mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-23 10:37:33 +03:00
Compare commits
33 Commits
issue-1060
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92a6440a00 | ||
|
|
d3641394d9 | ||
|
|
53a8f4bd47 | ||
|
|
2b256952c9 | ||
|
|
12086e75de | ||
|
|
d426575622 | ||
|
|
a76b1ce0e3 | ||
|
|
5f49fb7f31 | ||
|
|
80d1104fca | ||
|
|
ae59c2624c | ||
|
|
4661f69d9f | ||
|
|
4d9901fbf4 | ||
|
|
9356c2111a | ||
|
|
45f0b87150 | ||
|
|
8480f6b43e | ||
|
|
61668f0672 | ||
|
|
d1ebbf573c | ||
|
|
16422b2d14 | ||
|
|
0f1ca87611 | ||
|
|
0dd2b2cee6 | ||
|
|
7caec5fcb4 | ||
|
|
612f8ac8d6 | ||
|
|
6aa31a09d7 | ||
|
|
b6e6a50e29 | ||
|
|
a6d48b6af3 | ||
|
|
dc4cf5631b | ||
|
|
005f133146 | ||
|
|
35fc595e6f | ||
|
|
710c920d60 | ||
|
|
0ceeb14076 | ||
|
|
adc29732f9 | ||
|
|
41ffe23b18 | ||
|
|
6229a8fe7d |
6
.github/workflows/codeql-analysis-go.yml
vendored
6
.github/workflows/codeql-analysis-go.yml
vendored
@@ -54,14 +54,14 @@ jobs:
|
||||
restore-keys: go-artifacts-${{ runner.os }}-codeql-analyze-${{ steps.go.outputs.go-version }}-
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
|
||||
uses: github/codeql-action/init@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4.36.1
|
||||
with:
|
||||
languages: go
|
||||
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
|
||||
uses: github/codeql-action/autobuild@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4.36.1
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@e46ed2cbd01164d986452f91f178727624ae40d7 # v4.35.3
|
||||
uses: github/codeql-action/analyze@87557b9c84dde89fdd9b10e88954ac2f4248e463 # v4.36.1
|
||||
with:
|
||||
category: 'language:go'
|
||||
|
||||
@@ -187,7 +187,7 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
func (c *client) init(argIdx int, sanitizedURL string) {
|
||||
limitReached := metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
@@ -204,11 +204,20 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(concurrency)
|
||||
})
|
||||
for range concurrency {
|
||||
c.wg.Go(c.runWorker)
|
||||
workers := queues.GetOptionalArg(argIdx)
|
||||
if workers <= 0 {
|
||||
workers = 1
|
||||
}
|
||||
inmemoryWorkers := inmemoryQueues.GetOptionalArg(argIdx)
|
||||
for range inmemoryWorkers {
|
||||
c.wg.Go(func() {
|
||||
c.runWorker(c.fq.MustReadInMemoryBlockBlocking)
|
||||
})
|
||||
}
|
||||
for range workers {
|
||||
c.wg.Go(func() {
|
||||
c.runWorker(c.fq.MustReadBlock)
|
||||
})
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
@@ -302,12 +311,12 @@ func getAWSAPIConfig(argIdx int) (*awsapi.Config, error) {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func (c *client) runWorker() {
|
||||
func (c *client) runWorker(readBlock func(dst []byte) ([]byte, bool)) {
|
||||
var ok bool
|
||||
var block []byte
|
||||
ch := make(chan bool, 1)
|
||||
for {
|
||||
block, ok = c.fq.MustReadBlock(block[:0])
|
||||
block, ok = readBlock(block[:0])
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mdx"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -67,6 +66,9 @@ var (
|
||||
queues = flagutil.NewArrayInt("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||
inmemoryQueues = flagutil.NewArrayInt("remoteWrite.inmemoryQueues", 0, "The number of additional workers per each -remoteWrite.url, which send only recently ingested data from the in-memory queue, "+
|
||||
"while the file-based queue at -remoteWrite.tmpDataPath is drained by workers configured via -remoteWrite.queues. "+
|
||||
"This reduces delivery lag for fresh samples when the file-based queue contains a backlog accumulated during remote storage outages.")
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
@@ -104,9 +106,6 @@ var (
|
||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
||||
|
||||
enableMdx = flagutil.NewArrayBool("remoteWrite.mdx.enable", "Whether to only retain metrics from VictoriaMetrics services before sending them to the corresponding -remoteWrite.url. "+
|
||||
"Please see https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -308,10 +307,6 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
}
|
||||
fs.RegisterPathFsMetrics(*tmpDataPath)
|
||||
|
||||
if slices.Contains(*enableMdx, true) && *shardByURL {
|
||||
logger.Fatalf("-remoteWrite.mdx.enable and -remoteWrite.shardByURL cannot be set to true simultaneously.")
|
||||
}
|
||||
|
||||
if *shardByURL {
|
||||
consistentHashNodes := make([]string, 0, len(urls))
|
||||
for i, url := range urls {
|
||||
@@ -867,7 +862,6 @@ type remoteWriteCtx struct {
|
||||
|
||||
sas atomic.Pointer[streamaggr.Aggregators]
|
||||
deduplicator *streamaggr.Deduplicator
|
||||
mdxFilter *mdx.Filter
|
||||
|
||||
streamAggrKeepInput bool
|
||||
streamAggrDropInput bool
|
||||
@@ -882,7 +876,6 @@ type remoteWriteCtx struct {
|
||||
|
||||
rowsPushedAfterRelabel *metrics.Counter
|
||||
rowsDroppedByRelabel *metrics.Counter
|
||||
mdxRowsPreserved *metrics.Counter
|
||||
|
||||
pushFailures *metrics.Counter
|
||||
metadataDroppedOnPushFailure *metrics.Counter
|
||||
@@ -916,7 +909,8 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
|
||||
isPQDisabled := disableOnDiskQueue.GetOptionalArg(argIdx)
|
||||
queuesSize := queues.GetOptionalArg(argIdx)
|
||||
inmemoryQueueSize := inmemoryQueues.GetOptionalArg(argIdx)
|
||||
queuesSize := queues.GetOptionalArg(argIdx) + inmemoryQueueSize
|
||||
if queuesSize > maxQueues {
|
||||
queuesSize = maxQueues
|
||||
} else if queuesSize <= 0 {
|
||||
@@ -933,7 +927,13 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, isPQDisabled)
|
||||
fqOpts := persistentqueue.OpenFastQueueOpts{
|
||||
MaxInmemoryBlocks: maxInmemoryBlocks,
|
||||
MaxPendingBytes: maxPendingBytes,
|
||||
IsPQDisabled: isPQDisabled,
|
||||
PrioritizeInmemoryData: inmemoryQueueSize > 0,
|
||||
}
|
||||
fq := persistentqueue.MustOpenFastQueueWithOpts(queuePath, sanitizedURL, fqOpts)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
@@ -946,6 +946,9 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
return 0
|
||||
})
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, sanitizedURL), func() float64 {
|
||||
return float64(queuesSize)
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
@@ -954,7 +957,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, queuesSize, sanitizedURL)
|
||||
c.init(argIdx, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
sf := significantFigures.GetOptionalArg(argIdx)
|
||||
@@ -969,6 +972,7 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq, &c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
@@ -985,16 +989,6 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
rwctx.initStreamAggrConfig()
|
||||
|
||||
if enableMdx.GetOptionalArg(argIdx) {
|
||||
mdxFilter := mdx.NewFilter()
|
||||
rwctx.mdxFilter = mdxFilter
|
||||
rwctx.mdxRowsPreserved = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_mdx_rows_preserved_total{path=%q,url=%q}`, queuePath, sanitizedURL))
|
||||
_ = metrics.NewGauge(fmt.Sprintf(`vmagent_mdx_tracked_vm_instances{path=%q,url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(mdxFilter.VmInstancesCount())
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
@@ -1008,11 +1002,6 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
rwctx.deduplicator.MustStop()
|
||||
rwctx.deduplicator = nil
|
||||
}
|
||||
if rwctx.mdxFilter != nil {
|
||||
rwctx.mdxFilter.MustStop()
|
||||
rwctx.mdxFilter = nil
|
||||
rwctx.mdxRowsPreserved = nil
|
||||
}
|
||||
|
||||
for _, ps := range rwctx.pss {
|
||||
ps.MustStop()
|
||||
@@ -1028,7 +1017,6 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
||||
rwctx.rowsPushedAfterRelabel = nil
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
|
||||
}
|
||||
|
||||
// TryPushTimeSeries sends tss series to the configured remote write endpoint
|
||||
@@ -1036,13 +1024,8 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
// TryPushTimeSeries doesn't modify tss, so tss can be passed concurrently to TryPush across distinct rwctx instances.
|
||||
func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDropSamplesOnFailure bool) bool {
|
||||
var rctx *relabelCtx
|
||||
var mctx *mdx.Ctx
|
||||
var v *[]prompb.TimeSeries
|
||||
defer func() {
|
||||
if mctx != nil {
|
||||
mctx.Reset()
|
||||
mdx.CtxPool.Put(mctx)
|
||||
}
|
||||
if rctx == nil {
|
||||
return
|
||||
}
|
||||
@@ -1051,22 +1034,6 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
putRelabelCtx(rctx)
|
||||
}()
|
||||
|
||||
if rwctx.mdxFilter != nil {
|
||||
mctx = mdx.CtxPool.Get().(*mdx.Ctx)
|
||||
mctx.Reset()
|
||||
tssResP := tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tssRes := rwctx.mdxFilter.Filter(tss, *tssResP, mctx)
|
||||
defer func() {
|
||||
*tssResP = prompb.ResetTimeSeries(tssRes)
|
||||
tssPool.Put(tssResP)
|
||||
}()
|
||||
|
||||
if len(tssRes) == 0 {
|
||||
return true
|
||||
}
|
||||
tss = tssRes
|
||||
}
|
||||
|
||||
// Apply relabeling
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcs := rcs.perURL[rwctx.idx]
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
See vmctl docs [here](https://docs.victoriametrics.com/victoriametrics/vmctl/).
|
||||
|
||||
vmctl docs can be edited at [docs/vmctl.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/vmctl.md).
|
||||
vmctl docs can be edited at [docs/vmctl.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/vmctl/vmctl.md).
|
||||
|
||||
@@ -259,7 +259,7 @@ func (cr *ChunkedResponse) Next() ([]int64, []float64, error) {
|
||||
|
||||
fieldValues, ok := r.values[cr.field]
|
||||
if !ok {
|
||||
return nil, nil, fmt.Errorf("response doesn't contain filed %q", cr.field)
|
||||
return nil, nil, fmt.Errorf("response doesn't contain field %q", cr.field)
|
||||
}
|
||||
values := make([]float64, len(fieldValues))
|
||||
for i, fv := range fieldValues {
|
||||
|
||||
@@ -563,11 +563,11 @@ func main() {
|
||||
}()
|
||||
|
||||
err = app.Run(os.Args)
|
||||
pushmetrics.StopAndPush()
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
log.Printf("Total time: %v", time.Since(start))
|
||||
pushmetrics.StopAndPush()
|
||||
}
|
||||
|
||||
func initConfigVM(c *cli.Context) (vm.Config, error) {
|
||||
|
||||
@@ -405,7 +405,16 @@ func buildMatchWithFilter(filter string, metricName string) (string, error) {
|
||||
if len(tf.Key) == 0 {
|
||||
continue
|
||||
}
|
||||
a = append(a, tf.String())
|
||||
switch {
|
||||
case tf.IsNegative && tf.IsRegexp:
|
||||
a = append(a, fmt.Sprintf("%s!~%q", tf.Key, tf.Value))
|
||||
case tf.IsNegative:
|
||||
a = append(a, fmt.Sprintf("%s!=%q", tf.Key, tf.Value))
|
||||
case tf.IsRegexp:
|
||||
a = append(a, fmt.Sprintf("%s=~%q", tf.Key, tf.Value))
|
||||
default:
|
||||
a = append(a, fmt.Sprintf("%s=%q", tf.Key, tf.Value))
|
||||
}
|
||||
}
|
||||
a = append(a, nameFilter)
|
||||
filters = append(filters, strings.Join(a, ","))
|
||||
|
||||
@@ -15,7 +15,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-m
|
||||
currentItem := 0
|
||||
%}
|
||||
{% for _, row := range result %}
|
||||
"{%s string(row.MetricFamilyName) %}": [
|
||||
{%q= string(row.MetricFamilyName) %}: [
|
||||
{
|
||||
"type": {%q= row.Type.String() %},
|
||||
{% if len(row.Unit) > 0 -%}
|
||||
|
||||
@@ -35,12 +35,10 @@ func StreamMetadataResponse(qw422016 *qt422016.Writer, result []*metricsmetadata
|
||||
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:17
|
||||
for _, row := range result {
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:17
|
||||
qw422016.N().S(`"`)
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:18
|
||||
qw422016.E().S(string(row.MetricFamilyName))
|
||||
qw422016.N().Q(string(row.MetricFamilyName))
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:18
|
||||
qw422016.N().S(`": [{"type":`)
|
||||
qw422016.N().S(`: [{"type":`)
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:20
|
||||
qw422016.N().Q(row.Type.String())
|
||||
//line app/vmselect/prometheus/metadata_response.qtpl:20
|
||||
|
||||
File diff suppressed because one or more lines are too long
197
app/vmselect/vmui/assets/index-CusQvJzs.js
Normal file
197
app/vmselect/vmui/assets/index-CusQvJzs.js
Normal file
File diff suppressed because one or more lines are too long
@@ -1 +0,0 @@
|
||||
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),d=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{u as a,d as i,o as n,c as r,s as t};
|
||||
1
app/vmselect/vmui/assets/rolldown-runtime-Cyuzqnbw.js
Normal file
1
app/vmselect/vmui/assets/rolldown-runtime-Cyuzqnbw.js
Normal file
@@ -0,0 +1 @@
|
||||
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||(e((t={exports:{}}).exports,t),e=null),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),d=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{u as a,d as i,o as n,c as r,s as t};
|
||||
78
app/vmselect/vmui/assets/vendor-B83wxFqK.js
Normal file
78
app/vmselect/vmui/assets/vendor-B83wxFqK.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -37,9 +37,9 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-CoGukb-x.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-COnpUsM8.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-C8Kwp93_.js">
|
||||
<script type="module" crossorigin src="./assets/index-CusQvJzs.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-Cyuzqnbw.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-B83wxFqK.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-CnsZ1jie.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-BBUnmLOr.css">
|
||||
</head>
|
||||
|
||||
@@ -45,11 +45,13 @@ func TestSingleMetricsMetadata(t *testing.T) {
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_4"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_5"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_6"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: `metric_name_7_!@"_suffix`}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
},
|
||||
Metadata: []prompb.MetricMetadata{
|
||||
{MetricFamilyName: "metric_name_4", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_5", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_6", Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
{MetricFamilyName: `metric_name_7_!@"_suffix`, Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -59,12 +61,13 @@ func TestSingleMetricsMetadata(t *testing.T) {
|
||||
expected := &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
`metric_name_7_!@"_suffix`: {{Help: "some help message", Type: "stateset"}},
|
||||
},
|
||||
}
|
||||
gotStats := sut.PrometheusAPIV1Metadata(t, "", 0, apptest.QueryOpts{})
|
||||
@@ -154,11 +157,13 @@ func TestClusterMetricsMetadata(t *testing.T) {
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_4"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_5"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: "metric_name_6"}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
{Labels: []prompb.Label{{Name: "__name__", Value: `metric_name_7_!@"_suffix`}}, Samples: []prompb.Sample{{Value: 40, Timestamp: ingestTimestamp}}},
|
||||
},
|
||||
Metadata: []prompb.MetricMetadata{
|
||||
{MetricFamilyName: "metric_name_4", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_5", Help: "some help message", Type: prompb.MetricTypeSummary},
|
||||
{MetricFamilyName: "metric_name_6", Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
{MetricFamilyName: `metric_name_7_!@"_suffix`, Help: "some help message", Type: prompb.MetricTypeStateset},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -171,12 +176,13 @@ func TestClusterMetricsMetadata(t *testing.T) {
|
||||
expected := &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
"metric_name_1": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_2": {{Help: "some help message", Type: "counter"}},
|
||||
"metric_name_3": {{Help: "some help message", Type: "gauge"}},
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_5": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "stateset"}},
|
||||
`metric_name_7_!@"_suffix`: {{Help: "some help message", Type: "stateset"}},
|
||||
},
|
||||
}
|
||||
gotStats := vmselect.PrometheusAPIV1Metadata(t, "", 0, apptest.QueryOpts{Tenant: tenantID})
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -332,13 +333,11 @@ func TestSingleVMAgentDropOnOverload(t *testing.T) {
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 1 && vmagent.RemoteWriteRequests(t, url2) == 1
|
||||
},
|
||||
)
|
||||
|
||||
// Send 2 more requests, the first RW endpoint should receive everything, the second should add them to the queue
|
||||
// since worker is busy with the first request.
|
||||
for i := range 2 {
|
||||
@@ -641,3 +640,116 @@ func TestSingleVMAgentMultitenancy(t *testing.T) {
|
||||
t.Fatalf("expected vmagent_tenant_inserted_rows_total to have value 1 for accountID=5, projectID=0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSingleVMAgentPriorizeRecentData(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
remoteWriteSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}))
|
||||
defer remoteWriteSrv.Close()
|
||||
|
||||
var mustRW2ReturnError atomic.Bool
|
||||
mustRW2ReturnError.Store(true)
|
||||
|
||||
remoteWriteSrv2 := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if mustRW2ReturnError.Load() {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}))
|
||||
defer remoteWriteSrv2.Close()
|
||||
|
||||
vmagent := tc.MustStartDefaultRWVmagent("vmagent", []string{
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv.URL),
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv2.URL),
|
||||
"-remoteWrite.disableOnDiskQueue=true",
|
||||
// use only 1 worker to get a full queue faster
|
||||
"-remoteWrite.queues=1",
|
||||
"-remoteWrite.flushInterval=1ms",
|
||||
"-remoteWrite.inmemoryQueues=1",
|
||||
// fastqueue size is roughly memory.Allowed() / len(urls) / *maxRowsPerBlock / 100
|
||||
// Use very large maxRowsPerBlock to get fastqueue of minimal length(2).
|
||||
// See initRemoteWriteCtxs function in remotewrite.go for details.
|
||||
"-remoteWrite.maxRowsPerBlock=1000000000",
|
||||
"-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent",
|
||||
|
||||
// Delay retry logic to avoid race conditions with waitFor assertions.
|
||||
// It improves the test stability on resource-constrained runners.
|
||||
"-remoteWrite.retryMinInterval=3s",
|
||||
"-remoteWrite.retryMaxTime=3s",
|
||||
})
|
||||
|
||||
const (
|
||||
retries = 20
|
||||
period = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
waitFor := func(f func() bool) {
|
||||
t.Helper()
|
||||
for range retries {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out waiting for retry #%d", retries)
|
||||
}
|
||||
|
||||
// Real remote write URLs are hidden in metrics
|
||||
url1 := "1:secret-url"
|
||||
url2 := "2:secret-url"
|
||||
|
||||
// Wait until first request got flushed to remote write server
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 1 && vmagent.RemoteWriteRequests(t, url2) == 1
|
||||
},
|
||||
)
|
||||
// Wait until second request got flushed to remote write server
|
||||
// since there are 2 independent queues (general and in-memory) with minimal capacity of 1
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 2 && vmagent.RemoteWriteRequests(t, url2) == 2
|
||||
},
|
||||
)
|
||||
// Send 2 more requests, the first RW endpoint should receive everything, the second should add them to the queue
|
||||
// since worker is busy with the first request.
|
||||
for i := range 2 {
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 3+i && vmagent.RemoteWritePendingInmemoryBlocks(t, url2) == 1+i
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// Send one more request.
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWriteRequests(t, url1) == 5 && vmagent.RemoteWriteSamplesDropped(t, url2) > 0
|
||||
},
|
||||
)
|
||||
mustRW2ReturnError.Store(false)
|
||||
// ensure that inmemory data correctly flushed to the remote write
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.RemoteWritePendingInmemoryBlocks(t, url2) == 0
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@@ -6201,7 +6201,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The rate of ignored samples during aggregation. \nStream aggregation will drop samples with NaN values, or samples with too old timestamps. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"description": "The rate of dropped samples during aggregation. \nStream aggregation will drop samples with NaN values, too old timestamps or samples identified as duplicates during deduplication. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6282,14 +6282,14 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_streamaggr_ignored_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"expr": "sum(rate({__name__=~\"vm_streamaggr_ignored_samples_total|vm_streamaggr_dedup_dropped_samples_total\", job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Ignored samples ($instance)",
|
||||
"title": "Dropped samples ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6200,7 +6200,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The rate of ignored samples during aggregation. \nStream aggregation will drop samples with NaN values, or samples with too old timestamps. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"description": "The rate of dropped samples during aggregation. \nStream aggregation will drop samples with NaN values, too old timestamps or samples identified as duplicates during deduplication. See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples ",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -6281,14 +6281,14 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_streamaggr_ignored_samples_total{job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"expr": "sum(rate({__name__=~\"vm_streamaggr_ignored_samples_total|vm_streamaggr_dedup_dropped_samples_total\", job=~\"$job\",instance=~\"$instance\", url=~\"$url\"}[$__rate_interval]) > 0) without (instance, pod)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Ignored samples ($instance)",
|
||||
"title": "Dropped samples ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -42,14 +42,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -68,7 +68,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -90,7 +90,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -105,7 +105,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.145.0
|
||||
image: victoriametrics/vmauth:v1.146.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -119,7 +119,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.6
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -32,6 +32,17 @@ docs-image:
|
||||
--platform $(DOCKER_PLATFORM) \
|
||||
vmdocs
|
||||
|
||||
docs-check-links: docs-image
|
||||
rm -rf vmdocs/public
|
||||
docker run \
|
||||
--rm \
|
||||
--platform $(DOCKER_PLATFORM) \
|
||||
-v ./vmdocs:/opt/docs \
|
||||
$(shell for d in ./docs/*/; do printf ' -v %s:/opt/docs/content/%s' "$${d}" "$$(basename $${d})"; done) \
|
||||
--entrypoint /bin/sh \
|
||||
vmdocs-docker-package \
|
||||
-c "yarn install && hugo --minify && yarn run check-links"
|
||||
|
||||
docs-debug: docs docs-image
|
||||
docker run \
|
||||
--rm \
|
||||
|
||||
@@ -14,6 +14,13 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.29.6
|
||||
Released: 2026-06-17
|
||||
|
||||
- BUGFIX: Fixed `VLogsReader` startup and query execution when `tenant_id` is omitted or provided in short account-only form such as `"0"`. Omitted or empty tenant IDs are treated as single-node/no-tenant mode, and account-only tenant IDs are expanded to `accountID:0` before adding VictoriaLogs `AccountID`/`ProjectID` params or VM tenant labels.
|
||||
|
||||
- BUGFIX: Hardened [`OnlineMADModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-mad) anomaly scoring for perfectly constant time series (all values identical). The model now keeps a small deterministic prediction interval when the learned MAD is zero, so values deviating from an unknown constant baseline can produce `anomaly_score > 1` (previously, all anomaly scores were `0`).
|
||||
|
||||
## v1.29.5
|
||||
Released: 2026-06-11
|
||||
|
||||
|
||||
@@ -423,7 +423,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.6
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -641,7 +641,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.5 && docker image tag victoriametrics/vmanomaly:v1.29.5 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.29.6 && docker image tag victoriametrics/vmanomaly:v1.29.6 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -45,7 +45,7 @@ There are 2 types of compatibility to consider when migrating in stateful mode:
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | [v1.29.5](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1295) | Fully Compatible | - |
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | [v1.29.6](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1296) | Fully Compatible | - |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Partially compatible* | Dumped models of class [prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [seasonal quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) have problems with loading to [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) due to dropped `pytz` library. **Upgrading directly from v1.28.7 to [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) with a fix is suggested** |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
|
||||
@@ -132,7 +132,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.5
|
||||
docker pull victoriametrics/vmanomaly:v1.29.6
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -152,7 +152,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.5 \
|
||||
victoriametrics/vmanomaly:v1.29.6 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -169,7 +169,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.5 \
|
||||
victoriametrics/vmanomaly:v1.29.6 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -182,7 +182,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.6
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
|
||||
@@ -315,7 +315,7 @@ docker run -it --rm \
|
||||
-e VMANOMALY_MCP_SERVER_URL=http://mcp-vmanomaly:8081/mcp \
|
||||
-p 8080:8080 \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.5 \
|
||||
victoriametrics/vmanomaly:v1.29.6 \
|
||||
vmanomaly_config.yaml
|
||||
```
|
||||
|
||||
|
||||
@@ -1265,7 +1265,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.5
|
||||
docker pull victoriametrics/vmanomaly:v1.29.6
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1279,7 +1279,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.5 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.6 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
|
||||
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.145.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.145.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.145.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.146.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.146.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.146.0)
|
||||
- [Grafana](https://grafana.com/) (v12.2.0)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.9.1) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.28.1)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.5
|
||||
image: victoriametrics/vmanomaly:v1.29.6
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -240,23 +240,23 @@ vmagent will write data into VictoriaMetrics single-node and cluster (with tenan
|
||||
# compose.yaml
|
||||
services:
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -308,7 +308,7 @@ Now add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: docker.io/victoriametrics/vmauth:v1.145.0
|
||||
image: docker.io/victoriametrics/vmauth:v1.146.0
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
|
||||
@@ -155,15 +155,15 @@ These services will store and query the metrics scraped by vmagent.
|
||||
# compose.yaml
|
||||
services:
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.146.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
image: victoriametrics/vminsert:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
image: victoriametrics/vmselect:v1.146.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
ports:
|
||||
@@ -196,7 +196,7 @@ Add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.145.0-enterprise
|
||||
image: victoriametrics/vmauth:v1.146.0-enterprise
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
@@ -251,7 +251,7 @@ Add the vmagent service to `compose.yaml` with OAuth2 configuration:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
image: victoriametrics/vmagent:v1.146.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
|
||||
@@ -107,7 +107,7 @@ The final piece is the Docker Compose file. This ties all the services together
|
||||
# compose.yml
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
command:
|
||||
- "--storageDataPath=/victoria-metrics-data"
|
||||
- "--selfScrapeInterval=10s"
|
||||
@@ -128,7 +128,7 @@ services:
|
||||
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
image: victoriametrics/vmalert:v1.146.0
|
||||
depends_on:
|
||||
- victoriametrics
|
||||
- alertmanager
|
||||
|
||||
@@ -19,6 +19,7 @@ See also [case studies](https://docs.victoriametrics.com/victoriametrics/casestu
|
||||
* [Datanami: Why Roblox Picked VictoriaMetrics for Observability Data Overhaul](https://www.hpcwire.com/bigdatawire/2023/05/30/why-roblox-picked-victoriametrics-for-observability-data-overhaul/)
|
||||
* [Cloudflare: Introducing notifications for HTTP Traffic Anomalies](https://blog.cloudflare.com/introducing-http-traffic-anomalies-notifications/)
|
||||
* [Grammarly: Better, Faster, Cheaper: How Grammarly Improved Monitoring by Over 10x with VictoriaMetrics](https://www.grammarly.com/blog/engineering/monitoring-with-victoriametrics/)
|
||||
* [Xata: How we rebuilt PostgreSQL branch metrics on VictoriaMetrics, per cell](https://xata.io/blog/how-we-rebuilt-postgresql-branch-metrics-on-victoriametrics-per-cell)
|
||||
* [CERN: CMS monitoring R&D: Real-time monitoring and alerts](https://indico.cern.ch/event/877333/contributions/3696707/attachments/1972189/3281133/CMS_mon_RD_for_opInt.pdf)
|
||||
* [CERN: The CMS monitoring infrastructure and applications](https://arxiv.org/pdf/2007.03630.pdf)
|
||||
* [Forbes: The (Almost) Infinitely Scalable Open Source Monitoring Dream](https://www.forbes.com/sites/adrianbridgwater/2022/08/16/the-almost-infinitely-scalable-open-source-monitoring-dream/)
|
||||
|
||||
@@ -61,9 +61,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.145.0
|
||||
docker pull victoriametrics/victoria-metrics:v1.146.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.145.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.146.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images, see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
|
||||
@@ -26,20 +26,34 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
## [v1.146.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.146.0)
|
||||
|
||||
Released at 2026-06-22
|
||||
|
||||
* FEATURE: all VictoriaMetrics components: add `-http.header.disableServerHostname` command-line flag for disabling the `X-Server-Hostname` HTTP response header. See [#11067](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11067). Thanks to @zasdaym for contribution.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): log calls to [/api/v1/admin/tsdb/delete_series](https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1admintsdbdelete_series) API handler. This should help to identify events of metrics deletion from the database. See [#11104](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11104).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add `-vm-headers` and `-vm-bearer-token` flags for authenticating requests to the VictoriaMetrics import destination. The flags are available in `opentsdb`, `influx`, `remote-read`, `prometheus`, `mimir`, and `thanos` vmctl sub-commands. See [#8897](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8897).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): add the `last` value to graph legend statistics. See [#10759](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10759).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add support for [Monitoring Data eXchange (MDX)](https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange): the ability to route only metrics from VictoriaMetrics services to a specific `-remoteWrite.url`. MDX is useful for building monitoring-of-monitoring where one remote storage should receive the full metric stream and another should receive only VictoriaMetrics metrics. Enable per destination with `-remoteWrite.mdx.enable=true`. See [#10600](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10600).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): expose `vm_streamaggr_dedup_dropped_samples_total` to allow tracking dropped old samples during [deduplication](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): use the aggregation rule interval as the default [staleness_interval](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#staleness) instead of `2*interval`, to reduce spikes when there are gaps between received samples. See [#11102](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11102).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add a new flag `-remoteWrite.inmemoryQueues` to prioritize recently ingested data over historical data stored at file-based [persistent queue](https://docs.victoriametrics.com/victoriametrics/vmagent/#on-disk-persistence-and-data-processing-order). See [#8833](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8833)
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add `-promscrape.cluster.shardByLabels` command-line flag for selecting target labels used for sharding scrape targets among `vmagent` instances in cluster mode. See [#11044](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11044).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add `-vm-headers` and `-vm-bearer-token` flags for authenticating requests to the VictoriaMetrics import destination. The flags are available in `opentsdb`, `influx`, `remote-read`, `prometheus`, `mimir`, and `thanos` vmctl sub-commands. See [#8897](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8897).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): log calls to [/api/v1/admin/tsdb/delete_series](https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1admintsdbdelete_series) API handler. This should help to identify events of metrics deletion from the database. See [#11104](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11104).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): add the `last` value to graph legend statistics. See [#10759](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10759).
|
||||
|
||||
* BUGFIX: [enterprise](https://docs.victoriametrics.com/enterprise/) [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly expose metric `vm_retention_filters_partitions_scheduled_rows`. See [#11138](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11138)
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix potential corruption of remote-write metadata `Unit` values. See [#11120](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11120). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8).
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): push metrics to configured `-pushmetrics.url` on shutdown when migration fails. Previously, metrics were not pushed if vmctl exited with an error. See [#11081](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11081). Thanks to @zasdaym for contribution.
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): disallow restoring parts outside the configured `-storageDataPath` directory. See [710c920d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/710c920d6083327042a309e449fae4383617d817).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): correctly apply long tenant filters. Previously, such filters could be truncated, causing tenants to be matched incorrectly. See [#11096](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11096). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): don't cache empty responses for tenant IDs discovery during [multitenant queries](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenant-reads). This problem was visible during integration tests when multitenant queries were executed before the first ingestion happened. See [#10982](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10982)
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `metricFamilyName` at metrics metadata response. See [#11129](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11129). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent more cases of panic during directory deletion on `NFS`-based mounts. See [#11060](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11060).
|
||||
|
||||
|
||||
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)
|
||||
|
||||
Released at 2026-06-08
|
||||
@@ -278,6 +292,25 @@ It enables back `Discovered targets` debug UI by default.
|
||||
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly apply `extra_filters[]` filter when querying `vm_account_id` or `vm_project_id` labels via [multitenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) request for `/api/v1/label/…/values` API. Before, `extra_filters` was ignored. See [#10503](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10503).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): revert the use of rollup result cache for [instant queries](https://docs.victoriametrics.com/keyConcepts.html#instant-query) that contain [`rate`](https://docs.victoriametrics.com/MetricsQL.html#rate) function with a lookbehind window larger than `-search.minWindowForInstantRollupOptimization`. The cache usage was removed since [v1.132.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.132.0). See [#10098](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098#issuecomment-3895011084) for more details.
|
||||
|
||||
## [v1.136.12](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.12)
|
||||
|
||||
Released at 2026-06-19
|
||||
|
||||
**v1.136.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.136.x line will be supported for at least 12 months since [v1.136.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11360) release**
|
||||
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix potential corruption of remote-write metadata `Unit` values. See [#11120](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11120). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): push metrics to configured `-pushmetrics.url` on shutdown when migration fails. Previously, metrics were not pushed if vmctl exited with an error. See [#11081](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11081). Thanks to @zasdaym for contribution.
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): disallow restoring parts outside the configured `-storageDataPath` directory. See [710c920d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/710c920d6083327042a309e449fae4383617d817).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `metricFamilyName` at metrics metadata response. See [#11129](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11129). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): correctly apply long tenant filters. Previously, such filters could be truncated, causing tenants to be matched incorrectly. See [#11096](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11096). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent more cases of panic during directory deletion on `NFS`-based mounts. See [#11060](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11060).
|
||||
|
||||
## [v1.136.11](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.11)
|
||||
|
||||
Released at 2026-06-05
|
||||
@@ -639,6 +672,20 @@ See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/ch
|
||||
|
||||
See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/changelog_2025/#v11230)
|
||||
|
||||
## [v1.122.25](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.25)
|
||||
|
||||
Released at 2026-06-19
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/),[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/),[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): fix rare unbounded shutdown delay when config reload takes longer than `-configCheckInterval`. See [#11107](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11107). Thanks to @PleasingFungus for contribution.
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix corrupted metrics metadata when a response contains multiple rows. See [#11115](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11115). Thanks for @fxrlv for the contribution.
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): do not fail backup list if directory is absent while using `fs://` destination to align with other protocols. See [6c3c548d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/6c3c548ddb0385b749e731f52276f130e2a4e4a8)
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): disallow restoring parts outside the configured `-storageDataPath` directory. See [710c920d](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/710c920d6083327042a309e449fae4383617d817).
|
||||
|
||||
## [v1.122.24](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.24)
|
||||
|
||||
Released at 2026-06-05
|
||||
|
||||
@@ -121,7 +121,7 @@ It is allowed to run Enterprise components in [cases listed here](https://docs.v
|
||||
Binary releases of Enterprise components are available at [the releases page for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest),
|
||||
[the releases page for VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/releases/latest)
|
||||
and [the releases page for VictoriaTraces](https://github.com/VictoriaMetrics/VictoriaTraces/releases/latest).
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz`.
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz`.
|
||||
|
||||
In order to run binary release of Enterprise component, please download the `*-enterprise.tar.gz` archive for your OS and architecture
|
||||
from the corresponding releases page and unpack it. Then run the unpacked binary.
|
||||
@@ -139,8 +139,8 @@ For example, the following command runs VictoriaMetrics Enterprise binary with t
|
||||
obtained at [this page](https://victoriametrics.com/products/enterprise/trial/):
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz
|
||||
./victoria-metrics-prod -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
@@ -155,7 +155,7 @@ Alternatively, VictoriaMetrics Enterprise license can be stored in the file and
|
||||
It is allowed to run Enterprise components in [cases listed here](https://docs.victoriametrics.com/victoriametrics/enterprise/#valid-cases-for-victoriametrics-enterprise).
|
||||
|
||||
Docker images for Enterprise components are available at [VictoriaMetrics Docker Hub](https://hub.docker.com/u/victoriametrics) and [VictoriaMetrics Quay](https://quay.io/organization/victoriametrics).
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.145.0-enterprise`.
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.146.0-enterprise`.
|
||||
|
||||
In order to run Docker image of VictoriaMetrics Enterprise component, it is required to provide the license key via the command-line
|
||||
flag as described in the [binary-releases](https://docs.victoriametrics.com/victoriametrics/enterprise/#binary-releases) section.
|
||||
@@ -165,13 +165,13 @@ Enterprise license key can be obtained at [this page](https://victoriametrics.co
|
||||
For example, the following command runs VictoriaMetrics Enterprise Docker image with the specified license key:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.145.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.146.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
Alternatively, the license code can be stored in the file and then referred via `-licenseFile` command-line flag:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.145.0-enterprise -licenseFile=/path/to/vm-license
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.146.0-enterprise -licenseFile=/path/to/vm-license
|
||||
```
|
||||
|
||||
Example docker-compose configuration:
|
||||
@@ -181,7 +181,7 @@ version: "3.5"
|
||||
services:
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
image: victoriametrics/victoria-metrics:v1.146.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -213,7 +213,7 @@ is used to provide the license key in plain-text:
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
@@ -224,7 +224,7 @@ In order to provide the license key via existing secret, the following values fi
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
|
||||
license:
|
||||
secret:
|
||||
@@ -274,7 +274,7 @@ spec:
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
```
|
||||
|
||||
In order to provide the license key via an existing secret, the following custom resource is used:
|
||||
@@ -291,7 +291,7 @@ spec:
|
||||
name: vm-license
|
||||
key: license
|
||||
image:
|
||||
tag: v1.145.0-enterprise
|
||||
tag: v1.146.0-enterprise
|
||||
```
|
||||
|
||||
Example secret with license key:
|
||||
@@ -342,7 +342,7 @@ Builds are available for amd64 and arm64 architectures.
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz`
|
||||
`victoria-metrics-linux-amd64-v1.146.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
@@ -351,7 +351,7 @@ Includes:
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.145.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
`victoriametrics/victoria-metrics:v1.146.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
|
||||
## What Happens to Licensed Components When a License Expires
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ aliases:
|
||||
1. The main goal - **to help users and [clients](https://docs.victoriametrics.com/victoriametrics/enterprise/) using VictoriaMetrics products in the most efficient way**.
|
||||
1. Fixing bugs in the essential functionality of VictoriaMetrics components. Small usability bugs are usually the most annoying,
|
||||
so they **must be fixed first**. Bugs, which affect a small number of users at some rare edge cases, can be fixed later.
|
||||
1. Improving [public docs for VictoriaMetrics products](https://docs.victoriametrics.com).
|
||||
1. Improving [public docs for VictoriaMetrics products](https://docs.victoriametrics.com),
|
||||
so users could find answers to their questions via Google or any other AI-powered web search without the need
|
||||
to ask these questions at our [support channels](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#community-and-contributions).
|
||||
1. Simplifying usage of VictoriaMetrics products without breaking backwards compatibility, so users could regularly
|
||||
|
||||
@@ -35,8 +35,8 @@ scrape_configs:
|
||||
After you created the `scrape.yaml` file, download and unpack [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the same directory:
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
```
|
||||
|
||||
Then start VictoriaMetrics and instruct it to scrape targets defined in `scrape.yaml` and save scraped metrics
|
||||
@@ -150,8 +150,8 @@ Then start [single-node VictoriaMetrics](https://docs.victoriametrics.com/victor
|
||||
|
||||
```yaml
|
||||
# Download and unpack single-node VictoriaMetrics
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.146.0.tar.gz
|
||||
|
||||
# Run single-node VictoriaMetrics with the given scrape.yaml
|
||||
./victoria-metrics-prod -promscrape.config=scrape.yaml
|
||||
|
||||
@@ -76,7 +76,7 @@ It is better to substitute the slow recording rule with the following [stream ag
|
||||
outputs: [rate_sum]
|
||||
```
|
||||
|
||||
> Field `interval` should be set to a value at least several times higher than the matched metrics collection interval.
|
||||
> It is recommended to set the `interval` field to a value at least 2 times the matched metrics collection interval.
|
||||
|
||||
This stream aggregation generates `http_request_duration_seconds_bucket:1m_without_instance_rate_sum` output series according to [output metric naming](#output-metric-names).
|
||||
Then these series can be used in [alerting rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules):
|
||||
@@ -396,7 +396,7 @@ before sending them to the configured `-remoteWrite.url`. The deduplication can
|
||||
|
||||
Labels can be dropped before deduplication is applied. See [these docs](#dropping-unneeded-labels).
|
||||
|
||||
Stream aggregation deduplication is applied before aggregation rules, so duplicate samples are dropped before aggregation.
|
||||
Stream aggregation deduplication is applied before aggregation rules, so duplicate samples are dropped before aggregation. The dropped old samples can be tracked with the `vm_streamaggr_dedup_dropped_samples_total` metric.
|
||||
|
||||
# Relabeling
|
||||
|
||||
@@ -444,7 +444,9 @@ outside the current [aggregation interval](https://docs.victoriametrics.com/vict
|
||||
|
||||
- To enable [aggregation windows](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#aggregation-windows).
|
||||
|
||||
The dropped old samples can be tracked with the `vm_streamaggr_ignored_samples_total{reason="too_old"}` metric.
|
||||
- To enable [deduplication](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication).
|
||||
|
||||
The dropped old samples can be tracked with the `vm_streamaggr_ignored_samples_total{reason="too_old"}` and `vm_streamaggr_dedup_dropped_samples_total` metrics.
|
||||
|
||||
## Ignore aggregation intervals on start
|
||||
|
||||
@@ -642,9 +644,9 @@ See also [why you shouldn't put an aggregator behind a load balancer](https://do
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
- [Unexpected spikes for `total` or `increase` outputs](#staleness).
|
||||
- [Unexpected spikes for `total` or `increase` outputs](#data-delay-and-staleness).
|
||||
- [Excessively large values for `total*`, `increase*`, and `rate*` outputs](#counter-resets).
|
||||
- [Lower than expected values for `total_prometheus` and `increase_prometheus` outputs](#staleness).
|
||||
- [Lower than expected values for `total_prometheus` and `increase_prometheus` outputs](#data-delay-and-staleness).
|
||||
- [High memory usage and CPU usage](#high-resource-usage).
|
||||
- [Unexpected results in vmagent cluster mode](#cluster-mode).
|
||||
- [Inaccurate aggregation results for histograms](#aggregation-windows)
|
||||
@@ -677,11 +679,19 @@ the following settings:
|
||||
|
||||
If counter-specific outputs, such as `total*`, `rate*`, and `increase*`, produce values that are significantly higher than anticipated, then check the `vm_streamaggr_counter_resets_total` metric. This metric increments each time when [counter reset event](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) happens and could be caused by duplication or collision of raw samples. If you observe duplication or collision, try solving this problem by either fixing the source of these metrics or by [deduplicating](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication) these samples before aggregation.
|
||||
|
||||
## Staleness
|
||||
## Data delay and staleness {#staleness}
|
||||
|
||||
The following outputs track the last seen per-series values in order to properly calculate output values:
|
||||
Stream aggregation processes input samples in a streaming manner and flushes results once per specified `interval`. Because of this, aggregation results can be heavily affected by data delays (see `vm_streamaggr_samples_lag_seconds_bucket` metric).
|
||||
|
||||
In particular:
|
||||
1. Stream aggregation won't produce results if input samples are delayed for multiple aggregation intervals, causing gaps in the output.
|
||||
2. Delayed and out-of-order samples can inflate or skew correctness of aggregation results.
|
||||
|
||||
Dropping delayed samples can result in missed observations in the results, while keeping delayed samples may inflate the results. It is up to the user to decide what they prefer in the produced results:
|
||||
1. If you prefer consistency in aggregation results and do not want delayed data to affect the next aggregation window, drop all potentially delayed samples via [ignore_old_samples](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#ignoring-old-samples).
|
||||
2. If you prefer to have the accumulated changes from delayed data reflected in aggregation windows after the delay, increase `staleness_interval` in the [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
This is especially important for outputs that track the last seen per-series values in order to properly calculate output values:
|
||||
|
||||
- [histogram_bucket](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#histogram_bucket)
|
||||
- [increase](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase)
|
||||
- [increase_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase_prometheus)
|
||||
- [rate_avg](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#rate_avg)
|
||||
@@ -689,21 +699,19 @@ The following outputs track the last seen per-series values in order to properly
|
||||
- [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total)
|
||||
- [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus)
|
||||
|
||||
The last seen per-series value is dropped if no new samples are received for the given time series during two consecutive aggregations
|
||||
intervals specified in [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) via `interval` option.
|
||||
For these outputs, the last seen per-series value is dropped if no new samples are received for the given time series during consecutive aggregation intervals specified in the [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) via `interval` option.
|
||||
If a new sample for the existing time series is received after that, then it is treated as the first sample for a new time series.
|
||||
This may lead to the following issues:
|
||||
This may lead to the following issues when data is delayed:
|
||||
|
||||
- Lower than expected results for [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus) and [increase_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase_prometheus) outputs,
|
||||
since they ignore the first sample in a new time series.
|
||||
- Unexpected spikes for [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total) and [increase](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase) outputs, since they assume that new time series start from 0.
|
||||
- [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total) and [increase](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase) may produce unexpected spikes, since they assume that a new time series starts from `0`.
|
||||
- [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus) and [increase_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#increase_prometheus) may produce lower than expected results, if you expect to see the accumulated changes reflected after the delay, since they ignore the first sample in a new time series.
|
||||
|
||||
These issues can be fixed in the following ways:
|
||||
These issues can be improved in the following ways:
|
||||
|
||||
- By increasing the `interval` option at [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines.
|
||||
- By specifying the `staleness_interval` option at [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines. By default, the `staleness_interval` is equal to `2 x interval`.
|
||||
delays in data ingestion pipelines. It is recommended to set `interval` to at least 2× the scrape or push interval of the input. Set it to a higher value if the input pipeline is prone to large delays.
|
||||
- By increasing the `staleness_interval` option in the [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines. By default, the `staleness_interval` is equal to `interval`.
|
||||
|
||||
## High resource usage
|
||||
|
||||
|
||||
@@ -66,6 +66,8 @@ specified individually per each `-remoteWrite.url`:
|
||||
|
||||
# interval is the interval for the aggregation.
|
||||
# The aggregated stats is sent to remote storage once per interval.
|
||||
# It is recommended to set `interval` to at least 2× the scrape or push interval of the input.
|
||||
# Set it to a higher value if the input pipeline is prone to large delays.
|
||||
#
|
||||
interval: 1m
|
||||
|
||||
@@ -94,7 +96,7 @@ specified individually per each `-remoteWrite.url`:
|
||||
# - total_prometheus
|
||||
# See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#staleness for more details.
|
||||
#
|
||||
# staleness_interval: 2m
|
||||
# staleness_interval: 1m
|
||||
|
||||
# ignore_first_sample_interval specifies the interval after which the agent begins sending samples.
|
||||
# By default, it is set to the staleness interval, and it helps reduce the initial sample load after an agent restart.
|
||||
@@ -291,9 +293,6 @@ The results of `histogram_bucket` is equal to the following [MetricsQL](https://
|
||||
sum(histogram_over_time(some_histogram_bucket[interval])) by (vmrange)
|
||||
```
|
||||
|
||||
Aggregating irregular and sporadic metrics (received from [Lambdas](https://aws.amazon.com/lambda/)
|
||||
or [Cloud Functions](https://cloud.google.com/functions)) can be controlled via [staleness_interval](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#staleness) option.
|
||||
|
||||
See also:
|
||||
- [quantiles](#quantiles)
|
||||
- [avg](#avg)
|
||||
@@ -507,6 +506,19 @@ See also:
|
||||
- [count_samples](#count_samples)
|
||||
- [count_series](#count_series)
|
||||
|
||||
### `sum_samples_total`
|
||||
|
||||
`sum_samples_total` sums input delta values into a cumulative [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/index.html#counter) and outputs the result at the given `interval`.
|
||||
`sum_samples_total` makes sense only for aggregating delta values from clients such as [StatsD counter](https://github.com/statsd/statsd/blob/master/docs/metric_types.md#counting).
|
||||
|
||||
The results of `sum_samples_total` is roughly equal to the following [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query:
|
||||
|
||||
```metricsql
|
||||
sum(running_sum(some_delta_values))
|
||||
```
|
||||
|
||||
>Note: The aggregator will forget the cumulative counter if it has not seen input samples for `staleness_interval`(set to `interval` by default) per output result, so the output counter will start from `0` the next time it sees the input again. Increase the `staleness_interval` option if you want to extend the window to tolerate bigger gaps.
|
||||
|
||||
### total
|
||||
|
||||
`total` generates output [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) by summing the input counters over the given `interval`.
|
||||
|
||||
@@ -275,6 +275,10 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
-promscrape.cluster.replicationFactor int
|
||||
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info (default 1)
|
||||
-promscrape.cluster.shardByLabels array
|
||||
Optional list of target labels, which will be used for sharding targets among cluster members if -promscrape.cluster.membersCount is greater than 1. If none of the specified labels are found in a target, then all the target labels will be used for sharding. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-promscrape.config string
|
||||
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. The path can point to local file and to http url. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
|
||||
-promscrape.config.dryRun
|
||||
@@ -486,13 +490,13 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
-search.maxTSDBStatusTopNSeries int
|
||||
The maximum value of 'topN' argument that can be passed to /api/v1/status/tsdb API. This option allows limiting memory usage. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#tsdb-stats (default 1000)
|
||||
-search.maxTagKeys int
|
||||
The maximum number of tag keys returned from /api/v1/labels . See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
The maximum number of tag keys returned per search. See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
-search.maxTagValueSuffixesPerSearch int
|
||||
The maximum number of tag value suffixes returned from /metrics/find (default 100000)
|
||||
-search.maxTagValues int
|
||||
The maximum number of tag values returned from /api/v1/label/<label_name>/values . See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
The maximum number of tag values returned per search. See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration (default 100000)
|
||||
-search.maxUniqueTimeseries int
|
||||
The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional).
|
||||
The maximum number of unique time series, which can be scanned during every query. This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect
|
||||
-search.maxWorkersPerQuery int
|
||||
The maximum number of CPU cores a single query can use. The default value should work good for most cases. The flag can be set to lower values for improving performance of big number of concurrently executed queries. The flag can be set to bigger values for improving performance of heavy queries, which scan big number of time series (>10K) and/or big number of samples (>100M). There is no sense in setting this flag to values bigger than the number of CPU cores available on the system (default netstorage.defaultMaxWorkersPerQuery())
|
||||
-search.minStalenessInterval duration
|
||||
|
||||
@@ -268,39 +268,6 @@ for the collected samples. Examples:
|
||||
```sh
|
||||
./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
### Monitoring Data eXchange
|
||||
|
||||
The Monitoring Data eXchange (MDX) feature allows `vmagent` to forward only VictoriaMetrics metrics to selected `-remoteWrite.url` destinations while dropping metrics from non-VictoriaMetrics services.
|
||||
|
||||
To enable MDX, set `-remoteWrite.mdx.enable=true` for the target URL and `-remoteWrite.mdx.enable=false` for other URLs:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-all-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=false \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true
|
||||
```
|
||||
When MDX is enabled for a `-remoteWrite.url`, `vmagent` forwards only metrics that:
|
||||
- come from the target that exposes the `vm_app_version` metric (emitted by all VictoriaMetrics components)
|
||||
- contain the `victoriametrics_app=true` label, which will be added automatically to the metrics if the instance was deployed via [VictoriaMetrics Operator](https://docs.victoriametrics.com/operator/).
|
||||
|
||||
`victoriametrics_app=true` label will be added to all metrics that are preserved by MDX if it's absent.
|
||||
|
||||
- contain the label specified via `-mdx.label`.
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true \
|
||||
-mdx.label="service=victoriametrics"
|
||||
```
|
||||
In this configuration, metrics with the label `service=victoriametrics` are preserved even if their scrape targets do not expose `vm_app_version` metric.
|
||||
|
||||
The number of VictoriaMetrics metrics preserved by MDX is exposed as `vmagent_remotewrite_mdx_rows_preserved_total`.
|
||||
|
||||
The scope of MDX is at the per-url level, so it works after global level mechanisms, such as stream aggregation, relabeling, complexity limiter, and cardinality limiter. See [Life of a sample](https://docs.victoriametrics.com/victoriametrics/vmagent/#life-of-a-sample).
|
||||
|
||||
### Life of a sample
|
||||
|
||||
@@ -318,20 +285,18 @@ flowchart TB
|
||||
F --> G[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability">replicate</a> to each <b>-remoteWrite.url</b><br/>or <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages">shard</a> if <b>-remoteWrite.shardByURL</b> is set]
|
||||
|
||||
%% Left branch
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange/">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H2 --> H3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H3 --> H4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H4 --> H5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H5 --> H6[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H2 --> H3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H3 --> H4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
|
||||
|
||||
%% Right branch
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R2 --> R3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R3 --> R4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R4 --> R5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R5 --> R6[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R2 --> R3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R3 --> R4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
|
||||
```
|
||||
|
||||
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
|
||||
@@ -832,6 +797,12 @@ For example, the following commands spread scrape targets among a cluster of two
|
||||
The `-promscrape.cluster.memberNum` can be set to a StatefulSet pod name when `vmagent` runs in Kubernetes.
|
||||
The pod name must end with a number in the range `0 ... promscrape.cluster.membersCount-1`. For example, `-promscrape.cluster.memberNum=vmagent-0`.
|
||||
|
||||
By default, targets are sharded among `vmagent` instances by all target labels after relabeling.
|
||||
Use `-promscrape.cluster.shardByLabels` {{% available_from "v1.146.0" %}} to shard targets by specified labels instead.
|
||||
For example, with `-promscrape.cluster.shardByLabels=service`, the targets with the same `service` label value will be scraped by the same `vmagent` instance,
|
||||
which is useful when perform stream aggregation that requires all metrics with the same `service` label value to be processed on the same `vmagent` instance.
|
||||
If none of the specified labels are present in the target labels, then all target labels will be used for sharding.
|
||||
|
||||
By default, each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
|
||||
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
|
||||
start a cluster of three `vmagent` instances, where two `vmagent` instances scrape each target:
|
||||
@@ -963,6 +934,29 @@ vmagent will generate the following persistent queue folders:
|
||||
2_0AAFDF53E314A72A
|
||||
```
|
||||
|
||||
### On-disk persistence and data processing order
|
||||
|
||||
By default, vmagent processes data in FIFO order. If data has been written to the on-disk queue,
|
||||
it must be flushed to the remote storage before newly ingested data can be forwarded there.
|
||||
During long outages, vmagent may accumulate large amounts of data in the file-based queue,
|
||||
which can introduce a significant lag between the moment data is collected by vmagent and the
|
||||
moment it becomes visible at the remote storage.
|
||||
|
||||
This behavior can be changed with the `-remoteWrite.inmemoryQueues` {{% available_from "v1.146.0" %}} command-line flag.
|
||||
When set to a non-zero value, vmagent starts the given number of additional workers,
|
||||
which send only recently ingested data from the in-memory queue, while the workers configured via `-remoteWrite.queues` drain the file-based backlog concurrently.
|
||||
This reduces the delivery lag for fresh samples after remote storage outages or slowdowns. The flag can be set individually per each `-remoteWrite.url`.
|
||||
|
||||
Note that these workers are started in addition to the workers configured via `-remoteWrite.queues`, so the total number of concurrent connections to
|
||||
the remote storage becomes the sum of both flags. Take this into account if the remote storage limits the number of concurrent requests.
|
||||
|
||||
This flag has the following possible limitations:
|
||||
|
||||
* Samples may arrive at the remote storage out of order, since recent data can be delivered before the older backlogged data.
|
||||
Do not use this option if the remote storage doesn't accept out-of-order samples.
|
||||
* Recent data isn't guaranteed to take the fast path: if the in-memory queue is full,
|
||||
newly ingested data is still written to the file-based queue and is delivered in FIFO order by the generic workers.
|
||||
|
||||
### Disabling On-disk persistence
|
||||
|
||||
There are cases when it is better to disable on-disk persistence for pending data on the `vmagent` side:
|
||||
|
||||
@@ -240,6 +240,10 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Optional name of the cluster. If multiple vmagent clusters scrape the same targets, then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
-promscrape.cluster.replicationFactor int
|
||||
The number of members in the cluster, which scrape the same targets. If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info (default 1)
|
||||
-promscrape.cluster.shardByLabels array
|
||||
Optional list of target labels, which will be used for sharding targets among cluster members if -promscrape.cluster.membersCount is greater than 1. If none of the specified labels are found in a target, then all the target labels will be used for sharding. See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-promscrape.config string
|
||||
Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. The path can point to local file and to http url. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details
|
||||
-promscrape.config.dryRun
|
||||
@@ -435,6 +439,10 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.inmemoryQueues array
|
||||
The number of additional workers per each -remoteWrite.url, which send only recently ingested data from the in-memory queue, while the file-based queue at -remoteWrite.tmpDataPath is drained by workers configured via -remoteWrite.queues. This reduces delivery lag for fresh samples when the file-based queue contains a backlog accumulated during remote storage outages. (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.keepDanglingQueues
|
||||
Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.
|
||||
-remoteWrite.label array
|
||||
|
||||
@@ -34,9 +34,9 @@ vmctl command-line tool is available as:
|
||||
|
||||
Download and unpack vmctl:
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/vmutils-darwin-arm64-v1.145.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.146.0/vmutils-darwin-arm64-v1.146.0.tar.gz
|
||||
|
||||
tar xzf vmutils-darwin-arm64-v1.145.0.tar.gz
|
||||
tar xzf vmutils-darwin-arm64-v1.146.0.tar.gz
|
||||
```
|
||||
|
||||
Once binary is unpacked, see the full list of supported modes by running the following command:
|
||||
|
||||
@@ -46,9 +46,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -42,9 +42,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -41,9 +41,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -34,9 +34,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -49,9 +49,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
@@ -34,9 +34,13 @@ OPTIONS:
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-headers value Optional HTTP headers to send with each request to the corresponding destination address.
|
||||
For example, --vm-headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding destination address.
|
||||
Multiple headers must be delimited by '^^': --vm-headers='header1:value1^^header2:value2'
|
||||
--vm-bearer-token value Optional bearer auth token to use for the corresponding --vm-addr
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
|
||||
4
go.mod
4
go.mod
@@ -10,8 +10,8 @@ require (
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3
|
||||
github.com/VictoriaMetrics/metrics v1.43.2
|
||||
github.com/VictoriaMetrics/metricsql v0.87.1
|
||||
github.com/VictoriaMetrics/metrics v1.44.0
|
||||
github.com/VictoriaMetrics/metricsql v0.87.2
|
||||
github.com/aws/aws-sdk-go-v2 v1.42.0
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.25
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.27
|
||||
|
||||
10
go.sum
10
go.sum
@@ -52,18 +52,16 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp
|
||||
github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.57.0/go.mod h1:YqwkQPrWSC7+byyc1VlKbWLBF5JsW5IoL6xUkemYSXk=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0 h1:2x1Tszv41PnCdSMumEtejz/On1RQ45kHQ+hhKT53sOk=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.50.1-0.20260415124154-6b7a6357aec0/go.mod h1:fQtmzaSUL+HJmHozeAKmnTJTOMBT+vBccv/VWQEwhUQ=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3 h1:3eP8RRZitbga5EYiQ3IANrMPxpBwMAX4VA6akDaXwpU=
|
||||
github.com/VictoriaMetrics/VictoriaLogs v1.121.1-0.20260616132739-c901a1e31cb3/go.mod h1:H4sDxcvk6OmC6zOt++IlDyrwfbn4F1eSLwMpR+kpRt8=
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0 h1:FJT9uNXA2isppFuJErbLqD306KoFlehl7Wn2dg/6oIE=
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0/go.mod h1:QlGlzaJnDfFd8Lk6Ci/fuLxfTo3/GThPs2KH23mv710=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3 h1:rBabE0iIxcqKEMCwUmwHZ9dgEqXerg8FRbRDUvC7OVc=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3/go.mod h1:hHXhl4DA2fTL2HTZDJFXWgW0LNjo6B+4aj2Wmng3TjU=
|
||||
github.com/VictoriaMetrics/metrics v1.43.2 h1:+8pIQEGwchKS5CYFyvv3LKvNXGi7baZ9hmIV4RHqibY=
|
||||
github.com/VictoriaMetrics/metrics v1.43.2/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.1 h1:GdIblCDgXsrBJcBSDtFT8SLK7P+QHijdQmcr4L/f0Go=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.1/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VictoriaMetrics/metrics v1.44.0 h1:Fr8yqQSV+ZfYaDD/anqk1E8e9YPgfleSleJmAI0M0Tw=
|
||||
github.com/VictoriaMetrics/metrics v1.44.0/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.2 h1:7OsrcDBWREWKqqpnFyIUEOM4FNv2qHvCoww2GYz3Tc0=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.2/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0=
|
||||
|
||||
@@ -91,6 +91,11 @@ func (r *Restore) Run(ctx context.Context) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot list src parts: %w", err)
|
||||
}
|
||||
for _, srcPart := range srcParts {
|
||||
if !srcPart.IsLocalPathInsideDir(r.Dst.Dir) {
|
||||
return fmt.Errorf("part file %s would be written outside storage directory %s", srcPart.Path, r.Dst.Dir)
|
||||
}
|
||||
}
|
||||
logger.Infof("obtaining list of parts at %s", dst)
|
||||
dstParts, err := dst.ListParts()
|
||||
if err != nil {
|
||||
|
||||
@@ -120,6 +120,17 @@ func (p *Part) ParseFromRemotePath(remotePath string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// IsLocalPathInsideDir returns true if the part's local path resolves inside dir.
|
||||
// It resolves ../../ sequences and prevents path traversal outside dir.
|
||||
func (p *Part) IsLocalPathInsideDir(dir string) bool {
|
||||
dir = filepath.Clean(dir)
|
||||
if dir == `/` {
|
||||
return true
|
||||
}
|
||||
|
||||
return strings.HasPrefix(p.LocalPath(dir), dir+string(filepath.Separator))
|
||||
}
|
||||
|
||||
// MaxPartSize is the maximum size for each part.
|
||||
//
|
||||
// The MaxPartSize reduces bandwidth usage during retires on network errors
|
||||
|
||||
54
lib/backup/common/part_test.go
Normal file
54
lib/backup/common/part_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestIsLocalPathInsideDir(t *testing.T) {
|
||||
f := func(dir, path string, expected bool) {
|
||||
t.Helper()
|
||||
p := Part{Path: path}
|
||||
if got := p.IsLocalPathInsideDir(dir); got != expected {
|
||||
t.Fatalf("IsLocalPathInsideDir(%q, %q): got %v, want %v", dir, path, got, expected)
|
||||
}
|
||||
}
|
||||
|
||||
// normal path inside dir
|
||||
f("/data/storage", "parts/segment1/data.bin", true)
|
||||
|
||||
// dir with trailing slash is normalized
|
||||
f("/data/storage/", "parts/segment1/data.bin", true)
|
||||
|
||||
// deeply nested path
|
||||
f("/data/storage", "a/b/c/d/e/file.dat", true)
|
||||
|
||||
// traversal that stays inside dir
|
||||
f("/data/storage", "foo/../bar/file.dat", true)
|
||||
|
||||
// root dir allows any path
|
||||
f("/", "any/path/here", true)
|
||||
|
||||
// root dir allows traversal attempts since nothing is outside /
|
||||
f("/", "../outside/marker.txt", true)
|
||||
|
||||
// path with leading slash is treated as relative by filepath.Join and stays inside dir
|
||||
f("/data/storage", "/outside/marker.txt", true)
|
||||
|
||||
// dir with .. components is normalized; path inside resolved dir
|
||||
f("/data/storage/../foo", "parts/file.dat", true)
|
||||
|
||||
// dir with .. components is normalized; traversal outside resolved dir
|
||||
f("/data/storage/../foo", "../storage/evil.txt", false)
|
||||
|
||||
// simple traversal
|
||||
f("/data/storage", "../outside/marker.txt", false)
|
||||
|
||||
// traversal with trailing slash in dir
|
||||
f("/data/storage/", "../outside/marker.txt", false)
|
||||
|
||||
// deep traversal
|
||||
f("/data/storage", "a/../../outside/marker.txt", false)
|
||||
|
||||
// sibling directory whose name shares a prefix with dir
|
||||
f("/data/storage", "../storagefoo/evil.txt", false)
|
||||
}
|
||||
@@ -129,6 +129,10 @@ func (fs *FS) NewReadCloser(p common.Part) (io.ReadCloser, error) {
|
||||
// On platforms with preallocation, writes go to a .tmp file that must be
|
||||
// finalized with FinalizeFile.
|
||||
func (fs *FS) NewDirectWriteCloser(p common.Part) (io.WriteCloser, error) {
|
||||
if !p.IsLocalPathInsideDir(fs.Dir) {
|
||||
logger.Fatalf("BUG: part file %s would be written outside storage directory %s", p.Path, fs.Dir)
|
||||
}
|
||||
|
||||
path := fs.writePath(p)
|
||||
if err := fs.mkdirAll(path); err != nil {
|
||||
return nil, err
|
||||
|
||||
@@ -1,198 +0,0 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
var (
|
||||
vmLabel = flag.String("mdx.label", "", "Optional label in the form 'name=value' used to identify VictoriaMetrics metrics for MDX. Metrics containing the specified label are forwarded to `-remoteWrite.url` endpoints configured with `-remoteWrite.mdx.enable=true`.")
|
||||
|
||||
vmAppLabelName = "victoriametrics_app"
|
||||
)
|
||||
|
||||
type Ctx struct {
|
||||
// pool for labels, which are used when adding victoriametrics_app label to the original labels.
|
||||
labels []prompb.Label
|
||||
}
|
||||
|
||||
func (ctx *Ctx) Reset() {
|
||||
promrelabel.CleanLabels(ctx.labels)
|
||||
ctx.labels = ctx.labels[:0]
|
||||
}
|
||||
|
||||
var CtxPool = &sync.Pool{
|
||||
New: func() any {
|
||||
return &Ctx{}
|
||||
},
|
||||
}
|
||||
|
||||
// Filter manages the list of VictoriaMetrics instances discovered from previous data flow, and uses it to filter out metrics that are not from VictoriaMetrics instances.
|
||||
type Filter struct {
|
||||
mu sync.RWMutex
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
vmInstance map[string]*atomic.Int64
|
||||
filterByLabelName string
|
||||
filterByLabelValue string
|
||||
}
|
||||
|
||||
func NewFilter() *Filter {
|
||||
filter := &Filter{
|
||||
vmInstance: make(map[string]*atomic.Int64),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
if len(*vmLabel) != 0 {
|
||||
n := strings.IndexByte(*vmLabel, '=')
|
||||
if n < 0 {
|
||||
logger.Fatalf("missing '=' in `-mdx.label`. It must contain label in the form `name=value`; got %q", *vmLabel)
|
||||
}
|
||||
filter.filterByLabelName = (*vmLabel)[:n]
|
||||
filter.filterByLabelValue = (*vmLabel)[n+1:]
|
||||
}
|
||||
|
||||
filter.wg.Go(filter.cleanStale)
|
||||
return filter
|
||||
}
|
||||
|
||||
func (filter *Filter) VmInstancesCount() int {
|
||||
if filter == nil {
|
||||
return 0
|
||||
}
|
||||
filter.mu.RLock()
|
||||
defer filter.mu.RUnlock()
|
||||
return len(filter.vmInstance)
|
||||
|
||||
}
|
||||
|
||||
func (filter *Filter) cleanStale() {
|
||||
entryTTL := time.Hour * 1
|
||||
ttlSec := int64(entryTTL.Seconds())
|
||||
ticker := time.NewTicker(time.Minute)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
filter.mu.Lock()
|
||||
currTs := time.Now().Unix()
|
||||
|
||||
dst := make(map[string]*atomic.Int64, len(filter.vmInstance))
|
||||
for k, v := range filter.vmInstance {
|
||||
if currTs-v.Load() < ttlSec {
|
||||
dst[k] = v
|
||||
}
|
||||
}
|
||||
if len(dst) != len(filter.vmInstance) {
|
||||
filter.vmInstance = dst
|
||||
}
|
||||
filter.mu.Unlock()
|
||||
case <-filter.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (filter *Filter) MustStop() {
|
||||
if filter == nil {
|
||||
return
|
||||
}
|
||||
close(filter.stopCh)
|
||||
filter.wg.Wait()
|
||||
}
|
||||
|
||||
func (filter *Filter) Filter(tss []prompb.TimeSeries, resTss []prompb.TimeSeries, ctx *Ctx) []prompb.TimeSeries {
|
||||
currTs := time.Now().Unix()
|
||||
var identicalKey []byte
|
||||
poolLabels := ctx.labels[:0]
|
||||
maybeAddVmAppLabel := func(idx int, labels []prompb.Label) []prompb.Label {
|
||||
for j := idx + 1; j < len(labels); j++ {
|
||||
if labels[j].Name == vmAppLabelName && labels[j].Value == "true" {
|
||||
return labels
|
||||
}
|
||||
}
|
||||
poolLabelsLen := len(poolLabels)
|
||||
poolLabels = append(poolLabels, labels...)
|
||||
poolLabels = append(poolLabels, prompb.Label{Name: vmAppLabelName, Value: "true"})
|
||||
return poolLabels[poolLabelsLen:]
|
||||
}
|
||||
|
||||
nextTss:
|
||||
for _, ts := range tss {
|
||||
var hasVersionLabel, triedJobInstance bool
|
||||
var job, instance string
|
||||
for i, label := range ts.Labels {
|
||||
if label.Name == vmAppLabelName && label.Value == "true" {
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
if filter.filterByLabelName != "" && label.Name == filter.filterByLabelName && label.Value == filter.filterByLabelValue {
|
||||
ts.Labels = maybeAddVmAppLabel(i, ts.Labels)
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
|
||||
if label.Name == "__name__" && label.Value == "vm_app_version" {
|
||||
hasVersionLabel = true
|
||||
}
|
||||
if instance == "" && label.Name == "instance" {
|
||||
if label.Value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
instance = label.Value
|
||||
}
|
||||
if job == "" && label.Name == "job" {
|
||||
if label.Value == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
job = label.Value
|
||||
}
|
||||
if !triedJobInstance && job != "" && instance != "" {
|
||||
identicalKey = identicalKey[:0]
|
||||
identicalKey = strconv.AppendQuote(identicalKey, job)
|
||||
identicalKey = append(identicalKey, ':')
|
||||
identicalKey = strconv.AppendQuote(identicalKey, instance)
|
||||
filter.mu.RLock()
|
||||
ptr, found := filter.vmInstance[bytesutil.ToUnsafeString(identicalKey)]
|
||||
filter.mu.RUnlock()
|
||||
if found {
|
||||
ptr.Store(currTs)
|
||||
ts.Labels = maybeAddVmAppLabel(i, ts.Labels)
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
triedJobInstance = true
|
||||
}
|
||||
|
||||
if hasVersionLabel && job != "" && instance != "" {
|
||||
identicalKey = identicalKey[:0]
|
||||
identicalKey = strconv.AppendQuote(identicalKey, job)
|
||||
identicalKey = append(identicalKey, ':')
|
||||
identicalKey = strconv.AppendQuote(identicalKey, instance)
|
||||
|
||||
v := &atomic.Int64{}
|
||||
v.Store(currTs)
|
||||
|
||||
filter.mu.Lock()
|
||||
filter.vmInstance[string(identicalKey)] = v
|
||||
filter.mu.Unlock()
|
||||
ts.Labels = maybeAddVmAppLabel(i, ts.Labels)
|
||||
resTss = append(resTss, ts)
|
||||
continue nextTss
|
||||
}
|
||||
}
|
||||
}
|
||||
return resTss
|
||||
}
|
||||
@@ -1,359 +0,0 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
func timeSeriessToString(tss []prompb.TimeSeries) string {
|
||||
a := make([]string, len(tss))
|
||||
for i, ts := range tss {
|
||||
a[i] = timeSeriesToString(ts)
|
||||
}
|
||||
sort.Strings(a)
|
||||
return strings.Join(a, "")
|
||||
}
|
||||
|
||||
func timeSeriesToString(ts prompb.TimeSeries) string {
|
||||
labelsString := promrelabel.LabelsToString(ts.Labels)
|
||||
|
||||
return fmt.Sprintf("%s\n", labelsString)
|
||||
}
|
||||
|
||||
func TestMdxInstanceFilter(t *testing.T) {
|
||||
originalVmLabel := *vmLabel
|
||||
*vmLabel = "service=victoriametrics"
|
||||
filter := NewFilter()
|
||||
defer filter.MustStop()
|
||||
f := func(input []prompb.TimeSeries, expectedOutput []prompb.TimeSeries, expectedInstanceMap map[string]int64) {
|
||||
t.Helper()
|
||||
ctx := Ctx{}
|
||||
output := filter.Filter(input, nil, &ctx)
|
||||
outputString := timeSeriessToString(output)
|
||||
expectedOutputString := timeSeriessToString(expectedOutput)
|
||||
if outputString != expectedOutputString {
|
||||
t.Fatalf("unexpected output; got %s; want %s", outputString, expectedOutputString)
|
||||
}
|
||||
if filter.VmInstancesCount() != len(expectedInstanceMap) {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", len(filter.vmInstance), len(expectedInstanceMap))
|
||||
}
|
||||
for k := range expectedInstanceMap {
|
||||
if filter.vmInstance[k] == nil {
|
||||
t.Fatalf("missing instance in filter.vmInstance: %q", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
// the first call
|
||||
f([]prompb.TimeSeries{
|
||||
// 1. metrics with vm_app_version and different order of labels.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics2:8428"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics3:8428"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
},
|
||||
},
|
||||
// 2.
|
||||
// metrics without vm_app_version but with service=victoriametrics that is specified in `-vm.label`.
|
||||
// it will be preserved, but won't be registered in instance map in MDX
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
|
||||
// 3. metrics with vm_app_version and service=victoriametrics should be preserved.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
},
|
||||
},
|
||||
// 4. metrics without vm_app_version and `service=victoriametrics` but with `victoriametrics_app=true`, which should be preserved.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics6:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
|
||||
// 5. metrics without vm_app_version and service=victoriametrics and `victoriametrics_app=true`, which should be filtered out.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
|
||||
// 6. metrics with vm_app_version but job or instance is empty (or missing), they should be dropped.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: ""},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent2:8429"},
|
||||
{Name: "job", Value: ""},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent2:8429"},
|
||||
},
|
||||
},
|
||||
},
|
||||
// `victoriametrics_app=true` should be added to all preserved metrics if absent.
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics2:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics3:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics5:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "instance", Value: "victoria-metrics6:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "__name__", Value: "vm_slow_queries_total"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
// only instances that are discovered via `vm_app_version` will be registered in instance map in MDX.
|
||||
map[string]int64{
|
||||
"\"test\":\"victoria-metrics1:8428\"": 0,
|
||||
"\"test\":\"victoria-metrics2:8428\"": 0,
|
||||
"\"test\":\"victoria-metrics3:8428\"": 0,
|
||||
})
|
||||
|
||||
// the second call
|
||||
f([]prompb.TimeSeries{
|
||||
// 1. metrics without vm_app_version, but the instances were already registered in the previous call, so it will be preserved.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_rows_inserted_total"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vminsert_request_duration_seconds_bucket"},
|
||||
{Name: "instance", Value: "victoria-metrics2:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
// 2. metrics without vm_app_version, `service=victoriametrics` and `victoriametrics_app=true`, and the instance wasn't already registered in the previous call, so it will be dropped.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vminsert_request_duration_seconds_bucket"},
|
||||
{Name: "instance", Value: "victoria-metrics7:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
// 3. metrics with service=victoriametrics.
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_rows_inserted_total"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vminsert_request_duration_seconds_bucket"},
|
||||
{Name: "instance", Value: "victoria-metrics2:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
{Name: "instance", Value: "victoria-metrics4:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "victoriametrics_app", Value: "true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
// only instances that are discovered via `vm_app_version` will be registered in instance map in MDX.
|
||||
map[string]int64{
|
||||
"\"test\":\"victoria-metrics1:8428\"": 0,
|
||||
"\"test\":\"victoria-metrics2:8428\"": 0,
|
||||
"\"test\":\"victoria-metrics3:8428\"": 0,
|
||||
})
|
||||
|
||||
*vmLabel = originalVmLabel
|
||||
}
|
||||
|
||||
func TestMdxInstanceCleanup(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
filter := NewFilter()
|
||||
defer filter.MustStop()
|
||||
ctx := Ctx{}
|
||||
filter.Filter([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}}, []prompb.TimeSeries{}, &ctx,
|
||||
)
|
||||
f := func(expectedInstanceMap map[string]int64) {
|
||||
t.Helper()
|
||||
if filter.VmInstancesCount() != len(expectedInstanceMap) {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", len(filter.vmInstance), len(expectedInstanceMap))
|
||||
}
|
||||
for k := range expectedInstanceMap {
|
||||
if filter.vmInstance[k] == nil {
|
||||
t.Fatalf("missing instance in filter.vmInstance: %q", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(59 * time.Minute)
|
||||
// the entries should not be cleaned.
|
||||
f(map[string]int64{
|
||||
"\"test\":\"victoria-metrics1:8428\"": 0,
|
||||
"\"test\":\"vmagent1:8429\"": 0,
|
||||
})
|
||||
|
||||
// receive samples from victoria-metrics1:8428 after 59 minutes.
|
||||
// so the entry will be refreshed.
|
||||
ctx.Reset()
|
||||
filter.Filter([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}}, []prompb.TimeSeries{}, &ctx,
|
||||
)
|
||||
|
||||
time.Sleep(2 * time.Minute)
|
||||
|
||||
// no samples from vmagent1:8429 in the last hour, so it should be removed from the mdx instance list.
|
||||
f(map[string]int64{
|
||||
"\"test\":\"victoria-metrics1:8428\"": 0,
|
||||
})
|
||||
})
|
||||
|
||||
}
|
||||
@@ -26,6 +26,8 @@ type FastQueue struct {
|
||||
// isPQDisabled is set to true when pq is disabled.
|
||||
isPQDisabled bool
|
||||
|
||||
prioritizeInMemoryData bool
|
||||
|
||||
// pq is file-based queue
|
||||
pq *queue
|
||||
|
||||
@@ -39,6 +41,31 @@ type FastQueue struct {
|
||||
stopDeadline uint64
|
||||
}
|
||||
|
||||
// OpenFastQueueOpts defines options for FastQueue
|
||||
type OpenFastQueueOpts struct {
|
||||
// MaxInmemoryBlocks defines amount of blocks to hold in memory before falling back to file-based persistence.
|
||||
MaxInmemoryBlocks int
|
||||
// MaxPendingBytes limits file-based size of the queue.
|
||||
// If MaxPendingBytes is 0, then the queue size is unlimited.
|
||||
// The oldest data is dropped when the queue
|
||||
// reaches MaxPendingSize.
|
||||
MaxPendingBytes int64
|
||||
// IsPQDisabled defines whether file-based queue could be used.
|
||||
// If it is set to true, then write requests that exceed in-memory buffer capacity are rejected.
|
||||
// in-memory queue part can be stored on disk during graceful shutdown.
|
||||
IsPQDisabled bool
|
||||
// PrioritizeInMemoryData instructs FastQueue to write data into the in-memory queue
|
||||
// even if the file-based queue is not empty.
|
||||
// This is useful when data order doesn't matter and getting the most recent data
|
||||
// as fast as possible is more important.
|
||||
PrioritizeInmemoryData bool
|
||||
}
|
||||
|
||||
// MustOpenFastQueueWithOpts opens persistent queue at the given path with given opts
|
||||
func MustOpenFastQueueWithOpts(path, name string, opts OpenFastQueueOpts) *FastQueue {
|
||||
return mustOpenFastQueue(path, name, opts)
|
||||
}
|
||||
|
||||
// MustOpenFastQueue opens persistent queue at the given path.
|
||||
//
|
||||
// It holds up to maxInmemoryBlocks in memory before falling back to file-based persistence.
|
||||
@@ -49,11 +76,22 @@ type FastQueue struct {
|
||||
// if isPQDisabled is set to true, then write requests that exceed in-memory buffer capacity are rejected.
|
||||
// in-memory queue part can be stored on disk during graceful shutdown.
|
||||
func MustOpenFastQueue(path, name string, maxInmemoryBlocks int, maxPendingBytes int64, isPQDisabled bool) *FastQueue {
|
||||
opts := OpenFastQueueOpts{
|
||||
MaxInmemoryBlocks: maxInmemoryBlocks,
|
||||
MaxPendingBytes: maxPendingBytes,
|
||||
IsPQDisabled: isPQDisabled,
|
||||
}
|
||||
return mustOpenFastQueue(path, name, opts)
|
||||
}
|
||||
func mustOpenFastQueue(path, name string, opts OpenFastQueueOpts) *FastQueue {
|
||||
maxPendingBytes := opts.MaxPendingBytes
|
||||
isPQDisabled := opts.IsPQDisabled
|
||||
pq := mustOpen(path, name, maxPendingBytes)
|
||||
fq := &FastQueue{
|
||||
pq: pq,
|
||||
isPQDisabled: isPQDisabled,
|
||||
ch: make(chan *bytesutil.ByteBuffer, maxInmemoryBlocks),
|
||||
pq: pq,
|
||||
isPQDisabled: isPQDisabled,
|
||||
prioritizeInMemoryData: opts.PrioritizeInmemoryData,
|
||||
ch: make(chan *bytesutil.ByteBuffer, opts.MaxInmemoryBlocks),
|
||||
}
|
||||
fq.cond.L = &fq.mu
|
||||
fq.lastInmemoryBlockReadTime = fasttime.UnixTimestamp()
|
||||
@@ -81,7 +119,7 @@ func MustOpenFastQueue(path, name string, maxInmemoryBlocks int, maxPendingBytes
|
||||
if isPQDisabled {
|
||||
persistenceStatus = "disabled"
|
||||
}
|
||||
logger.Infof("opened fast queue at %q with maxInmemoryBlocks=%d, it contains %d pending bytes, persistence is %s", path, maxInmemoryBlocks, pendingBytes, persistenceStatus)
|
||||
logger.Infof("opened fast queue at %q with maxInmemoryBlocks=%d, it contains %d pending bytes, persistence is %s", path, opts.MaxInmemoryBlocks, pendingBytes, persistenceStatus)
|
||||
return fq
|
||||
}
|
||||
|
||||
@@ -97,7 +135,7 @@ func (fq *FastQueue) IsWriteBlocked() bool {
|
||||
}
|
||||
fq.mu.Lock()
|
||||
defer fq.mu.Unlock()
|
||||
return len(fq.ch) == cap(fq.ch) || fq.pq.GetPendingBytes() > 0
|
||||
return len(fq.ch) == cap(fq.ch) || (fq.pq.GetPendingBytes() > 0 && !fq.prioritizeInMemoryData)
|
||||
}
|
||||
|
||||
// UnblockAllReaders unblocks all the readers.
|
||||
@@ -193,19 +231,24 @@ func (fq *FastQueue) tryWriteBlock(block []byte, ignoreDisabledPQ bool) bool {
|
||||
defer fq.mu.Unlock()
|
||||
|
||||
isPQWriteAllowed := !fq.isPQDisabled || ignoreDisabledPQ
|
||||
|
||||
fq.flushInmemoryBlocksToFileIfNeededLocked()
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
// The file-based queue isn't drained yet. This means that in-memory queue cannot be used yet.
|
||||
// So put the block to file-based queue.
|
||||
if len(fq.ch) > 0 {
|
||||
logger.Panicf("BUG: the in-memory queue must be empty when the file-based queue is non-empty; it contains %d pending bytes", n)
|
||||
if !isPQWriteAllowed && fq.pq.GetPendingBytes() > 0 {
|
||||
// fast path: there is pending data at file-based queue,
|
||||
// it must be drained before in-memory queue could be used.
|
||||
// File-based queue could be non-empty after vmagent restart
|
||||
// and vmagent couldn't flush in-memory queue during shutdown.
|
||||
return false
|
||||
}
|
||||
if !fq.prioritizeInMemoryData {
|
||||
fq.flushInmemoryBlocksToFileIfNeededLocked()
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
// The file-based queue isn't drained yet. This means that in-memory queue cannot be used yet.
|
||||
// So put the block to file-based queue.
|
||||
if len(fq.ch) > 0 {
|
||||
logger.Panicf("BUG: the in-memory queue must be empty when the file-based queue is non-empty; it contains %d pending bytes", n)
|
||||
}
|
||||
fq.pq.MustWriteBlock(block)
|
||||
return true
|
||||
}
|
||||
if !isPQWriteAllowed {
|
||||
return false
|
||||
}
|
||||
fq.pq.MustWriteBlock(block)
|
||||
return true
|
||||
}
|
||||
if len(fq.ch) == cap(fq.ch) {
|
||||
// There is no space left in the in-memory queue. Put the data to file-based queue.
|
||||
@@ -216,7 +259,7 @@ func (fq *FastQueue) tryWriteBlock(block []byte, ignoreDisabledPQ bool) bool {
|
||||
fq.pq.MustWriteBlock(block)
|
||||
return true
|
||||
}
|
||||
// Fast path - put the block to in-memory queue.
|
||||
|
||||
bb := blockBufPool.Get()
|
||||
bb.B = append(bb.B[:0], block...)
|
||||
fq.ch <- bb
|
||||
@@ -229,12 +272,41 @@ func (fq *FastQueue) tryWriteBlock(block []byte, ignoreDisabledPQ bool) bool {
|
||||
}
|
||||
|
||||
// MustReadBlock reads the next block from fq into dst and returns it.
|
||||
// It first reads from the in-memory queue, then checks file-based queue.
|
||||
// It first reads from the file-based queue, then checks in-memory queue.
|
||||
// It blocks until a block is available or the stop deadline is exceeded, in which case it returns (dst, false).
|
||||
func (fq *FastQueue) MustReadBlock(dst []byte) ([]byte, bool) {
|
||||
fq.mu.Lock()
|
||||
defer fq.mu.Unlock()
|
||||
|
||||
for {
|
||||
if fq.stopDeadline > 0 && fasttime.UnixTimestamp() > fq.stopDeadline {
|
||||
return dst, false
|
||||
}
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
data, ok := fq.pq.MustReadBlockNonblocking(dst)
|
||||
if ok {
|
||||
return data, true
|
||||
}
|
||||
dst = data
|
||||
}
|
||||
if len(fq.ch) > 0 {
|
||||
return fq.mustReadInMemoryBlockLocked(dst), true
|
||||
}
|
||||
if fq.stopDeadline > 0 {
|
||||
return dst, false
|
||||
}
|
||||
// There are no blocks. Wait for new block.
|
||||
fq.pq.ResetIfEmpty()
|
||||
fq.cond.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
// MustReadInMemoryBlockBlocking reads the next block from the in-memory queue into dst and returns it.
|
||||
// It blocks until a block is available or the stop deadline is exceeded, in which case it returns (dst, false).
|
||||
func (fq *FastQueue) MustReadInMemoryBlockBlocking(dst []byte) ([]byte, bool) {
|
||||
fq.mu.Lock()
|
||||
defer fq.mu.Unlock()
|
||||
|
||||
for {
|
||||
if fq.stopDeadline > 0 && fasttime.UnixTimestamp() > fq.stopDeadline {
|
||||
return dst, false
|
||||
@@ -242,19 +314,10 @@ func (fq *FastQueue) MustReadBlock(dst []byte) ([]byte, bool) {
|
||||
if len(fq.ch) > 0 {
|
||||
return fq.mustReadInMemoryBlockLocked(dst), true
|
||||
}
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
data, ok := fq.pq.MustReadBlockNonblocking(dst)
|
||||
if ok {
|
||||
return data, true
|
||||
}
|
||||
dst = data
|
||||
continue
|
||||
}
|
||||
if fq.stopDeadline > 0 {
|
||||
return dst, false
|
||||
}
|
||||
// There are no blocks. Wait for new block.
|
||||
fq.pq.ResetIfEmpty()
|
||||
fq.cond.Wait()
|
||||
}
|
||||
}
|
||||
@@ -277,9 +340,6 @@ func (fq *FastQueue) mustReadInMemoryBlockLocked(dst []byte) []byte {
|
||||
if len(fq.ch) == 0 {
|
||||
logger.Panicf("BUG: the function must not be called when in-memory queue is empty. Caller should verify the queue len upfront")
|
||||
}
|
||||
if n := fq.pq.GetPendingBytes(); n > 0 {
|
||||
logger.Panicf("BUG: the file-based queue must be empty when the in-memory queue is non-empty; it contains %d pending bytes", n)
|
||||
}
|
||||
bb := <-fq.ch
|
||||
fq.pendingInmemoryBytes -= uint64(len(bb.B))
|
||||
fq.lastInmemoryBlockReadTime = fasttime.UnixTimestamp()
|
||||
|
||||
@@ -364,3 +364,64 @@ func TestFastQueueWriteReadWithIgnoreDisabledPQ(t *testing.T) {
|
||||
fq.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
func TestFastQueueWriteReadWithPrioritizeInmemory(t *testing.T) {
|
||||
path := "fast-queue-write-read-inmemory-disabled-pq-force-write"
|
||||
fs.MustRemoveDir(path)
|
||||
|
||||
capacity := 20
|
||||
opts := OpenFastQueueOpts{
|
||||
MaxInmemoryBlocks: capacity,
|
||||
PrioritizeInmemoryData: true,
|
||||
}
|
||||
fq := MustOpenFastQueueWithOpts(path, "foobar", opts)
|
||||
if n := fq.GetInmemoryQueueLen(); n != 0 {
|
||||
t.Fatalf("unexpected non-zero inmemory queue size: %d", n)
|
||||
}
|
||||
var blocks []string
|
||||
for i := range capacity {
|
||||
block := fmt.Sprintf("block %d", i)
|
||||
if !fq.TryWriteBlock([]byte(block)) {
|
||||
t.Fatalf("TryWriteBlock must return true in this context")
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
if n := fq.GetInmemoryQueueLen(); n != capacity {
|
||||
t.Fatalf("unexpected non-zero inmemory queue size: %d: %d", n, capacity)
|
||||
}
|
||||
for i := range capacity {
|
||||
block := fmt.Sprintf("block %d-%d", i, i)
|
||||
if !fq.TryWriteBlock([]byte(block)) {
|
||||
t.Fatalf("TryWriteBlock must return true in this context")
|
||||
}
|
||||
blocks = append(blocks, block)
|
||||
}
|
||||
|
||||
// in case of capacity exceed last element is written into file-based queue
|
||||
if n := fq.GetInmemoryQueueLen(); n != capacity-1 {
|
||||
t.Fatalf("unexpected non-zero inmemory queue size: %d: %d", n, capacity)
|
||||
}
|
||||
|
||||
// make sure that recently ingested elemements returned first
|
||||
for idx := capacity + 1; idx < capacity*2; idx++ {
|
||||
buf, ok := fq.MustReadInMemoryBlockBlocking(nil)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected ok=false")
|
||||
}
|
||||
if string(buf) != blocks[idx] {
|
||||
t.Fatalf("unexpected block read; got %q; want %q: %d", buf, blocks[idx], idx)
|
||||
}
|
||||
}
|
||||
blocks = blocks[:capacity+1]
|
||||
for _, block := range blocks {
|
||||
buf, ok := fq.MustReadBlock(nil)
|
||||
if !ok {
|
||||
t.Fatalf("unexpected ok=false")
|
||||
}
|
||||
if string(buf) != block {
|
||||
t.Fatalf("unexpected block read; got %q; want %q", buf, block)
|
||||
}
|
||||
}
|
||||
fq.MustClose()
|
||||
fs.MustRemoveDir(path)
|
||||
}
|
||||
|
||||
@@ -50,6 +50,17 @@ func (ie *IfExpression) Parse(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseFromMetricExpr parses if from given MetricExpr
|
||||
func (ie *IfExpression) ParseFromMetricExpr(me *metricsql.MetricExpr) error {
|
||||
var ieLocal ifExpression
|
||||
if err := ieLocal.parseFromMetricExpr(me); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ie.ies = []*ifExpression{&ieLocal}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON unmarshals ie from JSON data.
|
||||
func (ie *IfExpression) UnmarshalJSON(data []byte) error {
|
||||
var v any
|
||||
@@ -182,6 +193,16 @@ func (ie *ifExpression) Parse(s string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ie *ifExpression) parseFromMetricExpr(me *metricsql.MetricExpr) error {
|
||||
lfss, err := metricExprToLabelFilterss(me)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse series selector: %w", err)
|
||||
}
|
||||
ie.s = string(me.AppendString(nil))
|
||||
ie.lfss = lfss
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON unmarshals ie from JSON data.
|
||||
func (ie *ifExpression) UnmarshalJSON(data []byte) error {
|
||||
var s string
|
||||
|
||||
@@ -76,6 +76,9 @@ var (
|
||||
"Every %d occurrence in the template is substituted with -promscrape.cluster.memberNum at urls to vmagent instances responsible for scraping the given target "+
|
||||
"at /service-discovery page. For example -promscrape.cluster.memberURLTemplate='http://vmagent-%d:8429/targets'. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more details")
|
||||
clusterShardByLabels = flagutil.NewArrayString("promscrape.cluster.shardByLabels", "Optional list of target labels, which will be used for sharding targets among cluster members "+
|
||||
"if -promscrape.cluster.membersCount is greater than 1. If none of the specified labels are found in a target, then all the target labels will be used for sharding. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info")
|
||||
clusterReplicationFactor = flag.Int("promscrape.cluster.replicationFactor", 1, "The number of members in the cluster, which scrape the same targets. "+
|
||||
"If the replication factor is greater than 1, then the deduplication must be enabled at remote storage side. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets for more info")
|
||||
@@ -86,7 +89,10 @@ var (
|
||||
"Bigger uncompressed responses are rejected. See also max_scrape_size option at https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs")
|
||||
)
|
||||
|
||||
var clusterMemberID int
|
||||
var (
|
||||
clusterMemberID int
|
||||
clusterShardByLabelsSorted []string
|
||||
)
|
||||
|
||||
func mustInitClusterMemberID() {
|
||||
s := *clusterMemberNum
|
||||
@@ -110,6 +116,15 @@ func mustInitClusterMemberID() {
|
||||
clusterMemberID = n
|
||||
}
|
||||
|
||||
func initClusterShardByLabels() {
|
||||
if len(*clusterShardByLabels) == 0 {
|
||||
clusterShardByLabelsSorted = nil
|
||||
return
|
||||
}
|
||||
clusterShardByLabelsSorted = slices.Clone(*clusterShardByLabels)
|
||||
slices.Sort(clusterShardByLabelsSorted)
|
||||
}
|
||||
|
||||
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
type Config struct {
|
||||
Global GlobalConfig `yaml:"global,omitempty"`
|
||||
@@ -1138,12 +1153,28 @@ func (stc *StaticConfig) appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConf
|
||||
}
|
||||
|
||||
func appendScrapeWorkKey(dst []byte, labels *promutil.Labels) []byte {
|
||||
for _, label := range labels.GetLabels() {
|
||||
// Do not use strconv.AppendQuote, since it is slow according to CPU profile.
|
||||
dst = append(dst, label.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = append(dst, label.Value...)
|
||||
dst = append(dst, ',')
|
||||
originalDstLen := len(dst)
|
||||
for _, targetLabelName := range clusterShardByLabelsSorted {
|
||||
for _, label := range labels.GetLabels() {
|
||||
if label.Name == targetLabelName {
|
||||
// Do not use strconv.AppendQuote, since it is slow according to CPU profile.
|
||||
dst = append(dst, label.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = append(dst, label.Value...)
|
||||
dst = append(dst, ',')
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Use all labels to compute the key if `promscrape.cluster.shardByLabels` is not configured
|
||||
if len(dst) == originalDstLen {
|
||||
for _, label := range labels.GetLabels() {
|
||||
dst = append(dst, label.Name...)
|
||||
dst = append(dst, '=')
|
||||
dst = append(dst, label.Value...)
|
||||
dst = append(dst, ',')
|
||||
}
|
||||
return dst
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
@@ -148,6 +148,77 @@ func TestGetClusterMemberNumsForScrapeWork(t *testing.T) {
|
||||
f("foo", 3, 2, []int{2, 0})
|
||||
}
|
||||
|
||||
func TestAppendScrapeWorkKeyShardByLabels(t *testing.T) {
|
||||
f := func(labels map[string]string, shardByLabels []string, expectedKey string) {
|
||||
t.Helper()
|
||||
originValue := *clusterShardByLabels
|
||||
*clusterShardByLabels = shardByLabels
|
||||
defer func() {
|
||||
*clusterShardByLabels = originValue
|
||||
}()
|
||||
initClusterShardByLabels()
|
||||
outputKey := string(appendScrapeWorkKey(nil, promutil.NewLabelsFromMap(labels)))
|
||||
if expectedKey != outputKey {
|
||||
t.Fatalf("unexpected sharding key:%q for target labels:%v with shardByLabels=%q, expect: %q",
|
||||
outputKey, labels, shardByLabels, expectedKey)
|
||||
}
|
||||
}
|
||||
|
||||
// didn't specify -promscrape.cluster.shardByLabels, so all labels will be used for sharding
|
||||
f(
|
||||
map[string]string{
|
||||
"a": "aa",
|
||||
"b": "bb",
|
||||
"c": "cc",
|
||||
"d": "dd"},
|
||||
[]string{},
|
||||
"a=aa,b=bb,c=cc,d=dd,",
|
||||
)
|
||||
|
||||
// match all labels in -promscrape.cluster.shardByLabels, so label "a" and "c" will be used for sharding
|
||||
f(
|
||||
map[string]string{
|
||||
"a": "aa",
|
||||
"b": "bb",
|
||||
"c": "cc",
|
||||
"d": "dd"},
|
||||
[]string{"a", "c"},
|
||||
"a=aa,c=cc,",
|
||||
)
|
||||
|
||||
// match all labels in -promscrape.cluster.shardByLabels, so label "a" and "c" will be used for sharding even if they're not in order in -promscrape.cluster.shardByLabels.
|
||||
f(
|
||||
map[string]string{
|
||||
"a": "aa",
|
||||
"b": "bb",
|
||||
"c": "cc",
|
||||
"d": "dd"},
|
||||
[]string{"c", "a"},
|
||||
"a=aa,c=cc,",
|
||||
)
|
||||
|
||||
// match part of labels in -promscrape.cluster.shardByLabels, label "a" and "c" will be used for sharding
|
||||
f(
|
||||
map[string]string{
|
||||
"a": "aa",
|
||||
"c": "cc",
|
||||
"d": "dd"},
|
||||
|
||||
[]string{"a", "b", "c"},
|
||||
"a=aa,c=cc,",
|
||||
)
|
||||
|
||||
// none of labels in -promscrape.cluster.shardByLabels is matched, so all labels will be used for sharding
|
||||
f(
|
||||
map[string]string{
|
||||
"d": "dd",
|
||||
"e": "ee"},
|
||||
[]string{"a", "b", "c"},
|
||||
"d=dd,e=ee,",
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
func TestLoadStaticConfigs(t *testing.T) {
|
||||
scs, err := loadStaticConfigs("testdata/file_sd.json")
|
||||
if err != nil {
|
||||
|
||||
@@ -66,6 +66,7 @@ func CheckConfig() error {
|
||||
// Scraped data is passed to pushData.
|
||||
func Init(pushData func(at *auth.Token, wr *prompb.WriteRequest)) {
|
||||
mustInitClusterMemberID()
|
||||
initClusterShardByLabels()
|
||||
globalStopChan = make(chan struct{})
|
||||
scraperWG.Go(func() {
|
||||
runScraper(*promscrapeConfigFile, pushData, globalStopChan)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package streamaggr
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
@@ -17,9 +18,10 @@ import (
|
||||
const dedupAggrShardsCount = 128
|
||||
|
||||
type dedupAggr struct {
|
||||
shards []dedupAggrShard
|
||||
flushDuration *metrics.Histogram
|
||||
flushTimeouts *metrics.Counter
|
||||
shards []dedupAggrShard
|
||||
flushDuration *metrics.Histogram
|
||||
flushTimeouts *metrics.Counter
|
||||
droppedSamples *metrics.Counter
|
||||
}
|
||||
|
||||
type dedupAggrShard struct {
|
||||
@@ -47,10 +49,20 @@ type dedupAggrSample struct {
|
||||
timestamp int64
|
||||
}
|
||||
|
||||
func newDedupAggr() *dedupAggr {
|
||||
return &dedupAggr{
|
||||
shards: make([]dedupAggrShard, dedupAggrShardsCount),
|
||||
}
|
||||
func newDedupAggr(ms *metrics.Set, metricLabels string) *dedupAggr {
|
||||
var d dedupAggr
|
||||
d.shards = make([]dedupAggrShard, dedupAggrShardsCount)
|
||||
_ = ms.NewGauge(fmt.Sprintf(`vm_streamaggr_dedup_state_size_bytes{%s}`, metricLabels), func() float64 {
|
||||
return float64(d.sizeBytes())
|
||||
})
|
||||
_ = ms.NewGauge(fmt.Sprintf(`vm_streamaggr_dedup_state_items_count{%s}`, metricLabels), func() float64 {
|
||||
return float64(d.itemsCount())
|
||||
})
|
||||
|
||||
d.flushDuration = ms.NewHistogram(fmt.Sprintf(`vm_streamaggr_dedup_flush_duration_seconds{%s}`, metricLabels))
|
||||
d.flushTimeouts = ms.NewCounter(fmt.Sprintf(`vm_streamaggr_dedup_flush_timeouts_total{%s}`, metricLabels))
|
||||
d.droppedSamples = ms.NewCounter(fmt.Sprintf(`vm_streamaggr_dedup_dropped_samples_total{%s}`, metricLabels))
|
||||
return &d
|
||||
}
|
||||
|
||||
func (da *dedupAggr) sizeBytes() uint64 {
|
||||
@@ -87,7 +99,8 @@ func (da *dedupAggr) pushSamples(samples []pushSample, _ int64, isGreen bool) {
|
||||
if len(shardSamples) == 0 {
|
||||
continue
|
||||
}
|
||||
da.shards[i].pushSamples(shardSamples, isGreen)
|
||||
deduplicatedSamples := da.shards[i].pushSamples(shardSamples, isGreen)
|
||||
da.droppedSamples.Add(deduplicatedSamples)
|
||||
}
|
||||
putPerShardSamples(pss)
|
||||
}
|
||||
@@ -167,8 +180,9 @@ func putPerShardSamples(pss *perShardSamples) {
|
||||
|
||||
var perShardSamplesPool sync.Pool
|
||||
|
||||
func (das *dedupAggrShard) pushSamples(samples []pushSample, isGreen bool) {
|
||||
func (das *dedupAggrShard) pushSamples(samples []pushSample, isGreen bool) int {
|
||||
var state *dedupAggrState
|
||||
var deduplicatedSamples int
|
||||
|
||||
if isGreen {
|
||||
state = &das.green
|
||||
@@ -198,8 +212,10 @@ func (das *dedupAggrShard) pushSamples(samples []pushSample, isGreen bool) {
|
||||
continue
|
||||
}
|
||||
s.timestamp, s.value = deduplicateSamples(s.timestamp, sample.timestamp, s.value, sample.value)
|
||||
deduplicatedSamples++
|
||||
}
|
||||
state.samplesBuf = samplesBuf
|
||||
return deduplicatedSamples
|
||||
}
|
||||
|
||||
// deduplicateSamples returns deduplicated timestamp and value results.
|
||||
|
||||
@@ -7,11 +7,13 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
)
|
||||
|
||||
func TestDedupAggrSerial(t *testing.T) {
|
||||
da := newDedupAggr()
|
||||
da := newDedupAggr(metrics.NewSet(), "")
|
||||
|
||||
const seriesCount = 100_000
|
||||
expectedSamplesMap := make(map[string]pushSample)
|
||||
@@ -59,7 +61,7 @@ func TestDedupAggrSerial(t *testing.T) {
|
||||
func TestDedupAggrConcurrent(_ *testing.T) {
|
||||
const concurrency = 5
|
||||
const seriesCount = 10_000
|
||||
da := newDedupAggr()
|
||||
da := newDedupAggr(metrics.NewSet(), "")
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for range concurrency {
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
@@ -23,7 +25,7 @@ func benchmarkDedupAggr(b *testing.B, samplesPerPush int) {
|
||||
|
||||
const loops = 2
|
||||
benchSamples := newBenchSamples(samplesPerPush)
|
||||
da := newDedupAggr()
|
||||
da := newDedupAggr(metrics.NewSet(), "")
|
||||
|
||||
b.ResetTimer()
|
||||
b.ReportAllocs()
|
||||
|
||||
@@ -44,7 +44,6 @@ type Deduplicator struct {
|
||||
// MustStop must be called on the returned deduplicator in order to free up occupied resources.
|
||||
func NewDeduplicator(pushFunc PushFunc, enableWindows bool, interval time.Duration, dropLabels []string, alias string) *Deduplicator {
|
||||
d := &Deduplicator{
|
||||
da: newDedupAggr(),
|
||||
dropLabels: dropLabels,
|
||||
interval: interval,
|
||||
enableWindows: enableWindows,
|
||||
@@ -64,16 +63,7 @@ func NewDeduplicator(pushFunc PushFunc, enableWindows bool, interval time.Durati
|
||||
ms := d.ms
|
||||
|
||||
metricLabels := fmt.Sprintf(`name="dedup",url=%q`, alias)
|
||||
|
||||
_ = ms.NewGauge(fmt.Sprintf(`vm_streamaggr_dedup_state_size_bytes{%s}`, metricLabels), func() float64 {
|
||||
return float64(d.da.sizeBytes())
|
||||
})
|
||||
_ = ms.NewGauge(fmt.Sprintf(`vm_streamaggr_dedup_state_items_count{%s}`, metricLabels), func() float64 {
|
||||
return float64(d.da.itemsCount())
|
||||
})
|
||||
|
||||
d.da.flushDuration = ms.NewHistogram(fmt.Sprintf(`vm_streamaggr_dedup_flush_duration_seconds{%s}`, metricLabels))
|
||||
d.da.flushTimeouts = ms.NewCounter(fmt.Sprintf(`vm_streamaggr_dedup_flush_timeouts_total{%s}`, metricLabels))
|
||||
d.da = newDedupAggr(ms, metricLabels)
|
||||
|
||||
metrics.RegisterSet(ms)
|
||||
|
||||
@@ -120,6 +110,7 @@ func (d *Deduplicator) Push(tss []prompb.TimeSeries) {
|
||||
key := bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
for _, s := range ts.Samples {
|
||||
if d.enableWindows && minDeadline > s.Timestamp {
|
||||
d.da.droppedSamples.Inc()
|
||||
continue
|
||||
} else if d.enableWindows && s.Timestamp <= cs.maxDeadline == cs.isGreen {
|
||||
ctx.green = append(ctx.green, pushSample{
|
||||
|
||||
@@ -31,12 +31,16 @@ type increaseAggrValue struct {
|
||||
}
|
||||
|
||||
func (av *increaseAggrValue) pushSample(c aggrConfig, sample *pushSample, key string, deleteDeadline int64) {
|
||||
if av.total == nil {
|
||||
av.total = new(float64)
|
||||
}
|
||||
ac := c.(*increaseAggrConfig)
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
keepFirstSample := ac.keepFirstSample && currentTime >= ac.ignoreFirstSampleDeadline
|
||||
lv, ok := av.shared[key]
|
||||
if av.total == nil {
|
||||
av.total = new(float64)
|
||||
// The last value is stale, reset it.
|
||||
if ok && lv.deleteDeadline < int64(currentTime)*1000 {
|
||||
ok = false
|
||||
}
|
||||
if ok {
|
||||
if sample.timestamp < lv.timestamp {
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
)
|
||||
|
||||
var rateAggrSharedValuePool sync.Pool
|
||||
@@ -99,6 +100,12 @@ func (av *rateAggrValue) pushSample(c aggrConfig, sample *pushSample, key string
|
||||
ac := c.(*rateAggrConfig)
|
||||
var state *rateAggrStateValue
|
||||
sv, ok := av.shared[key]
|
||||
// The last value is stale, reset it.
|
||||
if ok && sv.deleteDeadline < int64(fasttime.UnixTimestamp())*1000 {
|
||||
delete(av.shared, key)
|
||||
putRateAggrSharedValue(sv)
|
||||
ok = false
|
||||
}
|
||||
if ok {
|
||||
state = sv.getState(av.isGreen)
|
||||
if sample.timestamp < state.timestamp {
|
||||
|
||||
@@ -43,6 +43,7 @@ var supportedOutputs = []string{
|
||||
"stddev",
|
||||
"stdvar",
|
||||
"sum_samples",
|
||||
"sum_samples_total",
|
||||
"total",
|
||||
"total_prometheus",
|
||||
"unique_samples",
|
||||
@@ -172,12 +173,12 @@ type Config struct {
|
||||
DedupInterval string `yaml:"dedup_interval,omitempty"`
|
||||
|
||||
// Staleness interval is interval after which the series state will be reset if no samples have been sent during it.
|
||||
// The parameter is only relevant for outputs: total, total_prometheus, increase, increase_prometheus and histogram_bucket.
|
||||
// The parameter is only relevant for outputs: total, total_prometheus, increase, increase_prometheus, rate_avg and rate_sum.
|
||||
StalenessInterval string `yaml:"staleness_interval,omitempty"`
|
||||
|
||||
// IgnoreFirstSampleInterval specifies the interval after which the agent begins sending samples.
|
||||
// By default, it is set to the staleness interval, and it helps reduce the initial sample load after an agent restart.
|
||||
// This parameter is relevant only for the following outputs: total, total_prometheus, increase, increase_prometheus, and histogram_bucket.
|
||||
// This parameter is relevant only for the following outputs: total, total_prometheus, increase and increase_prometheus.
|
||||
IgnoreFirstSampleInterval string `yaml:"ignore_first_sample_interval,omitempty"`
|
||||
|
||||
// Outputs is a list of output aggregate functions to produce.
|
||||
@@ -501,8 +502,9 @@ func newAggregator(cfg *Config, path string, pushFunc PushFunc, ms *metrics.Set,
|
||||
return nil, fmt.Errorf("interval=%s must be a multiple of dedup_interval=%s", interval, dedupInterval)
|
||||
}
|
||||
|
||||
// check cfg.StalenessInterval
|
||||
stalenessInterval := interval * 2
|
||||
// set the default staleness interval as the aggregation interval, to be consistent with query lookbehind window in metricsQL,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11102
|
||||
stalenessInterval := interval
|
||||
if cfg.StalenessInterval != "" {
|
||||
stalenessInterval, err = time.ParseDuration(cfg.StalenessInterval)
|
||||
if err != nil {
|
||||
@@ -668,18 +670,7 @@ func newAggregator(cfg *Config, path string, pushFunc PushFunc, ms *metrics.Set,
|
||||
}
|
||||
|
||||
if dedupInterval > 0 {
|
||||
a.da = newDedupAggr()
|
||||
a.da.flushTimeouts = ms.NewCounter(fmt.Sprintf(`vm_streamaggr_dedup_flush_timeouts_total{%s}`, metricLabels))
|
||||
a.da.flushDuration = ms.NewHistogram(fmt.Sprintf(`vm_streamaggr_dedup_flush_duration_seconds{%s}`, metricLabels))
|
||||
|
||||
_ = ms.NewGauge(fmt.Sprintf(`vm_streamaggr_dedup_state_size_bytes{%s}`, metricLabels), func() float64 {
|
||||
n := a.da.sizeBytes()
|
||||
return float64(n)
|
||||
})
|
||||
_ = ms.NewGauge(fmt.Sprintf(`vm_streamaggr_dedup_state_items_count{%s}`, metricLabels), func() float64 {
|
||||
n := a.da.itemsCount()
|
||||
return float64(n)
|
||||
})
|
||||
a.da = newDedupAggr(ms, metricLabels)
|
||||
}
|
||||
|
||||
alignFlushToInterval := !opts.NoAlignFlushToInterval
|
||||
@@ -780,7 +771,9 @@ func newOutputConfig(ms *metrics.Set, metricLabels, output string, outputsSeen m
|
||||
case "stdvar":
|
||||
return newStdvarAggrConfig(), nil
|
||||
case "sum_samples":
|
||||
return newSumSamplesAggrConfig(), nil
|
||||
return newSumSamplesAggrConfig(true), nil
|
||||
case "sum_samples_total":
|
||||
return newSumSamplesAggrConfig(false), nil
|
||||
case "total":
|
||||
return newTotalAggrConfig(ms, metricLabels, ignoreFirstSampleIntervalSecs, true), nil
|
||||
case "total_prometheus":
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
//go:build synctest
|
||||
|
||||
package streamaggr
|
||||
|
||||
import (
|
||||
@@ -475,26 +477,125 @@ foo:1m_increase_prometheus{baz="qwe"} 15
|
||||
outputs: [increase_prometheus]
|
||||
`, "11111111")
|
||||
|
||||
// multiple aggregate configs
|
||||
// increase, increase_prometheus, total, total_prometheus outputs with different staleness intervals
|
||||
f([]string{`
|
||||
foo 5
|
||||
bar 200
|
||||
`, `
|
||||
foo 10
|
||||
bar 201
|
||||
`, ``, `
|
||||
foo 7
|
||||
bar 205
|
||||
`}, time.Minute, `bar:1m_increase 200
|
||||
bar:1m_increase 1
|
||||
bar:1m_increase 205
|
||||
bar:1m_increase_prometheus 0
|
||||
bar:1m_increase_prometheus 1
|
||||
bar:1m_increase_prometheus 0
|
||||
bar:1m_total 200
|
||||
bar:1m_total 201
|
||||
bar:1m_total 205
|
||||
bar:1m_total_prometheus 0
|
||||
bar:1m_total_prometheus 1
|
||||
bar:1m_total_prometheus 0
|
||||
bar:1m_without_non_existing_label_increase 0
|
||||
bar:1m_without_non_existing_label_increase 1
|
||||
bar:1m_without_non_existing_label_increase 4
|
||||
bar:1m_without_non_existing_label_increase_prometheus 0
|
||||
bar:1m_without_non_existing_label_increase_prometheus 1
|
||||
bar:1m_without_non_existing_label_increase_prometheus 4
|
||||
bar:1m_without_non_existing_label_total 0
|
||||
bar:1m_without_non_existing_label_total 1
|
||||
bar:1m_without_non_existing_label_total 1
|
||||
bar:1m_without_non_existing_label_total 5
|
||||
bar:1m_without_non_existing_label_total_prometheus 0
|
||||
bar:1m_without_non_existing_label_total_prometheus 1
|
||||
bar:1m_without_non_existing_label_total_prometheus 1
|
||||
bar:1m_without_non_existing_label_total_prometheus 5
|
||||
foo:1m_increase 5
|
||||
foo:1m_increase 5
|
||||
foo:1m_increase 7
|
||||
foo:1m_increase_prometheus 0
|
||||
foo:1m_increase_prometheus 5
|
||||
foo:1m_increase_prometheus 0
|
||||
foo:1m_total 5
|
||||
foo:1m_total 10
|
||||
foo:1m_total 7
|
||||
foo:1m_total_prometheus 0
|
||||
foo:1m_total_prometheus 5
|
||||
foo:1m_total_prometheus 0
|
||||
foo:1m_without_non_existing_label_increase 0
|
||||
foo:1m_without_non_existing_label_increase 5
|
||||
foo:1m_without_non_existing_label_increase 7
|
||||
foo:1m_without_non_existing_label_increase_prometheus 0
|
||||
foo:1m_without_non_existing_label_increase_prometheus 5
|
||||
foo:1m_without_non_existing_label_increase_prometheus 7
|
||||
foo:1m_without_non_existing_label_total 0
|
||||
foo:1m_without_non_existing_label_total 5
|
||||
foo:1m_without_non_existing_label_total 5
|
||||
foo:1m_without_non_existing_label_total 12
|
||||
foo:1m_without_non_existing_label_total_prometheus 0
|
||||
foo:1m_without_non_existing_label_total_prometheus 5
|
||||
foo:1m_without_non_existing_label_total_prometheus 5
|
||||
foo:1m_without_non_existing_label_total_prometheus 12
|
||||
`, `
|
||||
- interval: 1m
|
||||
ignore_first_sample_interval: 0s
|
||||
outputs: [increase, increase_prometheus, total, total_prometheus]
|
||||
- interval: 1m
|
||||
staleness_interval: 2m
|
||||
without: [non_existing_label]
|
||||
outputs: [increase, increase_prometheus, total, total_prometheus]
|
||||
`, "111111")
|
||||
|
||||
// sum_sample and sum_samples_total outputs with different staleness intervals
|
||||
f([]string{`
|
||||
foo 1
|
||||
foo 2 1
|
||||
foo{bar="baz"} 2
|
||||
foo 3.3
|
||||
`, ``, ``, ``, ``}, time.Minute, `foo:1m_count_series 1
|
||||
foo:1m_count_series{bar="baz"} 1
|
||||
foo:1m_sum_samples 0
|
||||
foo:1m_sum_samples 4.3
|
||||
foo:1m_sum_samples{bar="baz"} 0
|
||||
`, `
|
||||
foo 4
|
||||
`, ``, ``, `
|
||||
foo 6
|
||||
`, ``, ``}, time.Minute, `foo:1m_sum_samples 3
|
||||
foo:1m_sum_samples 4
|
||||
foo:1m_sum_samples 6
|
||||
foo:1m_sum_samples_total 3
|
||||
foo:1m_sum_samples_total 7
|
||||
foo:1m_sum_samples_total 6
|
||||
foo:1m_sum_samples_total{bar="baz"} 2
|
||||
foo:1m_sum_samples{bar="baz"} 2
|
||||
foo:5m_by_bar_sum_samples 4.3
|
||||
foo:1m_without_non-existing-label_sum_samples 3
|
||||
foo:1m_without_non-existing-label_sum_samples 4
|
||||
foo:1m_without_non-existing-label_sum_samples 0
|
||||
foo:1m_without_non-existing-label_sum_samples 6
|
||||
foo:1m_without_non-existing-label_sum_samples 0
|
||||
foo:1m_without_non-existing-label_sum_samples_total 3
|
||||
foo:1m_without_non-existing-label_sum_samples_total 7
|
||||
foo:1m_without_non-existing-label_sum_samples_total 7
|
||||
foo:1m_without_non-existing-label_sum_samples_total 6
|
||||
foo:1m_without_non-existing-label_sum_samples_total 6
|
||||
foo:1m_without_non-existing-label_sum_samples_total{bar="baz"} 2
|
||||
foo:1m_without_non-existing-label_sum_samples_total{bar="baz"} 2
|
||||
foo:1m_without_non-existing-label_sum_samples{bar="baz"} 2
|
||||
foo:1m_without_non-existing-label_sum_samples{bar="baz"} 0
|
||||
foo:5m_by_bar_sum_samples 13
|
||||
foo:5m_by_bar_sum_samples_total 13
|
||||
foo:5m_by_bar_sum_samples_total{bar="baz"} 2
|
||||
foo:5m_by_bar_sum_samples{bar="baz"} 2
|
||||
`, `
|
||||
- interval: 1m
|
||||
outputs: [count_series, sum_samples]
|
||||
staleness_interval: 1m
|
||||
outputs: [ sum_samples, sum_samples_total]
|
||||
- interval: 1m
|
||||
staleness_interval: 2m
|
||||
without: [non-existing-label]
|
||||
outputs: [ sum_samples, sum_samples_total]
|
||||
- interval: 5m
|
||||
by: [bar]
|
||||
outputs: [sum_samples]
|
||||
`, "111")
|
||||
outputs: [sum_samples, sum_samples_total]
|
||||
`, "11111")
|
||||
|
||||
// min and max outputs
|
||||
f([]string{`
|
||||
@@ -688,30 +789,39 @@ foo:1m_by_cde_rate_sum{cde="1"} 0.125
|
||||
outputs: [rate_sum, rate_avg]
|
||||
`, "11111")
|
||||
|
||||
// test rate_sum and rate_avg, when two aggregation intervals are empty
|
||||
// test rate_sum and rate_avg with different staleness intervals
|
||||
f([]string{`
|
||||
foo{abc="123", cde="1"} 1
|
||||
foo{abc="123", cde="1"} 2 1
|
||||
foo{abc="456", cde="1"} 7
|
||||
foo{abc="456", cde="1"} 8 1
|
||||
foo{abc="777", cde="1"} 8
|
||||
foo{abc="777", cde="1"} 9 1
|
||||
`, ``, ``, `
|
||||
foo{abc="123", cde="1"} 19
|
||||
foo{abc="123", cde="1"} 20 1
|
||||
foo{abc="456", cde="1"} 26
|
||||
foo{abc="456", cde="1"} 27 1
|
||||
foo{abc="777", cde="1"} 27
|
||||
foo{abc="777", cde="1"} 28 1
|
||||
foo{abc="456", cde="1"} 3
|
||||
foo{abc="456", cde="1"} 4 1
|
||||
foo{abc="777", cde="1"} 5
|
||||
foo{abc="777", cde="1"} 6 1
|
||||
`, ``, `
|
||||
foo{abc="123", cde="1"} 121
|
||||
foo{abc="123", cde="1"} 122 1
|
||||
foo{abc="456", cde="1"} 123
|
||||
foo{abc="456", cde="1"} 124 1
|
||||
foo{abc="777", cde="1"} 125
|
||||
foo{abc="777", cde="1"} 126 1
|
||||
`}, time.Minute, `foo:1m_by_cde_rate_avg{cde="1"} 1
|
||||
foo:1m_by_cde_rate_avg{cde="1"} 1
|
||||
foo:1m_by_cde_rate_sum{cde="1"} 3
|
||||
foo:1m_by_cde_rate_sum{cde="1"} 3
|
||||
foo:1m_without_abc_rate_avg{cde="1"} 1
|
||||
foo:1m_without_abc_rate_avg{cde="1"} 1
|
||||
foo:1m_without_abc_rate_sum{cde="1"} 3
|
||||
foo:1m_without_abc_rate_sum{cde="1"} 3
|
||||
`, `
|
||||
- interval: 1m
|
||||
by: [cde]
|
||||
outputs: [rate_sum, rate_avg]
|
||||
enable_windows: true
|
||||
- interval: 1m
|
||||
staleness_interval: 2m
|
||||
without: [abc]
|
||||
outputs: [rate_sum, rate_avg]
|
||||
enable_windows: true
|
||||
`, "111111111111")
|
||||
|
||||
// rate_sum and rate_avg with duplicated events
|
||||
|
||||
@@ -252,11 +252,15 @@ func TestAggregatorsEqual(t *testing.T) {
|
||||
}
|
||||
|
||||
func timeSeriessToString(tss []prompb.TimeSeries) string {
|
||||
a := make([]string, len(tss))
|
||||
for i, ts := range tss {
|
||||
sorted := make([]prompb.TimeSeries, len(tss))
|
||||
copy(sorted, tss)
|
||||
sort.SliceStable(sorted, func(i, j int) bool {
|
||||
return promrelabel.LabelsToString(sorted[i].Labels) < promrelabel.LabelsToString(sorted[j].Labels)
|
||||
})
|
||||
a := make([]string, len(sorted))
|
||||
for i, ts := range sorted {
|
||||
a[i] = timeSeriesToString(ts)
|
||||
}
|
||||
sort.Strings(a)
|
||||
return strings.Join(a, "")
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ var benchOutputs = []string{
|
||||
"stddev",
|
||||
"stdvar",
|
||||
"sum_samples",
|
||||
"sum_samples_total",
|
||||
"total",
|
||||
"total_prometheus",
|
||||
"unique_samples",
|
||||
|
||||
@@ -1,27 +1,44 @@
|
||||
package streamaggr
|
||||
|
||||
import (
|
||||
"math"
|
||||
)
|
||||
|
||||
type sumSamplesAggrValue struct {
|
||||
sum float64
|
||||
}
|
||||
|
||||
func (av *sumSamplesAggrValue) pushSample(_ aggrConfig, sample *pushSample, _ string, _ int64) {
|
||||
if math.Abs(av.sum) >= (1 << 53) {
|
||||
// It is time to reset the entry, since it starts losing float64 precision
|
||||
av.sum = 0
|
||||
}
|
||||
av.sum += sample.value
|
||||
}
|
||||
|
||||
func (av *sumSamplesAggrValue) flush(_ aggrConfig, ctx *flushCtx, key string, _ bool) {
|
||||
ctx.appendSeries(key, "sum_samples", av.sum)
|
||||
av.sum = 0
|
||||
func (av *sumSamplesAggrValue) flush(c aggrConfig, ctx *flushCtx, key string, _ bool) {
|
||||
ac := c.(*sumSamplesAggrConfig)
|
||||
if ac.resetTotalOnFlush {
|
||||
ctx.appendSeries(key, "sum_samples", av.sum)
|
||||
av.sum = 0
|
||||
return
|
||||
}
|
||||
ctx.appendSeries(key, "sum_samples_total", av.sum)
|
||||
}
|
||||
|
||||
func (*sumSamplesAggrValue) state() any {
|
||||
return nil
|
||||
}
|
||||
|
||||
func newSumSamplesAggrConfig() aggrConfig {
|
||||
return &sumSamplesAggrConfig{}
|
||||
func newSumSamplesAggrConfig(resetTotalOnFlush bool) aggrConfig {
|
||||
return &sumSamplesAggrConfig{
|
||||
resetTotalOnFlush: resetTotalOnFlush,
|
||||
}
|
||||
}
|
||||
|
||||
type sumSamplesAggrConfig struct{}
|
||||
type sumSamplesAggrConfig struct {
|
||||
resetTotalOnFlush bool
|
||||
}
|
||||
|
||||
func (*sumSamplesAggrConfig) getValue(_ any) aggrValue {
|
||||
return &sumSamplesAggrValue{}
|
||||
|
||||
@@ -31,7 +31,11 @@ func (av *totalAggrValue) pushSample(c aggrConfig, sample *pushSample, key strin
|
||||
currentTime := fasttime.UnixTimestamp()
|
||||
keepFirstSample := ac.keepFirstSample && currentTime >= ac.ignoreFirstSampleDeadline
|
||||
lv, ok := av.shared.lastValues[key]
|
||||
if ok || keepFirstSample {
|
||||
// The last value is stale, reset it.
|
||||
if ok && lv.deleteDeadline < int64(currentTime)*1000 {
|
||||
ok = false
|
||||
}
|
||||
if ok {
|
||||
if sample.timestamp < lv.timestamp {
|
||||
// Skip out of order sample
|
||||
return
|
||||
@@ -43,6 +47,8 @@ func (av *totalAggrValue) pushSample(c aggrConfig, sample *pushSample, key strin
|
||||
av.total += sample.value
|
||||
ac.counterResetsTotal.Inc()
|
||||
}
|
||||
} else if keepFirstSample {
|
||||
av.total += sample.value
|
||||
}
|
||||
lv.value = sample.value
|
||||
lv.timestamp = sample.timestamp
|
||||
|
||||
110
vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
generated
vendored
110
vendor/github.com/VictoriaMetrics/metrics/process_metrics_linux.go
generated
vendored
@@ -7,6 +7,7 @@ import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
@@ -404,16 +405,113 @@ func readPSITotals(cgroupPath, statsName string) (uint64, uint64, error) {
|
||||
}
|
||||
|
||||
func getCgroupV2Path() string {
|
||||
data, err := ioutil.ReadFile("/proc/self/cgroup")
|
||||
cgroupData, err := os.ReadFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
tmp := strings.SplitN(string(data), "::", 2)
|
||||
if len(tmp) != 2 {
|
||||
// Read /proc/self/mountinfo with a timeout. Generating the mountinfo contents
|
||||
// can block in the kernel when a backing filesystem (e.g. a hung NFS or FUSE
|
||||
// mount) is unresponsive. Since this runs at program init via psiMetricsStart,
|
||||
// a blocking read would hang startup, so fall back to disabling PSI metrics instead.
|
||||
mountinfoData, _ := readFileWithTimeout("/proc/self/mountinfo", time.Second)
|
||||
return getCgroupV2PathInternal(string(cgroupData), mountinfoData)
|
||||
}
|
||||
|
||||
// readFileWithTimeout reads the file at path, returning ("", false) if the read
|
||||
// doesn't complete within timeout.
|
||||
//
|
||||
// A timed-out read leaks the reading goroutine until the read eventually unblocks
|
||||
// (if ever). This is an acceptable safeguard against a read of a pseudo-file such
|
||||
// as /proc/self/mountinfo hanging on an unresponsive mount.
|
||||
func readFileWithTimeout(path string, timeout time.Duration) (string, bool) {
|
||||
type result struct {
|
||||
data []byte
|
||||
err error
|
||||
}
|
||||
// The channel is buffered so the goroutine can always send and exit,
|
||||
// even after this function has returned on timeout.
|
||||
ch := make(chan result, 1)
|
||||
go func() {
|
||||
data, err := os.ReadFile(path)
|
||||
ch <- result{data: data, err: err}
|
||||
}()
|
||||
timer := time.NewTimer(timeout)
|
||||
defer timer.Stop()
|
||||
select {
|
||||
case r := <-ch:
|
||||
if r.err != nil {
|
||||
return "", false
|
||||
}
|
||||
return string(r.data), true
|
||||
case <-timer.C:
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
|
||||
func getCgroupV2PathInternal(cgroupData, mountinfoData string) string {
|
||||
rel := getCgroupV2RelativePath(cgroupData)
|
||||
if rel == "" {
|
||||
// The process doesn't run under cgroup v2.
|
||||
return ""
|
||||
}
|
||||
path := "/sys/fs/cgroup" + strings.TrimSpace(tmp[1])
|
||||
|
||||
// Drop trailing slash if it exsits. This prevents from '//' in the constructed paths by the caller.
|
||||
return strings.TrimSuffix(path, "/")
|
||||
// Determine the actual cgroup v2 mountpoint instead of assuming /sys/fs/cgroup.
|
||||
// On systems with a hybrid cgroup hierarchy the unified cgroup v2 is mounted
|
||||
// at a different location such as /sys/fs/cgroup/unified.
|
||||
// See https://github.com/VictoriaMetrics/metrics/issues/127
|
||||
mountpoint := getCgroupV2Mountpoint(mountinfoData)
|
||||
if mountpoint == "" {
|
||||
// fallback to assumed path
|
||||
mountpoint = "/sys/fs/cgroup"
|
||||
}
|
||||
cgroupPath := path.Join(mountpoint, rel)
|
||||
// Drop trailing slash if it exists. This prevents from '//' in the constructed paths by the caller.
|
||||
return strings.TrimSuffix(cgroupPath, "/")
|
||||
}
|
||||
|
||||
// getCgroupV2RelativePath returns the cgroup v2 path of the process relative to
|
||||
// the cgroup v2 mountpoint, or an empty string if the process doesn't run under cgroup v2.
|
||||
//
|
||||
// The cgroup v2 entry in /proc/self/cgroup has an empty controllers field, e.g. "0::/the/path".
|
||||
// See https://man7.org/linux/man-pages/man7/cgroups.7.html
|
||||
func getCgroupV2RelativePath(cgroupData string) string {
|
||||
for _, line := range strings.Split(cgroupData, "\n") {
|
||||
// Each line has the form "hierarchy-ID:controller-list:cgroup-path".
|
||||
// The cgroup v2 line has an empty hierarchy-ID and controller-list, i.e. it starts with "0::".
|
||||
tmp := strings.SplitN(line, "::", 2)
|
||||
if len(tmp) == 2 && strings.HasPrefix(line, "0::") {
|
||||
return strings.TrimSpace(tmp[1])
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// getCgroupV2Mountpoint returns the mountpoint of the cgroup v2 (unified) hierarchy
|
||||
// parsed from the contents of /proc/self/mountinfo, or an empty string if cgroup v2 isn't mounted.
|
||||
func getCgroupV2Mountpoint(mountinfoData string) string {
|
||||
for _, line := range strings.Split(mountinfoData, "\n") {
|
||||
if !strings.Contains(line, "cgroup2") {
|
||||
// fast path
|
||||
continue
|
||||
}
|
||||
// mountinfo lines have the form:
|
||||
// 36 35 98:0 / /sys/fs/cgroup/unified rw,... - cgroup2 cgroup2 rw,...
|
||||
// The optional fields preceding the filesystem type are terminated by " - ".
|
||||
// See https://man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html
|
||||
tmp := strings.SplitN(line, " - ", 2)
|
||||
if len(tmp) != 2 {
|
||||
continue
|
||||
}
|
||||
after := strings.Fields(tmp[1])
|
||||
if len(after) < 1 || after[0] != "cgroup2" {
|
||||
continue
|
||||
}
|
||||
before := strings.Fields(tmp[0])
|
||||
if len(before) < 5 {
|
||||
continue
|
||||
}
|
||||
// before[4] is the mount point.
|
||||
return before[4]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
1
vendor/github.com/VictoriaMetrics/metricsql/transform.go
generated
vendored
1
vendor/github.com/VictoriaMetrics/metricsql/transform.go
generated
vendored
@@ -74,7 +74,6 @@ var transformFuncs = map[string]bool{
|
||||
"rand": true,
|
||||
"rand_exponential": true,
|
||||
"rand_normal": true,
|
||||
"range": true,
|
||||
"range_avg": true,
|
||||
"range_first": true,
|
||||
"range_last": true,
|
||||
|
||||
4
vendor/modules.txt
vendored
4
vendor/modules.txt
vendored
@@ -143,10 +143,10 @@ github.com/VictoriaMetrics/easyproto
|
||||
# github.com/VictoriaMetrics/fastcache v1.13.3
|
||||
## explicit; go 1.24.0
|
||||
github.com/VictoriaMetrics/fastcache
|
||||
# github.com/VictoriaMetrics/metrics v1.43.2
|
||||
# github.com/VictoriaMetrics/metrics v1.44.0
|
||||
## explicit; go 1.24.0
|
||||
github.com/VictoriaMetrics/metrics
|
||||
# github.com/VictoriaMetrics/metricsql v0.87.1
|
||||
# github.com/VictoriaMetrics/metricsql v0.87.2
|
||||
## explicit; go 1.24.2
|
||||
github.com/VictoriaMetrics/metricsql
|
||||
github.com/VictoriaMetrics/metricsql/binaryop
|
||||
|
||||
Reference in New Issue
Block a user