mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-01 16:12:18 +03:00
Compare commits
32 Commits
issue-1050
...
issue-1060
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
517c17b744 | ||
|
|
1c2622d8ae | ||
|
|
9a836dac59 | ||
|
|
799ecb0a08 | ||
|
|
c88dc19052 | ||
|
|
919049f9e2 | ||
|
|
24efe47c6a | ||
|
|
f8d99d9289 | ||
|
|
333a015be5 | ||
|
|
b6196524ba | ||
|
|
e9f1bb911c | ||
|
|
2336c7e72f | ||
|
|
b803a46e7f | ||
|
|
0e845e234f | ||
|
|
49a8dd4da6 | ||
|
|
2609a53e41 | ||
|
|
1ca4b3ba3c | ||
|
|
66b9890025 | ||
|
|
2e7591d567 | ||
|
|
ca8d9d21a9 | ||
|
|
0653b7c7b8 | ||
|
|
569197d038 | ||
|
|
5f357e6a94 | ||
|
|
c317e95ab8 | ||
|
|
a875597b09 | ||
|
|
3062f4355d | ||
|
|
aa206acd6f | ||
|
|
9a74f71a5f | ||
|
|
1dcf0f6826 | ||
|
|
727abb0b57 | ||
|
|
2c262c5ef6 | ||
|
|
12b79143dc |
4
.github/pull_request_template.md
vendored
4
.github/pull_request_template.md
vendored
@@ -1 +1,3 @@
|
||||
Before creating the PR, please read [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist) and remove this line after confirming you understand and follow them.
|
||||
**PLEASE REMOVE LINE BELOW BEFORE SUBMITTING**
|
||||
|
||||
Before creating the PR, make sure you have read and followed the [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
|
||||
|
||||
@@ -77,16 +77,6 @@ func insertRows(at *auth.Token, tss []prompb.TimeSeries, mms []prompb.MetricMeta
|
||||
|
||||
var metadataTotal int
|
||||
if prommetadata.IsEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mm.AccountID = accountID
|
||||
mm.ProjectID = projectID
|
||||
}
|
||||
}
|
||||
ctx.WriteRequest.Metadata = mms
|
||||
metadataTotal = len(mms)
|
||||
}
|
||||
|
||||
@@ -75,11 +75,6 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
|
||||
Samples: samples[len(samples)-1:],
|
||||
})
|
||||
}
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
}
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mmsDst = append(mmsDst, prompb.MetricMetadata{
|
||||
@@ -88,8 +83,6 @@ func insertRows(at *auth.Token, rows []prometheus.Row, mms []prometheus.Metadata
|
||||
Type: mm.Type,
|
||||
// there is no unit in Prometheus exposition formats
|
||||
|
||||
AccountID: accountID,
|
||||
ProjectID: projectID,
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Timeseries = tssDst
|
||||
|
||||
@@ -72,11 +72,6 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
|
||||
var metadataTotal int
|
||||
if prommetadata.IsEnabled() {
|
||||
var accountID, projectID uint32
|
||||
if at != nil {
|
||||
accountID = at.AccountID
|
||||
projectID = at.ProjectID
|
||||
}
|
||||
for i := range mms {
|
||||
mm := &mms[i]
|
||||
mmsDst = append(mmsDst, prompb.MetricMetadata{
|
||||
@@ -85,8 +80,8 @@ func insertRows(at *auth.Token, timeseries []prompb.TimeSeries, mms []prompb.Met
|
||||
Type: mm.Type,
|
||||
Unit: mm.Unit,
|
||||
|
||||
AccountID: accountID,
|
||||
ProjectID: projectID,
|
||||
AccountID: mm.AccountID,
|
||||
ProjectID: mm.ProjectID,
|
||||
})
|
||||
}
|
||||
ctx.WriteRequest.Metadata = mmsDst
|
||||
|
||||
@@ -211,6 +211,9 @@ func (wr *writeRequest) copyMetadata(dst, src *prompb.MetricMetadata) {
|
||||
dst.Type = src.Type
|
||||
dst.Unit = src.Unit
|
||||
|
||||
dst.AccountID = src.AccountID
|
||||
dst.ProjectID = src.ProjectID
|
||||
|
||||
// Pre-allocate memory for all string fields.
|
||||
neededBufLen := len(src.MetricFamilyName) + len(src.Help)
|
||||
bufLen := len(wr.metadatabuf)
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mdx"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -102,6 +103,9 @@ var (
|
||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
||||
|
||||
enableMdx = flagutil.NewArrayBool("remoteWrite.mdx.enable", "Whether to only retain metrics from VictoriaMetrics services before sending them to the corresponding -remoteWrite.url. "+
|
||||
"Please see https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -286,6 +290,10 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
rwctxIdx[i] = i
|
||||
}
|
||||
|
||||
if slices.Contains(*enableMdx, true) && *shardByURL {
|
||||
logger.Fatalf("-remoteWrite.mdx.enable and -remoteWrite.shardByURL cannot be set to true simultaneously.")
|
||||
}
|
||||
|
||||
if *shardByURL {
|
||||
consistentHashNodes := make([]string, 0, len(urls))
|
||||
for i, url := range urls {
|
||||
@@ -294,6 +302,10 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
rwctxConsistentHashGlobal = consistenthash.NewConsistentHash(consistentHashNodes, 0)
|
||||
}
|
||||
|
||||
if slices.Contains(*enableMdx, true) {
|
||||
mdx.InitGlobalFilter()
|
||||
}
|
||||
|
||||
rwctxsGlobal = rwctxs
|
||||
rwctxsGlobalIdx = rwctxIdx
|
||||
}
|
||||
@@ -356,6 +368,7 @@ func Stop() {
|
||||
if sl := dailySeriesLimiter; sl != nil {
|
||||
sl.MustStop()
|
||||
}
|
||||
mdx.GlobalFilter.MustStop()
|
||||
}
|
||||
|
||||
// PushDropSamplesOnFailure pushes wr to the configured remote storage systems set via -remoteWrite.url
|
||||
@@ -398,7 +411,7 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
||||
|
||||
// Push metadata separately from time series, since it doesn't need sharding,
|
||||
// relabeling, stream aggregation, deduplication, etc.
|
||||
if !tryPushMetadataToRemoteStorages(rwctxs, mms, forceDropSamplesOnFailure) {
|
||||
if !tryPushMetadataToRemoteStorages(at, rwctxs, mms, forceDropSamplesOnFailure) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -536,11 +549,18 @@ func pushTimeSeriesToRemoteStoragesTrackDropped(tss []prompb.TimeSeries) {
|
||||
}
|
||||
}
|
||||
|
||||
func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
|
||||
func tryPushMetadataToRemoteStorages(at *auth.Token, rwctxs []*remoteWriteCtx, mms []prompb.MetricMetadata, forceDropSamplesOnFailure bool) bool {
|
||||
if len(mms) == 0 {
|
||||
// Nothing to push
|
||||
return true
|
||||
}
|
||||
if at != nil {
|
||||
for idx := range mms {
|
||||
mm := &mms[idx]
|
||||
mm.AccountID = at.AccountID
|
||||
mm.ProjectID = at.ProjectID
|
||||
}
|
||||
}
|
||||
// Do not shard metadata even if -remoteWrite.shardByURL is set, just replicate it among rwctxs.
|
||||
// Since metadata is usually small and there is no guarantee that metadata can be sent to
|
||||
// the same remote storage with the corresponding metrics.
|
||||
@@ -830,11 +850,14 @@ type remoteWriteCtx struct {
|
||||
// otherwise by the global -enableMetadata flag.
|
||||
enableMetadata bool
|
||||
|
||||
enableMdx bool
|
||||
|
||||
pss []*pendingSeries
|
||||
pssNextIdx atomic.Uint64
|
||||
|
||||
rowsPushedAfterRelabel *metrics.Counter
|
||||
rowsDroppedByRelabel *metrics.Counter
|
||||
rowsDroppedByMdx *metrics.Counter
|
||||
|
||||
pushFailures *metrics.Counter
|
||||
metadataDroppedOnPushFailure *metrics.Counter
|
||||
@@ -921,16 +944,17 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
for i := range pss {
|
||||
pss[i] = newPendingSeries(fq, &c.useVMProto, sf, rd)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pss: pss,
|
||||
enableMetadata: isMetadataEnabledForURL(argIdx),
|
||||
enableMdx: enableMdx.GetOptionalArg(argIdx),
|
||||
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByMdx: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_mdx_rows_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
|
||||
pushFailures: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_push_failures_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
metadataDroppedOnPushFailure: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_metadata_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
@@ -966,6 +990,7 @@ func (rwctx *remoteWriteCtx) MustStop() {
|
||||
|
||||
rwctx.rowsPushedAfterRelabel = nil
|
||||
rwctx.rowsDroppedByRelabel = nil
|
||||
rwctx.rowsDroppedByMdx = nil
|
||||
}
|
||||
|
||||
// TryPushTimeSeries sends tss series to the configured remote write endpoint
|
||||
@@ -983,17 +1008,30 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
putRelabelCtx(rctx)
|
||||
}()
|
||||
|
||||
if rwctx.enableMdx && mdx.GlobalFilter != nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss
|
||||
rowsCountBeforeMdx := getRowsCount(tss)
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = mdx.GlobalFilter.Filter(tss, *v)
|
||||
rowsCountAfterMdx := getRowsCount(tss)
|
||||
rwctx.rowsDroppedByMdx.Add(rowsCountBeforeMdx - rowsCountAfterMdx)
|
||||
}
|
||||
|
||||
// Apply relabeling
|
||||
rcs := allRelabelConfigs.Load()
|
||||
pcs := rcs.perURL[rwctx.idx]
|
||||
if pcs.Len() > 0 {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/599
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
if rctx == nil {
|
||||
// Make a copy of tss before applying relabeling in order to prevent
|
||||
// from affecting time series for other remoteWrite.url configs.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/467
|
||||
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/599
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
rowsCountBeforeRelabel := getRowsCount(tss)
|
||||
tss = rctx.applyRelabeling(tss, pcs)
|
||||
rowsCountAfterRelabel := getRowsCount(tss)
|
||||
|
||||
@@ -362,40 +362,62 @@ func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
|
||||
}
|
||||
|
||||
type backendURLs struct {
|
||||
healthChecksContext context.Context
|
||||
healthChecksCancel func()
|
||||
healthChecksWG sync.WaitGroup
|
||||
|
||||
bhc backendHealthCheck
|
||||
bus []*backendURL
|
||||
}
|
||||
|
||||
type backendHealthCheck struct {
|
||||
ctx context.Context
|
||||
// mu protects fields below
|
||||
cancel func()
|
||||
mu sync.Mutex
|
||||
isStopped bool
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func (bhc *backendHealthCheck) run(hc func()) {
|
||||
bhc.mu.Lock()
|
||||
defer bhc.mu.Unlock()
|
||||
if bhc.isStopped {
|
||||
return
|
||||
}
|
||||
bhc.wg.Go(hc)
|
||||
}
|
||||
|
||||
func (bhc *backendHealthCheck) stop() {
|
||||
bhc.mu.Lock()
|
||||
bhc.cancel()
|
||||
bhc.isStopped = true
|
||||
bhc.mu.Unlock()
|
||||
bhc.wg.Wait()
|
||||
}
|
||||
|
||||
func newBackendURLs() *backendURLs {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
return &backendURLs{
|
||||
healthChecksContext: ctx,
|
||||
healthChecksCancel: cancel,
|
||||
bhc: backendHealthCheck{
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (bus *backendURLs) add(u *url.URL) {
|
||||
bus.bus = append(bus.bus, &backendURL{
|
||||
url: u,
|
||||
healthCheckContext: bus.healthChecksContext,
|
||||
healthCheckWG: &bus.healthChecksWG,
|
||||
hasPlaceHolders: hasAnyPlaceholders(u),
|
||||
url: u,
|
||||
bhc: &bus.bhc,
|
||||
hasPlaceHolders: hasAnyPlaceholders(u),
|
||||
})
|
||||
}
|
||||
|
||||
func (bus *backendURLs) stopHealthChecks() {
|
||||
bus.healthChecksCancel()
|
||||
bus.healthChecksWG.Wait()
|
||||
bus.bhc.stop()
|
||||
}
|
||||
|
||||
type backendURL struct {
|
||||
broken atomic.Bool
|
||||
|
||||
healthCheckContext context.Context
|
||||
healthCheckWG *sync.WaitGroup
|
||||
bhc *backendHealthCheck
|
||||
|
||||
concurrentRequests atomic.Int32
|
||||
|
||||
@@ -410,7 +432,7 @@ func (bu *backendURL) isBroken() bool {
|
||||
|
||||
func (bu *backendURL) setBroken() {
|
||||
if bu.broken.CompareAndSwap(false, true) {
|
||||
bu.healthCheckWG.Go(func() {
|
||||
bu.bhc.run(func() {
|
||||
bu.runHealthCheck()
|
||||
bu.broken.Store(false)
|
||||
})
|
||||
@@ -432,11 +454,11 @@ func (bu *backendURL) runHealthCheck() {
|
||||
case <-t.C:
|
||||
// Verify network connectivity via TCP dial before marking backend healthy.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9997
|
||||
ctx, cancel := context.WithTimeout(bu.healthCheckContext, time.Second)
|
||||
ctx, cancel := context.WithTimeout(bu.bhc.ctx, time.Second)
|
||||
c, err := netutil.Dialer.DialContext(ctx, "tcp", addr)
|
||||
cancel()
|
||||
if err != nil {
|
||||
if errors.Is(bu.healthCheckContext.Err(), context.Canceled) {
|
||||
if errors.Is(bu.bhc.ctx.Err(), context.Canceled) {
|
||||
return
|
||||
}
|
||||
logger.Warnf("ignoring the backend at %s for %s because of dial error: %s", addr, *failTimeout, err)
|
||||
@@ -445,7 +467,7 @@ func (bu *backendURL) runHealthCheck() {
|
||||
|
||||
_ = c.Close()
|
||||
return
|
||||
case <-bu.healthCheckContext.Done():
|
||||
case <-bu.bhc.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,10 +8,10 @@ import (
|
||||
"time"
|
||||
|
||||
vmetrics "github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/cheggaaa/pb/v3"
|
||||
)
|
||||
|
||||
type otsdbProcessor struct {
|
||||
@@ -89,9 +89,6 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
// we're going to make serieslist * queryRanges queries, so we should represent that in the progress bar
|
||||
otsdbSeriesTotal.Add(len(serieslist) * queryRanges)
|
||||
bar := pb.StartNew(len(serieslist) * queryRanges)
|
||||
defer func(bar *pb.ProgressBar) {
|
||||
bar.Finish()
|
||||
}(bar)
|
||||
var wg sync.WaitGroup
|
||||
for range op.otsdbcc {
|
||||
wg.Go(func() {
|
||||
@@ -106,41 +103,22 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
}
|
||||
})
|
||||
}
|
||||
/*
|
||||
Loop through all series for this metric, processing all retentions and time ranges
|
||||
requested. This loop is our primary "collect data from OpenTSDB loop" and should
|
||||
be async, sending data to VictoriaMetrics over time.
|
||||
runErr := op.sendQueries(ctx, serieslist, seriesCh, errCh, startTime)
|
||||
|
||||
The idea with having the select at the inner-most loop is to ensure quick
|
||||
short-circuiting on error.
|
||||
*/
|
||||
for _, series := range serieslist {
|
||||
for _, rt := range op.oc.Retentions {
|
||||
for _, tr := range rt.QueryRanges {
|
||||
select {
|
||||
case otsdbErr := <-errCh:
|
||||
return fmt.Errorf("opentsdb error: %s", otsdbErr)
|
||||
case vmErr := <-op.im.Errors():
|
||||
otsdbErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
|
||||
case seriesCh <- queryObj{
|
||||
Tr: tr, StartTime: startTime,
|
||||
Series: series, Rt: opentsdb.RetentionMeta{
|
||||
FirstOrder: rt.FirstOrder, SecondOrder: rt.SecondOrder, AggTime: rt.AggTime}}:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Drain channels per metric
|
||||
// Always drain channels and wait for workers to prevent goroutine leaks
|
||||
close(seriesCh)
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
// check for any lingering errors on the query side
|
||||
for otsdbErr := range errCh {
|
||||
return fmt.Errorf("import process failed: \n%s", otsdbErr)
|
||||
if runErr == nil {
|
||||
runErr = fmt.Errorf("import process failed: \n%s", otsdbErr)
|
||||
}
|
||||
}
|
||||
bar.Finish()
|
||||
if runErr != nil {
|
||||
return runErr
|
||||
}
|
||||
log.Print(op.im.Stats())
|
||||
}
|
||||
op.im.Close()
|
||||
@@ -155,6 +133,34 @@ func (op *otsdbProcessor) run(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendQueries iterates over all series and retention ranges, sending queries to workers.
|
||||
// It returns early if ctx is canceled or an error is received.
|
||||
func (op *otsdbProcessor) sendQueries(ctx context.Context, serieslist []opentsdb.Meta, seriesCh chan<- queryObj, errCh <-chan error, startTime int64) error {
|
||||
for _, series := range serieslist {
|
||||
for _, rt := range op.oc.Retentions {
|
||||
for _, tr := range rt.QueryRanges {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("context canceled: %s", ctx.Err())
|
||||
case otsdbErr := <-errCh:
|
||||
otsdbErrorsTotal.Inc()
|
||||
return fmt.Errorf("opentsdb error: %s", otsdbErr)
|
||||
case vmErr := <-op.im.Errors():
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, op.isVerbose))
|
||||
case seriesCh <- queryObj{
|
||||
Tr: tr, StartTime: startTime,
|
||||
Series: series, Rt: opentsdb.RetentionMeta{
|
||||
FirstOrder: rt.FirstOrder,
|
||||
SecondOrder: rt.SecondOrder,
|
||||
AggTime: rt.AggTime,
|
||||
}}:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (op *otsdbProcessor) do(s queryObj) error {
|
||||
start := s.StartTime - s.Tr.Start
|
||||
end := s.StartTime - s.Tr.End
|
||||
@@ -163,6 +169,7 @@ func (op *otsdbProcessor) do(s queryObj) error {
|
||||
return fmt.Errorf("failed to collect data for %v in %v:%v :: %v", s.Series, s.Rt, s.Tr, err)
|
||||
}
|
||||
if len(data.Timestamps) < 1 || len(data.Values) < 1 {
|
||||
log.Printf("no data found for %v in %v:%v...skipping", s.Series, s.Rt, s.Tr)
|
||||
return nil
|
||||
}
|
||||
labels := make([]vm.LabelPair, 0, len(data.Tags))
|
||||
|
||||
@@ -108,10 +108,10 @@ func (c Client) FindMetrics(q string) ([]string, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to send GET request to %q: %s", q, err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not retrieve metric data from %q: %s", q, err)
|
||||
@@ -130,12 +130,12 @@ func (c Client) FindSeries(metric string) ([]Meta, error) {
|
||||
q := fmt.Sprintf("%s/api/search/lookup?m=%s&limit=%d", c.Addr, metric, c.Limit)
|
||||
resp, err := c.c.Get(q)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to set GET request to %q: %s", q, err)
|
||||
return nil, fmt.Errorf("failed to send GET request to %q: %s", q, err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("bad return from OpenTSDB: %d: %v", resp.StatusCode, resp)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("could not retrieve series data from %q: %s", q, err)
|
||||
@@ -185,6 +185,7 @@ func (c Client) GetData(series Meta, rt RetentionMeta, start int64, end int64, m
|
||||
if err != nil {
|
||||
return Metric{}, fmt.Errorf("failed to send GET request to %q: %s", q, err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
/*
|
||||
There are three potential failures here, none of which should kill the entire
|
||||
migration run:
|
||||
@@ -196,7 +197,6 @@ func (c Client) GetData(series Meta, rt RetentionMeta, start int64, end int64, m
|
||||
log.Printf("bad response code from OpenTSDB query %v for %q...skipping", resp.StatusCode, q)
|
||||
return Metric{}, nil
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
log.Println("couldn't read response body from OpenTSDB query...skipping")
|
||||
@@ -239,27 +239,20 @@ func (c Client) GetData(series Meta, rt RetentionMeta, start int64, end int64, m
|
||||
In all "bad" cases, we don't end the migration, we just don't process that particular message
|
||||
*/
|
||||
if len(output) < 1 {
|
||||
// no results returned...return an empty object without error
|
||||
return Metric{}, nil
|
||||
}
|
||||
if len(output) > 1 {
|
||||
// multiple series returned for a single query. We can't process this right, so...
|
||||
return Metric{}, nil
|
||||
return Metric{}, fmt.Errorf("unexpected number of series returned: %d for query %q; expected 1", len(output), q)
|
||||
}
|
||||
if len(output[0].AggregateTags) > 0 {
|
||||
// This failure means we've suppressed potential series somehow...
|
||||
return Metric{}, nil
|
||||
return Metric{}, fmt.Errorf("aggregate tags %v present in response for query %q; series may be suppressed", output[0].AggregateTags, q)
|
||||
}
|
||||
data := Metric{}
|
||||
data.Metric = output[0].Metric
|
||||
data.Tags = output[0].Tags
|
||||
/*
|
||||
We evaluate data for correctness before formatting the actual values
|
||||
to skip a little bit of time if the series has invalid formatting
|
||||
*/
|
||||
data, err = modifyData(data, c.Normalize)
|
||||
if err != nil {
|
||||
return Metric{}, nil
|
||||
return Metric{}, fmt.Errorf("failed to convert metric data for query %q: %w", q, err)
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -32,7 +32,7 @@ func convertDuration(duration string) (time.Duration, error) {
|
||||
var err error
|
||||
var timeValue int
|
||||
if strings.HasSuffix(duration, "y") {
|
||||
timeValue, err = strconv.Atoi(strings.Trim(duration, "y"))
|
||||
timeValue, err = strconv.Atoi(strings.TrimSuffix(duration, "y"))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid time range: %q", duration)
|
||||
}
|
||||
@@ -42,7 +42,7 @@ func convertDuration(duration string) (time.Duration, error) {
|
||||
return 0, fmt.Errorf("invalid time range: %q", duration)
|
||||
}
|
||||
} else if strings.HasSuffix(duration, "w") {
|
||||
timeValue, err = strconv.Atoi(strings.Trim(duration, "w"))
|
||||
timeValue, err = strconv.Atoi(strings.TrimSuffix(duration, "w"))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid time range: %q", duration)
|
||||
}
|
||||
@@ -52,7 +52,7 @@ func convertDuration(duration string) (time.Duration, error) {
|
||||
return 0, fmt.Errorf("invalid time range: %q", duration)
|
||||
}
|
||||
} else if strings.HasSuffix(duration, "d") {
|
||||
timeValue, err = strconv.Atoi(strings.Trim(duration, "d"))
|
||||
timeValue, err = strconv.Atoi(strings.TrimSuffix(duration, "d"))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid time range: %q", duration)
|
||||
}
|
||||
@@ -95,6 +95,9 @@ func convertRetention(retention string, offset int64, msecTime bool) (Retention,
|
||||
if !msecTime {
|
||||
queryLength = queryLength / 1000
|
||||
}
|
||||
if queryLength <= 0 {
|
||||
return Retention{}, fmt.Errorf("ttl %q resolves to non-positive query range %d; use a larger duration", chunks[2], queryLength)
|
||||
}
|
||||
queryRange := queryLength
|
||||
// bump by the offset so we don't look at empty ranges any time offset > ttl
|
||||
queryLength += offset
|
||||
@@ -138,16 +141,29 @@ func convertRetention(retention string, offset int64, msecTime bool) (Retention,
|
||||
2. we discover the actual size of each "chunk"
|
||||
This is second division step
|
||||
*/
|
||||
querySize = int64(queryRange / (queryRange / (rowLength * 4)))
|
||||
divisor := queryRange / (rowLength * 4)
|
||||
if divisor == 0 {
|
||||
querySize = queryRange
|
||||
} else {
|
||||
querySize = queryRange / divisor
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
Unless the aggTime (how long a range of data we're requesting per individual point)
|
||||
is greater than the row size. Then we'll need to use that to determine
|
||||
how big each individual query should be
|
||||
*/
|
||||
querySize = int64(queryRange / (queryRange / (aggTime * 4)))
|
||||
divisor := queryRange / (aggTime * 4)
|
||||
if divisor == 0 {
|
||||
querySize = queryRange
|
||||
} else {
|
||||
querySize = queryRange / divisor
|
||||
}
|
||||
}
|
||||
|
||||
if querySize <= 0 {
|
||||
return Retention{}, fmt.Errorf("computed non-positive querySize=%d for retention %q; check parameters", querySize, retention)
|
||||
}
|
||||
var timeChunks []TimeRange
|
||||
var i int64
|
||||
for i = offset; i <= queryLength; i = i + querySize {
|
||||
|
||||
@@ -1223,11 +1223,7 @@ func getCommonParamsInternal(r *http.Request, startTime time.Time, requireNonEmp
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Limit the `end` arg to the current time +2 days in the same way
|
||||
// as it is limited during data ingestion.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/ea06d2fd3ccbbb6aa4480ab3b04f7b671408be2a/lib/storage/table.go#L378
|
||||
// This should fix possible timestamp overflow - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2669
|
||||
maxTS := startTime.UnixNano()/1e6 + 2*24*3600*1000
|
||||
maxTS := int64(math.MaxInt64 / 1_000_000)
|
||||
if end > maxTS {
|
||||
end = maxTS
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
197
app/vmselect/vmui/assets/index-fsxQMWD9.js
Normal file
197
app/vmselect/vmui/assets/index-fsxQMWD9.js
Normal file
File diff suppressed because one or more lines are too long
71
app/vmselect/vmui/assets/vendor-BF3F25aG.js
Normal file
71
app/vmselect/vmui/assets/vendor-BF3F25aG.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -37,9 +37,9 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-C24BPpD_.js"></script>
|
||||
<script type="module" crossorigin src="./assets/index-fsxQMWD9.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-COnpUsM8.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-BWBgVCcr.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-BF3F25aG.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-CnsZ1jie.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D2OEy8Ra.css">
|
||||
</head>
|
||||
|
||||
@@ -33,6 +33,8 @@ import (
|
||||
var (
|
||||
retentionPeriod = flagutil.NewRetentionDuration("retentionPeriod", "1M", "Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention. See also -retentionFilter")
|
||||
futureRetention = flagutil.NewRetentionDuration("futureRetention", "2d", "Data with timestamps bigger than now+futureRetention is automatically deleted. "+
|
||||
"The minimum futureRetention is 2 days. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention")
|
||||
snapshotAuthKey = flagutil.NewPassword("snapshotAuthKey", "authKey, which must be passed in query string to /snapshot* pages. It overrides -httpAuth.*")
|
||||
forceMergeAuthKey = flagutil.NewPassword("forceMergeAuthKey", "authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.*")
|
||||
forceFlushAuthKey = flagutil.NewPassword("forceFlushAuthKey", "authKey, which must be passed in query string to /internal/force_flush pages. It overrides -httpAuth.*")
|
||||
@@ -135,7 +137,12 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
mergeset.SetDataBlocksSparseCacheSize(cacheSizeIndexDBDataBlocksSparse.IntN())
|
||||
|
||||
if retentionPeriod.Duration() < 24*time.Hour {
|
||||
logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s", retentionPeriod)
|
||||
logger.Fatalf("-retentionPeriod cannot be smaller than a day; got %s. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention", retentionPeriod)
|
||||
}
|
||||
if futureRetention.Duration() < 2*24*time.Hour {
|
||||
logger.Fatalf("-futureRetention cannot be smaller than 2 days; got %s. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention", futureRetention)
|
||||
}
|
||||
if *idbPrefillStart > 23*time.Hour {
|
||||
logger.Panicf("-storage.idbPrefillStart cannot exceed 23 hours; got %s", idbPrefillStart)
|
||||
@@ -145,6 +152,7 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
WG = syncwg.WaitGroup{}
|
||||
opts := storage.OpenOptions{
|
||||
Retention: retentionPeriod.Duration(),
|
||||
FutureRetention: futureRetention.Duration(),
|
||||
MaxHourlySeries: getMaxHourlySeries(),
|
||||
MaxDailySeries: getMaxDailySeries(),
|
||||
DisablePerDayIndex: *disablePerDayIndex,
|
||||
|
||||
@@ -6,7 +6,7 @@ COPY web/ /build/
|
||||
RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o web-amd64 github.com/VictoriMetrics/vmui/ && \
|
||||
GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build -o web-windows github.com/VictoriMetrics/vmui/
|
||||
|
||||
FROM alpine:3.23.3
|
||||
FROM alpine:3.23.4
|
||||
USER root
|
||||
|
||||
COPY --from=build-web-stage /build/web-amd64 /app/web
|
||||
|
||||
@@ -1,47 +1,18 @@
|
||||
import { ArrayRGB } from "../types";
|
||||
|
||||
export const baseContrastColors = [
|
||||
"#e54040",
|
||||
"#32a9dc",
|
||||
"#2ee329",
|
||||
"#7126a1",
|
||||
"#e38f0f",
|
||||
"#3d811a",
|
||||
"#ffea00",
|
||||
"#2d2d2d",
|
||||
"#da42a6",
|
||||
"#a44e0c",
|
||||
"#e6194b", // red
|
||||
"#4363d8", // blue
|
||||
"#3cb44b", // green
|
||||
"#911eb4", // purple
|
||||
"#f58231", // orange
|
||||
"#f032e6", // magenta
|
||||
"#c8a200", // dark yellow
|
||||
"#a65628", // brown
|
||||
"#42d4f4", // cyan
|
||||
"#a9a9a9", // gray
|
||||
];
|
||||
|
||||
export const hexToRGB = (hex: string): string => {
|
||||
if (hex.length != 7) return "0, 0, 0";
|
||||
const r = parseInt(hex.slice(1, 3), 16);
|
||||
const g = parseInt(hex.slice(3, 5), 16);
|
||||
const b = parseInt(hex.slice(5, 7), 16);
|
||||
return `${r}, ${g}, ${b}`;
|
||||
};
|
||||
|
||||
export const getColorFromString = (text: string): string => {
|
||||
const SEED = 16777215;
|
||||
const FACTOR = 49979693;
|
||||
|
||||
let b = 1;
|
||||
let d = 0;
|
||||
let f = 1;
|
||||
|
||||
if (text.length > 0) {
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
text[i].charCodeAt(0) > d && (d = text[i].charCodeAt(0));
|
||||
f = parseInt(String(SEED / d));
|
||||
b = (b + text[i].charCodeAt(0) * f * FACTOR) % SEED;
|
||||
}
|
||||
}
|
||||
|
||||
let hex = ((b * text.length) % SEED).toString(16);
|
||||
hex = hex.padEnd(6, hex);
|
||||
return `#${hex}`;
|
||||
};
|
||||
|
||||
export const getContrastColor = (value: string) => {
|
||||
let hex = value.replace("#", "").trim();
|
||||
|
||||
@@ -70,3 +41,109 @@ export const generateGradient = (start: ArrayRGB, end: ArrayRGB, steps: number)
|
||||
}
|
||||
return gradient.map(c => `rgb(${c})`);
|
||||
};
|
||||
|
||||
const clamp = (n: number, min: number, max: number) => Math.min(max, Math.max(min, n));
|
||||
|
||||
const hexToRgb = (hex: string) => {
|
||||
let value = hex.replace("#", "").trim();
|
||||
|
||||
if (value.length === 3) {
|
||||
value = value.split("").map((c) => c + c).join("");
|
||||
}
|
||||
|
||||
if (!/^[0-9a-fA-F]{6}$/.test(value)) {
|
||||
throw new Error("Invalid HEX color.");
|
||||
}
|
||||
|
||||
return {
|
||||
r: parseInt(value.slice(0, 2), 16),
|
||||
g: parseInt(value.slice(2, 4), 16),
|
||||
b: parseInt(value.slice(4, 6), 16),
|
||||
};
|
||||
};
|
||||
|
||||
const rgbToHex = (r: number, g: number, b: number) =>
|
||||
`#${[r, g, b].map((v) => clamp(Math.round(v), 0, 255).toString(16).padStart(2, "0")).join("")}`;
|
||||
|
||||
const rgbToHsl = (r: number, g: number, b: number) => {
|
||||
r /= 255; g /= 255; b /= 255;
|
||||
const max = Math.max(r, g, b);
|
||||
const min = Math.min(r, g, b);
|
||||
const l = (max + min) / 2;
|
||||
const d = max - min;
|
||||
|
||||
let h = 0;
|
||||
let s = 0;
|
||||
|
||||
if (d !== 0) {
|
||||
s = d / (1 - Math.abs(2 * l - 1));
|
||||
|
||||
switch (max) {
|
||||
case r: h = ((g - b) / d) % 6; break;
|
||||
case g: h = (b - r) / d + 2; break;
|
||||
case b: h = (r - g) / d + 4; break;
|
||||
}
|
||||
|
||||
h *= 60;
|
||||
if (h < 0) h += 360;
|
||||
}
|
||||
|
||||
return { h, s: s * 100, l: l * 100 };
|
||||
};
|
||||
|
||||
const hslToRgb = (h: number, s: number, l: number) => {
|
||||
s /= 100;
|
||||
l /= 100;
|
||||
|
||||
const c = (1 - Math.abs(2 * l - 1)) * s;
|
||||
const x = c * (1 - Math.abs((h / 60) % 2 - 1));
|
||||
const m = l - c / 2;
|
||||
|
||||
let r: number;
|
||||
let g: number;
|
||||
let b: number;
|
||||
|
||||
if (h < 60) [r, g, b] = [c, x, 0];
|
||||
else if (h < 120) [r, g, b] = [x, c, 0];
|
||||
else if (h < 180) [r, g, b] = [0, c, x];
|
||||
else if (h < 240) [r, g, b] = [0, x, c];
|
||||
else if (h < 300) [r, g, b] = [x, 0, c];
|
||||
else [r, g, b] = [c, 0, x];
|
||||
|
||||
return {
|
||||
r: (r + m) * 255,
|
||||
g: (g + m) * 255,
|
||||
b: (b + m) * 255,
|
||||
};
|
||||
};
|
||||
|
||||
const varyColor = (hex: string, variant: number) => {
|
||||
const { r, g, b } = hexToRgb(hex);
|
||||
const { h, s, l } = rgbToHsl(r, g, b);
|
||||
|
||||
const variants = [
|
||||
{ ds: 0, dl: 0 },
|
||||
{ ds: -20, dl: -16 },
|
||||
{ ds: -16, dl: +16 },
|
||||
{ ds: +14, dl: -20 },
|
||||
];
|
||||
|
||||
const v = variants[variant % variants.length];
|
||||
|
||||
const nextS = clamp(s + v.ds, 35, 85);
|
||||
const nextL = clamp(l + v.dl, 35, 70);
|
||||
|
||||
const rgb = hslToRgb(h, nextS, nextL);
|
||||
return rgbToHex(rgb.r, rgb.g, rgb.b);
|
||||
};
|
||||
|
||||
export const getSeriesColor = (index: number) => {
|
||||
const baseCount = baseContrastColors.length;
|
||||
|
||||
const baseIndex = index % baseCount;
|
||||
const variantIndex = Math.floor(index / baseCount);
|
||||
|
||||
const base = baseContrastColors[(baseIndex + variantIndex) % baseCount];
|
||||
|
||||
return varyColor(base, variantIndex);
|
||||
};
|
||||
|
||||
@@ -2,7 +2,7 @@ import { MetricBase, MetricResult } from "../../api/types";
|
||||
import uPlot, { Series as uPlotSeries } from "uplot";
|
||||
import { getNameForMetric, promValueToNumber } from "../metric";
|
||||
import { HideSeriesArgs, LegendItemType, SeriesItem } from "../../types";
|
||||
import { baseContrastColors, getColorFromString } from "../color";
|
||||
import { getSeriesColor } from "../color";
|
||||
import { getMathStats } from "../math";
|
||||
import { formatPrettyNumber } from "./helpers";
|
||||
import { drawPoints } from "./scatter";
|
||||
@@ -17,11 +17,10 @@ export const extractFields = (metric: MetricBase["metric"]): string => {
|
||||
|
||||
export const getSeriesItemContext = (data: MetricResult[], hideSeries: string[], alias: string[], showPoints?: boolean, isRawQuery?: boolean) => {
|
||||
const colorState: {[key: string]: string} = {};
|
||||
const maxColors = Math.min(data.length, baseContrastColors.length);
|
||||
|
||||
for (let i = 0; i < maxColors; i++) {
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const label = getNameForMetric(data[i], alias[data[i].group - 1]);
|
||||
colorState[label] = baseContrastColors[i];
|
||||
colorState[label] = getSeriesColor(i);
|
||||
}
|
||||
|
||||
return (d: MetricResult): SeriesItem => {
|
||||
@@ -32,7 +31,7 @@ export const getSeriesItemContext = (data: MetricResult[], hideSeries: string[],
|
||||
label,
|
||||
hasAlias: Boolean(aliasValue),
|
||||
width: 1.4,
|
||||
stroke: colorState[label] || getColorFromString(label),
|
||||
stroke: colorState[label],
|
||||
points: getPointsSeries(showPoints, isRawQuery),
|
||||
spanGaps: false,
|
||||
freeFormFields: d.metric,
|
||||
|
||||
@@ -28,6 +28,7 @@ func TestSingleBackupRestore(t *testing.T) {
|
||||
return tc.MustStartVmsingle("vmsingle", []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-futureRetention=2y",
|
||||
})
|
||||
},
|
||||
stopSUT: func() {
|
||||
@@ -60,11 +61,13 @@ func TestClusterBackupRestore(t *testing.T) {
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-futureRetention=2y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2",
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-futureRetention=2y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
@@ -97,10 +100,16 @@ func TestClusterBackupRestore(t *testing.T) {
|
||||
func testBackupRestore(tc *apptest.TestCase, opts testBackupRestoreOpts) {
|
||||
t := tc.T()
|
||||
|
||||
genData := func(count int, prefix string, start, step int64) (recs []string, wantSeries []map[string]string, wantQueryResults []*apptest.QueryResult) {
|
||||
recs = make([]string, count)
|
||||
wantSeries = make([]map[string]string, count)
|
||||
wantQueryResults = make([]*apptest.QueryResult, count)
|
||||
type data struct {
|
||||
samples []string
|
||||
wantSeries []map[string]string
|
||||
wantQueryResults []*apptest.QueryResult
|
||||
}
|
||||
|
||||
genData := func(count int, prefix string, start, step int64) data {
|
||||
recs := make([]string, count)
|
||||
wantSeries := make([]map[string]string, count)
|
||||
wantQueryResults := make([]*apptest.QueryResult, count)
|
||||
for i := range count {
|
||||
name := fmt.Sprintf("%s_%03d", prefix, i)
|
||||
value := float64(i)
|
||||
@@ -113,7 +122,15 @@ func testBackupRestore(tc *apptest.TestCase, opts testBackupRestoreOpts) {
|
||||
Samples: []*apptest.Sample{{Timestamp: timestamp, Value: value}},
|
||||
}
|
||||
}
|
||||
return recs, wantSeries, wantQueryResults
|
||||
return data{recs, wantSeries, wantQueryResults}
|
||||
}
|
||||
|
||||
concatData := func(d1, d2 data) data {
|
||||
var d data
|
||||
d.samples = slices.Concat(d1.samples, d2.samples)
|
||||
d.wantSeries = slices.Concat(d1.wantSeries, d2.wantSeries)
|
||||
d.wantQueryResults = slices.Concat(d1.wantQueryResults, d2.wantQueryResults)
|
||||
return d
|
||||
}
|
||||
|
||||
backupBaseDir, err := filepath.Abs(filepath.Join(tc.Dir(), "backups"))
|
||||
@@ -190,10 +207,20 @@ func testBackupRestore(tc *apptest.TestCase, opts testBackupRestoreOpts) {
|
||||
// Use the same number of metrics and time range for all the data ingestions
|
||||
// below.
|
||||
const numMetrics = 1000
|
||||
// With 1000 metrics (one per minute), the time range spans 2 months.
|
||||
start := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
end := time.Date(2025, 3, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
step := (end - start) / numMetrics
|
||||
batch1 := genData(numMetrics, "batch1", start, step)
|
||||
batch2 := genData(numMetrics, "batch2", start, step)
|
||||
batches12 := concatData(batch1, batch2)
|
||||
|
||||
now := time.Now().UTC()
|
||||
startFuture := time.Date(now.Year()+1, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
endFuture := time.Date(now.Year()+1, 3, 1, 0, 0, 0, 0, time.UTC).UnixMilli()
|
||||
stepFuture := (endFuture - startFuture) / numMetrics
|
||||
batch1Future := genData(numMetrics, "batch1", startFuture, stepFuture)
|
||||
batch2Future := genData(numMetrics, "batch2", startFuture, stepFuture)
|
||||
batches12Future := concatData(batch1Future, batch2Future)
|
||||
|
||||
// Verify backup/restore:
|
||||
//
|
||||
@@ -207,24 +234,25 @@ func testBackupRestore(tc *apptest.TestCase, opts testBackupRestoreOpts) {
|
||||
// - Start vmsingle
|
||||
// - Ensure that the queries return batch1 data only.
|
||||
|
||||
batch1Data, wantBatch1Series, wantBatch1QueryResults := genData(numMetrics, "batch1", start, step)
|
||||
batch2Data, wantBatch2Series, wantBatch2QueryResults := genData(numMetrics, "batch2", start, step)
|
||||
wantBatch12Series := slices.Concat(wantBatch1Series, wantBatch2Series)
|
||||
wantBatch12QueryResults := slices.Concat(wantBatch1QueryResults, wantBatch2QueryResults)
|
||||
|
||||
sut := opts.startSUT()
|
||||
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch1Data, apptest.QueryOpts{})
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch1.samples, apptest.QueryOpts{})
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch1Future.samples, apptest.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
assertSeries(sut, `{__name__=~"batch1.*"}`, start, end, wantBatch1Series)
|
||||
assertQueryResults(sut, `{__name__=~"batch1.*"}`, start, end, step, wantBatch1QueryResults)
|
||||
assertSeries(sut, `{__name__=~"batch1.*"}`, start, end, batch1.wantSeries)
|
||||
assertSeries(sut, `{__name__=~"batch1.*"}`, startFuture, endFuture, batch1Future.wantSeries)
|
||||
assertQueryResults(sut, `{__name__=~"batch1.*"}`, start, end, step, batch1.wantQueryResults)
|
||||
assertQueryResults(sut, `{__name__=~"batch1.*"}`, startFuture, endFuture, stepFuture, batch1Future.wantQueryResults)
|
||||
|
||||
createBackup(sut, "batch1")
|
||||
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch2Data, apptest.QueryOpts{})
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch2.samples, apptest.QueryOpts{})
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch2Future.samples, apptest.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
assertSeries(sut, `{__name__=~"batch(1|2).*"}`, start, end, wantBatch12Series)
|
||||
assertQueryResults(sut, `{__name__=~"batch(1|2).*"}`, start, end, step, wantBatch12QueryResults)
|
||||
assertSeries(sut, `{__name__=~"batch(1|2).*"}`, start, end, batches12.wantSeries)
|
||||
assertSeries(sut, `{__name__=~"batch(1|2).*"}`, startFuture, endFuture, batches12Future.wantSeries)
|
||||
assertQueryResults(sut, `{__name__=~"batch(1|2).*"}`, start, end, step, batches12.wantQueryResults)
|
||||
assertQueryResults(sut, `{__name__=~"batch(1|2).*"}`, startFuture, endFuture, stepFuture, batches12Future.wantQueryResults)
|
||||
createBackup(sut, "batch12")
|
||||
|
||||
opts.stopSUT()
|
||||
@@ -233,6 +261,8 @@ func testBackupRestore(tc *apptest.TestCase, opts testBackupRestoreOpts) {
|
||||
|
||||
sut = opts.startSUT()
|
||||
|
||||
assertSeries(sut, `{__name__=~"batch1.*"}`, start, end, wantBatch1Series)
|
||||
assertQueryResults(sut, `{__name__=~"batch1.*"}`, start, end, step, wantBatch1QueryResults)
|
||||
assertSeries(sut, `{__name__=~"batch(1|2).*"}`, start, end, batch1.wantSeries)
|
||||
assertSeries(sut, `{__name__=~"batch(1|2).*"}`, startFuture, endFuture, batch1Future.wantSeries)
|
||||
assertQueryResults(sut, `{__name__=~"batch(1|2).*"}`, start, end, step, batch1.wantQueryResults)
|
||||
assertQueryResults(sut, `{__name__=~"batch(1|2).*"}`, startFuture, endFuture, stepFuture, batch1Future.wantQueryResults)
|
||||
}
|
||||
|
||||
211
apptest/tests/future_timestamps_test.go
Normal file
211
apptest/tests/future_timestamps_test.go
Normal file
@@ -0,0 +1,211 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
func TestSingleFutureTimestamps(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
opts := testFutureTimestampsOpts{
|
||||
start: func() apptest.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle("vmsingle", []string{
|
||||
"-storageDataPath=" + filepath.Join(tc.Dir(), "vmsingle"),
|
||||
"-retentionPeriod=100y",
|
||||
"-futureRetention=100y",
|
||||
})
|
||||
},
|
||||
stop: func() {
|
||||
tc.StopApp("vmsingle")
|
||||
},
|
||||
}
|
||||
|
||||
testFutureTimestamps(tc, opts)
|
||||
}
|
||||
|
||||
func TestClusterFutureTimestamps(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
opts := testFutureTimestampsOpts{
|
||||
start: func() apptest.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&apptest.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1",
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + filepath.Join(tc.Dir(), "vmstorage1"),
|
||||
"-retentionPeriod=100y",
|
||||
"-futureRetention=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2",
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + filepath.Join(tc.Dir(), "vmstorage2"),
|
||||
"-retentionPeriod=100y",
|
||||
"-futureRetention=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{},
|
||||
})
|
||||
},
|
||||
stop: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1")
|
||||
tc.StopApp("vmstorage2")
|
||||
},
|
||||
}
|
||||
|
||||
testFutureTimestamps(tc, opts)
|
||||
}
|
||||
|
||||
type testFutureTimestampsOpts struct {
|
||||
start func() apptest.PrometheusWriteQuerier
|
||||
stop func()
|
||||
}
|
||||
|
||||
func testFutureTimestamps(tc *apptest.TestCase, opts testFutureTimestampsOpts) {
|
||||
t := tc.T()
|
||||
|
||||
// assertSeries retrieves set of all metric names from the storage and
|
||||
// compares it with the expected set.
|
||||
assertSeries := func(app apptest.PrometheusQuerier, prefix string, start, end int64, want []map[string]string) {
|
||||
t.Helper()
|
||||
|
||||
query := fmt.Sprintf(`{__name__=~"metric_%s.*"}`, prefix)
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected /api/v1/series response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Series(t, query, apptest.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
}).Sort()
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1SeriesResponse{
|
||||
Status: "success",
|
||||
Data: want,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
}
|
||||
|
||||
// assertSeries retrieves all data from the storage and compares it with the
|
||||
// expected result.
|
||||
assertQueryResults := func(app apptest.PrometheusQuerier, prefix string, start, end, step int64, want []*apptest.QueryResult) {
|
||||
t.Helper()
|
||||
|
||||
query := fmt.Sprintf(`{__name__=~"metric_%s.*"}`, prefix)
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query_range response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1QueryRange(t, query, apptest.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
Step: fmt.Sprintf("%dms", step),
|
||||
MaxLookback: fmt.Sprintf("%dms", step-1),
|
||||
NoCache: "1",
|
||||
})
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &apptest.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: want,
|
||||
},
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
}
|
||||
|
||||
f := func(prefix string, startTime, endTime time.Time, wantEmpty bool) {
|
||||
const numMetrics = 1000
|
||||
start := startTime.UnixMilli()
|
||||
end := endTime.UnixMilli()
|
||||
step := (end - start) / numMetrics
|
||||
data := genFutureTimestampsData(prefix, numMetrics, start, step)
|
||||
if wantEmpty {
|
||||
data.wantSeries = []map[string]string{}
|
||||
data.wantQueryResults = []*apptest.QueryResult{}
|
||||
}
|
||||
|
||||
// Ingest data and check query results.
|
||||
sut := opts.start()
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, data.samples, apptest.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
assertSeries(sut, prefix, start, end, data.wantSeries)
|
||||
assertQueryResults(sut, prefix, start, end, step, data.wantQueryResults)
|
||||
|
||||
// Ensure the queries work after restrart.
|
||||
opts.stop()
|
||||
sut = opts.start()
|
||||
assertSeries(sut, prefix, start, end, data.wantSeries)
|
||||
assertQueryResults(sut, prefix, start, end, step, data.wantQueryResults)
|
||||
|
||||
opts.stop()
|
||||
}
|
||||
|
||||
now := time.Now().UTC()
|
||||
retentionLimit := 100 * 365 * 24 * time.Hour
|
||||
var start, end time.Time
|
||||
|
||||
start = time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, time.UTC)
|
||||
end = time.Date(now.Year(), now.Month(), now.Day()+2, 0, 0, 0, 0, time.UTC)
|
||||
f("future_1d", start, end, false)
|
||||
|
||||
start = time.Date(now.Year(), now.Month()+1, 1, 0, 0, 0, 0, time.UTC)
|
||||
end = time.Date(now.Year(), now.Month()+2, 1, 0, 0, 0, 0, time.UTC)
|
||||
f("future_1m", start, end, false)
|
||||
|
||||
start = time.Date(now.Year()+1, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
end = time.Date(now.Year()+2, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
f("future_1y", start, end, false)
|
||||
|
||||
start = now.Add(retentionLimit - 24*time.Hour)
|
||||
end = now.Add(retentionLimit)
|
||||
f("future_1d_before_limit", start, end, false)
|
||||
|
||||
start = now.Add(retentionLimit + time.Minute)
|
||||
end = now.Add(retentionLimit + 24*time.Hour)
|
||||
f("future_1d_beyond_limit", start, end, true)
|
||||
|
||||
}
|
||||
|
||||
type futureTimestampsData struct {
|
||||
samples []string
|
||||
wantSeries []map[string]string
|
||||
wantQueryResults []*apptest.QueryResult
|
||||
}
|
||||
|
||||
func genFutureTimestampsData(prefix string, numMetrics, start, step int64) futureTimestampsData {
|
||||
samples := make([]string, numMetrics)
|
||||
wantSeries := make([]map[string]string, numMetrics)
|
||||
wantQueryResults := make([]*apptest.QueryResult, numMetrics)
|
||||
for i := range numMetrics {
|
||||
metricName := fmt.Sprintf("metric_%s_%04d", prefix, i)
|
||||
labelName := fmt.Sprintf("label_%s_%04d", prefix, i)
|
||||
labelValue := fmt.Sprintf("value_%s_%04d", prefix, i)
|
||||
value := i
|
||||
timestamp := start + i*step
|
||||
samples[i] = fmt.Sprintf(`%s{%s="value", label="%s"} %d %d`, metricName, labelName, labelValue, value, timestamp)
|
||||
wantSeries[i] = map[string]string{
|
||||
"__name__": metricName,
|
||||
labelName: "value",
|
||||
"label": labelValue,
|
||||
}
|
||||
wantQueryResults[i] = &apptest.QueryResult{
|
||||
Metric: map[string]string{
|
||||
"__name__": metricName,
|
||||
labelName: "value",
|
||||
"label": labelValue,
|
||||
},
|
||||
Samples: []*apptest.Sample{{Timestamp: timestamp, Value: float64(value)}},
|
||||
}
|
||||
}
|
||||
return futureTimestampsData{samples, wantSeries, wantQueryResults}
|
||||
}
|
||||
186
apptest/tests/opentsdb_server_test.go
Normal file
186
apptest/tests/opentsdb_server_test.go
Normal file
@@ -0,0 +1,186 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// openTSDBPoint is a single data point served by the mock OpenTSDB server.
|
||||
type openTSDBPoint struct {
|
||||
Metric string
|
||||
Tags map[string]string
|
||||
Timestamp int64
|
||||
Value float64
|
||||
}
|
||||
|
||||
// openTSDBMockServer implements the minimal subset of the OpenTSDB HTTP API
|
||||
// used by vmctl opentsdb: /api/suggest, /api/search/lookup, /api/query.
|
||||
type openTSDBMockServer struct {
|
||||
server *httptest.Server
|
||||
points []openTSDBPoint
|
||||
}
|
||||
|
||||
// newOpenTSDBMockServer starts an httptest server serving the given points.
|
||||
func newOpenTSDBMockServer(t *testing.T, points []openTSDBPoint) *openTSDBMockServer {
|
||||
t.Helper()
|
||||
s := &openTSDBMockServer{points: points}
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/api/suggest", s.handleSuggest)
|
||||
mux.HandleFunc("/api/search/lookup", s.handleLookup)
|
||||
mux.HandleFunc("/api/query", s.handleQuery)
|
||||
s.server = httptest.NewServer(mux)
|
||||
return s
|
||||
}
|
||||
|
||||
// close shuts down the server.
|
||||
func (s *openTSDBMockServer) close() { s.server.Close() }
|
||||
|
||||
// httpAddr returns the server URL.
|
||||
func (s *openTSDBMockServer) httpAddr() string { return s.server.URL }
|
||||
|
||||
// handleSuggest serves https://opentsdb.net/docs/build/html/api_http/suggest.html
|
||||
func (s *openTSDBMockServer) handleSuggest(w http.ResponseWriter, r *http.Request) {
|
||||
q := r.URL.Query().Get("q")
|
||||
seen := make(map[string]bool, len(s.points))
|
||||
var out []string
|
||||
for _, p := range s.points {
|
||||
if seen[p.Metric] {
|
||||
continue
|
||||
}
|
||||
if q != "" && !strings.Contains(p.Metric, q) {
|
||||
continue
|
||||
}
|
||||
seen[p.Metric] = true
|
||||
out = append(out, p.Metric)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(out)
|
||||
}
|
||||
|
||||
// handleLookup serves https://opentsdb.net/docs/build/html/api_http/search/lookup.html
|
||||
func (s *openTSDBMockServer) handleLookup(w http.ResponseWriter, r *http.Request) {
|
||||
metric := r.URL.Query().Get("m")
|
||||
type meta struct {
|
||||
Metric string `json:"metric"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
}
|
||||
seen := make(map[string]bool, len(s.points))
|
||||
var results []meta
|
||||
for _, p := range s.points {
|
||||
if p.Metric != metric {
|
||||
continue
|
||||
}
|
||||
key := tagsKey(p.Tags)
|
||||
if seen[key] {
|
||||
continue
|
||||
}
|
||||
seen[key] = true
|
||||
results = append(results, meta{p.Metric, p.Tags})
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"type": "LOOKUP",
|
||||
"metric": metric,
|
||||
"results": results,
|
||||
})
|
||||
}
|
||||
|
||||
// handleQuery serves https://opentsdb.net/docs/build/html/api_http/query/index.html
|
||||
func (s *openTSDBMockServer) handleQuery(w http.ResponseWriter, r *http.Request) {
|
||||
m := r.URL.Query().Get("m")
|
||||
metric, tagFilter, ok := parseQuery(m)
|
||||
if !ok {
|
||||
http.Error(w, "bad query param", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
start, err := strconv.ParseInt(r.URL.Query().Get("start"), 10, 64)
|
||||
if err != nil {
|
||||
http.Error(w, "bad start param", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
end, err := strconv.ParseInt(r.URL.Query().Get("end"), 10, 64)
|
||||
if err != nil {
|
||||
http.Error(w, "bad end param", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
type resp struct {
|
||||
Metric string `json:"metric"`
|
||||
Tags map[string]string `json:"tags"`
|
||||
AggregateTags []string `json:"aggregateTags"`
|
||||
Dps map[string]float64 `json:"dps"`
|
||||
}
|
||||
grouped := make(map[string]*resp, len(s.points))
|
||||
for _, p := range s.points {
|
||||
if p.Metric != metric {
|
||||
continue
|
||||
}
|
||||
if !matchTags(p.Tags, tagFilter) {
|
||||
continue
|
||||
}
|
||||
if p.Timestamp < start || p.Timestamp > end {
|
||||
continue
|
||||
}
|
||||
key := tagsKey(p.Tags)
|
||||
if _, exists := grouped[key]; !exists {
|
||||
grouped[key] = &resp{
|
||||
Metric: p.Metric,
|
||||
Tags: p.Tags,
|
||||
AggregateTags: []string{},
|
||||
Dps: map[string]float64{},
|
||||
}
|
||||
}
|
||||
grouped[key].Dps[fmt.Sprintf("%d", p.Timestamp)] = p.Value
|
||||
}
|
||||
out := make([]*resp, 0, len(grouped))
|
||||
for _, v := range grouped {
|
||||
out = append(out, v)
|
||||
}
|
||||
_ = json.NewEncoder(w).Encode(out)
|
||||
}
|
||||
|
||||
// parseQuery parses the OpenTSDB m= query parameter.
|
||||
// Format: "<agg>:<bucket>-<agg>-none:<metric>{k=v,k=v}"
|
||||
func parseQuery(m string) (string, map[string]string, bool) {
|
||||
parts := strings.SplitN(m, ":", 3)
|
||||
if len(parts) != 3 {
|
||||
return "", nil, false
|
||||
}
|
||||
metric, tagStr, _ := strings.Cut(parts[2], "{")
|
||||
tags := make(map[string]string, 4)
|
||||
tagStr = strings.TrimSuffix(tagStr, "}")
|
||||
for _, kv := range strings.Split(tagStr, ",") {
|
||||
if k, v, ok := strings.Cut(kv, "="); ok {
|
||||
tags[k] = v
|
||||
}
|
||||
}
|
||||
return metric, tags, true
|
||||
}
|
||||
|
||||
func matchTags(got, filter map[string]string) bool {
|
||||
for k, v := range filter {
|
||||
if v == "*" {
|
||||
continue
|
||||
}
|
||||
if got[k] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func tagsKey(tags map[string]string) string {
|
||||
keys := make([]string, 0, len(tags))
|
||||
for k := range tags {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
parts := make([]string, 0, len(keys))
|
||||
for _, k := range keys {
|
||||
parts = append(parts, k+"="+tags[k])
|
||||
}
|
||||
return strings.Join(parts, ",")
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
// TestSingleVMAgentReloadConfigs verifies that vmagent reload new configurations on SIGHUP signal
|
||||
@@ -512,3 +513,74 @@ func TestSingleVMAgentCardinalityLimiter(t *testing.T) {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClusterVMAgentForwardMetricsMetadata(t *testing.T) {
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultCluster()
|
||||
|
||||
vmagent := tc.MustStartVmagent("vmagent", []string{
|
||||
`-remoteWrite.flushInterval=50ms`,
|
||||
`-remoteWrite.forcePromProto=true`,
|
||||
`-enableMultitenantHandlers=true`,
|
||||
"-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent",
|
||||
fmt.Sprintf(`-remoteWrite.url=http://%s/insert/multitenant/prometheus/api/v1/write`, sut.Vminsert.HTTPAddr()),
|
||||
}, ``)
|
||||
|
||||
prometheusRemoteWriteDataSet := prompb.WriteRequest{
|
||||
Metadata: []prompb.MetricMetadata{
|
||||
{MetricFamilyName: "metric_name_4", Help: "some help message", Type: prompb.MetricTypeSummary, AccountID: 100},
|
||||
},
|
||||
}
|
||||
vmagent.PrometheusAPIV1Write(t, prometheusRemoteWriteDataSet, apptest.QueryOpts{Tenant: "multitenant"})
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected /api/v1/metadata response",
|
||||
Got: func() any {
|
||||
return sut.PrometheusAPIV1Metadata(t, ``, -1, apptest.QueryOpts{Tenant: "100:0"})
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
prometheusRemoteWriteDataSet = prompb.WriteRequest{
|
||||
Metadata: []prompb.MetricMetadata{
|
||||
{MetricFamilyName: "metric_name_6", Help: "some help message", Type: prompb.MetricTypeSummary, AccountID: 100},
|
||||
},
|
||||
}
|
||||
// enforce tenant from request uri /insert/tenant_id/prometheus/api/v1/write
|
||||
vmagent.PrometheusAPIV1Write(t, prometheusRemoteWriteDataSet, apptest.QueryOpts{Tenant: "500:500"})
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected /api/v1/metadata response",
|
||||
Got: func() any {
|
||||
return sut.PrometheusAPIV1Metadata(t, ``, -1, apptest.QueryOpts{Tenant: "500:500"})
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_6": {{Help: "some help message", Type: "summary"}},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: "unexpected /api/v1/metadata response",
|
||||
Got: func() any {
|
||||
return sut.PrometheusAPIV1Metadata(t, ``, -1, apptest.QueryOpts{Tenant: "multitenant"})
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1Metadata{
|
||||
Status: "success",
|
||||
Data: map[string][]apptest.MetadataEntry{
|
||||
"metric_name_4": {{Help: "some help message", Type: "summary"}},
|
||||
"metric_name_6": {{Help: "some help message", Type: "summary"}},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
167
apptest/tests/vmctl_opentsdb_migration_test.go
Normal file
167
apptest/tests/vmctl_opentsdb_migration_test.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestSingleVmctlOpenTSDBProtocol(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
vmsingleDst := tc.MustStartDefaultVmsingle()
|
||||
vmAddr := fmt.Sprintf("http://%s/", vmsingleDst.HTTPAddr())
|
||||
|
||||
// Generate 60 points at 1-minute intervals starting 2 hours ago.
|
||||
// This ensures data falls within vmctl's default query window (now - retention).
|
||||
baseTS := time.Now().Add(-2 * time.Hour).Truncate(time.Minute).Unix()
|
||||
points := make([]openTSDBPoint, 0, 60)
|
||||
for i := range 60 {
|
||||
points = append(points, openTSDBPoint{
|
||||
Metric: "test.cpu",
|
||||
Tags: map[string]string{"host": "h1", "env": "prod"},
|
||||
Timestamp: baseTS + int64(i*60),
|
||||
Value: float64(i),
|
||||
})
|
||||
}
|
||||
|
||||
otsdb := newOpenTSDBMockServer(t, points)
|
||||
defer otsdb.close()
|
||||
|
||||
vmctlFlags := []string{
|
||||
`opentsdb`,
|
||||
`--otsdb-addr=` + otsdb.httpAddr(),
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--otsdb-retentions=ssum-1m-avg:1d:1d`,
|
||||
`--otsdb-filters=test`,
|
||||
`--otsdb-normalize`,
|
||||
`--disable-progress-bar=true`,
|
||||
`-s`,
|
||||
}
|
||||
|
||||
testOpenTSDBProtocol(tc, vmsingleDst, vmctlFlags, points, "test_cpu", baseTS)
|
||||
}
|
||||
|
||||
func TestClusterVmctlOpenTSDBProtocol(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
cluster := tc.MustStartDefaultCluster()
|
||||
vmAddr := fmt.Sprintf("http://%s/", cluster.Vminsert.HTTPAddr())
|
||||
|
||||
// Generate 60 points at 1-minute intervals starting 2 hours ago.
|
||||
baseTS := time.Now().Add(-2 * time.Hour).Truncate(time.Minute).Unix()
|
||||
points := make([]openTSDBPoint, 0, 60)
|
||||
for i := range 60 {
|
||||
points = append(points, openTSDBPoint{
|
||||
Metric: "test.mem",
|
||||
Tags: map[string]string{"host": "h1"},
|
||||
Timestamp: baseTS + int64(i*60),
|
||||
Value: float64(i * 2),
|
||||
})
|
||||
}
|
||||
|
||||
otsdb := newOpenTSDBMockServer(t, points)
|
||||
defer otsdb.close()
|
||||
|
||||
vmctlFlags := []string{
|
||||
`opentsdb`,
|
||||
`--otsdb-addr=` + otsdb.httpAddr(),
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--otsdb-retentions=sum-1m-avg:1d:1d`,
|
||||
`--otsdb-filters=test`,
|
||||
`--otsdb-normalize`,
|
||||
`--disable-progress-bar=true`,
|
||||
`--vm-account-id=0`,
|
||||
`-s`,
|
||||
}
|
||||
|
||||
testOpenTSDBProtocol(tc, cluster, vmctlFlags, points, "test_mem", baseTS)
|
||||
}
|
||||
|
||||
func testOpenTSDBProtocol(
|
||||
tc *apptest.TestCase,
|
||||
queries apptest.PrometheusWriteQuerier,
|
||||
vmctlFlags []string,
|
||||
points []openTSDBPoint,
|
||||
vmMetricName string,
|
||||
baseTS int64,
|
||||
) {
|
||||
t := tc.T()
|
||||
t.Helper()
|
||||
|
||||
// Build dynamic time range covering all data points with 1-hour padding.
|
||||
queryStart := time.Unix(baseTS-3600, 0).UTC().Format(time.RFC3339)
|
||||
queryEnd := time.Unix(baseTS+7200, 0).UTC().Format(time.RFC3339)
|
||||
|
||||
cmpOpt := cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType")
|
||||
|
||||
got := queries.PrometheusAPIV1Query(t, `{__name__=~".*"}`, apptest.QueryOpts{
|
||||
Step: "5m",
|
||||
Time: queryStart,
|
||||
})
|
||||
want := apptest.NewPrometheusAPIV1QueryResponse(t, `{"data":{"result":[]}}`)
|
||||
if diff := cmp.Diff(want, got, cmpOpt); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
tc.MustStartVmctl("vmctl", vmctlFlags)
|
||||
queries.ForceFlush(t)
|
||||
|
||||
expected := buildExpectedOpenTSDBResult(points, vmMetricName)
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Retries: 300,
|
||||
Msg: `unexpected metrics stored via opentsdb protocol`,
|
||||
Got: func() any {
|
||||
r := queries.PrometheusAPIV1Export(t, fmt.Sprintf(`{__name__=%q}`, vmMetricName), apptest.QueryOpts{
|
||||
Start: queryStart,
|
||||
End: queryEnd,
|
||||
})
|
||||
r.Sort()
|
||||
return r.Data.Result
|
||||
},
|
||||
Want: expected,
|
||||
CmpOpts: []cmp.Option{
|
||||
cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType"),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func buildExpectedOpenTSDBResult(points []openTSDBPoint, vmMetricName string) []*apptest.QueryResult {
|
||||
grouped := map[string]*apptest.QueryResult{}
|
||||
for _, p := range points {
|
||||
metric := map[string]string{"__name__": vmMetricName}
|
||||
for k, v := range p.Tags {
|
||||
metric[k] = v
|
||||
}
|
||||
key := tagsKey(metric)
|
||||
if _, ok := grouped[key]; !ok {
|
||||
grouped[key] = &apptest.QueryResult{Metric: metric}
|
||||
}
|
||||
grouped[key].Samples = append(grouped[key].Samples, &apptest.Sample{
|
||||
Timestamp: p.Timestamp * 1000,
|
||||
Value: p.Value,
|
||||
})
|
||||
}
|
||||
out := make([]*apptest.QueryResult, 0, len(grouped))
|
||||
for _, v := range grouped {
|
||||
out = append(out, v)
|
||||
}
|
||||
resp := apptest.PrometheusAPIV1QueryResponse{
|
||||
Data: &apptest.QueryData{Result: out},
|
||||
}
|
||||
resp.Sort()
|
||||
return resp.Data.Result
|
||||
}
|
||||
@@ -10,6 +10,10 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
// Vmagent holds the state of a vmagent app and provides vmagent-specific functions
|
||||
@@ -158,6 +162,31 @@ func (app *Vmagent) ReloadRelabelConfigs(t *testing.T) {
|
||||
t.Fatalf("relabel configs were not reloaded after SIGHUP signal; previous total: %f, current total: %f", prevTotal, currTotal)
|
||||
}
|
||||
|
||||
// PrometheusAPIV1Write is a test helper function that inserts a
|
||||
// collection of records in Prometheus remote-write format by sending a HTTP
|
||||
// POST request to /prometheus/api/v1/write vmagent endpoint.
|
||||
func (app *Vmagent) PrometheusAPIV1Write(t *testing.T, wr prompb.WriteRequest, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/prometheus/api/v1/write", app.httpListenAddr)
|
||||
if opts.Tenant != "" {
|
||||
url = fmt.Sprintf("http://%s/insert/%s/prometheus/api/v1/write", app.httpListenAddr, opts.Tenant)
|
||||
}
|
||||
data := snappy.Encode(nil, wr.MarshalProtobuf(nil))
|
||||
recordsCount := len(wr.Timeseries)
|
||||
if prommetadata.IsEnabled() {
|
||||
recordsCount += len(wr.Metadata)
|
||||
}
|
||||
headers := opts.getHeaders()
|
||||
headers.Set("Content-Type", "application/x-protobuf")
|
||||
app.sendBlocking(t, recordsCount, func() {
|
||||
_, statusCode := app.cli.Post(t, url, data, headers)
|
||||
if statusCode != http.StatusNoContent {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusNoContent)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// HTTPAddr returns the address at which the vmagent process is listening
|
||||
// for http connections.
|
||||
func (app *Vmagent) HTTPAddr() string {
|
||||
@@ -176,16 +205,22 @@ func (app *Vmagent) HTTPAddr() string {
|
||||
// If it is, then the data has been sent to vmstorage.
|
||||
//
|
||||
// Unreliable if the records are inserted concurrently.
|
||||
func (app *Vmagent) sendBlocking(t *testing.T, numRecordsToSend int, send func()) {
|
||||
func (app *Vmagent) sendBlocking(t *testing.T, _ int, send func()) {
|
||||
t.Helper()
|
||||
|
||||
currRowsSentCount := app.remoteWriteRequestsTotal(t)
|
||||
|
||||
send()
|
||||
|
||||
const (
|
||||
retries = 20
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
wantRowsSentCount := app.remoteWriteRequestsTotal(t) + numRecordsToSend
|
||||
// TODO: properly account wantRowsSentCount
|
||||
// currently vmagent doesn't expose per time-series write information
|
||||
// so we can only account number of blocks sent via remote write protocol
|
||||
// it should be suitable for tests purpose
|
||||
wantRowsSentCount := currRowsSentCount + 1
|
||||
for range retries {
|
||||
if app.remoteWriteRequestsTotal(t) >= wantRowsSentCount {
|
||||
return
|
||||
|
||||
@@ -9093,18 +9093,20 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
|
||||
"description": "Shows the rate of data rows ingested from write requests. There are two kinds of rows:\n1. Raw sample: each sample consists of a value and a timestamp, see https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples. \n2. Metric metadata: refers to descriptive information about metrics. It can be disabled by setting `-enableMetadata=false`, see https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9113,6 +9115,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9120,6 +9123,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9129,6 +9133,7 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 2,
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
@@ -9136,7 +9141,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9152,9 +9158,9 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8385
|
||||
"y": 43
|
||||
},
|
||||
"id": 97,
|
||||
"id": 227,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
@@ -9169,11 +9175,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9181,15 +9188,30 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_http_requests_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) by (path) > 0",
|
||||
"expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0 ",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "sample: {{type}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_metadata_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0 ",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "metadata: {{type}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Requests rate ($instance)",
|
||||
"title": "Rows rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -9204,11 +9226,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
@@ -9217,6 +9241,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9224,6 +9249,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9241,7 +9267,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9277,7 +9304,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8385
|
||||
"y": 43
|
||||
},
|
||||
"id": 99,
|
||||
"options": {
|
||||
@@ -9294,11 +9321,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9336,17 +9364,20 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9355,6 +9386,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9362,6 +9394,116 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 51
|
||||
},
|
||||
"id": 97,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_http_requests_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) by (path) > 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Requests rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9384,7 +9526,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9399,8 +9542,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8393
|
||||
"x": 12,
|
||||
"y": 51
|
||||
},
|
||||
"id": 185,
|
||||
"options": {
|
||||
@@ -9417,11 +9560,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9447,7 +9591,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "min(\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "min",
|
||||
@@ -9463,7 +9606,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "median",
|
||||
@@ -9486,11 +9628,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9499,6 +9643,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9506,6 +9651,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9528,7 +9674,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9543,8 +9690,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8393
|
||||
"x": 0,
|
||||
"y": 59
|
||||
},
|
||||
"id": 187,
|
||||
"options": {
|
||||
@@ -9561,11 +9708,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9591,7 +9739,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "min(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "min",
|
||||
@@ -9607,7 +9754,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "quantile(0.5,\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "median",
|
||||
@@ -9623,18 +9769,20 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the saturation level of connection between vminsert and vmstorage components. \n\nIf the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vminsert won't be able to keep up. This usually means that either vminsert or vmstorage nodes are struggling with the load. Verify CPU/mem saturation of both components and network saturation between them.\nIf vminsert resources are saturated - consider adding more resources or scale vminserts horizontally.\n\nIf vminsert resources and network are fine, check vmstorage metrics for anomalies.",
|
||||
"description": "Shows when vmstorage node is unreachable for vminsert.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9643,6 +9791,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9650,6 +9799,116 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 59
|
||||
},
|
||||
"id": 114,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "vm_rpc_vmstorage_is_reachable{job=~\"$job\", instance=~\"$instance\"} != 1",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} => {{addr}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage reachability ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the saturation level of connection between vminsert and vmstorage components. \n\nIf the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vminsert won't be able to keep up. This usually means that either vminsert or vmstorage nodes are struggling with the load. Verify CPU/mem saturation of both components and network saturation between them.\nIf vminsert resources are saturated - consider adding more resources or scale vminserts horizontally.\n\nIf vminsert resources and network are fine, check vmstorage metrics for anomalies.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9667,7 +9926,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "transparent"
|
||||
"color": "transparent",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9683,7 +9943,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8401
|
||||
"y": 67
|
||||
},
|
||||
"id": 139,
|
||||
"options": {
|
||||
@@ -9699,11 +9959,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9729,18 +9990,20 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows when vmstorage node is unreachable for vminsert.",
|
||||
"description": "Shows network usage between vminserts and vmstorages.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9749,6 +10012,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9756,6 +10020,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9765,15 +10030,14 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9781,17 +10045,30 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
"unit": "bps"
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/read.*/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.transform",
|
||||
"value": "negative-Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8401
|
||||
"y": 67
|
||||
},
|
||||
"id": 114,
|
||||
"id": 209,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
@@ -9800,14 +10077,17 @@
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9815,16 +10095,28 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "vm_rpc_vmstorage_is_reachable{job=~\"$job\", instance=~\"$instance\"} != 1",
|
||||
"expr": "sum(rate(vm_tcpdialer_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} => {{addr}}",
|
||||
"legendFormat": "read from vmstorage",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write to vmstorage",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Storage reachability ($instance)",
|
||||
"title": "Network usage: vmstorage ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -9839,11 +10131,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9852,6 +10146,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9859,6 +10154,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9874,7 +10170,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9903,7 +10200,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8409
|
||||
"y": 75
|
||||
},
|
||||
"id": 208,
|
||||
"options": {
|
||||
@@ -9919,11 +10216,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9946,7 +10244,6 @@
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcplistener_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write to client",
|
||||
"range": true,
|
||||
@@ -9961,147 +10258,19 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows network usage between vminserts and vmstorages.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bps"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/read.*/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.transform",
|
||||
"value": "negative-Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8409
|
||||
},
|
||||
"id": 209,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "read from vmstorage",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write to vmstorage",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Network usage: vmstorage ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -10110,6 +10279,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -10117,6 +10287,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -10134,7 +10305,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -10150,7 +10322,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8417
|
||||
"y": 75
|
||||
},
|
||||
"id": 88,
|
||||
"options": {
|
||||
@@ -10167,11 +10339,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -9094,18 +9094,20 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
|
||||
"description": "Shows the rate of data rows ingested from write requests. There are two kinds of rows:\n1. Raw sample: each sample consists of a value and a timestamp, see https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples. \n2. Metric metadata: refers to descriptive information about metrics. It can be disabled by setting `-enableMetadata=false`, see https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9114,6 +9116,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9121,6 +9124,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9130,6 +9134,7 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 2,
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
@@ -9137,7 +9142,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9153,9 +9159,9 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8385
|
||||
"y": 43
|
||||
},
|
||||
"id": 97,
|
||||
"id": 227,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
@@ -9170,11 +9176,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9182,15 +9189,30 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_http_requests_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) by (path) > 0",
|
||||
"expr": "sum(rate(vm_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0 ",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "sample: {{type}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_metadata_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (type) > 0 ",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "metadata: {{type}}",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Requests rate ($instance)",
|
||||
"title": "Rows rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -9205,11 +9227,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
@@ -9218,6 +9242,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9225,6 +9250,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9242,7 +9268,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9278,7 +9305,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8385
|
||||
"y": 43
|
||||
},
|
||||
"id": 99,
|
||||
"options": {
|
||||
@@ -9295,11 +9322,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9337,17 +9365,20 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "* `*` - unsupported query path\n* `/write` - insert into VM\n* `/metrics` - query VM system metrics\n* `/query` - query instant values\n* `/query_range` - query over a range of time\n* `/series` - match a certain label set\n* `/label/{}/values` - query a list of label values (variables mostly)",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9356,6 +9387,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9363,6 +9395,116 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 51
|
||||
},
|
||||
"id": 97,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_http_requests_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) by (path) > 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Requests rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9385,7 +9527,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9400,8 +9543,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8393
|
||||
"x": 12,
|
||||
"y": 51
|
||||
},
|
||||
"id": 185,
|
||||
"options": {
|
||||
@@ -9418,11 +9561,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9448,7 +9592,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "min(\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "min",
|
||||
@@ -9464,7 +9607,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "median",
|
||||
@@ -9487,11 +9629,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9500,6 +9644,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9507,6 +9652,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9529,7 +9675,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9544,8 +9691,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8393
|
||||
"x": 0,
|
||||
"y": 59
|
||||
},
|
||||
"id": 187,
|
||||
"options": {
|
||||
@@ -9562,11 +9709,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9592,7 +9740,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "min(\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "min",
|
||||
@@ -9608,7 +9755,6 @@
|
||||
"exemplar": true,
|
||||
"expr": "quantile(0.5,\n max_over_time(process_resident_memory_anon_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "median",
|
||||
@@ -9624,18 +9770,20 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the saturation level of connection between vminsert and vmstorage components. \n\nIf the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vminsert won't be able to keep up. This usually means that either vminsert or vmstorage nodes are struggling with the load. Verify CPU/mem saturation of both components and network saturation between them.\nIf vminsert resources are saturated - consider adding more resources or scale vminserts horizontally.\n\nIf vminsert resources and network are fine, check vmstorage metrics for anomalies.",
|
||||
"description": "Shows when vmstorage node is unreachable for vminsert.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9644,6 +9792,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9651,6 +9800,116 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 59
|
||||
},
|
||||
"id": 114,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "vm_rpc_vmstorage_is_reachable{job=~\"$job\", instance=~\"$instance\"} != 1",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} => {{addr}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage reachability ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the saturation level of connection between vminsert and vmstorage components. \n\nIf the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vminsert won't be able to keep up. This usually means that either vminsert or vmstorage nodes are struggling with the load. Verify CPU/mem saturation of both components and network saturation between them.\nIf vminsert resources are saturated - consider adding more resources or scale vminserts horizontally.\n\nIf vminsert resources and network are fine, check vmstorage metrics for anomalies.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9668,7 +9927,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "transparent"
|
||||
"color": "transparent",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9684,7 +9944,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8401
|
||||
"y": 67
|
||||
},
|
||||
"id": 139,
|
||||
"options": {
|
||||
@@ -9700,11 +9960,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9730,18 +9991,20 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows when vmstorage node is unreachable for vminsert.",
|
||||
"description": "Shows network usage between vminserts and vmstorages.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9750,6 +10013,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9757,6 +10021,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9766,15 +10031,14 @@
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"decimals": 0,
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9782,17 +10046,30 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
"unit": "bps"
|
||||
},
|
||||
"overrides": []
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/read.*/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.transform",
|
||||
"value": "negative-Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8401
|
||||
"y": 67
|
||||
},
|
||||
"id": 114,
|
||||
"id": 209,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
@@ -9801,14 +10078,17 @@
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9816,16 +10096,28 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "vm_rpc_vmstorage_is_reachable{job=~\"$job\", instance=~\"$instance\"} != 1",
|
||||
"expr": "sum(rate(vm_tcpdialer_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{instance}} => {{addr}}",
|
||||
"legendFormat": "read from vmstorage",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write to vmstorage",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Storage reachability ($instance)",
|
||||
"title": "Network usage: vmstorage ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -9840,11 +10132,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -9853,6 +10147,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -9860,6 +10155,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -9875,7 +10171,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -9904,7 +10201,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8409
|
||||
"y": 75
|
||||
},
|
||||
"id": 208,
|
||||
"options": {
|
||||
@@ -9920,11 +10217,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -9947,7 +10245,6 @@
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcplistener_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write to client",
|
||||
"range": true,
|
||||
@@ -9962,147 +10259,19 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows network usage between vminserts and vmstorages.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bps"
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byRegexp",
|
||||
"options": "/read.*/"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "custom.transform",
|
||||
"value": "negative-Y"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8409
|
||||
},
|
||||
"id": 209,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true,
|
||||
"sortBy": "Last *",
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_read_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "read from vmstorage",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])) * 8 > 0",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "write to vmstorage",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Network usage: vmstorage ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -10111,6 +10280,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -10118,6 +10288,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -10135,7 +10306,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -10151,7 +10323,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8417
|
||||
"y": 75
|
||||
},
|
||||
"id": 88,
|
||||
"options": {
|
||||
@@ -10168,11 +10340,12 @@
|
||||
"sortDesc": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
|
||||
@@ -4020,7 +4020,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the rate of parsed datapoints from write or scrape requests.",
|
||||
"description": "Shows the rate of data rows parsed from write requests and scraping. \nThere are two kinds of rows:\n1. Raw sample: each sample consists of a value and a timestamp, see https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples \n2. Metric metadata: refers to descriptive information about metrics. It can be disabled by setting `-enableMetadata=false`, see https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -4050,6 +4050,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -4066,7 +4067,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -4102,7 +4104,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -4113,12 +4115,24 @@
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_protoparser_rows_read_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{ type }} ({{job}})",
|
||||
"legendFormat": "sample: {{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_protoparser_metadata_read_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"instant": false,
|
||||
"legendFormat": "metadata: {{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Datapoints rate ($instance)",
|
||||
"title": "Rows rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -5722,7 +5736,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the rate of rows ingested in vmagent via push protocols.",
|
||||
"description": "Shows the rate of data rows ingested from write requests. There are two kinds of rows:\n1. Raw sample: each sample consists of a value and a timestamp, see https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples. \n2. Metric metadata: refers to descriptive information about metrics. It can be disabled by setting `-enableMetadata=false`, see https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -5735,6 +5749,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -5751,6 +5766,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -5767,7 +5783,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -5783,7 +5800,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3931
|
||||
"y": 41
|
||||
},
|
||||
"id": 131,
|
||||
"options": {
|
||||
@@ -5798,11 +5815,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -5813,12 +5831,24 @@
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vmagent_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{ type }} ({{job}})",
|
||||
"legendFormat": "sample: {{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmagent_metadata_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"instant": false,
|
||||
"legendFormat": "metadata:{{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Rows rate ($instance)",
|
||||
"title": "Rows rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -4019,7 +4019,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the rate of parsed datapoints from write or scrape requests.",
|
||||
"description": "Shows the rate of data rows parsed from write requests and scraping. \nThere are two kinds of rows:\n1. Raw sample: each sample consists of a value and a timestamp, see https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples \n2. Metric metadata: refers to descriptive information about metrics. It can be disabled by setting `-enableMetadata=false`, see https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -4049,6 +4049,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -4065,7 +4066,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -4101,7 +4103,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -4112,12 +4114,24 @@
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vm_protoparser_rows_read_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{ type }} ({{job}})",
|
||||
"legendFormat": "sample: {{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_protoparser_metadata_read_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"instant": false,
|
||||
"legendFormat": "metadata: {{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Datapoints rate ($instance)",
|
||||
"title": "Rows rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -5721,7 +5735,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the rate of rows ingested in vmagent via push protocols.",
|
||||
"description": "Shows the rate of data rows ingested from write requests. There are two kinds of rows:\n1. Raw sample: each sample consists of a value and a timestamp, see https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples. \n2. Metric metadata: refers to descriptive information about metrics. It can be disabled by setting `-enableMetadata=false`, see https://docs.victoriametrics.com/victoriametrics/vmagent/#metric-metadata.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -5734,6 +5748,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -5750,6 +5765,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -5766,7 +5782,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -5782,7 +5799,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3931
|
||||
"y": 41
|
||||
},
|
||||
"id": 131,
|
||||
"options": {
|
||||
@@ -5797,11 +5814,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.4.3",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -5812,12 +5830,24 @@
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vmagent_rows_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{ type }} ({{job}})",
|
||||
"legendFormat": "sample: {{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmagent_metadata_inserted_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job, type) > 0",
|
||||
"instant": false,
|
||||
"legendFormat": "metadata:{{ type }} ({{job}})",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Rows rate ($instance)",
|
||||
"title": "Rows rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
DOCKER_REGISTRIES ?= docker.io quay.io
|
||||
DOCKER_NAMESPACE ?= victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= alpine:3.23.3
|
||||
ROOT_IMAGE ?= alpine:3.23.4
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.23.3
|
||||
CERTS_IMAGE := alpine:3.23.4
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.26.2
|
||||
|
||||
|
||||
@@ -1511,6 +1511,16 @@ value than before, then data outside the configured period will be eventually de
|
||||
|
||||
VictoriaMetrics does not support indefinite retention, but you can specify an arbitrarily high duration, e.g. `-retentionPeriod=100y`.
|
||||
|
||||
By default, VictoriaMetrics doesn't accept samples with timestamps bigger than `now+2d`, e.g. 2 days in the future.
|
||||
If you need accepting samples with bigger timestamps, then specify the desired "future retention" via `-futureRetention` command-line flag.
|
||||
This flag accepts values starting from `2d`.
|
||||
|
||||
For example, the following command starts VictoriaMetrics, which accepts samples with timestamps up to a year in the future:
|
||||
|
||||
```sh
|
||||
/path/to/victoria-metrics -futureRetention=1y
|
||||
```
|
||||
|
||||
### Multiple retentions
|
||||
|
||||
Distinct retentions for distinct time series can be configured via [retention filters](#retention-filters)
|
||||
|
||||
@@ -25,28 +25,44 @@ The sandbox cluster installation runs under the constant load generated by
|
||||
See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/).
|
||||
|
||||
## tip
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): introduce `-remoteWrite.mdx.enable` and `-mdx.instanceEntryTTL` command-line flags to support mdx service in `vmagent`, it allows `vmagent` to send only metrics from VictoriaMetrics services to the corresponding `-remoteWrite.url`. See [monitoring-data-exchange](https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange) and [#10600](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10600) for detail.
|
||||
|
||||
## [v1.141.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.141.0)
|
||||
|
||||
Released at 2026-04-24
|
||||
|
||||
* SECURITY: upgrade base docker image (Alpine) from 3.23.3 to 3.23.4. See [Alpine 3.23.4 release notes](https://www.alpinelinux.org/posts/Alpine-3.20.10-3.21.7-3.22.4-3.23.4-released.html).
|
||||
|
||||
* FEATURE: all VictoriaMetrics components: add support for reading cpu/memory limits configured via [systemd slices](https://www.freedesktop.org/software/systemd/man/latest/systemd.slice.html). Previously, only limits set directly on the process's own cgroup were detected. See [#10635](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10635). Thanks to @andriibeee for the contribution.
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): improve error handling at opentsdb migration. See [#10797](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10797)
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): now `Run query` link on the Alerting Rules page correctly propagates the alert’s interval and evaluation time. See [#10366](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10366).
|
||||
* FEATURE: [alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules): add new `MetricNameStatsCacheUtilizationIsTooHigh` alerting rule to track overutilization of [Metric names usage stats tracker](https://docs.victoriametrics.com/victoriametrics/#track-ingested-metrics-usage) (used in [Cardinality Explorer](https://docs.victoriametrics.com/victoriametrics/#cardinality-explorer)). See [#10840](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10840).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): add `vm_streamaggr_counter_resets_total` metric for `total*`, `increase*` and `rate*` outputs that is useful for aggregation behaviour tracking. These metrics help to identify issues described in [Troubleshooting: counter resets](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#counter-resets).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): add `vm_streamaggr_counter_resets_total` metric for `total*`, `increase*` and `rate*` outputs that is useful for aggregation behaviour tracking. These metrics help to identify issues described in [Troubleshooting: counter resets](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#counter-resets). See [#10807](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10807).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add the support of ingestion and retrieval of samples with timestamps in the future. The new `-futureRetention` flag controls how far in the future the timestamps are allowed to be. See [#827](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/827) and [#10718](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10718).
|
||||
* FEATURE: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683), [dashboards/cluster](https://grafana.com/grafana/dashboards/11176): add metadata row ingestion rate queries to related panels. See [#10868](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10868).
|
||||
|
||||
* BUGFIX: `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix increased memory usage after upgrade to [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0) by properly accounting for internal buffer count when calculating per-storage buffer size. See [#10725](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10725#issuecomment-4282256709).
|
||||
* BUGFIX: `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix increased memory usage after upgrade to [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0) by properly accounting for internal buffer count when calculating per-storage buffer size. See [#10725](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10725#issuecomment-4282256709). Thanks to @fxrlv for the contribution.
|
||||
* BUGFIX: all VictoriaMetrics components: properly parse IPv6 source address when accepting connections with proxy protocol v2 enabled. See [#10839](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10839). Thanks to @andriibeee for the contribution.
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): properly attach [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) from `__tenant_id__` label to the scraped metadata. See [#10828](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10828).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): keep [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) ingested into vmagent via [prometheus remotewrite](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/) endpoint. See [#10828](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10828).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): `-maxScrapeSize` is now correctly applied when reading response bodies, including non-OK scrape error responses. See [#10804](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10804).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix `ec2_sd_configs` returning 401 `AuthFailure` from AWS when credentials are obtained via IRSA, instance role or `AWS_CONTAINER_CREDENTIALS_*` env vars. The regression was introduced in [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0). See [#10815](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10815).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix `ec2_sd_configs` returning 401 `AuthFailure` from AWS when credentials are obtained via IRSA, instance role or `AWS_CONTAINER_CREDENTIALS_*` env vars. The regression was introduced in [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0). See [#10815](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10815). Thanks to @andriibeee for the contribution.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): fix leak of backend TCP connections, file descriptors and goroutines when the client cancels the request after the backend response has been received. See [#10833](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10833). Thanks to @andriibeee for the contribution.
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): fix a rare panic during config reload when a backend is marked as broken. See [#10806](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10806).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): stop logging warnings about failed handshakes when the `clusternative` port receives TCP healthchecks from load balancers. See [#10786](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10786). Thanks to @andriibeee for the contribution.
|
||||
* BUGFIX: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): fix an issue where vmrestore could hang indefinitely when interrupted during backup download. See [#10794](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10794).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): properly execute graceful shutdown for vmsingle if `-maxIngestionRate` is configured. See [#10795](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10795).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix time display on Alerting Rules page to use selected timezone. See [#10827](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10827).
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): use contrasting colors when displaying time series to improve visibility on light and dark themes. See [#10869](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10869).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): delete labels from rule results if they are specified with an empty string value in rule or group labels. See [#10766](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10766).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix incorrect evaluation of binary operations caused by an ordering bug (e.g. `10 - (3 + 3 + 4)` being evaluated as `10 - 3 + 3 + 4`). The issue was introduced in v1.140.0, v1.136.4, and v1.122.19. See [#10856](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856).
|
||||
|
||||
## [v1.140.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.140.0)
|
||||
|
||||
Released at 2026-04-10
|
||||
|
||||
**Update Note 1:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): [CSV export](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-export-csv-data) (`/api/v1/export/csv`) now adds a header row as the first line of the response, so existing CSV-processing scripts may need to skip this header. See [#10666](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10666).
|
||||
|
||||
**Update Note 2:** [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): Due to an ordering bug in binary operations, some queries may produce incorrect results. For example, `10 - (3 + 3 + 4)` is evaluated as `10 - 3 + 3 + 4`. The issue was introduced in versions v1.140.0, v1.136.4, v1.122.19, and is addressed in upcoming releases. It is strongly recommended to avoid these versions entirely. See [#10856](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856).
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.26.1 to Go1.26.2. See [the list of issues addressed in Go1.26.2](https://github.com/golang/go/issues?q=milestone%3AGo1.26.2%20label%3ACherryPickApproved).
|
||||
@@ -152,6 +168,16 @@ It enables back `Discovered targets` debug UI by default.
|
||||
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly apply `extra_filters[]` filter when querying `vm_account_id` or `vm_project_id` labels via [multitenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) request for `/api/v1/label/…/values` API. Before, `extra_filters` was ignored. See [#10503](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10503).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): revert the use of rollup result cache for [instant queries](https://docs.victoriametrics.com/keyConcepts.html#instant-query) that contain [`rate`](https://docs.victoriametrics.com/MetricsQL.html#rate) function with a lookbehind window larger than `-search.minWindowForInstantRollupOptimization`. The cache usage was removed since [v1.132.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.132.0). See [#10098](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098#issuecomment-3895011084) for more details.
|
||||
|
||||
## [v1.136.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.5)
|
||||
|
||||
Released at 2026-04-23
|
||||
|
||||
**v1.136.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.136.x line will be supported for at least 12 months since [v1.136.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11360) release**
|
||||
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix incorrect evaluation of binary operations caused by an ordering bug (e.g. `10 - (3 + 3 + 4)` being evaluated as `10 - 3 + 3 + 4`). The issue was introduced in v1.140.0, v1.136.4, and v1.122.19. See [#10856](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856).
|
||||
|
||||
## [v1.136.4](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.4)
|
||||
|
||||
**Update Note 1:** [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): Due to an ordering bug in binary operations, some queries may produce incorrect results. For example, `10 - (3 + 3 + 4)` is evaluated as `10 - 3 + 3 + 4`. The issue was introduced in versions v1.140.0, v1.136.4, v1.122.19, and is addressed in upcoming releases. It is strongly recommended to avoid these versions entirely. See [#10856](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856).
|
||||
@@ -305,9 +331,13 @@ Released at 2026-01-16
|
||||
Released at 2026-01-02
|
||||
|
||||
**Update Note 1:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): Upgrading to per-partition index requires registering all active time series. Expect slow down of data ingestion and queries during upgrade roll-out. This is a one-time operation. Additionally, for users with retention periods shorter than 1 month the disk usage may increase.
|
||||
|
||||
**Update Note 2:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): Certain data and query patterns may cause high CPU utilization due to [10154](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10154). See [10297](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10297).
|
||||
|
||||
**Update Note 3:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): lock contention in the ingestion path may cause frequent context switches and storage connection saturation spikes. See [10367](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10367). Addressed in `v1.135.0`.
|
||||
|
||||
**Update Note 4:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): TSDB status may be empty if the partition index does not have records for the requested date. See [10315](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10315). Addressed in `v1.135.0`.
|
||||
|
||||
**Update Note 5:** [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): `indexdb/tagFiltersToMetricIDs`, `indexdb/metricID` and `indexdb/date_metricID` cache metrics are not reported properly. See [10275](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10275). Addressed in `v1.135.0`.
|
||||
|
||||
* SECURITY: upgrade base docker image (Alpine) from 3.22.2 to 3.23.2. See [Alpine 3.23.2 release notes](https://www.alpinelinux.org/posts/Alpine-3.23.2-released.html).
|
||||
@@ -374,6 +404,16 @@ See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/ch
|
||||
|
||||
See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/changelog_2025/#v11230)
|
||||
|
||||
## [v1.122.20](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.20)
|
||||
|
||||
Released at 2026-04-23
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix incorrect evaluation of binary operations caused by an ordering bug (e.g. `10 - (3 + 3 + 4)` being evaluated as `10 - 3 + 3 + 4`). The issue was introduced in v1.140.0, v1.136.4, and v1.122.19. See [#10856](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856).
|
||||
|
||||
## [v1.122.19](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.19)
|
||||
|
||||
**Update Note 1:** [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): Due to an ordering bug in binary operations, some queries may produce incorrect results. For example, `10 - (3 + 3 + 4)` is evaluated as `10 - 3 + 3 + 4`. The issue was introduced in versions v1.140.0, v1.136.4, v1.122.19, and is addressed in upcoming releases. It is strongly recommended to avoid these versions entirely. See [#10856](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10856).
|
||||
@@ -1527,4 +1567,4 @@ See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/ch
|
||||
|
||||
## Previous releases
|
||||
|
||||
See [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
|
||||
See [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases).
|
||||
@@ -9,13 +9,29 @@ sitemap:
|
||||
[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/)
|
||||
can aggregate incoming [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) in streaming mode **by time** and **by labels** before data is written to remote storage
|
||||
(or local storage for single-node VictoriaMetrics).
|
||||
The aggregation is applied to all the metrics received via any [supported data ingestion protocol](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter),
|
||||
and allows building [flexible processing pipelines](#routing).
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["requests_total{instance=foo}"] --> V[vmagent]
|
||||
B["requests_total{instance=bar}"] --> V
|
||||
C["requests_total{instance=baz}"] --> V
|
||||
V --> D[requests_total:rate5m]
|
||||
```
|
||||
|
||||
> By default, stream aggregation ignores timestamps associated with the input [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). It expects that the ingested samples have timestamps close to the current time. See [how to ignore old samples](#ignoring-old-samples).
|
||||
# Features
|
||||
|
||||
> If `-streamAggr.dedupInterval` is enabled, out-of-order samples (older than already received) within the configured interval are treated as duplicates and ignored. See [de-duplication](#deduplication).
|
||||
Stream aggregation has the following features:
|
||||
|
||||
- It can calculate [aggregates](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs) on ingested samples before they're sent to remote destination;
|
||||
- It is applied to all the metric samples received via any [supported data ingestion protocol](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter)
|
||||
- It can filter out raw samples matched by aggregation rules, so raw data will never reach the remote destination. See `--streamAggr.keepInput` and `-streamAggr.dropInput` in [aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/);
|
||||
- It allows building [flexible processing pipelines](#routing);
|
||||
|
||||
# Limitations
|
||||
|
||||
- Stream aggregation **ignores timestamps associated with the input [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples)**.
|
||||
It expects that the ingested samples have timestamps close to the current time. See [how to ignore old samples](#ignoring-old-samples).
|
||||
- Aggregation state is held in the process memory and will be lost on process restart.
|
||||
|
||||
# Use cases
|
||||
|
||||
@@ -41,35 +57,41 @@ and not available for [Statsd metrics format](https://github.com/statsd/statsd/b
|
||||
|
||||
## Recording rules alternative
|
||||
|
||||
Sometimes [alerting queries](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules) may require non-trivial amounts of CPU, RAM,
|
||||
disk IO and network bandwidth at metrics storage side. For example, if `http_request_duration_seconds` histogram is generated by thousands
|
||||
of application instances, then the alerting query `histogram_quantile(0.99, sum(increase(http_request_duration_seconds_bucket[5m])) without (instance)) > 0.5`
|
||||
can become slow, since it needs to scan too big number of unique [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series)
|
||||
with `http_request_duration_seconds_bucket` name. This alerting query can be accelerated by pre-calculating
|
||||
the `sum(increase(http_request_duration_seconds_bucket[5m])) without (instance)` via [recording rule](https://docs.victoriametrics.com/victoriametrics/vmalert/#recording-rules).
|
||||
But this recording rule may take too much time to execute too. In this case the slow recording rule can be substituted
|
||||
with the following [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config):
|
||||
Sometimes [rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#rules) may require non-trivial amounts of CPU, RAM,
|
||||
disk IO and network bandwidth for processing on the metrics storage side.
|
||||
|
||||
For example, if the `http_request_duration_seconds` histogram is generated by thousands of application instances,
|
||||
then the alerting query `histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[2m])) without (instance)) > 0.5`
|
||||
can become slow, since it needs to scan a large number of unique [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series).
|
||||
|
||||
This alerting query can be accelerated by pre-calculating
|
||||
the `sum(rate(http_request_duration_seconds_bucket[5m])) without (instance)` via [recording rule](https://docs.victoriametrics.com/victoriametrics/vmalert/#recording-rules).
|
||||
But it only shifts slowness from the alerting rule to the recording rule, since calculation still has to happen somewhere.
|
||||
It is better to substitute the slow recording rule with the following [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config):
|
||||
|
||||
```yaml
|
||||
- match: 'http_request_duration_seconds_bucket'
|
||||
interval: 5m
|
||||
interval: 1m
|
||||
without: [instance]
|
||||
outputs: [total]
|
||||
outputs: [rate_sum]
|
||||
```
|
||||
|
||||
This stream aggregation generates `http_request_duration_seconds_bucket:5m_without_instance_total` output series according to [output metric naming](#output-metric-names).
|
||||
> Field `interval` should be set to a value at least several times higher than the matched metrics collection interval.
|
||||
|
||||
This stream aggregation generates `http_request_duration_seconds_bucket:1m_without_instance_rate_sum` output series according to [output metric naming](#output-metric-names).
|
||||
Then these series can be used in [alerting rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules):
|
||||
|
||||
```metricsql
|
||||
histogram_quantile(0.99, last_over_time(http_request_duration_seconds_bucket:5m_without_instance_total[5m])) > 0.5
|
||||
histogram_quantile(0.99, avg_over_time(http_request_duration_seconds_bucket:1m_without_instance_rate_sum[5m])) > 0.5
|
||||
```
|
||||
|
||||
This query is executed much faster than the original query, because it needs to scan much lower number of time series.
|
||||
This query executes much faster than the original one because it needs to scan fewer time series.
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
> `avg_over_time(<aggregate:1m>[5m])` is similar to recording rules calculating rate over a sliding window of `5m` with `1m` interval.
|
||||
If the sliding window isn't important, then simply omit the `avg_over_time` aggregation in the expression.
|
||||
|
||||
Field `interval` is recommended to be set to a value at least several times higher than your metrics collect interval.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels), [aggregating histograms](#aggregating-histograms).
|
||||
|
||||
## Reducing the number of stored samples
|
||||
|
||||
@@ -89,7 +111,7 @@ to one sample per 5 minutes per each input time series (this operation is also k
|
||||
interval: 5m
|
||||
outputs: [total]
|
||||
|
||||
# Downsample other metrics with `count_samples`, `sum_samples`, `min` and `max` outputs
|
||||
# Downsample other metrics with `count_samples`, `sum_samples`, `min`, and `max` outputs
|
||||
# See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs
|
||||
- match: '{__name__!~".+_total"}'
|
||||
interval: 5m
|
||||
@@ -109,14 +131,14 @@ some_metric:5m_min
|
||||
some_metric:5m_max
|
||||
```
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [aggregating histograms](#aggregating-histograms) and [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
## Reducing the number of stored series
|
||||
|
||||
Sometimes applications may generate too many [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series).
|
||||
For example, the `http_requests_total` metric may have `path` or `user` label with too big number of unique values.
|
||||
In this case the following stream aggregation can be used for reducing the number metrics stored in VictoriaMetrics:
|
||||
For example, the `http_requests_total` metric may have `path` or `user` label with too many unique values.
|
||||
In this case, the following stream aggregation can be used for reducing the number of metrics stored in VictoriaMetrics:
|
||||
|
||||
```yaml
|
||||
- match: 'http_requests_total'
|
||||
@@ -134,17 +156,17 @@ The aggregated output metric has the following name according to [output metric
|
||||
http_requests_total:30s_without_path_user_total
|
||||
```
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [aggregating histograms](#aggregating-histograms).
|
||||
|
||||
## Counting input samples
|
||||
|
||||
If the monitored application generates event-based metrics, then it may be useful to count the number of such metrics
|
||||
at stream aggregation level.
|
||||
at the stream aggregation level.
|
||||
|
||||
For example, if an advertising server generates `hits{some="labels"} 1` and `clicks{some="labels"} 1` metrics
|
||||
per each incoming hit and click, then the following [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config)
|
||||
can be used for counting these metrics per every 30 second interval:
|
||||
can be used for counting these metrics every 30-second interval:
|
||||
|
||||
```yaml
|
||||
- match: '{__name__=~"hits|clicks"}'
|
||||
@@ -160,7 +182,7 @@ hits:30s_count_samples count1
|
||||
clicks:30s_count_samples count2
|
||||
```
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
## Summing input metrics
|
||||
@@ -186,12 +208,12 @@ hits:1m_sum_samples sum1
|
||||
clicks:1m_sum_samples sum2
|
||||
```
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
## Quantiles over input metrics
|
||||
|
||||
If the monitored application generates measurement metrics per each request, then it may be useful to calculate
|
||||
If the monitored application generates measurement metrics for each request, then it may be useful to calculate
|
||||
the pre-defined set of [percentiles](https://en.wikipedia.org/wiki/Percentile) over these measurements.
|
||||
|
||||
For example, if the monitored application generates `request_duration_seconds N` and `response_size_bytes M` metrics
|
||||
@@ -216,12 +238,12 @@ response_size_bytes:30s_quantiles{quantile="0.50"} value1
|
||||
response_size_bytes:30s_quantiles{quantile="0.99"} value2
|
||||
```
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [histograms over input metrics](#histograms-over-input-metrics) and [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
## Histograms over input metrics
|
||||
|
||||
If the monitored application generates measurement metrics per each request, then it may be useful to calculate
|
||||
If the monitored application generates measurement metrics for each request, then it may be useful to calculate
|
||||
a [histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) over these metrics.
|
||||
|
||||
For example, if the monitored application generates `request_duration_seconds N` and `response_size_bytes M` metrics
|
||||
@@ -267,7 +289,7 @@ The resulting histogram buckets can be queried with [MetricsQL](https://docs.vic
|
||||
histogram_stddev(sum(increase(request_duration_seconds:60s_histogram_bucket[1h])) by (vmrange))
|
||||
```
|
||||
|
||||
This query uses [histogram_stddev](https://docs.victoriametrics.com/victoriametrics/metricsql/#histogram_stddev) function.
|
||||
This query uses the [histogram_stddev](https://docs.victoriametrics.com/victoriametrics/metricsql/#histogram_stddev) function.
|
||||
|
||||
1. An estimated share of requests with the duration smaller than `0.5s` over the last hour:
|
||||
|
||||
@@ -277,7 +299,7 @@ The resulting histogram buckets can be queried with [MetricsQL](https://docs.vic
|
||||
|
||||
This query uses [histogram_share](https://docs.victoriametrics.com/victoriametrics/metricsql/#histogram_share) function.
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [quantiles over input metrics](#quantiles-over-input-metrics) and [aggregating by labels](#aggregating-by-labels).
|
||||
|
||||
## Aggregating histograms
|
||||
@@ -317,19 +339,24 @@ Please note, histograms can be aggregated if their `le` labels are configured id
|
||||
[VictoriaMetrics histogram buckets](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350)
|
||||
have no such requirement.
|
||||
|
||||
It's recommended to use [aggregation windows](#aggregation-windows) when aggregating histograms if you observe [accuracy issues](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4580).
|
||||
Stream aggregation of histogram buckets is very sensitive to sample delays. Histogram is logical group of independent time series (buckets) that are supposed to be updated uniformly.
|
||||
Aggregation can't guarantee that within one aggregation intervals all samples belonging to one histogram were updated. Situations like this can cause [accuracy issues](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4580).
|
||||
The recommended ways for improving accuracy are:
|
||||
- enable [aggregation windows](#aggregation-windows);
|
||||
- increase `interval` ;
|
||||
- ensure that vmagent has no resource shortage;
|
||||
- ensure that samples delivery pipeline has no resource shortage or queue accumulation, so it can deliver samples fast.
|
||||
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at `output` field.
|
||||
See [the list of aggregate output](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs), which can be specified at the `output` field.
|
||||
See also [histograms over input metrics](#histograms-over-input-metrics) and [quantiles over input metrics](#quantiles-over-input-metrics).
|
||||
|
||||
|
||||
# Routing
|
||||
|
||||
[Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) supports relabeling,
|
||||
deduplication and stream aggregation for all the received data, scraped or pushed.
|
||||
The processed data is then stored in local storage and **can't be forwarded further**.
|
||||
The processed data is then stored in local storage, and **can't be forwarded further**.
|
||||
|
||||
[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) supports relabeling, deduplication and stream aggregation for all
|
||||
[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) supports relabeling, deduplication, and stream aggregation for all
|
||||
the received data, scraped or pushed. See the [processing order for vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/#life-of-a-sample).
|
||||
|
||||
Typical scenarios for data routing with `vmagent`:
|
||||
@@ -342,42 +369,44 @@ Typical scenarios for data routing with `vmagent`:
|
||||
|
||||
# Deduplication
|
||||
|
||||
[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) supports online [de-duplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) of samples
|
||||
before sending them to the configured `-remoteWrite.url`. The de-duplication can be enabled via the following options:
|
||||
If `-streamAggr.dedupInterval` is enabled, out-of-order samples (older than already received) within the configured interval are treated as duplicates and ignored. See [deduplication](#deduplication).
|
||||
|
||||
- By specifying the desired de-duplication interval via `-streamAggr.dedupInterval` command-line flag for all received data
|
||||
[vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) supports [deduplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) of samples
|
||||
before sending them to the configured `-remoteWrite.url`. The deduplication can be enabled via the following options:
|
||||
|
||||
- By specifying the desired deduplication interval via `-streamAggr.dedupInterval` command-line flag for all received data
|
||||
or via `-remoteWrite.streamAggr.dedupInterval` command-line flag for the particular `-remoteWrite.url` destination.
|
||||
For example, `./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -remoteWrite.streamAggr.dedupInterval=30s` instructs `vmagent` to leave
|
||||
only the last sample per each seen [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) per every 30 seconds.
|
||||
only the last sample for each seen [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) every 30 seconds.
|
||||
The de-deduplication is performed after applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) and
|
||||
before performing the aggregation.
|
||||
|
||||
- By specifying `dedup_interval` option individually per each [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config)
|
||||
- By specifying the `dedup_interval` option individually per each [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config)
|
||||
in `-remoteWrite.streamAggr.config` or `-streamAggr.config` configs.
|
||||
|
||||
[Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) supports two types of de-duplication:
|
||||
- After storing the duplicate samples to local storage. See [`-dedup.minScrapeInterval`](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) command-line option.
|
||||
- Before storing the duplicate samples to local storage. This type of de-duplication can be enabled via the following options:
|
||||
- By specifying the desired de-duplication interval via `-streamAggr.dedupInterval` command-line flag.
|
||||
For example, `./victoria-metrics -streamAggr.dedupInterval=30s` instructs VictoriaMetrics to leave only the last sample per each
|
||||
- After storing the duplicate samples in local storage. See [`-dedup.minScrapeInterval`](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) command-line option.
|
||||
- Before storing the duplicate samples in local storage. This type of deduplication can be enabled via the following options:
|
||||
- By specifying the desired deduplication interval via the `-streamAggr.dedupInterval` command-line flag.
|
||||
For example, `./victoria-metrics -streamAggr.dedupInterval=30s` instructs VictoriaMetrics to leave only the last sample for each
|
||||
seen [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) per every 30 seconds.
|
||||
The de-duplication is performed after applying `-relabelConfig` [relabeling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#relabeling).
|
||||
The deduplication is performed after applying `-relabelConfig` [relabeling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#relabeling).
|
||||
|
||||
- By specifying `dedup_interval` option individually per each [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) at `-streamAggr.config`.
|
||||
|
||||
It is possible to drop the given labels before applying the de-duplication. See [these docs](#dropping-unneeded-labels).
|
||||
It is possible to drop the given labels before applying the deduplication. See [these docs](#dropping-unneeded-labels).
|
||||
|
||||
The online de-duplication uses the same logic as [`-dedup.minScrapeInterval` command-line flag](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) at VictoriaMetrics.
|
||||
The online deduplication uses the same logic as [`-dedup.minScrapeInterval` command-line flag](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) at VictoriaMetrics.
|
||||
|
||||
De-duplication is applied before stream aggregation rules and can drop samples before they get matched for aggregation.
|
||||
Deduplication is applied before stream aggregation rules and can drop samples before they get matched for aggregation.
|
||||
|
||||
# Relabeling
|
||||
|
||||
It is possible to apply [arbitrary relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) to input and output metrics
|
||||
during stream aggregation via `input_relabel_configs` and `output_relabel_configs` options in [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
|
||||
Relabeling rules inside `input_relabel_configs` are applied to samples matching the `match` filters before optional [deduplication](#deduplication).
|
||||
Relabeling rules inside `output_relabel_configs` are applied to aggregated samples before sending them to the remote storage.
|
||||
Relabeling rules inside `input_relabel_configs` are applied to samples matching the `match` filters before optional [deduplication](# deduplication).
|
||||
Relabeling rules in `output_relabel_configs` are applied to aggregated samples before they are sent to the remote storage.
|
||||
|
||||
For example, the following config removes the `:1m_sum_samples` suffix added [to the output metric name](#output-metric-names):
|
||||
|
||||
@@ -444,12 +473,12 @@ For example:
|
||||
- if `interval: 1m` is set, then the aggregated data is flushed to the storage at the end of every minute
|
||||
- if `interval: 1h` is set, then the aggregated data is flushed to the storage at the end of every hour
|
||||
|
||||
If you do not need such an alignment, then set `no_align_flush_to_interval: true` option in the [aggregate config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
In this case aggregated data flushes will be aligned to the `vmagent` start time or to [config reload](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#configuration-update) time.
|
||||
If you do not need such an alignment, then set the `no_align_flush_to_interval: true` option in the [aggregate config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
In this case, aggregated data flushes will be aligned to the `vmagent` start time or to [config reload](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#configuration-update) time.
|
||||
|
||||
The aggregated data on the first and the last interval is dropped during `vmagent` start, restart or [config reload](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#configuration-update),
|
||||
since the first and the last aggregation intervals are incomplete, so they usually contain incomplete confusing data.
|
||||
If you need preserving the aggregated data on these intervals, then set `flush_on_shutdown: true` option in the [aggregate config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
The aggregated data on the first and the last interval is dropped during `vmagent` start, restart, or [config reload](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#configuration-update),
|
||||
since the first and last aggregation intervals are incomplete, they usually contain incomplete, confusing data.
|
||||
If you need to preserve the aggregated data on these intervals, then set `flush_on_shutdown: true` option in the [aggregate config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
|
||||
See also:
|
||||
|
||||
@@ -480,7 +509,7 @@ The `keep_metric_names` option can be used if only a single output is set in [`o
|
||||
|
||||
## Aggregating by labels
|
||||
|
||||
All the labels for the input metrics are preserved by default in the output metrics. For example,
|
||||
By default, all labels from the input metrics are preserved in the output metrics. For example,
|
||||
the input metric `foo{app="bar",instance="host1"}` results to the output metric `foo:1m_sum_samples{app="bar",instance="host1"}`
|
||||
when the following [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) is used:
|
||||
|
||||
@@ -489,7 +518,7 @@ when the following [stream aggregation config](https://docs.victoriametrics.com/
|
||||
outputs: [sum_samples]
|
||||
```
|
||||
|
||||
The input labels can be removed via `without` list specified in the config. For example, the following config
|
||||
The input labels can be removed via a `without` list specified in the config. For example, the following config
|
||||
removes the `instance` label from output metrics by summing input samples across all the instances:
|
||||
|
||||
```yaml
|
||||
@@ -501,7 +530,7 @@ removes the `instance` label from output metrics by summing input samples across
|
||||
In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_without_instance_sum_samples{app="bar"}`
|
||||
output metric according to [output metric naming](#output-metric-names).
|
||||
|
||||
It is possible specifying the exact list of labels in the output metrics via `by` list.
|
||||
It is possible to specify the exact list of labels in the output metrics via the `by` list.
|
||||
For example, the following config sums input samples by the `app` label:
|
||||
|
||||
```yaml
|
||||
@@ -513,7 +542,7 @@ For example, the following config sums input samples by the `app` label:
|
||||
In this case the `foo{app="bar",instance="..."}` input metrics are transformed into `foo:1m_by_app_sum_samples{app="bar"}`
|
||||
output metric according to [output metric naming](#output-metric-names).
|
||||
|
||||
The labels used in `by` and `without` lists can be modified via `input_relabel_configs` section - see [these docs](#relabeling).
|
||||
The labels used in `by` and `without` lists can be modified via the `input_relabel_configs` section - see [these docs](#relabeling).
|
||||
|
||||
See also [aggregation outputs](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#aggregation-outputs).
|
||||
|
||||
@@ -579,32 +608,32 @@ Below is an example of an `aggr.yaml` configuration that drops the `replica` and
|
||||
|
||||
## Aggregation windows
|
||||
|
||||
By default, stream aggregation and deduplication stores a single state per each aggregation output result.
|
||||
By default, stream aggregation and deduplication store a single state for each aggregation output result.
|
||||
The data for each aggregator is flushed independently once per aggregation interval. But there's no guarantee that
|
||||
incoming samples with timestamps close to the aggregation interval's end will get into it. For example, when aggregating
|
||||
with `interval: 1m` a data sample with timestamp 1739473078 (18:57:59) can fall into aggregation round `18:58:00` or `18:59:00`.
|
||||
It depends on network lag, load, clock synchronization, etc. In most scenarios it doesn't impact aggregation or
|
||||
deduplication results, which are consistent within margin of error. But for metrics represented as a collection of series,
|
||||
with `interval: 1m`, a data sample with timestamp 1739473078 (18:57:59) can fall into the aggregation round `18:58:00` or `18:59:00`.
|
||||
It depends on network lag, load, clock synchronization, etc. In most scenarios, it doesn't impact aggregation or
|
||||
deduplication results, which are consistent within the margin of error. But for metrics represented as a collection of series,
|
||||
like [histograms](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram), such inaccuracy leads to invalid aggregation results.
|
||||
|
||||
For this case, streaming aggregation and deduplication support mode with aggregation windows for current and previous state.
|
||||
With this mode, flush doesn't happen immediately but is shifted by a calculated samples lag that improves correctness for delayed data. {{% available_from "v1.112.0" %}}
|
||||
For this case, streaming aggregation and deduplication support mode with aggregation windows for the current and previous state.
|
||||
With this mode, flush doesn't happen immediately but is shifted by a calculated sample lag that improves correctness for delayed data. {{% available_from "v1.112.0" %}}
|
||||
|
||||
Enabling of this mode has increased resource usage: memory usage is expected to double as aggregation will store two states
|
||||
instead of one. However, this significantly improves accuracy of calculations. Aggregation windows can be enabled via
|
||||
Enabling this mode has increased resource usage: memory usage is expected to double as aggregation will store two states
|
||||
instead of one. However, this significantly improves the accuracy of calculations. Aggregation windows can be enabled via
|
||||
the following settings:
|
||||
|
||||
- `-streamAggr.enableWindows` at [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/)
|
||||
and [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/). At [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/)
|
||||
`-remoteWrite.streamAggr.enableWindows` flag can be specified individually per each `-remoteWrite.url`.
|
||||
If one of these flags is set, then all aggregators will be using fixed windows. In conjunction with `-remoteWrite.streamAggr.dedupInterval` or
|
||||
`-streamAggr.dedupInterval` fixed aggregation windows are enabled on deduplicator as well.
|
||||
`-remoteWrite.streamAggr.enableWindows` flag can be specified individually for each `-remoteWrite.url`.
|
||||
If one of these flags is set, all aggregators will use fixed windows. In conjunction with `-remoteWrite.streamAggr.dedupInterval` or
|
||||
`-streamAggr.dedupInterval` fixed aggregation windows are enabled on the deduplicator as well.
|
||||
- `enable_windows` option in [aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#stream-aggregation-config).
|
||||
It allows enabling aggregation windows for a specific aggregator.
|
||||
|
||||
## Counter resets
|
||||
|
||||
If counter-specific outputs, such as `total*`, `rate*`, and `increase*`, produce values that are significantly higher than anticipated, then check the `vm_streamaggr_counter_resets_total` metric. This metric increments each time when [counter reset event](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) is happening and could be caused by duplication or collision of raw samples. If you observe duplication or collision - try solving this problem by either fixing the source of these metrics or by [deduplicating](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication) these samples before aggregation.
|
||||
If counter-specific outputs, such as `total*`, `rate*`, and `increase*`, produce values that are significantly higher than anticipated, then check the `vm_streamaggr_counter_resets_total` metric. This metric increments each time when [counter reset event](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter) happens and could be caused by duplication or collision of raw samples. If you observe duplication or collision, try solving this problem by either fixing the source of these metrics or by [deduplicating](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication) these samples before aggregation.
|
||||
|
||||
## Staleness
|
||||
|
||||
@@ -618,7 +647,7 @@ The following outputs track the last seen per-series values in order to properly
|
||||
- [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total)
|
||||
- [total_prometheus](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#total_prometheus)
|
||||
|
||||
The last seen per-series value is dropped if no new samples are received for the given time series during two consecutive aggregation
|
||||
The last seen per-series value is dropped if no new samples are received for the given time series during two consecutive aggregations
|
||||
intervals specified in [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config) via `interval` option.
|
||||
If a new sample for the existing time series is received after that, then it is treated as the first sample for a new time series.
|
||||
This may lead to the following issues:
|
||||
@@ -632,16 +661,16 @@ These issues can be fixed in the following ways:
|
||||
- By increasing the `interval` option at [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines.
|
||||
- By specifying the `staleness_interval` option at [stream aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so it covers the expected
|
||||
delays in data ingestion pipelines. By default, the `staleness_interval` equals to `2 x interval`.
|
||||
delays in data ingestion pipelines. By default, the `staleness_interval` is equal to `2 x interval`.
|
||||
|
||||
## High resource usage
|
||||
|
||||
The following solutions can help reducing memory usage and CPU usage during streaming aggregation:
|
||||
The following solutions can help reduce memory usage and CPU usage during streaming aggregation:
|
||||
|
||||
- To use more specific `match` filters at [streaming aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config), so only the really needed
|
||||
[raw samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) are aggregated.
|
||||
- To increase aggregation interval by specifying bigger duration for the `interval` option at [streaming aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
- To generate lower number of output time series by using less specific [`by` list](#aggregating-by-labels) or more specific [`without` list](#aggregating-by-labels).
|
||||
- To increase the aggregation interval by specifying a bigger duration for the `interval` option at [streaming aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#stream-aggregation-config).
|
||||
- To generate a lower number of output time series by using less specific [`by` list](#aggregating-by-labels) or more specific [`without` list](#aggregating-by-labels).
|
||||
- To drop unneeded long labels in input samples via [input_relabel_configs](#relabeling).
|
||||
|
||||
## Cluster mode
|
||||
@@ -654,32 +683,32 @@ For example, if more than one `vmagent` instance calculates [increase](https://d
|
||||
with `by: [path]` option, then all the `vmagent` instances will aggregate samples to the same set of time series with different `path` labels.
|
||||
The proper fix would be [adding a unique label](https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics) for all the output samples
|
||||
produced by each `vmagent`, so they are aggregated into distinct sets of [time series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series).
|
||||
These time series then can be aggregated later as needed during querying.
|
||||
These time series can then be aggregated later as needed during querying.
|
||||
|
||||
If `vmagent` instances run in Docker or Kubernetes, then you can refer `POD_NAME` or `HOSTNAME` environment variables
|
||||
If `vmagent` instances run in Docker or Kubernetes, then you can refer to `POD_NAME` or `HOSTNAME` environment variables
|
||||
as a unique label value per each `vmagent` via `-remoteWrite.label=vmagent=%{HOSTNAME}` command-line flag.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables) on how to refer environment variables in VictoriaMetrics components.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables) on how to refer to environment variables in VictoriaMetrics components.
|
||||
|
||||
## Common mistakes
|
||||
|
||||
### Put aggregator behind load balancer
|
||||
|
||||
When configuring the aggregation rule, make sure that `vmagent` receives all the required data to satisfy the `match` rule.
|
||||
If traffic to the vmagent goes through the load balancer, it could happen that vmagent will be receiving only fraction of the data
|
||||
When configuring the aggregation rule, ensure that `vmagent` receives all required data to satisfy the `match` rule.
|
||||
If traffic to the vmagent goes through the load balancer, it could happen that vmagent will be receiving only a fraction of the data
|
||||
and produce incomplete aggregations.
|
||||
|
||||
To keep aggregation results consistent, make sure that vmagent receives all the required data for aggregation. In case if you need to
|
||||
To keep aggregation results consistent, ensure that vmagent receives all required data for aggregation. In case you need to
|
||||
split the load across multiple vmagents, try sharding the traffic among them via metric names or labels.
|
||||
For example, see how vmagent could consistently [shard data across remote write destinations](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages)
|
||||
via `-remoteWrite.shardByURL.labels` or `-remoteWrite.shardByURL.ignoreLabels` cmd-line flags.
|
||||
|
||||
### Create aggregator per each recording rule
|
||||
|
||||
Stream aggregation can be used as alternative for [recording rules](#recording-rules-alternative).
|
||||
But creating an aggregation rule per each recording rule can lead to elevated resource usage on the vmagent,
|
||||
Stream aggregation can be used as an alternative for [recording rules](#recording-rules-alternative).
|
||||
But creating an aggregation rule for each recording rule can lead to elevated resource usage on the vmagent,
|
||||
because the ingestion stream should be matched against every configured aggregation rule.
|
||||
|
||||
To optimize this, we recommend merging together aggregations which only differ in match expressions.
|
||||
To optimize this, we recommend merging together aggregations that only differ in match expressions.
|
||||
For example, let's see the following list of recording rules:
|
||||
|
||||
```yaml
|
||||
@@ -709,7 +738,7 @@ These rules can be effectively converted into a single aggregation rule:
|
||||
replacement: "instance:$1:rate:sum"
|
||||
```
|
||||
|
||||
**Note**: having separate aggregator for a certain `match` expression can only be justified when aggregator cannot keep up with all
|
||||
**Note**: having a separate aggregator for a certain `match` expression can only be justified when the aggregator cannot keep up with all
|
||||
the data pushed to an aggregator within an aggregation interval.
|
||||
|
||||
### Use identical --remoteWrite.streamAggr.config for all remote writes
|
||||
@@ -721,9 +750,9 @@ across multiple `-remoteWrite.url`.
|
||||
|
||||
### Use aggregated metrics like original ones
|
||||
|
||||
Stream aggregation allows keeping original metric names after aggregation by using `keep_metric_names` setting.
|
||||
But the "meaning" of aggregated metrics is usually different to original ones after the aggregation.
|
||||
Make sure that you updated queries in your alerting rules and dashboards accordingly if you used `keep_metric_names` setting.
|
||||
Stream aggregation allows keeping original metric names after aggregation by using the `keep_metric_names` setting.
|
||||
But the "meaning" of aggregated metrics is usually different from that of the original metrics.
|
||||
Make sure that you update queries in your alerting rules and dashboards accordingly if you used the `keep_metric_names` setting.
|
||||
|
||||
### Use different deduplication intervals on storage and vmagent
|
||||
|
||||
@@ -734,7 +763,7 @@ To avoid this, set `-streamAggr.dedupInterval` or `-remoteWrite.streamAggr.dedup
|
||||
|
||||
---
|
||||
|
||||
Section below contains backward-compatible anchors for links that were moved or renamed.
|
||||
The section below contains backward-compatible anchors for links that were moved or renamed.
|
||||
|
||||
###### Configuration
|
||||
|
||||
|
||||
@@ -268,6 +268,40 @@ for the collected samples. Examples:
|
||||
```sh
|
||||
./vmagent -remoteWrite.url=http://remote-storage/api/v1/write -streamAggr.dropInputLabels=replica -streamAggr.dedupInterval=60s
|
||||
```
|
||||
|
||||
### Monitoring Data eXchange
|
||||
|
||||
The MDX (Monitoring Data eXchange) is a monitoring of monitoring metrics collection and sharing feature.
|
||||
It aims to send only metrics from the VictoriaMetrics services to the corresponding `-remoteWrite.url`, discarding metrics from non-VictoriaMetrics services.
|
||||
|
||||
When enabling MDX for the `-remoteWrite.url`, `vmagent` will only forward metrics from the instances that emit `vm_app_version`, which is a metric that all VictoriaMetrics services will emit.
|
||||
The number of dropped rows from non-VictoriaMetrics services is exposed as `vmagent_remotewrite_mdx_rows_dropped_total`.
|
||||
|
||||
To enable MDX, set `-remoteWrite.mdx.enable=true` for the target URL and `-remoteWrite.mdx.enable=false` for other URLs:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-all-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=false \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true
|
||||
```
|
||||
|
||||
`vmagent` relies on the `vm_app_version` metric to identify the VictoriaMetrics instance. `vmagent` will not recognize an instance as a VictoriaMetrics instance and retain the instance's metrics until it receives the `vm_app_version` metric from this instance.
|
||||
|
||||
If the metrics from the VictoriaMetrics instances have specific label and value in your setup(e.g. service,app label), you can also specify the label name in `-mdx.keepMetricsWithLabel.name` and its value in `-mdx.keepMetricsWithLabel.value`. `vmagent` will retain metrics with the specified label and value to the MDX URL:
|
||||
|
||||
```sh
|
||||
./vmagent \
|
||||
-remoteWrite.url=http://service-to-keep-only-vm-metrics:8428/api/v1/write \
|
||||
-remoteWrite.mdx.enable=true \
|
||||
-mdx.keepMetricsWithLabel.name=service \
|
||||
-mdx.keepMetricsWithLabel.value=victoriametrics
|
||||
```
|
||||
Otherwise, `vmagent` will rely on the `vm_app_version` metric mechanism to automatically determine which metrics should be retained.
|
||||
|
||||
If `vmagent` does not receive metrics from a Victoriametrics instance for a period, it will remove that instance from the discovered VictoriaMetrics instances list to reduce memory usage.
|
||||
The TTL period can be configured by `-mdx.instanceEntryTTL` and defaults to 1 hour, it should work well in most scenarios. If you want to release memory more promptly, you can reduce the value, but note that the minimum recommended value is 5 times scrape interval.
|
||||
|
||||
### Life of a sample
|
||||
|
||||
@@ -285,18 +319,20 @@ flowchart TB
|
||||
F --> G[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability">replicate</a> to each <b>-remoteWrite.url</b><br/>or <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages">shard</a> if <b>-remoteWrite.shardByURL</b> is set]
|
||||
|
||||
%% Left branch
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H2 --> H3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H3 --> H4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H4 --> H5[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> H1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange/">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
H1 --> H2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
H2 --> H3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
H3 --> H4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
H4 --> H5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
H5 --> H6[[push to <b>-remoteWrite.url</b>]]
|
||||
|
||||
%% Right branch
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R2 --> R3["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R3 --> R4[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R4 --> R5[[push to <b>-remoteWrite.url</b>]]
|
||||
G --> R1[per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange">mdx filter</a><br><b>-remoteWrite.mdx.enable</b>]
|
||||
R1 --> R2[per-url <a href="https://docs.victoriametrics.com/victoriametrics/relabeling/">relabeling</a><br><b>-remoteWrite.urlRelabelConfig</b>]
|
||||
R2 --> R3[per-url <a href="https://docs.victoriametrics.com/victoriametrics/stream-aggregation">aggregation</a><br><b>-remoteWrite.streamAggr.config</b><br><b>-remoteWrite.streamAggr.dedupInterval</b>]
|
||||
R3 --> R4["per-url <a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue">queue</a> (default: enabled)<br><b>-remoteWrite.disableOnDiskQueue</b>"]
|
||||
R4 --> R5[<a href="https://docs.victoriametrics.com/victoriametrics/vmagent/#adding-labels-to-metrics">add extra labels</a><br><b>-remoteWrite.label</b>]
|
||||
R5 --> R6[[push to <b>-remoteWrite.url</b>]]
|
||||
```
|
||||
|
||||
Scraping has additional settings that can be applied before samples are pushed to the processing pipeline above:
|
||||
@@ -501,9 +537,24 @@ scrape_configs:
|
||||
target_label: vm_account_id
|
||||
```
|
||||
|
||||
In addition, vmagent could obtain tenant identifier from `__tenant_id__` label at target discovery phase.
|
||||
It implicitly converts `__tenant_id__` label into `vm_account_id` and `vm_project_id` labels and attaches
|
||||
it to the scraped metrics and metrics metadata.
|
||||
For example, the following relabeling rule instructs sending metrics to `10:5` [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy)
|
||||
defined in the `prometheus.io/tenant_id: 10:5` annotation of Kubernetes pod deployment:
|
||||
|
||||
```yaml
|
||||
scrape_configs:
|
||||
- kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_tenant_id]
|
||||
target_label: __tenant_id__
|
||||
```
|
||||
|
||||
`vmagent` can accept data via the same multitenant endpoints (`/insert/<accountID>/<suffix>`) as `vminsert` at [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
does according to [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) if `-enableMultitenantHandlers` command-line flag is set.
|
||||
In this case, vmagent automatically converts tenant identifiers from the URL to `vm_account_id` and `vm_project_id` labels.
|
||||
In this case, vmagent automatically converts tenant identifiers from the URL to `vm_account_id` and `vm_project_id` labels and sets tenant info in metadata.
|
||||
These tenant labels are added before applying [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) specified via `-remoteWrite.relabelConfig`
|
||||
and `-remoteWrite.urlRelabelConfig` command-line flags. Metrics with `vm_account_id` and `vm_project_id` labels can be routed to the corresponding tenants
|
||||
when specifying `-remoteWrite.url` to [multitenant url at VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels).
|
||||
@@ -665,10 +716,14 @@ e.g. it sets `scrape_series_added` metric to zero. See [these docs](#automatical
|
||||
`vmagent` accepts{{% available_from "v1.137.0" %}} metric metadata exposed by scrape targets in [Prometheus exposition format](https://github.com/prometheus/docs/blob/main/docs/instrumenting/exposition_formats.md), received via [Prometheus remote write v1](https://prometheus.io/docs/specs/prw/remote_write_spec/) or [OpenTelemetry protocol](https://github.com/open-telemetry/opentelemetry-proto/blob/v1.7.0/opentelemetry/proto/metrics/v1/metrics.proto) by default. Set `-enableMetadata=false` to disable metadata processing{{% available_from "v1.125.1" %}}.
|
||||
During processing, metadata won't be dropped or modified by [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/) or [streaming aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/).
|
||||
|
||||
When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metadata received via the [multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) (`/insert/<accountID>/<suffix>`). However, if `vm_account_id` or `vm_project_id` labels are added directly to metrics before reaching vmagent, and vmagent writes to the [vminsert multitenant endpoints](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy-via-labels), the tenant info won't be attached and the metadata will be stored under the default tenant of VictoriaMetrics cluster.
|
||||
When `-enableMultitenantHandlers` is enabled, vmagent adds tenant info to metadata specified in [multitenant endpoint](https://docs.victoriametrics.com/victoriametrics/vmagent/#multitenancy) (`/insert/<accountID>/<suffix>`).
|
||||
However, if the `/insert/multitenant/<suffix>` endpoint is used, vmagent preserves the tenant information provided in the metadata by the sender. If the sender does not specify tenant information, the default tenant `0:0` is used.
|
||||
|
||||
> Metadata requires extra memory, disk space, and network traffic.
|
||||
|
||||
Use `-remoteWrite.disableMetadata`{{% available_from "v1.140.0" %}} to fully disable sending metadata from vmagent.
|
||||
This reduces network traffic and resource usage when metadata is not required.
|
||||
|
||||
## Stream parsing mode
|
||||
|
||||
By default, `vmagent` parses the full response from the scrape target, applies [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/)
|
||||
|
||||
2
go.mod
2
go.mod
@@ -11,7 +11,7 @@ require (
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3
|
||||
github.com/VictoriaMetrics/metrics v1.43.2
|
||||
github.com/VictoriaMetrics/metricsql v0.86.1
|
||||
github.com/VictoriaMetrics/metricsql v0.87.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.5
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.14
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.13
|
||||
|
||||
8
go.sum
8
go.sum
@@ -58,14 +58,10 @@ github.com/VictoriaMetrics/easyproto v1.2.0 h1:FJT9uNXA2isppFuJErbLqD306KoFlehl7
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0/go.mod h1:QlGlzaJnDfFd8Lk6Ci/fuLxfTo3/GThPs2KH23mv710=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3 h1:rBabE0iIxcqKEMCwUmwHZ9dgEqXerg8FRbRDUvC7OVc=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3/go.mod h1:hHXhl4DA2fTL2HTZDJFXWgW0LNjo6B+4aj2Wmng3TjU=
|
||||
github.com/VictoriaMetrics/metrics v1.43.1 h1:j3Ba4l2K1q3pkvzPqt6aSiQ2DBlAEj3VPVeBtpR3t/Y=
|
||||
github.com/VictoriaMetrics/metrics v1.43.1/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metrics v1.43.2 h1:+8pIQEGwchKS5CYFyvv3LKvNXGi7baZ9hmIV4RHqibY=
|
||||
github.com/VictoriaMetrics/metrics v1.43.2/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.86.0 h1:IFD08amp+nkW6I+pB3+iyamewkIrbEojkQP4cmEbwkU=
|
||||
github.com/VictoriaMetrics/metricsql v0.86.0/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VictoriaMetrics/metricsql v0.86.1 h1:GuNqbbIaWZ9eNa6dOCi6itG/fJ96TGOFV3KWLnAyC2o=
|
||||
github.com/VictoriaMetrics/metricsql v0.86.1/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.0 h1:Koxh3GkB/Z0f3O0bEChVFxiE4YZoxYyn5TzmGJfSfaw=
|
||||
github.com/VictoriaMetrics/metricsql v0.87.0/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0=
|
||||
|
||||
156
lib/mdx/filter.go
Normal file
156
lib/mdx/filter.go
Normal file
@@ -0,0 +1,156 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var (
|
||||
mdxInstanceEntryTTL = flagutil.NewExtendedDuration("mdx.instanceEntryTTL", "1h", "After not receiving metrics for the VictoriaMetrics instance for the configured time, remove this instance from the MDX instance list."+
|
||||
"It should be several times the scrape interval for VictoriaMetrics instances. The cleanup mechanism helps release memory after a VictoriaMetrics instance is permanently taken offline, preventing the MDX instance list from growing indefinitely."+
|
||||
"It must be explicitly set when -remoteWrite.mdx.enable is set and requires explicit unit suffixes (s, m, h, d, w, y). Please see https://docs.victoriametrics.com/victoriametrics/vmagent/#monitoring-data-exchange")
|
||||
keepMetricsWithLabelName = flag.String("mdx.keepMetricsWithLabel.name", "", "Keep metrics containing specific label and label value to the `-remoteWrite.url` that configured with `-remoteWrite.mdx.enable=true`. "+
|
||||
"See also -mdx.keepMetricsWithLabel.value.")
|
||||
keepMetricsWithLabelValue = flag.String("mdx.keepMetricsWithLabel.value", "", "Keep metrics containing specific label and label value to the `-remoteWrite.url` that configured with `-remoteWrite.mdx.enable=true`. "+
|
||||
"See also -mdx.keepMetricsWithLabel.name")
|
||||
)
|
||||
|
||||
// Filter manages the list of VictoriaMetrics instances discovered from previous data flow, and uses it to filter out metrics that are not from VictoriaMetrics instances.
|
||||
type Filter struct {
|
||||
mu sync.RWMutex
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
vmInstance map[string]*atomic.Int64
|
||||
filterByLabel bool
|
||||
}
|
||||
|
||||
var GlobalFilter *Filter
|
||||
|
||||
func InitGlobalFilter() {
|
||||
GlobalFilter = &Filter{
|
||||
vmInstance: make(map[string]*atomic.Int64),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
if len(*keepMetricsWithLabelName) > 0 && len(*keepMetricsWithLabelValue) > 0 {
|
||||
GlobalFilter.filterByLabel = true
|
||||
} else if len(*keepMetricsWithLabelName) > 0 || len(*keepMetricsWithLabelValue) > 0 {
|
||||
logger.Fatalf("Both -mdx.keepMetricsWithLabel.name and -mdx.keepMetricsWithLabel.value must be set if one of them is set.")
|
||||
}
|
||||
|
||||
_ = metrics.NewGauge("vmagent_mdx_tracked_vm_instances", func() float64 {
|
||||
GlobalFilter.mu.RLock()
|
||||
n := len(GlobalFilter.vmInstance)
|
||||
GlobalFilter.mu.RUnlock()
|
||||
return float64(n)
|
||||
})
|
||||
if mdxInstanceEntryTTL.Milliseconds() != 0 {
|
||||
GlobalFilter.wg.Go(GlobalFilter.cleanStale)
|
||||
}
|
||||
}
|
||||
|
||||
func (filter *Filter) cleanStale() {
|
||||
ttlSec := int64(mdxInstanceEntryTTL.Duration().Seconds())
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
filter.mu.Lock()
|
||||
currTs := time.Now().Unix()
|
||||
|
||||
dst := make(map[string]*atomic.Int64, len(filter.vmInstance))
|
||||
for k, v := range filter.vmInstance {
|
||||
if currTs-v.Load() < ttlSec {
|
||||
dst[k] = v
|
||||
}
|
||||
}
|
||||
if len(dst) != len(filter.vmInstance) {
|
||||
filter.vmInstance = dst
|
||||
}
|
||||
filter.mu.Unlock()
|
||||
case <-filter.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (filter *Filter) MustStop() {
|
||||
if filter == nil {
|
||||
return
|
||||
}
|
||||
close(filter.stopCh)
|
||||
filter.wg.Wait()
|
||||
}
|
||||
|
||||
func (filter *Filter) Filter(tss []prompb.TimeSeries, resTss []prompb.TimeSeries) []prompb.TimeSeries {
|
||||
for _, ts := range tss {
|
||||
isVmInstance := false
|
||||
var instance string
|
||||
var job string
|
||||
isStored := false
|
||||
for _, label := range ts.Labels {
|
||||
if label.Name == "__name__" && label.Value == "vm_app_version" {
|
||||
isVmInstance = true
|
||||
}
|
||||
if label.Name == "instance" {
|
||||
instance = label.Value
|
||||
}
|
||||
if label.Name == "job" {
|
||||
job = label.Value
|
||||
}
|
||||
if filter.filterByLabel {
|
||||
if label.Name == *keepMetricsWithLabelName && label.Value == *keepMetricsWithLabelValue {
|
||||
resTss = append(resTss, ts)
|
||||
isStored = true
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(job) == 0 || len(instance) == 0 {
|
||||
continue
|
||||
}
|
||||
identicalKey := fmt.Sprintf("%q:%q", job, instance)
|
||||
currTs := time.Now().Unix()
|
||||
|
||||
//fast path
|
||||
filter.mu.RLock()
|
||||
ptr, ok := filter.vmInstance[identicalKey]
|
||||
filter.mu.RUnlock()
|
||||
if ok {
|
||||
ptr.Store(currTs)
|
||||
if !isStored {
|
||||
resTss = append(resTss, ts)
|
||||
isStored = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
if !isVmInstance {
|
||||
continue
|
||||
}
|
||||
|
||||
// slow path
|
||||
if !isStored {
|
||||
resTss = append(resTss, ts)
|
||||
isStored = true
|
||||
}
|
||||
filter.mu.Lock()
|
||||
if ptr, ok = filter.vmInstance[identicalKey]; ok {
|
||||
ptr.Store(currTs)
|
||||
} else {
|
||||
v := atomic.Int64{}
|
||||
v.Store(currTs)
|
||||
filter.vmInstance[identicalKey] = &v
|
||||
}
|
||||
filter.mu.Unlock()
|
||||
}
|
||||
|
||||
return resTss
|
||||
}
|
||||
226
lib/mdx/filter_test.go
Normal file
226
lib/mdx/filter_test.go
Normal file
@@ -0,0 +1,226 @@
|
||||
package mdx
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
func timeSeriessToString(tss []prompb.TimeSeries) string {
|
||||
a := make([]string, len(tss))
|
||||
for i, ts := range tss {
|
||||
a[i] = timeSeriesToString(ts)
|
||||
}
|
||||
sort.Strings(a)
|
||||
return strings.Join(a, "")
|
||||
}
|
||||
|
||||
func timeSeriesToString(ts prompb.TimeSeries) string {
|
||||
labelsString := promrelabel.LabelsToString(ts.Labels)
|
||||
|
||||
return fmt.Sprintf("%s\n", labelsString)
|
||||
}
|
||||
|
||||
func TestMdxInstanceFilter(t *testing.T) {
|
||||
filter := Filter{
|
||||
vmInstance: make(map[string]*atomic.Int64),
|
||||
}
|
||||
_ = mdxInstanceEntryTTL.Set("120s")
|
||||
|
||||
f := func(input []prompb.TimeSeries, expectedOutput []prompb.TimeSeries, expectedInstanceMap map[string]int64) {
|
||||
t.Helper()
|
||||
output := filter.Filter(input, []prompb.TimeSeries{})
|
||||
if len(output) != len(expectedOutput) {
|
||||
t.Fatalf("unexpected output length; got %d; want %d", len(output), len(expectedOutput))
|
||||
}
|
||||
if timeSeriessToString(output) != timeSeriessToString(expectedOutput) {
|
||||
t.Fatalf("unexpected output; got %s; want %s", timeSeriessToString(output), timeSeriessToString(expectedOutput))
|
||||
}
|
||||
if len(filter.vmInstance) != len(expectedInstanceMap) {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", len(filter.vmInstance), len(expectedInstanceMap))
|
||||
}
|
||||
for k := range expectedInstanceMap {
|
||||
if filter.vmInstance[k] == nil {
|
||||
t.Fatalf("missing instance in filter.vmInstance: %q", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
f([]prompb.TimeSeries{{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
}, map[string]int64{
|
||||
fmt.Sprintf("%q:%q", "test", "victoria-metrics1:8428"): 0,
|
||||
fmt.Sprintf("%q:%q", "test", "vmagent1:8429"): 0,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestMdxFilterByLabel(t *testing.T) {
|
||||
filter := Filter{
|
||||
vmInstance: make(map[string]*atomic.Int64),
|
||||
filterByLabel: true,
|
||||
}
|
||||
*keepMetricsWithLabelName = "service"
|
||||
*keepMetricsWithLabelValue = "victoriametrics"
|
||||
|
||||
f := func(input []prompb.TimeSeries, expectedOutput []prompb.TimeSeries) {
|
||||
t.Helper()
|
||||
output := filter.Filter(input, []prompb.TimeSeries{})
|
||||
if len(output) != len(expectedOutput) {
|
||||
t.Fatalf("unexpected output length; got %d; want %d", len(output), len(expectedOutput))
|
||||
}
|
||||
if timeSeriessToString(output) != timeSeriessToString(expectedOutput) {
|
||||
t.Fatalf("unexpected output; got %s; want %s", timeSeriessToString(output), timeSeriessToString(expectedOutput))
|
||||
}
|
||||
}
|
||||
f([]prompb.TimeSeries{{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "up"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}},
|
||||
[]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "up"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
{Name: "service", Value: "victoriametrics"},
|
||||
},
|
||||
},
|
||||
})
|
||||
*keepMetricsWithLabelName = ""
|
||||
*keepMetricsWithLabelValue = ""
|
||||
}
|
||||
|
||||
func TestMdxInstanceCleanup(t *testing.T) {
|
||||
t.Helper()
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
_ = mdxInstanceEntryTTL.Set("10s")
|
||||
InitGlobalFilter()
|
||||
|
||||
// init instance list
|
||||
GlobalFilter.Filter([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "go_gc_duration_seconds"},
|
||||
{Name: "instance", Value: "node-exporter1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "http_request_duration_seconds"},
|
||||
{Name: "instance", Value: "service1"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
},
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "vmagent1:8429"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}}, []prompb.TimeSeries{},
|
||||
)
|
||||
f := func(expectedInstanceMap map[string]int64) {
|
||||
t.Helper()
|
||||
if len(GlobalFilter.vmInstance) != len(expectedInstanceMap) {
|
||||
t.Fatalf("unexpected instance map length; got %d; want %d", len(GlobalFilter.vmInstance), len(expectedInstanceMap))
|
||||
}
|
||||
for k := range expectedInstanceMap {
|
||||
if GlobalFilter.vmInstance[k] == nil {
|
||||
t.Fatalf("missing instance in filter.vmInstance: %q", k)
|
||||
}
|
||||
}
|
||||
}
|
||||
f(map[string]int64{
|
||||
fmt.Sprintf("%q:%q", "test", "victoria-metrics1:8428"): 0,
|
||||
fmt.Sprintf("%q:%q", "test", "vmagent1:8429"): 0,
|
||||
})
|
||||
|
||||
// receive samples from victoria-metrics1:8428 after 9 seconds.
|
||||
time.Sleep(9 * time.Second)
|
||||
GlobalFilter.Filter([]prompb.TimeSeries{
|
||||
{
|
||||
Labels: []prompb.Label{
|
||||
{Name: "__name__", Value: "vm_app_version"},
|
||||
{Name: "instance", Value: "victoria-metrics1:8428"},
|
||||
{Name: "job", Value: "test"},
|
||||
},
|
||||
}}, []prompb.TimeSeries{},
|
||||
)
|
||||
|
||||
// no samples from vmagent1:8429 in the last 10 seconds, so it should be removed from the mdx instance list.
|
||||
time.Sleep(9 * time.Second)
|
||||
f(map[string]int64{
|
||||
fmt.Sprintf("%q:%q", "test", "victoria-metrics1:8428"): 0,
|
||||
})
|
||||
GlobalFilter.MustStop()
|
||||
})
|
||||
|
||||
}
|
||||
@@ -3,6 +3,7 @@ package storage
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/rand"
|
||||
"reflect"
|
||||
"regexp"
|
||||
@@ -164,7 +165,9 @@ func TestSearch_VariousTimeRanges(t *testing.T) {
|
||||
mrs[i].Value = float64(i)
|
||||
}
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: time.Duration(math.MaxInt64),
|
||||
})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
@@ -33,6 +33,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
retention2Days = 2 * 24 * time.Hour
|
||||
retention31Days = 31 * 24 * time.Hour
|
||||
retentionMax = 100 * 12 * retention31Days
|
||||
idbPrefilStart = time.Hour
|
||||
@@ -60,9 +61,10 @@ type Storage struct {
|
||||
// indexdb rotation.
|
||||
legacyNextRotationTimestamp atomic.Int64
|
||||
|
||||
path string
|
||||
cachePath string
|
||||
retentionMsecs int64
|
||||
path string
|
||||
cachePath string
|
||||
retentionMsecs int64
|
||||
futureRetentionMsecs int64
|
||||
|
||||
// lock file for exclusive access to the storage on the given path.
|
||||
flockF *os.File
|
||||
@@ -160,6 +162,7 @@ type Storage struct {
|
||||
// OpenOptions optional args for MustOpenStorage
|
||||
type OpenOptions struct {
|
||||
Retention time.Duration
|
||||
FutureRetention time.Duration
|
||||
MaxHourlySeries int
|
||||
MaxDailySeries int
|
||||
DisablePerDayIndex bool
|
||||
@@ -181,6 +184,7 @@ func MustOpenStorage(path string, opts OpenOptions) *Storage {
|
||||
if retention <= 0 || retention > retentionMax {
|
||||
retention = retentionMax
|
||||
}
|
||||
futureRetention := max(opts.FutureRetention, retention2Days)
|
||||
idbPrefillStart := opts.IDBPrefillStart
|
||||
if idbPrefillStart <= 0 {
|
||||
idbPrefillStart = time.Hour
|
||||
@@ -189,6 +193,7 @@ func MustOpenStorage(path string, opts OpenOptions) *Storage {
|
||||
path: path,
|
||||
cachePath: filepath.Join(path, cacheDirname),
|
||||
retentionMsecs: retention.Milliseconds(),
|
||||
futureRetentionMsecs: futureRetention.Milliseconds(),
|
||||
stopCh: make(chan struct{}),
|
||||
idbPrefillStartSeconds: idbPrefillStart.Milliseconds() / 1000,
|
||||
}
|
||||
|
||||
@@ -5,11 +5,13 @@ package storage
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"testing/synctest"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
@@ -825,66 +827,320 @@ func TestStorageNextDayMetricIDs_update(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestStorageLastPartitionMetrics checks that "last partition" metrics
|
||||
// correspond to the current partition and not some future partition.
|
||||
func TestStorageLastPartitionMetrics(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
addRows := func(t *testing.T, s *Storage, prefix string, tr TimeRange) {
|
||||
t.Helper()
|
||||
const numSeries = 1000
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, prefix, tr)
|
||||
want := s.newTimeseriesCreated.Load() + numSeries
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
if got := s.newTimeseriesCreated.Load(); got != want {
|
||||
t.Errorf("unexpected number of new timeseries: got %d, want %d", got, want)
|
||||
}
|
||||
// wait for merged parts to be attached to the table
|
||||
time.Sleep(time.Minute)
|
||||
}
|
||||
assertLastPartitionEmpty := func(t *testing.T, s *Storage) {
|
||||
t.Helper()
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
lpm := m.TableMetrics.LastPartition
|
||||
if lpm.SmallPartsCount != 0 {
|
||||
t.Fatalf("unexpected last partition SmallPartsCount: got %d, want 0", lpm.SmallPartsCount)
|
||||
}
|
||||
if lpm.IndexDBMetrics.FileBlocksCount != 0 {
|
||||
t.Fatalf("unexpected last partition IndexDBMetrics.FileBlocksCount: got %d, want 0", lpm.IndexDBMetrics.FileBlocksCount)
|
||||
}
|
||||
}
|
||||
assertLastPartitionNonEmpty := func(t *testing.T, s *Storage) {
|
||||
t.Helper()
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
lpm := m.TableMetrics.LastPartition
|
||||
if lpm.SmallPartsCount == 0 {
|
||||
t.Fatalf("unexpected last partition SmallPartsCount: got 0, want > 0")
|
||||
}
|
||||
if lpm.IndexDBMetrics.FileBlocksCount == 0 {
|
||||
t.Fatalf("unexpected last partition IndexDBMetrics.FileBlocksCount: got 0, want > 0")
|
||||
}
|
||||
}
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// Advance current time to 2h before the next month, 2000-01-31T22:00:00Z.
|
||||
time.Sleep(31*24*time.Hour - 2*time.Hour)
|
||||
ct := time.Now().UTC()
|
||||
|
||||
// Open the storage, make sure current partition is empty.
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: 2 * 365 * 24 * time.Hour,
|
||||
})
|
||||
defer s.MustClose()
|
||||
assertLastPartitionEmpty(t, s)
|
||||
|
||||
// Insert rows with future timestamps. Current partition must be empty.
|
||||
addRows(t, s, "future", TimeRange{
|
||||
MinTimestamp: ct.Add(365 * 24 * time.Hour).UnixMilli(),
|
||||
MaxTimestamp: ct.Add(366 * 24 * time.Hour).UnixMilli(),
|
||||
})
|
||||
assertLastPartitionEmpty(t, s)
|
||||
|
||||
// Insert rows with timestamps within current partition.
|
||||
// Current partition must be not empty.
|
||||
addRows(t, s, "current", TimeRange{
|
||||
MinTimestamp: ct.UnixMilli(),
|
||||
MaxTimestamp: ct.Add(time.Hour).UnixMilli(),
|
||||
})
|
||||
assertLastPartitionNonEmpty(t, s)
|
||||
|
||||
// Advance current time to the the next month, 2000-02-01T00:30:00Z.
|
||||
// last partition is now 2000-02 and it must be empty.
|
||||
time.Sleep(2*time.Hour + time.Minute*30)
|
||||
assertLastPartitionEmpty(t, s)
|
||||
})
|
||||
}
|
||||
|
||||
func TestStorage_futureAndHistoricalRetention(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
assertData := func(t *testing.T, s *Storage, tr TimeRange, want []MetricRow) {
|
||||
t.Helper()
|
||||
tfs := NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(".*"), false, true); err != nil {
|
||||
t.Fatalf("TagFilters.Add() failed unexpectedly: %v", err)
|
||||
}
|
||||
if err := testAssertSearchResult(s, tr, tfs, want); err != nil {
|
||||
t.Fatalf("[now: %v tr: %v] search failed unexpectedly: %v", time.Now().UTC(), &tr, err)
|
||||
}
|
||||
}
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// synctests start at 2000-01-01T00:00:00Z
|
||||
|
||||
var s *Storage
|
||||
retention := 180 * 24 * time.Hour
|
||||
futureRetention := 180 * 24 * time.Hour
|
||||
|
||||
s = MustOpenStorage(t.Name(), OpenOptions{
|
||||
Retention: retention,
|
||||
FutureRetention: futureRetention,
|
||||
})
|
||||
|
||||
// Ingest samples for previous and future year. 10 samples per day.
|
||||
const numSeries = 10
|
||||
start := time.Date(1999, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
end := time.Date(2001, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
wantData := make(map[TimeRange][]MetricRow)
|
||||
for day := start; day.Before(end); {
|
||||
prefix := fmt.Sprintf("metric_%d_%d_%d", day.Year(), day.Month(), day.Day())
|
||||
tr := TimeRange{
|
||||
MinTimestamp: day.UnixMilli(),
|
||||
MaxTimestamp: day.UnixMilli() + msecPerDay - 1,
|
||||
}
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, prefix, tr)
|
||||
wantData[tr] = mrs
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
|
||||
day = time.Date(day.Year(), day.Month(), day.Day()+1, 0, 0, 0, 0, time.UTC)
|
||||
}
|
||||
s.DebugFlush()
|
||||
|
||||
// Advance time one partition at a time. Before each time advancement,
|
||||
// check the query results for each day between the original start and end
|
||||
// time.
|
||||
//
|
||||
// This is to test how historical and future retentions affect the
|
||||
// stored data over time.
|
||||
now := time.Now().UTC()
|
||||
dataEnd := now.Add(futureRetention - 24*time.Hour)
|
||||
for now.Before(end) {
|
||||
for day := start; day.Before(end); {
|
||||
tr := TimeRange{
|
||||
MinTimestamp: day.UnixMilli(),
|
||||
MaxTimestamp: day.UnixMilli() + msecPerDay - 1,
|
||||
}
|
||||
dataStart := now.Add(-retention)
|
||||
if day.Before(dataStart) || day.After(dataEnd) {
|
||||
assertData(t, s, tr, nil)
|
||||
} else {
|
||||
assertData(t, s, tr, wantData[tr])
|
||||
}
|
||||
day = time.Date(day.Year(), day.Month(), day.Day()+1, 0, 0, 0, 0, time.UTC)
|
||||
}
|
||||
|
||||
s.MustClose()
|
||||
nextMonth := time.Date(now.Year(), now.Month()+1, 1, 0, 0, 0, 0, time.UTC)
|
||||
time.Sleep(nextMonth.Sub(now))
|
||||
now = nextMonth
|
||||
s = MustOpenStorage(t.Name(), OpenOptions{
|
||||
Retention: retention,
|
||||
FutureRetention: futureRetention,
|
||||
})
|
||||
}
|
||||
|
||||
s.MustClose()
|
||||
})
|
||||
}
|
||||
|
||||
func TestStorage_defaultFutureRetention(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
assertData := func(t *testing.T, s *Storage, tr TimeRange, want []MetricRow) {
|
||||
t.Helper()
|
||||
tfs := NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(".*"), false, true); err != nil {
|
||||
t.Fatalf("TagFilters.Add() failed unexpectedly: %v", err)
|
||||
}
|
||||
if err := testAssertSearchResult(s, tr, tfs, want); err != nil {
|
||||
t.Fatalf("[now: %v tr: %v] search failed unexpectedly: %v", time.Now().UTC(), &tr, err)
|
||||
}
|
||||
}
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// synctests start at 2000-01-01T00:00:00Z
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
defer s.MustClose()
|
||||
|
||||
assertLastPartitionEmpty := func() {
|
||||
t.Helper()
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
lpm := m.TableMetrics.LastPartition
|
||||
if lpm.SmallPartsCount != 0 {
|
||||
t.Fatalf("unexpected last partition SmallPartsCount: got %d, want 0", lpm.SmallPartsCount)
|
||||
}
|
||||
if lpm.IndexDBMetrics.FileBlocksCount != 0 {
|
||||
t.Fatalf("unexpected last partition IndexDBMetrics.FileBlocksCount: got %d, want 0", lpm.IndexDBMetrics.FileBlocksCount)
|
||||
}
|
||||
}
|
||||
assertLastPartitionNonEmpty := func() {
|
||||
t.Helper()
|
||||
var m Metrics
|
||||
s.UpdateMetrics(&m)
|
||||
lpm := m.TableMetrics.LastPartition
|
||||
if lpm.SmallPartsCount == 0 {
|
||||
t.Fatalf("unexpected last partition SmallPartsCount: got 0, want > 0")
|
||||
}
|
||||
if lpm.IndexDBMetrics.FileBlocksCount == 0 {
|
||||
t.Fatalf("unexpected last partition IndexDBMetrics.FileBlocksCount: got 0, want > 0")
|
||||
}
|
||||
}
|
||||
|
||||
// make sure last partition is empty before ingestion
|
||||
assertLastPartitionEmpty()
|
||||
|
||||
const numSeries = 1000
|
||||
|
||||
// Ingest samples for this and several days in the future. 10 samples
|
||||
// per hour.
|
||||
const numSeries = 10
|
||||
start := time.Now().UTC()
|
||||
end := start.Add(10 * 24 * time.Hour)
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
tr := TimeRange{
|
||||
MinTimestamp: ct.Add(-time.Hour).UnixMilli(),
|
||||
MaxTimestamp: ct.UnixMilli(),
|
||||
}
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, "metric.", tr)
|
||||
s.AddRows(mrs, 1)
|
||||
s.DebugFlush()
|
||||
if got, want := s.newTimeseriesCreated.Load(), uint64(numSeries); got != want {
|
||||
t.Errorf("unexpected number of new timeseries: got %d, want %d", got, want)
|
||||
}
|
||||
// wait for merged parts to be attached to the table
|
||||
time.Sleep(time.Minute)
|
||||
wantData := make(map[TimeRange][]MetricRow)
|
||||
for ts := start; ts.Before(end); {
|
||||
prefix := fmt.Sprintf("metric_%04d_%02d_%02d_%02d", ts.Year(), ts.Month(), ts.Day(), ts.Hour())
|
||||
tr := TimeRange{
|
||||
MinTimestamp: ts.UnixMilli(),
|
||||
MaxTimestamp: ts.UnixMilli() + msecPerHour - 1,
|
||||
}
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, numSeries, prefix, tr)
|
||||
wantData[tr] = mrs
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
|
||||
ts = ts.Add(time.Hour)
|
||||
}
|
||||
s.DebugFlush()
|
||||
|
||||
dataStart := start
|
||||
dataEnd := dataStart.Add(2*24*time.Hour - time.Hour)
|
||||
for ts := start; ts.Before(end); ts = ts.Add(time.Hour) {
|
||||
tr := TimeRange{
|
||||
MinTimestamp: ts.UnixMilli(),
|
||||
MaxTimestamp: ts.UnixMilli() + msecPerHour - 1,
|
||||
}
|
||||
if ts.Before(dataStart) || ts.After(dataEnd) {
|
||||
assertData(t, s, tr, nil)
|
||||
} else {
|
||||
assertData(t, s, tr, wantData[tr])
|
||||
}
|
||||
}
|
||||
|
||||
// last created partition is empty, but we're still at current month
|
||||
assertLastPartitionNonEmpty()
|
||||
// Advance current time to the the next month, 2000-02-01T00:30:00Z.
|
||||
// last partition must be 2000-02 now
|
||||
time.Sleep(2*time.Hour + time.Minute*30)
|
||||
// current month partition has no data ingested
|
||||
assertLastPartitionEmpty()
|
||||
})
|
||||
}
|
||||
|
||||
func TestStorage_partitionsOutsideRetentionAreRemoved(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
assertPathExists := func(t *testing.T, path string, want bool) {
|
||||
t.Helper()
|
||||
if got := fs.IsPathExist(path); got != want {
|
||||
t.Fatalf("unexpected path existence test result for %s: got %t, want %t", path, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
assertPtExists := func(t *testing.T, pt string, want bool) {
|
||||
t.Helper()
|
||||
assertPathExists(t, filepath.Join(t.Name(), "data", "small", pt), want)
|
||||
assertPathExists(t, filepath.Join(t.Name(), "data", "big", pt), want)
|
||||
assertPathExists(t, filepath.Join(t.Name(), "data", "indexdb", pt), want)
|
||||
}
|
||||
|
||||
synctest.Test(t, func(t *testing.T) {
|
||||
// synctests start at 2000-01-01T00:00:00Z
|
||||
|
||||
retention := 80 * 24 * time.Hour
|
||||
futureRetention := 180 * 24 * time.Hour
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
Retention: retention,
|
||||
FutureRetention: futureRetention,
|
||||
})
|
||||
|
||||
// Ingest samples with future timestamps that span the entire retention.
|
||||
// This should create the corresponding partitions.
|
||||
rng := rand.New(rand.NewSource(1))
|
||||
mrs := testGenerateMetricRowsWithPrefix(rng, 1000, "metric", TimeRange{
|
||||
MinTimestamp: time.Now().Add(-retention).UnixMilli(),
|
||||
MaxTimestamp: time.Now().Add(futureRetention - time.Second).UnixMilli(),
|
||||
})
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
|
||||
assertPtExists(t, "1999_09", false)
|
||||
assertPtExists(t, "1999_10", true)
|
||||
assertPtExists(t, "1999_11", true)
|
||||
assertPtExists(t, "1999_12", true)
|
||||
assertPtExists(t, "2000_01", true)
|
||||
assertPtExists(t, "2000_02", true)
|
||||
assertPtExists(t, "2000_03", true)
|
||||
assertPtExists(t, "2000_04", true)
|
||||
assertPtExists(t, "2000_05", true)
|
||||
assertPtExists(t, "2000_06", true)
|
||||
assertPtExists(t, "2000_07", false)
|
||||
|
||||
// Reopen storage with smaller future retention. Future partitions
|
||||
// outside the new future retention must be removed.
|
||||
s.MustClose()
|
||||
s = MustOpenStorage(t.Name(), OpenOptions{
|
||||
Retention: retention,
|
||||
FutureRetention: 45 * 24 * time.Hour,
|
||||
})
|
||||
|
||||
// Wait for background task to remove future partitions.
|
||||
time.Sleep(2 * time.Minute)
|
||||
|
||||
assertPtExists(t, "1999_09", false)
|
||||
assertPtExists(t, "1999_10", true)
|
||||
assertPtExists(t, "1999_11", true)
|
||||
assertPtExists(t, "1999_12", true)
|
||||
assertPtExists(t, "2000_01", true)
|
||||
assertPtExists(t, "2000_02", true)
|
||||
assertPtExists(t, "2000_03", false)
|
||||
assertPtExists(t, "2000_04", false)
|
||||
assertPtExists(t, "2000_05", false)
|
||||
assertPtExists(t, "2000_06", false)
|
||||
assertPtExists(t, "2000_07", false)
|
||||
|
||||
// Reopen storage with smaller retention. Historical partitions
|
||||
// outside the new future retention must be removed.
|
||||
s.MustClose()
|
||||
s = MustOpenStorage(t.Name(), OpenOptions{
|
||||
Retention: 45 * 24 * time.Hour,
|
||||
FutureRetention: 45 * 24 * time.Hour,
|
||||
})
|
||||
|
||||
// Wait for background task to remove future partitions.
|
||||
time.Sleep(2 * time.Minute)
|
||||
|
||||
assertPtExists(t, "1999_09", false)
|
||||
assertPtExists(t, "1999_10", false)
|
||||
assertPtExists(t, "1999_11", true)
|
||||
assertPtExists(t, "1999_12", true)
|
||||
assertPtExists(t, "2000_01", true)
|
||||
assertPtExists(t, "2000_02", true)
|
||||
assertPtExists(t, "2000_03", false)
|
||||
assertPtExists(t, "2000_04", false)
|
||||
assertPtExists(t, "2000_05", false)
|
||||
assertPtExists(t, "2000_06", false)
|
||||
assertPtExists(t, "2000_07", false)
|
||||
|
||||
s.MustClose()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1877,7 +1877,7 @@ func TestStorageRowsNotAdded(t *testing.T) {
|
||||
})
|
||||
|
||||
retention = 48 * time.Hour
|
||||
minTimestamp = time.Now().Add(7 * 24 * time.Hour).UnixMilli()
|
||||
minTimestamp = maxUnixMilli + 1
|
||||
maxTimestamp = minTimestamp + 1000
|
||||
f(&options{
|
||||
name: "TooBigTimestamps",
|
||||
@@ -2024,7 +2024,9 @@ func TestStorageSearchMetricNames_VariousTimeRanges(t *testing.T) {
|
||||
}
|
||||
slices.Sort(want)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: time.Duration(math.MaxInt64),
|
||||
})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
@@ -2282,7 +2284,9 @@ func TestStorageSearchLabelNames_VariousTimeRanges(t *testing.T) {
|
||||
want = append(want, "__name__")
|
||||
slices.Sort(want)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: time.Duration(math.MaxInt64),
|
||||
})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
@@ -2331,7 +2335,9 @@ func TestStorageSearchLabelValues_VariousTimeRanges(t *testing.T) {
|
||||
}
|
||||
slices.Sort(want)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: time.Duration(math.MaxInt64),
|
||||
})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
@@ -2371,7 +2377,9 @@ func TestStorageSearchTagValueSuffixes_VariousTimeRanges(t *testing.T) {
|
||||
}
|
||||
slices.Sort(want)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: time.Duration(math.MaxInt64),
|
||||
})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
@@ -2410,7 +2418,9 @@ func TestStorageSearchGraphitePaths_VariousTimeRanges(t *testing.T) {
|
||||
}
|
||||
slices.Sort(want)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{})
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: time.Duration(math.MaxInt64),
|
||||
})
|
||||
defer s.MustClose()
|
||||
s.AddRows(mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
@@ -2458,6 +2468,42 @@ func testStorageOpOnVariousTimeRanges(t *testing.T, op func(t *testing.T, tr Tim
|
||||
MaxTimestamp: time.Date(2000, 12, 31, 23, 59, 59, 999_999_999, time.UTC).UnixMilli(),
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("future-1h", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
op(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year(), now.Month(), now.Day(), now.Hour()+1, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year(), now.Month(), now.Day(), now.Hour()+2, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
t.Run("future-1d", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
op(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year(), now.Month(), now.Day()+1, 1, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year(), now.Month(), now.Day()+2, 1, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
t.Run("future-1m", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
op(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year(), now.Month()+1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year(), now.Month()+2, 1, 0, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
t.Run("future-1y", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
op(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year()+1, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year()+2, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
t.Run("future-limit", func(t *testing.T) {
|
||||
maxTime := time.UnixMilli(maxUnixMilli).UTC()
|
||||
op(t, TimeRange{
|
||||
MinTimestamp: maxTime.Add(-24 * time.Hour).UnixMilli(),
|
||||
MaxTimestamp: maxTime.UnixMilli(),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestStorageSearchLabelValues_EmptyValuesAreNotReturned(t *testing.T) {
|
||||
@@ -4068,3 +4114,219 @@ func TestStorageMetrics_IndexDBBlockCaches(t *testing.T) {
|
||||
}
|
||||
assertMetrics(s)
|
||||
}
|
||||
|
||||
// TestStorage_futureTimestamps checks that samples with future timestamps can
|
||||
// be ingested, searched, and deleted.
|
||||
func TestStorage_futureTimestamps(t *testing.T) {
|
||||
defer testRemoveAll(t)
|
||||
|
||||
type want struct {
|
||||
mrs []MetricRow
|
||||
metricNames []string
|
||||
labelNames []string
|
||||
labelValues []string
|
||||
}
|
||||
|
||||
const numMetrics = 1000
|
||||
genData := func(prefix string, tr TimeRange) want {
|
||||
mrs := make([]MetricRow, numMetrics)
|
||||
metricNames := make([]string, numMetrics)
|
||||
labelNames := make([]string, numMetrics)
|
||||
labelValues := make([]string, numMetrics)
|
||||
step := (tr.MaxTimestamp - tr.MinTimestamp) / numMetrics
|
||||
for i := range numMetrics {
|
||||
metricName := fmt.Sprintf("metric_%s_%04d", prefix, i)
|
||||
labelName := fmt.Sprintf("label_%s_%04d", prefix, i)
|
||||
labelValue := fmt.Sprintf("value_%s_%04d", prefix, i)
|
||||
mn := MetricName{
|
||||
MetricGroup: []byte(metricName),
|
||||
Tags: []Tag{
|
||||
{Key: []byte(labelName), Value: []byte("value")},
|
||||
{Key: []byte("label"), Value: []byte(labelValue)},
|
||||
},
|
||||
}
|
||||
mrs[i].MetricNameRaw = mn.marshalRaw(nil)
|
||||
mrs[i].Timestamp = tr.MinTimestamp + int64(i)*step
|
||||
mrs[i].Value = float64(i)
|
||||
metricNames[i] = metricName
|
||||
labelNames[i] = labelName
|
||||
labelValues[i] = labelValue
|
||||
}
|
||||
labelNames = append(labelNames, "__name__", "label")
|
||||
return want{mrs, metricNames, labelNames, labelValues}
|
||||
}
|
||||
|
||||
assertMetricNames := func(t *testing.T, s *Storage, tr TimeRange, want []string) {
|
||||
t.Helper()
|
||||
tfs := NewTagFilters()
|
||||
if err := tfs.Add([]byte("__name__"), []byte(".*"), false, true); err != nil {
|
||||
t.Fatalf("unexpected error in TagFilters.Add: %v", err)
|
||||
}
|
||||
got, err := s.SearchMetricNames(nil, []*TagFilters{tfs}, tr, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("SearchMetricNames() failed unexpectedly: %v", err)
|
||||
}
|
||||
for i, name := range got {
|
||||
var mn MetricName
|
||||
if err := mn.UnmarshalString(name); err != nil {
|
||||
t.Fatalf("Could not unmarshal metric name %q: %v", name, err)
|
||||
}
|
||||
got[i] = string(mn.MetricGroup)
|
||||
}
|
||||
slices.Sort(got)
|
||||
slices.Sort(want)
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Fatalf("unexpected metric names (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
assertLabelNames := func(t *testing.T, s *Storage, tr TimeRange, want []string) {
|
||||
t.Helper()
|
||||
got, err := s.SearchLabelNames(nil, nil, tr, 1e9, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("SearchLabelNames() failed unexpectedly: %s", err)
|
||||
}
|
||||
slices.Sort(got)
|
||||
slices.Sort(want)
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Fatalf("unexpected label names (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
assertLabelValues := func(t *testing.T, s *Storage, tr TimeRange, want []string) {
|
||||
t.Helper()
|
||||
got, err := s.SearchLabelValues(nil, "label", nil, tr, 1e9, 1e9, noDeadline)
|
||||
if err != nil {
|
||||
t.Fatalf("SearchLabelValues() failed unexpectedly: %s", err)
|
||||
}
|
||||
slices.Sort(got)
|
||||
slices.Sort(want)
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Fatalf("unexpected label values (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
assertData := func(t *testing.T, s *Storage, tr TimeRange, want []MetricRow) {
|
||||
t.Helper()
|
||||
tfs := NewTagFilters()
|
||||
if err := tfs.Add(nil, []byte(".*"), false, true); err != nil {
|
||||
t.Fatalf("TagFilters.Add() failed unexpectedly: %v", err)
|
||||
}
|
||||
if err := testAssertSearchResult(s, tr, tfs, want); err != nil {
|
||||
t.Fatalf("search failed unexpectedly: %v", err)
|
||||
}
|
||||
}
|
||||
deleteAllSeries := func(t *testing.T, s *Storage) {
|
||||
tfs := NewTagFilters()
|
||||
if err := tfs.Add([]byte("__name__"), []byte(".*"), false, true); err != nil {
|
||||
t.Fatalf("unexpected error in TagFilters.Add: %v", err)
|
||||
}
|
||||
if _, err := s.DeleteSeries(nil, []*TagFilters{tfs}, 1e9); err != nil {
|
||||
t.Fatalf("DeleteSeries() failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
concatUniq := func(s1, s2 []string) []string {
|
||||
var s []string
|
||||
seen := make(map[string]bool)
|
||||
for _, v := range slices.Concat(s1, s2) {
|
||||
if !seen[v] {
|
||||
s = append(s, v)
|
||||
seen[v] = true
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
futureRetention := time.Duration(math.MaxInt64)
|
||||
|
||||
f := func(t *testing.T, tr TimeRange) {
|
||||
want := genData("batch1", tr)
|
||||
|
||||
s := MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: futureRetention,
|
||||
})
|
||||
s.AddRows(want.mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
assertMetricNames(t, s, tr, want.metricNames)
|
||||
assertLabelNames(t, s, tr, want.labelNames)
|
||||
assertLabelValues(t, s, tr, want.labelValues)
|
||||
assertData(t, s, tr, want.mrs)
|
||||
|
||||
// Reopen storage.
|
||||
s.MustClose()
|
||||
s = MustOpenStorage(t.Name(), OpenOptions{
|
||||
FutureRetention: futureRetention,
|
||||
})
|
||||
assertMetricNames(t, s, tr, want.metricNames)
|
||||
assertLabelNames(t, s, tr, want.labelNames)
|
||||
assertLabelValues(t, s, tr, want.labelValues)
|
||||
assertData(t, s, tr, want.mrs)
|
||||
|
||||
// Insert more data and force background merge tasks.
|
||||
want2 := genData("batch2", tr)
|
||||
s.AddRows(want2.mrs, defaultPrecisionBits)
|
||||
s.DebugFlush()
|
||||
if err := s.ForceMergePartitions(""); err != nil {
|
||||
t.Fatalf("ForceMergePartitions() failed unexpectedly: %s", err)
|
||||
}
|
||||
assertMetricNames(t, s, tr, slices.Concat(want.metricNames, want2.metricNames))
|
||||
assertLabelNames(t, s, tr, concatUniq(want.labelNames, want2.labelNames))
|
||||
assertLabelValues(t, s, tr, slices.Concat(want.labelValues, want2.labelValues))
|
||||
assertData(t, s, tr, slices.Concat(want.mrs, want2.mrs))
|
||||
|
||||
// Delete all series.
|
||||
deleteAllSeries(t, s)
|
||||
assertMetricNames(t, s, tr, []string{})
|
||||
assertLabelNames(t, s, tr, []string{})
|
||||
assertLabelValues(t, s, tr, []string{})
|
||||
assertData(t, s, tr, nil)
|
||||
|
||||
s.MustClose()
|
||||
}
|
||||
|
||||
t.Run("future-1h", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
f(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year(), now.Month(), now.Day(), now.Hour()+1, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year(), now.Month(), now.Day(), now.Hour()+2, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("future-1d", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
f(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year(), now.Month(), now.Day()+1, 1, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year(), now.Month(), now.Day()+2, 1, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("future-1m", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
f(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year(), now.Month()+1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year(), now.Month()+2, 1, 0, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("future-1y", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
f(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year()+1, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year()+2, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("future-10y", func(t *testing.T) {
|
||||
now := time.Now().UTC()
|
||||
f(t, TimeRange{
|
||||
MinTimestamp: time.Date(now.Year()+1, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli(),
|
||||
MaxTimestamp: time.Date(now.Year()+11, 1, 1, 0, 0, 0, 0, time.UTC).UnixMilli() - 1,
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("future-limit", func(t *testing.T) {
|
||||
maxTime := time.UnixMilli(maxUnixMilli).UTC()
|
||||
f(t, TimeRange{
|
||||
MinTimestamp: maxTime.Add(-24 * time.Hour).UnixMilli(),
|
||||
MaxTimestamp: maxTime.UnixMilli(),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
@@ -410,13 +410,13 @@ func (tb *table) MustGetIndexDBIDByHour(hour uint64) uint64 {
|
||||
func (tb *table) getMinMaxTimestamps() (int64, int64) {
|
||||
now := int64(fasttime.UnixTimestamp() * 1000)
|
||||
minTimestamp := now - tb.s.retentionMsecs
|
||||
maxTimestamp := now + 2*24*3600*1000 // allow max +2 days from now due to timezones shit :)
|
||||
if minTimestamp < 0 {
|
||||
// Negative timestamps aren't supported by the storage.
|
||||
minTimestamp = 0
|
||||
}
|
||||
if maxTimestamp < 0 {
|
||||
maxTimestamp = (1 << 63) - 1
|
||||
maxTimestamp := int64(maxUnixMilli)
|
||||
if maxUnixMilli-now > tb.s.futureRetentionMsecs {
|
||||
maxTimestamp = now + tb.s.futureRetentionMsecs
|
||||
}
|
||||
return minTimestamp, maxTimestamp
|
||||
}
|
||||
@@ -436,12 +436,14 @@ func (tb *table) retentionWatcher() {
|
||||
case <-ticker.C:
|
||||
}
|
||||
|
||||
minTimestamp := int64(fasttime.UnixTimestamp()*1000) - tb.s.retentionMsecs
|
||||
nowMsecs := int64(fasttime.UnixTimestamp() * 1000)
|
||||
minTimestamp := nowMsecs - tb.s.retentionMsecs
|
||||
maxTimestamp := nowMsecs + tb.s.futureRetentionMsecs
|
||||
var ptwsDrop []*partitionWrapper
|
||||
tb.ptwsLock.Lock()
|
||||
dst := tb.ptws[:0]
|
||||
for _, ptw := range tb.ptws {
|
||||
if ptw.pt.tr.MaxTimestamp < minTimestamp {
|
||||
if ptw.pt.tr.MaxTimestamp < minTimestamp || ptw.pt.tr.MinTimestamp > maxTimestamp {
|
||||
ptwsDrop = append(ptwsDrop, ptw)
|
||||
} else {
|
||||
dst = append(dst, ptw)
|
||||
|
||||
@@ -120,4 +120,16 @@ func (tr *TimeRange) contains(timestamp int64) bool {
|
||||
const (
|
||||
msecPerDay = 24 * 3600 * 1000
|
||||
msecPerHour = 3600 * 1000
|
||||
|
||||
// maxUnixMilli is the max millisecond that is allowed to be used as the
|
||||
// sample timestamp.
|
||||
//
|
||||
// Go's Duration is an int64 and is in nanoseconds. In order for time.Time
|
||||
// math operations and conversion to millis/micros/nanos to work correctly,
|
||||
// the max datetime must be limited to math.MaxInt64 nanoseconds, Which is
|
||||
// time.UnixMicro(math.MaxInt64/1000) == 2262-04-11 23:47:16.854775 UTC.
|
||||
//
|
||||
// Round it to the last millisecond of the last complete partition:
|
||||
// 2262-03-31 23:59:59.999 UTC.
|
||||
maxUnixMilli = 9222422399999
|
||||
)
|
||||
|
||||
@@ -229,3 +229,10 @@ func TestIsFirstHourOfDay(t *testing.T) {
|
||||
lastHourOfDay := time.Date(2000, 1, 1, 23, 59, 59, 999_999_999, time.UTC)
|
||||
f(lastHourOfDay, false)
|
||||
}
|
||||
|
||||
func TestMaxUnixMilli(t *testing.T) {
|
||||
lastFuturePtMaxTime := time.Date(2262, 3, 31, 23, 59, 59, 999_000_000, time.UTC)
|
||||
if got, want := lastFuturePtMaxTime.UnixMilli(), int64(maxUnixMilli); got != want {
|
||||
t.Fatalf("unexpected maxUnixMilli: got %d, want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,17 +23,12 @@ func ParseTimeMsec(s string) (int64, error) {
|
||||
return msecs, nil
|
||||
}
|
||||
|
||||
const (
|
||||
// time.UnixNano can only store maxInt64, which is 2262
|
||||
maxValidYear = 2262
|
||||
minValidYear = 1970
|
||||
)
|
||||
|
||||
// ParseTimeAt parses time s in different formats, assuming the given currentTimestamp.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#timestamp-formats
|
||||
//
|
||||
// If s doesn't contain timezone information, then the local timezone is used.
|
||||
// The time must be in the range [1970-01-01T00:00:00Z, 2262-04-11T23:47:16Z].
|
||||
//
|
||||
// It returns unix timestamp in nanoseconds.
|
||||
func ParseTimeAt(s string, currentTimestamp int64) (int64, error) {
|
||||
@@ -83,15 +78,7 @@ func ParseTimeAt(s string, currentTimestamp int64) (int64, error) {
|
||||
}
|
||||
if len(s) == 4 {
|
||||
// Parse YYYY
|
||||
t, err := time.Parse("2006", s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
y := t.Year()
|
||||
if y > maxValidYear || y < minValidYear {
|
||||
return 0, fmt.Errorf("cannot parse year from %q: year must in range [%d, %d]", s, minValidYear, maxValidYear)
|
||||
}
|
||||
return tzOffset + t.UnixNano(), nil
|
||||
return parseTimeAt("2006", s, tzOffset, sOrig)
|
||||
}
|
||||
if !strings.Contains(sOrig, "-") {
|
||||
nsec, ok := TryParseUnixTimestamp(sOrig)
|
||||
@@ -102,49 +89,43 @@ func ParseTimeAt(s string, currentTimestamp int64) (int64, error) {
|
||||
}
|
||||
if len(s) == 7 {
|
||||
// Parse YYYY-MM
|
||||
t, err := time.Parse("2006-01", s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return tzOffset + t.UnixNano(), nil
|
||||
return parseTimeAt("2006-01", s, tzOffset, sOrig)
|
||||
}
|
||||
if len(s) == 10 {
|
||||
// Parse YYYY-MM-DD
|
||||
t, err := time.Parse("2006-01-02", s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return tzOffset + t.UnixNano(), nil
|
||||
return parseTimeAt("2006-01-02", s, tzOffset, sOrig)
|
||||
}
|
||||
if len(s) == 13 {
|
||||
// Parse YYYY-MM-DDTHH
|
||||
t, err := time.Parse("2006-01-02T15", s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return tzOffset + t.UnixNano(), nil
|
||||
return parseTimeAt("2006-01-02T15", s, tzOffset, sOrig)
|
||||
}
|
||||
if len(s) == 16 {
|
||||
// Parse YYYY-MM-DDTHH:MM
|
||||
t, err := time.Parse("2006-01-02T15:04", s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return tzOffset + t.UnixNano(), nil
|
||||
return parseTimeAt("2006-01-02T15:04", s, tzOffset, sOrig)
|
||||
}
|
||||
if len(s) == 19 {
|
||||
// Parse YYYY-MM-DDTHH:MM:SS
|
||||
t, err := time.Parse("2006-01-02T15:04:05", s)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return tzOffset + t.UnixNano(), nil
|
||||
return parseTimeAt("2006-01-02T15:04:05", s, tzOffset, sOrig)
|
||||
}
|
||||
// Parse RFC3339
|
||||
t, err := time.Parse(time.RFC3339, sOrig)
|
||||
return parseTimeAt(time.RFC3339, sOrig, 0, sOrig)
|
||||
}
|
||||
|
||||
var (
|
||||
minTime = time.Unix(0, 0).UTC()
|
||||
maxTime = time.Unix(0, math.MaxInt64).UTC()
|
||||
)
|
||||
|
||||
func parseTimeAt(layout, value string, tzOffsetNanos int64, sOrig string) (int64, error) {
|
||||
t, err := time.Parse(layout, value)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
tzOffset := time.Duration(tzOffsetNanos)
|
||||
t = t.UTC().Add(tzOffset)
|
||||
if t.Before(minTime) || t.After(maxTime) {
|
||||
return 0, fmt.Errorf("time %s (%v) must be in the range [%v, %v]", sOrig, t, minTime, maxTime)
|
||||
}
|
||||
return t.UnixNano(), nil
|
||||
}
|
||||
|
||||
@@ -287,16 +268,25 @@ func multiplyByDecimalExp(n int64, decimalExp int64) (int64, bool) {
|
||||
|
||||
var decimalMultipliers = [...]int64{0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18}
|
||||
|
||||
const (
|
||||
maxValidSecond = math.MaxInt64 / 1_000_000_000
|
||||
maxValidMilli = math.MaxInt64 / 1_000_000
|
||||
maxValidMicro = math.MaxInt64 / 1_000
|
||||
minValidSecond = math.MinInt64 / 1_000_000_000
|
||||
minValidMilli = math.MinInt64 / 1_000_000
|
||||
minValidMicro = math.MinInt64 / 1_000
|
||||
)
|
||||
|
||||
func getUnixTimestampNanoseconds(n int64) int64 {
|
||||
if n < (1<<31) && n >= (-1<<31) {
|
||||
if n <= maxValidSecond && n >= minValidSecond {
|
||||
// The timestamp is in seconds.
|
||||
return n * 1e9
|
||||
}
|
||||
if n < 1e3*(1<<31) && n >= 1e3*(-1<<31) {
|
||||
if n <= maxValidMilli && n >= minValidMilli {
|
||||
// The timestamp is in milliseconds.
|
||||
return n * 1e6
|
||||
}
|
||||
if n < 1e6*(1<<31) && n >= 1e6*(-1<<31) {
|
||||
if n <= maxValidMicro && n >= minValidMicro {
|
||||
// The timestamp is in microseconds.
|
||||
return n * 1e3
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package timeutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -202,6 +203,165 @@ func TestParseTimeAtSuccess(t *testing.T) {
|
||||
f("2023-05-20T04:57:43.123456789-02:30", now, 1684567663123456789)
|
||||
}
|
||||
|
||||
func TestParseTimeAtLimits(t *testing.T) {
|
||||
f := func(s string, wantTime time.Time) {
|
||||
t.Helper()
|
||||
currentTimestamp := time.Now().UnixNano()
|
||||
got, err := ParseTimeAt(s, currentTimestamp)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if want := wantTime.UnixNano(); got != want {
|
||||
t.Fatalf("unexpected result; got %d; want %d", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
location := func(t *testing.T, location string) *time.Location {
|
||||
t.Helper()
|
||||
l, err := time.LoadLocation(location)
|
||||
if err != nil {
|
||||
t.Fatalf("could not load location %q: %v", location, err)
|
||||
}
|
||||
return l
|
||||
}
|
||||
east := location(t, "Etc/GMT-14") // UTC+14:00
|
||||
west := location(t, "Etc/GMT+12") // UTC-12:00
|
||||
|
||||
// min year
|
||||
f("1970Z", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("1971+14:00", time.Date(1971, 1, 1, 0, 0, 0, 0, east))
|
||||
f("1970-12:00", time.Date(1970, 1, 1, 0, 0, 0, 0, west))
|
||||
|
||||
// min month
|
||||
f("1970-01Z", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("1970-02+14:00", time.Date(1970, 2, 1, 0, 0, 0, 0, east))
|
||||
f("1970-01-12:00", time.Date(1970, 1, 1, 0, 0, 0, 0, west))
|
||||
|
||||
// min day
|
||||
f("1970-01-01Z", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("1970-01-02+14:00", time.Date(1970, 1, 2, 0, 0, 0, 0, east))
|
||||
f("1970-01-01-12:00", time.Date(1970, 1, 1, 0, 0, 0, 0, west))
|
||||
|
||||
// min hour
|
||||
f("1970-01-01T00Z", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("1970-01-01T14+14:00", time.Date(1970, 1, 1, 14, 0, 0, 0, east))
|
||||
f("1969-12-31T12-12:00", time.Date(1969, 12, 31, 12, 0, 0, 0, west))
|
||||
|
||||
// min minute
|
||||
f("1970-01-01T00:00Z", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("1970-01-01T14:00+14:00", time.Date(1970, 1, 1, 14, 0, 0, 0, east))
|
||||
f("1969-12-31T12:00-12:00", time.Date(1969, 12, 31, 12, 0, 0, 0, west))
|
||||
|
||||
// min second
|
||||
f("1970-01-01T00:00:00Z", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("1970-01-01T14:00:00+14:00", time.Date(1970, 1, 1, 14, 0, 0, 0, east))
|
||||
f("1969-12-31T12:00:00-12:00", time.Date(1969, 12, 31, 12, 0, 0, 0, west))
|
||||
|
||||
// max year
|
||||
f("2262Z", time.Date(2262, 1, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("2262+14:00", time.Date(2262, 1, 1, 0, 0, 0, 0, east))
|
||||
f("2262-12:00", time.Date(2262, 1, 1, 0, 0, 0, 0, west))
|
||||
|
||||
// max month
|
||||
f("2262-04Z", time.Date(2262, 4, 1, 0, 0, 0, 0, time.UTC))
|
||||
f("2262-04+14:00", time.Date(2262, 4, 1, 0, 0, 0, 0, east))
|
||||
f("2262-04-12:00", time.Date(2262, 4, 1, 0, 0, 0, 0, west))
|
||||
|
||||
// max day
|
||||
f("2262-04-11Z", time.Date(2262, 4, 11, 0, 0, 0, 0, time.UTC))
|
||||
f("2262-04-12+14:00", time.Date(2262, 4, 12, 0, 0, 0, 0, east))
|
||||
f("2262-04-11-12:00", time.Date(2262, 4, 11, 0, 0, 0, 0, west))
|
||||
|
||||
// max hour
|
||||
f("2262-04-11T23Z", time.Date(2262, 4, 11, 23, 0, 0, 0, time.UTC))
|
||||
f("2262-04-12T13+14:00", time.Date(2262, 4, 12, 13, 0, 0, 0, east))
|
||||
f("2262-04-11T11-12:00", time.Date(2262, 4, 11, 11, 0, 0, 0, west))
|
||||
|
||||
// max minute
|
||||
f("2262-04-11T23:47Z", time.Date(2262, 4, 11, 23, 47, 0, 0, time.UTC))
|
||||
f("2262-04-12T13:47+14:00", time.Date(2262, 4, 12, 13, 47, 0, 0, east))
|
||||
f("2262-04-11T11:47-12:00", time.Date(2262, 4, 11, 11, 47, 0, 0, west))
|
||||
|
||||
// max second
|
||||
f("2262-04-11T23:47:16Z", time.Date(2262, 4, 11, 23, 47, 16, 0, time.UTC))
|
||||
f("2262-04-12T13:47:16+14:00", time.Date(2262, 4, 12, 13, 47, 16, 0, east))
|
||||
f("2262-04-11T11:47:16-12:00", time.Date(2262, 4, 11, 11, 47, 16, 0, west))
|
||||
}
|
||||
|
||||
func TestParseTimeAtOutsideLimits(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
currentTimestamp := time.Now().UnixNano()
|
||||
got, err := ParseTimeAt(s, currentTimestamp)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error but got %d", got)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "must be in the range") {
|
||||
t.Fatalf("expected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// min year
|
||||
f("1969Z")
|
||||
f("1970+14:00")
|
||||
f("1969-12:00")
|
||||
|
||||
// min month
|
||||
f("1969-12Z")
|
||||
f("1970-01+14:00")
|
||||
f("1969-12-12:00")
|
||||
|
||||
// min day
|
||||
f("1969-12-31Z")
|
||||
f("1970-01-01+14:00")
|
||||
f("1969-12-31-12:00")
|
||||
|
||||
// min hour
|
||||
f("1969-12-31T23Z")
|
||||
f("1970-01-01T13+14:00")
|
||||
f("1969-12-31T11-12:00")
|
||||
|
||||
// min minute
|
||||
f("1969-12-31T23:59Z")
|
||||
f("1970-01-01T13:59+14:00")
|
||||
f("1969-12-31T11:59-12:00")
|
||||
|
||||
// min second
|
||||
f("1969-12-31T23:59:59Z")
|
||||
f("1970-01-01T13:59:59+14:00")
|
||||
f("1969-12-31T11:59:59-12:00")
|
||||
|
||||
// max year
|
||||
f("2263Z")
|
||||
f("2263+14:00")
|
||||
f("2263-12:00")
|
||||
|
||||
// max month
|
||||
f("2262-05Z")
|
||||
f("2262-05+14:00")
|
||||
f("2262-05-12:00")
|
||||
|
||||
// max day
|
||||
f("2262-04-12Z")
|
||||
f("2262-04-13+14:00")
|
||||
f("2262-04-12-12:00")
|
||||
|
||||
// max hour
|
||||
f("2262-04-12T00Z")
|
||||
f("2262-04-12T14+14:00")
|
||||
f("2262-04-11T12-12:00")
|
||||
|
||||
// max minute
|
||||
f("2262-04-11T23:48Z")
|
||||
f("2262-04-12T13:48+14:00")
|
||||
f("2262-04-11T11:48-12:00")
|
||||
|
||||
// max second
|
||||
f("2262-04-11T23:47:17Z")
|
||||
f("2262-04-12T13:47:17+14:00")
|
||||
f("2262-04-11T11:47:17-12:00")
|
||||
}
|
||||
|
||||
func TestParseTimeMsecFailure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
|
||||
44
vendor/github.com/VictoriaMetrics/metricsql/parser.go
generated
vendored
44
vendor/github.com/VictoriaMetrics/metricsql/parser.go
generated
vendored
@@ -1933,11 +1933,11 @@ func (be *BinaryOpExpr) appendStringNoKeepMetricNames(dst []byte) []byte {
|
||||
}
|
||||
|
||||
func (be *BinaryOpExpr) needLeftParens() bool {
|
||||
return needBinaryOpArgParens(be.Left, be.Op, false)
|
||||
return needBinaryOpArgParens(be.Left)
|
||||
}
|
||||
|
||||
func (be *BinaryOpExpr) needRightParens() bool {
|
||||
if needBinaryOpArgParens(be.Right, be.Op, true) {
|
||||
if needBinaryOpArgParens(be.Right) {
|
||||
return true
|
||||
}
|
||||
switch t := be.Right.(type) {
|
||||
@@ -1974,26 +1974,10 @@ func (be *BinaryOpExpr) appendModifiers(dst []byte) []byte {
|
||||
return dst
|
||||
}
|
||||
|
||||
func needBinaryOpArgParens(arg Expr, parentOp string, isRight bool) bool {
|
||||
func needBinaryOpArgParens(arg Expr) bool {
|
||||
switch t := arg.(type) {
|
||||
case *BinaryOpExpr:
|
||||
// Parens are required when the child op priority not equal to parent o one.
|
||||
// For example, a + b / c - d should be a + (b / c) - d.
|
||||
if binaryOpPriority(t.Op) != binaryOpPriority(parentOp) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Same op: parens are only needed when the sub-expression is not a simple leaf chain.
|
||||
if t.Op != parentOp {
|
||||
if isRight && !isRightAssociativeBinaryOp(parentOp) {
|
||||
return true
|
||||
}
|
||||
if !isRight && isRightAssociativeBinaryOp(parentOp) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return !isBinaryOpLeafSimple(t)
|
||||
return true
|
||||
case *RollupExpr:
|
||||
if be, ok := t.Expr.(*BinaryOpExpr); ok && be.KeepMetricNames {
|
||||
return true
|
||||
@@ -2004,26 +1988,6 @@ func needBinaryOpArgParens(arg Expr, parentOp string, isRight bool) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func isBinaryOpLeafSimple(arg Expr) bool {
|
||||
switch t := arg.(type) {
|
||||
case *NumberExpr:
|
||||
return true
|
||||
case *MetricExpr:
|
||||
metricName := t.getMetricName()
|
||||
// Parens should be added if metric name equals to a reserved word, such as group_left
|
||||
// For example, a + group_left should become a + (group_left). Otherwise, query won't be parsed.
|
||||
return !isReservedBinaryOpIdent(metricName)
|
||||
case *BinaryOpExpr:
|
||||
if t.GroupModifier.Op != "" || t.KeepMetricNames || t.JoinModifier.Op != "" {
|
||||
return false
|
||||
}
|
||||
|
||||
return isBinaryOpLeafSimple(t.Left) && isBinaryOpLeafSimple(t.Right)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isReservedBinaryOpIdent(s string) bool {
|
||||
return isBinaryOpGroupModifier(s) || isBinaryOpJoinModifier(s) || isBinaryOpBoolModifier(s) || isPrefixModifier(s)
|
||||
}
|
||||
|
||||
2
vendor/modules.txt
vendored
2
vendor/modules.txt
vendored
@@ -145,7 +145,7 @@ github.com/VictoriaMetrics/fastcache
|
||||
# github.com/VictoriaMetrics/metrics v1.43.2
|
||||
## explicit; go 1.24.0
|
||||
github.com/VictoriaMetrics/metrics
|
||||
# github.com/VictoriaMetrics/metricsql v0.86.1
|
||||
# github.com/VictoriaMetrics/metricsql v0.87.0
|
||||
## explicit; go 1.24.2
|
||||
github.com/VictoriaMetrics/metricsql
|
||||
github.com/VictoriaMetrics/metricsql/binaryop
|
||||
|
||||
Reference in New Issue
Block a user