mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-22 19:26:35 +03:00
Compare commits
38 Commits
debug-grou
...
issue-1041
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a990f4967 | ||
|
|
256eff061d | ||
|
|
fa1dd0ec0a | ||
|
|
6337dfc472 | ||
|
|
386b736670 | ||
|
|
e9e35dd8aa | ||
|
|
1db77bd3bb | ||
|
|
0a256002e5 | ||
|
|
b3c03c023c | ||
|
|
80e2f29761 | ||
|
|
df34ba3ba2 | ||
|
|
10dd45c4fd | ||
|
|
a3294b5aa2 | ||
|
|
4438454567 | ||
|
|
71af1ee5f1 | ||
|
|
e00fb7e605 | ||
|
|
5e2ee00504 | ||
|
|
de2bc4237a | ||
|
|
3e51f277bd | ||
|
|
5723339525 | ||
|
|
3b986ad326 | ||
|
|
08dd38d4a0 | ||
|
|
815cc97952 | ||
|
|
93d71e7106 | ||
|
|
577b161343 | ||
|
|
dd2d6807e4 | ||
|
|
e38e25b756 | ||
|
|
bc708c8568 | ||
|
|
cd73472a3e | ||
|
|
527d09653a | ||
|
|
28a87b90bb | ||
|
|
c445e7fcc0 | ||
|
|
9494ee103e | ||
|
|
94af588e92 | ||
|
|
e5c194cc10 | ||
|
|
7c65e3daca | ||
|
|
a6532c28b2 | ||
|
|
ec26ebb803 |
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -4,6 +4,8 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
cooldown:
|
||||
default-days: 21
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
@@ -23,6 +25,8 @@ updates:
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
cooldown:
|
||||
default-days: 21
|
||||
- package-ecosystem: "npm"
|
||||
directory: "/app/vmui/packages/vmui"
|
||||
schedule:
|
||||
|
||||
4
Makefile
4
Makefile
@@ -467,10 +467,10 @@ test-full-386:
|
||||
GOARCH=386 go test -tags 'synctest' -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
apptest:
|
||||
$(MAKE) victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||
$(MAKE) victoria-metrics-race vmagent-race vmalert-race vmauth-race vmctl-race vmbackup-race vmrestore-race
|
||||
go test ./apptest/... -skip="^Test(Cluster|Legacy).*"
|
||||
|
||||
apptest-legacy: victoria-metrics vmbackup vmrestore
|
||||
apptest-legacy: victoria-metrics-race vmbackup-race vmrestore-race
|
||||
OS=$$(uname | tr '[:upper:]' '[:lower:]'); \
|
||||
ARCH=$$(uname -m | tr '[:upper:]' '[:lower:]' | sed 's/x86_64/amd64/'); \
|
||||
VERSION=v1.132.0; \
|
||||
|
||||
@@ -13,6 +13,9 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/awsapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
@@ -21,10 +24,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -405,8 +405,7 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.Register(len(block))
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
|
||||
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||
bt := timeutil.NewBackoffTimer(c.retryMinInterval, c.retryMaxInterval)
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
@@ -415,19 +414,10 @@ again:
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %s",
|
||||
len(block), c.sanitizedURL, err, bt.CurrentDelay())
|
||||
if !bt.Wait(c.stopCh) {
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
@@ -493,7 +483,10 @@ again:
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
||||
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterHeader > 0 {
|
||||
bt.SetDelay(retryAfterHeader)
|
||||
}
|
||||
|
||||
// Handle response
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
@@ -502,15 +495,10 @@ again:
|
||||
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
||||
} else {
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||
"re-sending the block in %s", len(block), c.sanitizedURL, retriesCount, statusCode, body, bt.CurrentDelay())
|
||||
}
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
if !bt.Wait(c.stopCh) {
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
@@ -519,27 +507,6 @@ again:
|
||||
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
||||
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
||||
|
||||
// getRetryDuration returns retry duration.
|
||||
// retryAfterDuration has the highest priority.
|
||||
// If retryAfterDuration is not specified, retryDuration gets doubled.
|
||||
// retryDuration can't exceed maxRetryDuration.
|
||||
//
|
||||
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
|
||||
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterDuration > 0 {
|
||||
return timeutil.AddJitterToDuration(retryAfterDuration)
|
||||
}
|
||||
|
||||
// default backoff retry policy
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
|
||||
return retryDuration
|
||||
}
|
||||
|
||||
// repackBlockFromZstdToSnappy repacks the given zstd-compressed block to snappy-compressed block.
|
||||
//
|
||||
// The input block may be corrupted, for example, if vmagent was shut down ungracefully and
|
||||
@@ -570,24 +537,20 @@ func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
|
||||
}
|
||||
|
||||
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
||||
// retryAfterString should be in either HTTP-date or a number of seconds.
|
||||
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
|
||||
if retryAfterString == "" {
|
||||
return retryAfterDuration
|
||||
//
|
||||
// s should be in either HTTP-date or a number of seconds.
|
||||
// It returns time.Duration(0) if s does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(s string) time.Duration {
|
||||
if s == "" {
|
||||
return 0
|
||||
}
|
||||
|
||||
defer func() {
|
||||
v := retryAfterDuration.Seconds()
|
||||
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
|
||||
}()
|
||||
|
||||
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, s); err == nil {
|
||||
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
||||
}
|
||||
// Retry-After could be in seconds.
|
||||
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
|
||||
if seconds, err := strconv.Atoi(s); err == nil {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
|
||||
@@ -6,66 +6,11 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
)
|
||||
|
||||
func TestCalculateRetryDuration(t *testing.T) {
|
||||
// `testFunc` call `calculateRetryDuration` for `n` times
|
||||
// and evaluate if the result of `calculateRetryDuration` is
|
||||
// 1. >= expectMinDuration
|
||||
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
|
||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
for range n {
|
||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||
}
|
||||
|
||||
expectMaxDuration := helper(expectMinDuration)
|
||||
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
|
||||
|
||||
if retryDuration < expectMinDuration || retryDuration > expectMaxDuration {
|
||||
t.Fatalf(
|
||||
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
|
||||
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
|
||||
retryDuration.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for 1 time.
|
||||
{
|
||||
// default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
// default backoff policy exceed max limit"
|
||||
f(0, 10*time.Minute, 1, time.Minute)
|
||||
|
||||
// retry after > default backoff policy
|
||||
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
|
||||
// retry after < default backoff policy
|
||||
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
|
||||
// retry after invalid and < default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for multiple times.
|
||||
{
|
||||
// default backoff policy 2 times
|
||||
f(0, time.Second, 2, 4*time.Second)
|
||||
// default backoff policy 3 times
|
||||
f(0, time.Second, 3, 8*time.Second)
|
||||
// default backoff policy N times exceed max limit
|
||||
f(0, time.Second, 10, time.Minute)
|
||||
|
||||
// retry after 120s 1 times
|
||||
f(120*time.Second, time.Second, 1, 120*time.Second)
|
||||
// retry after 120s 2 times
|
||||
f(120*time.Second, time.Second, 2, 120*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f := func(retryAfterString string, expectResult time.Duration) {
|
||||
t.Helper()
|
||||
@@ -91,13 +36,6 @@ func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
||||
}
|
||||
|
||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||
func helper(d time.Duration) time.Duration {
|
||||
dv := min(d/10, 10*time.Second)
|
||||
|
||||
return d + dv
|
||||
}
|
||||
|
||||
func TestRepackBlockFromZstdToSnappy(t *testing.T) {
|
||||
expectedPlainBlock := []byte(`foobar`)
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
@@ -11,6 +12,10 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bloomfilter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
@@ -23,6 +28,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prommetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
@@ -30,8 +36,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -80,10 +84,14 @@ var (
|
||||
`This may be needed for reducing memory usage at remote storage when the order of labels in incoming samples is random. `+
|
||||
`For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}`+
|
||||
`Enabled sorting for labels can slow down ingestion performance a bit`)
|
||||
maxHourlySeries = flag.Int("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxDailySeries = flag.Int("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxHourlySeries = flag.Int64("remoteWrite.maxHourlySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last hour. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. "+
|
||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxDailySeries = flag.Int64("remoteWrite.maxDailySeries", 0, "The maximum number of unique series vmagent can send to remote storage systems during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. "+
|
||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter")
|
||||
maxIngestionRate = flag.Int("maxIngestionRate", 0, "The maximum number of samples vmagent can receive per second. Data ingestion is paused when the limit is exceeded. "+
|
||||
"By default there are no limits on samples ingestion rate. See also -remoteWrite.rateLimit")
|
||||
|
||||
@@ -92,6 +100,8 @@ var (
|
||||
"See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence . See also -remoteWrite.dropSamplesOnOverload")
|
||||
dropSamplesOnOverload = flag.Bool("remoteWrite.dropSamplesOnOverload", false, "Whether to drop samples when -remoteWrite.disableOnDiskQueue is set and if the samples "+
|
||||
"cannot be pushed into the configured -remoteWrite.url systems in a timely manner. See https://docs.victoriametrics.com/victoriametrics/vmagent/#disabling-on-disk-persistence")
|
||||
disableMetadataPerURL = flagutil.NewArrayBool("remoteWrite.disableMetadata", "Whether to disable sending metadata to the corresponding -remoteWrite.url. "+
|
||||
"By default, metadata sending is controlled by the global -enableMetadata flag")
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -157,8 +167,8 @@ func Init() {
|
||||
if len(*remoteWriteURLs) == 0 {
|
||||
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
||||
}
|
||||
if *maxHourlySeries > 0 {
|
||||
hourlySeriesLimiter = bloomfilter.NewLimiter(*maxHourlySeries, time.Hour)
|
||||
if limit := getMaxHourlySeries(); limit > 0 {
|
||||
hourlySeriesLimiter = bloomfilter.NewLimiter(limit, time.Hour)
|
||||
_ = metrics.NewGauge(`vmagent_hourly_series_limit_max_series`, func() float64 {
|
||||
return float64(hourlySeriesLimiter.MaxItems())
|
||||
})
|
||||
@@ -166,8 +176,8 @@ func Init() {
|
||||
return float64(hourlySeriesLimiter.CurrentItems())
|
||||
})
|
||||
}
|
||||
if *maxDailySeries > 0 {
|
||||
dailySeriesLimiter = bloomfilter.NewLimiter(*maxDailySeries, 24*time.Hour)
|
||||
if limit := getMaxDailySeries(); limit > 0 {
|
||||
dailySeriesLimiter = bloomfilter.NewLimiter(limit, 24*time.Hour)
|
||||
_ = metrics.NewGauge(`vmagent_daily_series_limit_max_series`, func() float64 {
|
||||
return float64(dailySeriesLimiter.MaxItems())
|
||||
})
|
||||
@@ -540,6 +550,10 @@ func tryPushMetadataToRemoteStorages(rwctxs []*remoteWriteCtx, mms []prompb.Metr
|
||||
var wg sync.WaitGroup
|
||||
var anyPushFailed atomic.Bool
|
||||
for _, rwctx := range rwctxs {
|
||||
if !rwctx.enableMetadata {
|
||||
// Skip remote storage with disabled metadata
|
||||
continue
|
||||
}
|
||||
wg.Go(func() {
|
||||
if !rwctx.tryPushMetadataInternal(mms) {
|
||||
rwctx.pushFailures.Inc()
|
||||
@@ -811,6 +825,11 @@ type remoteWriteCtx struct {
|
||||
streamAggrKeepInput bool
|
||||
streamAggrDropInput bool
|
||||
|
||||
// enableMetadata indicates whether metadata should be sent to this remote storage.
|
||||
// It is determined by -remoteWrite.enableMetadata per-URL flag if set,
|
||||
// otherwise by the global -enableMetadata flag.
|
||||
enableMetadata bool
|
||||
|
||||
pss []*pendingSeries
|
||||
pssNextIdx atomic.Uint64
|
||||
|
||||
@@ -822,6 +841,18 @@ type remoteWriteCtx struct {
|
||||
rowsDroppedOnPushFailure *metrics.Counter
|
||||
}
|
||||
|
||||
// isMetadataEnabledForURL returns true if metadata should be sent to the remote storage at argIdx.
|
||||
// It checks the per-URL -remoteWrite.disableMetadata flag first.
|
||||
// If not set, it falls back to the global -enableMetadata flag.
|
||||
func isMetadataEnabledForURL(argIdx int) bool {
|
||||
if disableMetadataPerURL.GetOptionalArg(argIdx) {
|
||||
// Metadata is explicitly disabled for this URL
|
||||
return false
|
||||
}
|
||||
// Use global -enableMetadata value
|
||||
return prommetadata.IsEnabled()
|
||||
}
|
||||
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string) *remoteWriteCtx {
|
||||
// strip query params, otherwise changing params resets pq
|
||||
pqURL := *remoteWriteURL
|
||||
@@ -892,10 +923,11 @@ func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, sanitizedURL string)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pss: pss,
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pss: pss,
|
||||
enableMetadata: isMetadataEnabledForURL(argIdx),
|
||||
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_rows_pushed_after_relabel_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_relabel_metrics_dropped_total{path=%q,url=%q}`, queuePath, sanitizedURL)),
|
||||
@@ -1116,3 +1148,21 @@ func newMapFromStrings(a []string) map[string]struct{} {
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func getMaxHourlySeries() int {
|
||||
limit := *maxHourlySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
return math.MaxInt32
|
||||
}
|
||||
|
||||
return int(limit)
|
||||
}
|
||||
|
||||
func getMaxDailySeries() int {
|
||||
limit := *maxDailySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
return math.MaxInt32
|
||||
}
|
||||
|
||||
return int(limit)
|
||||
}
|
||||
|
||||
@@ -13,14 +13,18 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -113,8 +117,10 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
input: make(chan prompb.TimeSeries, cfg.MaxQueueSize),
|
||||
}
|
||||
|
||||
for range cc {
|
||||
c.run(ctx)
|
||||
for i := 0; i < cc; i++ {
|
||||
c.wg.Go(func() {
|
||||
c.run(ctx, i)
|
||||
})
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
@@ -156,8 +162,7 @@ func (c *Client) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) run(ctx context.Context) {
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
func (c *Client) run(ctx context.Context, id int) {
|
||||
wr := &prompb.WriteRequest{}
|
||||
shutdown := func() {
|
||||
lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout)
|
||||
@@ -174,42 +179,64 @@ func (c *Client) run(ctx context.Context) {
|
||||
cancel()
|
||||
}
|
||||
|
||||
c.wg.Go(func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
// add jitter to spread remote write flushes over the flush interval to avoid congestion at the remote write destination
|
||||
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(fmt.Sprintf("%d", id)))
|
||||
randJitter := uint64(float64(c.flushInterval) * (float64(h) / (1 << 64)))
|
||||
timer := time.NewTimer(time.Duration(randJitter))
|
||||
addJitter:
|
||||
for {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
timer.Stop()
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
shutdown()
|
||||
return
|
||||
case <-timer.C:
|
||||
break addJitter
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(c.flushInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
shutdown()
|
||||
return
|
||||
case <-ticker.C:
|
||||
c.flush(ctx, wr)
|
||||
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
|
||||
select {
|
||||
case <-c.doneCh:
|
||||
shutdown()
|
||||
return
|
||||
case <-ctx.Done():
|
||||
shutdown()
|
||||
return
|
||||
case <-ticker.C:
|
||||
default:
|
||||
}
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
// drain the potential stale tick to avoid small or empty flushes after a slow flush.
|
||||
select {
|
||||
case <-ticker.C:
|
||||
default:
|
||||
}
|
||||
case ts, ok := <-c.input:
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
wr.Timeseries = append(wr.Timeseries, ts)
|
||||
if len(wr.Timeseries) >= c.maxBatchSize {
|
||||
c.flush(ctx, wr)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
rwErrors = metrics.NewCounter(`vmalert_remotewrite_errors_total`)
|
||||
rwTotal = metrics.NewCounter(`vmalert_remotewrite_total`)
|
||||
|
||||
// sentRows and sentBytes are historical counters that can now be replaced by flushedRows and flushedBytes histograms. They may be deprecated in the future after the new histograms have been adopted for some time.
|
||||
sentRows = metrics.NewCounter(`vmalert_remotewrite_sent_rows_total`)
|
||||
sentBytes = metrics.NewCounter(`vmalert_remotewrite_sent_bytes_total`)
|
||||
flushedRows = metrics.NewHistogram(`vmalert_remotewrite_sent_rows`)
|
||||
flushedBytes = metrics.NewHistogram(`vmalert_remotewrite_sent_bytes`)
|
||||
droppedRows = metrics.NewCounter(`vmalert_remotewrite_dropped_rows_total`)
|
||||
sendDuration = metrics.NewFloatCounter(`vmalert_remotewrite_send_duration_seconds_total`)
|
||||
bufferFlushDuration = metrics.NewHistogram(`vmalert_remotewrite_flush_duration_seconds`)
|
||||
@@ -235,10 +262,8 @@ func (c *Client) flush(ctx context.Context, wr *prompb.WriteRequest) {
|
||||
data := wr.MarshalProtobuf(nil)
|
||||
b := snappy.Encode(nil, data)
|
||||
|
||||
retryInterval, maxRetryInterval := *retryMinInterval, *retryMaxTime
|
||||
if retryInterval > maxRetryInterval {
|
||||
retryInterval = maxRetryInterval
|
||||
}
|
||||
maxRetryInterval := *retryMaxTime
|
||||
bt := timeutil.NewBackoffTimer(*retryMinInterval, maxRetryInterval)
|
||||
timeStart := time.Now()
|
||||
defer func() {
|
||||
sendDuration.Add(time.Since(timeStart).Seconds())
|
||||
@@ -256,6 +281,8 @@ L:
|
||||
if err == nil {
|
||||
sentRows.Add(len(wr.Timeseries))
|
||||
sentBytes.Add(len(b))
|
||||
flushedRows.Update(float64(len(wr.Timeseries)))
|
||||
flushedBytes.Update(float64(len(b)))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -281,12 +308,11 @@ L:
|
||||
break
|
||||
}
|
||||
|
||||
if retryInterval > timeLeftForRetries {
|
||||
retryInterval = timeLeftForRetries
|
||||
if bt.CurrentDelay() > timeLeftForRetries {
|
||||
bt.SetDelay(timeLeftForRetries)
|
||||
}
|
||||
// sleeping to prevent remote db hammering
|
||||
time.Sleep(retryInterval)
|
||||
retryInterval *= 2
|
||||
bt.Wait(ctx.Done())
|
||||
|
||||
attempts++
|
||||
}
|
||||
|
||||
@@ -47,12 +47,13 @@ var (
|
||||
"It is recommended setting this value to values smaller than -http.idleConnTimeout set at backend services")
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
|
||||
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
|
||||
requestBufferSize = flagutil.NewBytes("requestBufferSize", defaultBodyBufferSize, "The size of the buffer for reading the request body before proxying the request to backends. "+
|
||||
"This allows reducing the consumption of backend resources when processing requests from clients connected via slow networks. "+
|
||||
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
|
||||
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
|
||||
"See also -requestBufferSize")
|
||||
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering. "+
|
||||
" Disabling request buffering also disables request retries configured with '-maxRequestBodySizeToRetry'")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", defaultBodyBufferSize, "The maximum request body size in memory for potential retries at other backends. "+
|
||||
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request retries. "+
|
||||
"See also '-requestBufferSize' flag, which controlls a size of the buffer for potential retry.")
|
||||
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process simultaneously. "+
|
||||
"Requests exceeding this limit are queued for up to -maxQueueDuration and then rejected with '429 Too Many Requests' http status code if the limit is still reached. "+
|
||||
@@ -89,6 +90,8 @@ var (
|
||||
"Recommended when vmauth is exposed to the internet.")
|
||||
)
|
||||
|
||||
const defaultBodyBufferSize = 16 * 1024
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
@@ -349,13 +352,25 @@ func endConcurrencyLimit() {
|
||||
<-concurrencyLimitCh
|
||||
}
|
||||
|
||||
func getMaxRequestBufSize() int {
|
||||
rbs := requestBufferSize.IntN()
|
||||
rbstr := maxRequestBodySizeToRetry.IntN()
|
||||
if rbs <= 0 {
|
||||
// request buffering disabled
|
||||
// it also disables request retries as a side effect
|
||||
return 0
|
||||
}
|
||||
// for backward compatibility we must use max value for both flags
|
||||
return max(rbs, rbstr)
|
||||
}
|
||||
|
||||
func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (io.ReadCloser, error) {
|
||||
if r == nil {
|
||||
// This is a GET request with nil reader.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
maxBufSize := max(requestBufferSize.IntN(), maxRequestBodySizeToRetry.IntN())
|
||||
maxBufSize := getMaxRequestBufSize()
|
||||
if maxBufSize <= 0 {
|
||||
return r, nil
|
||||
}
|
||||
@@ -386,7 +401,7 @@ func bufferRequestBody(ctx context.Context, r io.ReadCloser, userName string) (i
|
||||
}
|
||||
}
|
||||
|
||||
bb := newBufferedBody(r, buf, maxBufSize)
|
||||
bb := newBufferedBody(r, buf, maxBufSize, maxRequestBodySizeToRetry.IntN())
|
||||
return bb, nil
|
||||
}
|
||||
|
||||
@@ -763,7 +778,7 @@ var concurrentRequestsLimitReached = metrics.NewCounter("vmauth_concurrent_reque
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics components or any other HTTP backends.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/ .
|
||||
`
|
||||
@@ -810,11 +825,14 @@ type bufferedBody struct {
|
||||
// bufOffset is the offset at buf for already read bytes.
|
||||
bufOffset int
|
||||
|
||||
// cannotRetry is set to true after Close() call on non-nil r.
|
||||
// cannotRetry is set to true if buf size exceeds max retry body size
|
||||
cannotRetry bool
|
||||
|
||||
// bodyWasClosed is set to true at Close if request retry is not possible
|
||||
bodyWasClosed bool
|
||||
}
|
||||
|
||||
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody {
|
||||
func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize, maxBufRetrySize int) *bufferedBody {
|
||||
// Do not use sync.Pool here, since http.RoundTrip may still use request body after return.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8051
|
||||
|
||||
@@ -824,14 +842,15 @@ func newBufferedBody(r io.ReadCloser, buf []byte, maxBufSize int) *bufferedBody
|
||||
}
|
||||
|
||||
return &bufferedBody{
|
||||
r: r,
|
||||
buf: buf,
|
||||
r: r,
|
||||
buf: buf,
|
||||
cannotRetry: len(buf) > maxBufRetrySize || maxBufRetrySize == 0,
|
||||
}
|
||||
}
|
||||
|
||||
// Read implements io.Reader interface.
|
||||
func (bb *bufferedBody) Read(p []byte) (int, error) {
|
||||
if bb.cannotRetry {
|
||||
if bb.bodyWasClosed {
|
||||
return 0, fmt.Errorf("cannot read already closed body")
|
||||
}
|
||||
if bb.bufOffset < len(bb.buf) {
|
||||
@@ -846,14 +865,16 @@ func (bb *bufferedBody) Read(p []byte) (int, error) {
|
||||
}
|
||||
|
||||
func (bb *bufferedBody) canRetry() bool {
|
||||
return bb.r == nil
|
||||
return !bb.cannotRetry
|
||||
}
|
||||
|
||||
// Close implements io.Closer interface.
|
||||
func (bb *bufferedBody) Close() error {
|
||||
bb.resetReader()
|
||||
if bb.cannotRetry {
|
||||
bb.bodyWasClosed = true
|
||||
}
|
||||
if bb.r != nil {
|
||||
bb.cannotRetry = true
|
||||
return bb.r.Close()
|
||||
}
|
||||
return nil
|
||||
|
||||
@@ -1714,13 +1714,6 @@ func TestBufferRequestBody_Failure(t *testing.T) {
|
||||
}
|
||||
}()
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
|
||||
defaultMaxQueueDuration := *maxQueueDuration
|
||||
defer func() {
|
||||
*maxQueueDuration = defaultMaxQueueDuration
|
||||
@@ -1729,9 +1722,6 @@ func TestBufferRequestBody_Failure(t *testing.T) {
|
||||
f := func(body *mockBody, expectedResponse string) {
|
||||
t.Helper()
|
||||
|
||||
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
if err := requestBufferSize.Set("2048"); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
@@ -1844,16 +1834,6 @@ func TestBufferedBody_RetrySuccess(t *testing.T) {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
|
||||
if err != nil {
|
||||
@@ -1902,16 +1882,6 @@ func TestBufferedBody_RetrySuccessPartialRead(t *testing.T) {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
t.Fatalf("cannot reset maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := maxRequestBodySizeToRetry.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set maxRequestBodySizeToRetry: %s", err)
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
rb, err := bufferRequestBody(ctx, io.NopCloser(bytes.NewBufferString(s)), "foo")
|
||||
if err != nil {
|
||||
@@ -1964,16 +1934,6 @@ func TestBufferedBody_RetryFailureTooBigBody(t *testing.T) {
|
||||
f := func(s string, maxBodySize int) {
|
||||
t.Helper()
|
||||
|
||||
defaultRequestBufferSize := requestBufferSize.String()
|
||||
defer func() {
|
||||
if err := requestBufferSize.Set(defaultRequestBufferSize); err != nil {
|
||||
t.Fatalf("cannot reset requestBufferSize: %s", err)
|
||||
}
|
||||
}()
|
||||
if err := requestBufferSize.Set("0"); err != nil {
|
||||
t.Fatalf("cannot set requestBufferSize: %s", err)
|
||||
}
|
||||
|
||||
defaultMaxRequestBodySizeToRetry := maxRequestBodySizeToRetry.String()
|
||||
defer func() {
|
||||
if err := maxRequestBodySizeToRetry.Set(defaultMaxRequestBodySizeToRetry); err != nil {
|
||||
@@ -2049,10 +2009,9 @@ func TestBufferedBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
bb, ok := rb.(*bufferedBody)
|
||||
canRetry := !ok || bb.canRetry()
|
||||
|
||||
if !canRetry {
|
||||
t.Fatalf("canRetry() must return true before reading anything")
|
||||
canRetry := ok && bb.canRetry()
|
||||
if canRetry {
|
||||
t.Fatalf("canRetry() must return false before reading anything")
|
||||
}
|
||||
data, err := io.ReadAll(rb)
|
||||
if err != nil {
|
||||
@@ -2069,8 +2028,8 @@ func TestBufferedBody_RetryFailureZeroOrNegativeMaxBodySize(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error in io.ReadAll: %s", err)
|
||||
}
|
||||
if string(data) != s {
|
||||
t.Fatalf("unexpected data read\ngot\n%s\nwant\n%s", data, s)
|
||||
if len(data) > 0 {
|
||||
t.Fatalf("unexpected non-empty data read\ngot\n%s", data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -416,6 +416,16 @@ const (
|
||||
promTemporaryDirPath = "prom-tmp-dir-path"
|
||||
)
|
||||
|
||||
const (
|
||||
thanosSnapshot = "thanos-snapshot"
|
||||
thanosConcurrency = "thanos-concurrency"
|
||||
thanosFilterTimeStart = "thanos-filter-time-start"
|
||||
thanosFilterTimeEnd = "thanos-filter-time-end"
|
||||
thanosFilterLabel = "thanos-filter-label"
|
||||
thanosFilterLabelValue = "thanos-filter-label-value"
|
||||
thanosAggrTypes = "thanos-aggr-types"
|
||||
)
|
||||
|
||||
var (
|
||||
promFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
@@ -451,6 +461,43 @@ var (
|
||||
Value: os.TempDir(),
|
||||
},
|
||||
}
|
||||
|
||||
thanosFlags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: thanosSnapshot,
|
||||
Usage: "Path to Thanos snapshot directory containing raw and/or downsampled blocks.",
|
||||
Required: true,
|
||||
},
|
||||
&cli.IntFlag{
|
||||
Name: thanosConcurrency,
|
||||
Usage: "Number of concurrently running snapshot readers",
|
||||
Value: 1,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterTimeStart,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterTimeEnd,
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterLabel,
|
||||
Usage: "Thanos label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: thanosFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Thanos regular expression to filter label from %q flag.", thanosFilterLabel),
|
||||
Value: ".*",
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: thanosAggrTypes,
|
||||
Usage: "Aggregate types to import from Thanos downsampled blocks. Supported values: count, sum, min, max, counter. " +
|
||||
"Each aggregate will be imported as a separate metric with the aggregate type as suffix (e.g., metric_name:5m:count). " +
|
||||
"If not specified, all aggregate types will be imported from downsampled blocks.",
|
||||
Value: nil,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const (
|
||||
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/prometheus"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/thanos"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
@@ -285,6 +286,7 @@ func main() {
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create prometheus client: %s", err)
|
||||
}
|
||||
|
||||
pp := prometheusProcessor{
|
||||
cl: cl,
|
||||
im: importer,
|
||||
@@ -294,6 +296,59 @@ func main() {
|
||||
return pp.run(ctx)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "thanos",
|
||||
Usage: "Migrate time series from Thanos blocks (supports raw and downsampled data)",
|
||||
Flags: mergeFlags(globalFlags, thanosFlags, vmFlags),
|
||||
Before: beforeFn,
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Println("Thanos import mode")
|
||||
|
||||
vmCfg, err := initConfigVM(c)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init VM configuration: %s", err)
|
||||
}
|
||||
|
||||
importer, err = vm.NewImporter(ctx, vmCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create VM importer: %s", err)
|
||||
}
|
||||
|
||||
thanosCfg := thanos.Config{
|
||||
Snapshot: c.String(thanosSnapshot),
|
||||
Filter: thanos.Filter{
|
||||
TimeMin: c.String(thanosFilterTimeStart),
|
||||
TimeMax: c.String(thanosFilterTimeEnd),
|
||||
Label: c.String(thanosFilterLabel),
|
||||
LabelValue: c.String(thanosFilterLabelValue),
|
||||
},
|
||||
}
|
||||
cl, err := thanos.NewClient(thanosCfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create thanos client: %s", err)
|
||||
}
|
||||
|
||||
var aggrTypes []thanos.AggrType
|
||||
if aggrTypesStr := c.StringSlice(thanosAggrTypes); len(aggrTypesStr) > 0 {
|
||||
for _, typeStr := range aggrTypesStr {
|
||||
aggrType, err := thanos.ParseAggrType(typeStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse aggregate type %q: %s", typeStr, err)
|
||||
}
|
||||
aggrTypes = append(aggrTypes, aggrType)
|
||||
}
|
||||
}
|
||||
|
||||
tp := thanosProcessor{
|
||||
cl: cl,
|
||||
im: importer,
|
||||
cc: c.Int(thanosConcurrency),
|
||||
isVerbose: c.Bool(globalVerbose),
|
||||
aggrTypes: aggrTypes,
|
||||
}
|
||||
return tp.run(ctx)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "vm-native",
|
||||
Usage: "Migrate time series between VictoriaMetrics installations",
|
||||
|
||||
233
app/vmctl/thanos/aggr_chunk.go
Normal file
233
app/vmctl/thanos/aggr_chunk.go
Normal file
@@ -0,0 +1,233 @@
|
||||
package thanos
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
)
|
||||
|
||||
// ChunkEncAggr is the top level encoding byte for the AggrChunk.
|
||||
// It is defined by Thanos as 0xff to prevent collisions with Prometheus encodings.
|
||||
const ChunkEncAggr = chunkenc.Encoding(0xff)
|
||||
|
||||
// AggrType represents an aggregation type in Thanos downsampled blocks.
|
||||
type AggrType uint8
|
||||
|
||||
// AggrTypeNone indicates raw blocks with no aggregation.
|
||||
// It is used as a sentinel to distinguish raw block processing from downsampled.
|
||||
const AggrTypeNone AggrType = 255
|
||||
|
||||
// Valid aggregation types matching Thanos definitions.
|
||||
const (
|
||||
AggrCount AggrType = iota
|
||||
AggrSum
|
||||
AggrMin
|
||||
AggrMax
|
||||
AggrCounter
|
||||
)
|
||||
|
||||
// AllAggrTypes contains all supported aggregation types.
|
||||
var AllAggrTypes = []AggrType{AggrCount, AggrSum, AggrMin, AggrMax, AggrCounter}
|
||||
|
||||
func (t AggrType) String() string {
|
||||
switch t {
|
||||
case AggrCount:
|
||||
return "count"
|
||||
case AggrSum:
|
||||
return "sum"
|
||||
case AggrMin:
|
||||
return "min"
|
||||
case AggrMax:
|
||||
return "max"
|
||||
case AggrCounter:
|
||||
return "counter"
|
||||
}
|
||||
return "<unknown>"
|
||||
}
|
||||
|
||||
// ParseAggrType parses aggregate type from string.
|
||||
func ParseAggrType(s string) (AggrType, error) {
|
||||
switch s {
|
||||
case "count":
|
||||
return AggrCount, nil
|
||||
case "sum":
|
||||
return AggrSum, nil
|
||||
case "min":
|
||||
return AggrMin, nil
|
||||
case "max":
|
||||
return AggrMax, nil
|
||||
case "counter":
|
||||
return AggrCounter, nil
|
||||
}
|
||||
return 0, fmt.Errorf("unknown aggregate type: %q", s)
|
||||
}
|
||||
|
||||
// ErrAggrNotExist is returned if a requested aggregation is not present in an AggrChunk.
|
||||
var ErrAggrNotExist = errors.New("aggregate does not exist")
|
||||
|
||||
// AggrChunk is a chunk that is composed of a set of aggregates for the same underlying data.
|
||||
// Not all aggregates must be present.
|
||||
// This is a read-only implementation for decoding Thanos downsampled blocks.
|
||||
type AggrChunk []byte
|
||||
|
||||
// IsAggrChunk checks if the encoding byte indicates this is an AggrChunk.
|
||||
func IsAggrChunk(enc chunkenc.Encoding) bool {
|
||||
return enc == ChunkEncAggr
|
||||
}
|
||||
|
||||
// Get returns the sub-chunk for the given aggregate type if it exists.
|
||||
func (c AggrChunk) Get(t AggrType) (chunkenc.Chunk, error) {
|
||||
b := c[:]
|
||||
var x []byte
|
||||
|
||||
for i := AggrType(0); i <= t; i++ {
|
||||
l, n := binary.Uvarint(b)
|
||||
if n < 1 {
|
||||
return nil, errors.New("invalid size: failed to read uvarint")
|
||||
}
|
||||
if l > uint64(len(b[n:])) || l+1 > uint64(len(b[n:])) {
|
||||
if l > 0 {
|
||||
return nil, errors.New("invalid size: not enough bytes")
|
||||
}
|
||||
}
|
||||
b = b[n:]
|
||||
// If length is set to zero explicitly, that means the aggregate is unset.
|
||||
if l == 0 {
|
||||
if i == t {
|
||||
return nil, ErrAggrNotExist
|
||||
}
|
||||
continue
|
||||
}
|
||||
chunkLen := int(l) + 1
|
||||
x = b[:chunkLen]
|
||||
b = b[chunkLen:]
|
||||
}
|
||||
if len(x) == 0 {
|
||||
return nil, ErrAggrNotExist
|
||||
}
|
||||
return chunkenc.FromData(chunkenc.Encoding(x[0]), x[1:])
|
||||
}
|
||||
|
||||
// Encoding returns the encoding type for AggrChunk.
|
||||
func (c AggrChunk) Encoding() chunkenc.Encoding {
|
||||
return ChunkEncAggr
|
||||
}
|
||||
|
||||
// errIterator wraps a nop iterator but reports an error via Err().
|
||||
// It embeds chunkenc.Iterator to inherit all methods (including Seek)
|
||||
// which avoids go vet stdmethods warning about Seek signature.
|
||||
type errIterator struct {
|
||||
chunkenc.Iterator
|
||||
err error
|
||||
}
|
||||
|
||||
// Err returns the underlying error.
|
||||
func (it *errIterator) Err() error {
|
||||
return it.err
|
||||
}
|
||||
|
||||
// newAggrChunkIterator creates a new iterator for the specified aggregate type.
|
||||
// If the aggregate is not present in the chunk (ErrAggrNotExist), a nop iterator
|
||||
// is returned without error — the caller will simply see zero samples.
|
||||
// Real decoding/corruption errors are reported via the iterator's Err() method.
|
||||
func newAggrChunkIterator(data []byte, aggrType AggrType) chunkenc.Iterator {
|
||||
chunk := AggrChunk(data)
|
||||
subChunk, err := chunk.Get(aggrType)
|
||||
if err != nil {
|
||||
if errors.Is(err, ErrAggrNotExist) {
|
||||
return chunkenc.NewNopIterator()
|
||||
}
|
||||
return &errIterator{
|
||||
Iterator: chunkenc.NewNopIterator(),
|
||||
err: err,
|
||||
}
|
||||
}
|
||||
return subChunk.Iterator(nil)
|
||||
}
|
||||
|
||||
// AggrChunkWrapper wraps AggrChunk to implement chunkenc.Chunk interface.
|
||||
// It delegates iteration to a specific aggregate type.
|
||||
type AggrChunkWrapper struct {
|
||||
data []byte
|
||||
aggrType AggrType
|
||||
}
|
||||
|
||||
// NewAggrChunkWrapper creates a new AggrChunk wrapper for the specified aggregate type.
|
||||
func NewAggrChunkWrapper(data []byte, aggrType AggrType) *AggrChunkWrapper {
|
||||
return &AggrChunkWrapper{
|
||||
data: data,
|
||||
aggrType: aggrType,
|
||||
}
|
||||
}
|
||||
|
||||
// Bytes returns the underlying byte slice.
|
||||
func (c *AggrChunkWrapper) Bytes() []byte {
|
||||
return c.data
|
||||
}
|
||||
|
||||
// Encoding returns the AggrChunk encoding.
|
||||
func (c *AggrChunkWrapper) Encoding() chunkenc.Encoding {
|
||||
return ChunkEncAggr
|
||||
}
|
||||
|
||||
// Appender returns an error since AggrChunk is read-only.
|
||||
func (c *AggrChunkWrapper) Appender() (chunkenc.Appender, error) {
|
||||
return nil, errors.New("AggrChunk is read-only")
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for the specified aggregate type.
|
||||
func (c *AggrChunkWrapper) Iterator(it chunkenc.Iterator) chunkenc.Iterator {
|
||||
return newAggrChunkIterator(c.data, c.aggrType)
|
||||
}
|
||||
|
||||
// NumSamples returns the number of samples in the aggregate.
|
||||
func (c *AggrChunkWrapper) NumSamples() int {
|
||||
chunk := AggrChunk(c.data)
|
||||
subChunk, err := chunk.Get(c.aggrType)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return subChunk.NumSamples()
|
||||
}
|
||||
|
||||
// Compact is a no-op for read-only AggrChunk.
|
||||
func (c *AggrChunkWrapper) Compact() {}
|
||||
|
||||
// Reset resets the chunk with new data.
|
||||
func (c *AggrChunkWrapper) Reset(stream []byte) {
|
||||
c.data = stream
|
||||
}
|
||||
|
||||
// AggrChunkPool is a custom Pool that understands AggrChunk encoding (0xff).
|
||||
// It delegates standard encodings to the default pool and handles AggrChunk specially.
|
||||
type AggrChunkPool struct {
|
||||
defaultPool chunkenc.Pool
|
||||
aggrType AggrType
|
||||
}
|
||||
|
||||
// NewAggrChunkPool creates a new pool that handles AggrChunk encoding.
|
||||
func NewAggrChunkPool(aggrType AggrType) *AggrChunkPool {
|
||||
return &AggrChunkPool{
|
||||
defaultPool: chunkenc.NewPool(),
|
||||
aggrType: aggrType,
|
||||
}
|
||||
}
|
||||
|
||||
// Get returns a chunk for the given encoding and data.
|
||||
func (p *AggrChunkPool) Get(e chunkenc.Encoding, b []byte) (chunkenc.Chunk, error) {
|
||||
if e == ChunkEncAggr {
|
||||
return NewAggrChunkWrapper(b, p.aggrType), nil
|
||||
}
|
||||
return p.defaultPool.Get(e, b)
|
||||
}
|
||||
|
||||
// Put returns a chunk to the pool.
|
||||
func (p *AggrChunkPool) Put(c chunkenc.Chunk) error {
|
||||
if c.Encoding() == ChunkEncAggr {
|
||||
// AggrChunk wrappers are not pooled
|
||||
return nil
|
||||
}
|
||||
return p.defaultPool.Put(c)
|
||||
}
|
||||
110
app/vmctl/thanos/block_meta.go
Normal file
110
app/vmctl/thanos/block_meta.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package thanos
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// BlockMeta extends Prometheus BlockMeta with Thanos-specific fields.
|
||||
type BlockMeta struct {
|
||||
// Thanos-specific metadata
|
||||
Thanos ThanosMeta `json:"thanos,omitempty"`
|
||||
}
|
||||
|
||||
// ThanosMeta contains Thanos-specific block metadata.
|
||||
type ThanosMeta struct {
|
||||
// Labels are external labels identifying the producer.
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
|
||||
// Downsample contains downsampling information.
|
||||
Downsample ThanosDownsample `json:"downsample,omitempty"`
|
||||
|
||||
// Source indicates where the block came from.
|
||||
Source string `json:"source,omitempty"`
|
||||
|
||||
// SegmentFiles contains list of segment files in the block.
|
||||
SegmentFiles []string `json:"segment_files,omitempty"`
|
||||
|
||||
// Files contains metadata about files in the block.
|
||||
Files []ThanosFile `json:"files,omitempty"`
|
||||
}
|
||||
|
||||
// ThanosDownsample contains downsampling resolution info.
|
||||
type ThanosDownsample struct {
|
||||
// Resolution is the downsampling resolution in milliseconds.
|
||||
// 0 means raw data (no downsampling).
|
||||
// 300000 (5 minutes) or 3600000 (1 hour) for downsampled data.
|
||||
Resolution int64 `json:"resolution"`
|
||||
}
|
||||
|
||||
// ThanosFile contains metadata about a file in the block.
|
||||
type ThanosFile struct {
|
||||
RelPath string `json:"rel_path"`
|
||||
SizeBytes int64 `json:"size_bytes,omitempty"`
|
||||
}
|
||||
|
||||
// ResolutionLevel represents the downsampling resolution.
|
||||
type ResolutionLevel int64
|
||||
|
||||
const (
|
||||
// ResolutionRaw is for raw, non-downsampled data.
|
||||
ResolutionRaw ResolutionLevel = 0
|
||||
// Resolution5m is for 5-minute downsampled data (300000 ms).
|
||||
Resolution5m ResolutionLevel = 300000
|
||||
// Resolution1h is for 1-hour downsampled data (3600000 ms).
|
||||
Resolution1h ResolutionLevel = 3600000
|
||||
)
|
||||
|
||||
// String returns human-readable resolution string.
|
||||
func (r ResolutionLevel) String() string {
|
||||
switch r {
|
||||
case ResolutionRaw:
|
||||
return "raw"
|
||||
case Resolution5m:
|
||||
return "5m"
|
||||
case Resolution1h:
|
||||
return "1h"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// ReadBlockMeta reads Thanos-extended block metadata from meta.json.
|
||||
func ReadBlockMeta(blockDir string) (*BlockMeta, error) {
|
||||
metaPath := filepath.Join(blockDir, "meta.json")
|
||||
data, err := os.ReadFile(metaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var meta BlockMeta
|
||||
if err := json.Unmarshal(data, &meta); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &meta, nil
|
||||
}
|
||||
|
||||
// IsDownsampled returns true if the block contains downsampled data.
|
||||
func (m *BlockMeta) IsDownsampled() bool {
|
||||
return m.Thanos.Downsample.Resolution > 0
|
||||
}
|
||||
|
||||
// Resolution returns the block's downsampling resolution.
|
||||
func (m *BlockMeta) Resolution() ResolutionLevel {
|
||||
return ResolutionLevel(m.Thanos.Downsample.Resolution)
|
||||
}
|
||||
|
||||
// ResolutionSuffix returns a suffix string for metric names based on resolution.
|
||||
// For example: ":5m" or ":1h" for downsampled data, empty for raw data.
|
||||
func (m *BlockMeta) ResolutionSuffix() string {
|
||||
switch m.Resolution() {
|
||||
case Resolution5m:
|
||||
return ":5m"
|
||||
case Resolution1h:
|
||||
return ":1h"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
83
app/vmctl/thanos/block_reader.go
Normal file
83
app/vmctl/thanos/block_reader.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package thanos
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
)
|
||||
|
||||
// BlockInfo contains information about a block including Thanos metadata.
|
||||
type BlockInfo struct {
|
||||
Block tsdb.BlockReader
|
||||
Resolution ResolutionLevel
|
||||
IsThanos bool
|
||||
// Closer releases the block's resources (file descriptors, mmap).
|
||||
// Must be called only after all queriers on this block have been closed.
|
||||
Closer io.Closer
|
||||
}
|
||||
|
||||
// OpenBlocksWithInfo opens all blocks and returns them with their metadata.
|
||||
// snapshotDir must be a snapshot directory containing block directories.
|
||||
func OpenBlocksWithInfo(snapshotDir string, aggrType AggrType) ([]BlockInfo, error) {
|
||||
entries, err := os.ReadDir(snapshotDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read snapshot directory: %w", err)
|
||||
}
|
||||
|
||||
var blocks []BlockInfo
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
blockDir := filepath.Join(snapshotDir, entry.Name())
|
||||
metaPath := filepath.Join(blockDir, "meta.json")
|
||||
|
||||
// Check if this is a valid block directory (has meta.json)
|
||||
if _, err := os.Stat(metaPath); os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
|
||||
meta, err := ReadBlockMeta(blockDir)
|
||||
if err != nil {
|
||||
CloseBlocks(blocks)
|
||||
return nil, fmt.Errorf("failed to read Thanos metadata for block %s: %w", blockDir, err)
|
||||
}
|
||||
|
||||
var pool chunkenc.Pool
|
||||
if meta.IsDownsampled() {
|
||||
// Use AggrChunkPool for downsampled blocks
|
||||
pool = NewAggrChunkPool(aggrType)
|
||||
}
|
||||
|
||||
block, err := tsdb.OpenBlock(nil, blockDir, pool, nil)
|
||||
if err != nil {
|
||||
// Close previously opened blocks before returning error
|
||||
CloseBlocks(blocks)
|
||||
return nil, fmt.Errorf("failed to open block %s: %w", blockDir, err)
|
||||
}
|
||||
|
||||
blocks = append(blocks, BlockInfo{
|
||||
Block: block,
|
||||
Resolution: meta.Resolution(),
|
||||
IsThanos: true,
|
||||
Closer: block,
|
||||
})
|
||||
}
|
||||
|
||||
return blocks, nil
|
||||
}
|
||||
|
||||
// CloseBlocks closes all blocks in the slice.
|
||||
// Must be called only after all queriers on these blocks have been closed.
|
||||
func CloseBlocks(blocks []BlockInfo) {
|
||||
for _, bi := range blocks {
|
||||
if bi.Closer != nil {
|
||||
_ = bi.Closer.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
198
app/vmctl/thanos/client.go
Normal file
198
app/vmctl/thanos/client.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package thanos
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/prometheus/prometheus/tsdb"
|
||||
)
|
||||
|
||||
// Config contains parameters for reading Thanos snapshots.
|
||||
type Config struct {
|
||||
Snapshot string
|
||||
Filter Filter
|
||||
}
|
||||
|
||||
// Filter contains configuration for filtering the timeseries.
|
||||
type Filter struct {
|
||||
TimeMin string
|
||||
TimeMax string
|
||||
Label string
|
||||
LabelValue string
|
||||
}
|
||||
|
||||
// Client reads Thanos snapshot blocks, including downsampled blocks with AggrChunk encoding.
|
||||
type Client struct {
|
||||
snapshotPath string
|
||||
filter filter
|
||||
statsPrinted bool
|
||||
}
|
||||
|
||||
type filter struct {
|
||||
min, max int64
|
||||
label string
|
||||
labelValue string
|
||||
}
|
||||
|
||||
func (f filter) inRange(minV, maxV int64) bool {
|
||||
fmin, fmax := f.min, f.max
|
||||
if fmin == 0 {
|
||||
fmin = minV
|
||||
}
|
||||
if fmax == 0 {
|
||||
fmax = maxV
|
||||
}
|
||||
return minV <= fmax && fmin <= maxV
|
||||
}
|
||||
|
||||
// NewClient creates a new Thanos snapshot client.
|
||||
func NewClient(cfg Config) (*Client, error) {
|
||||
minTime, maxTime, err := parseTime(cfg.Filter.TimeMin, cfg.Filter.TimeMax)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse time in filter: %s", err)
|
||||
}
|
||||
return &Client{
|
||||
snapshotPath: cfg.Snapshot,
|
||||
filter: filter{
|
||||
min: minTime,
|
||||
max: maxTime,
|
||||
label: cfg.Filter.Label,
|
||||
labelValue: cfg.Filter.LabelValue,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Explore fetches all available blocks from the snapshot with support for
|
||||
// Thanos AggrChunk (downsampled blocks). It opens blocks with a custom pool
|
||||
// that can decode AggrChunk encoding (0xff).
|
||||
func (c *Client) Explore(aggrType AggrType) ([]BlockInfo, error) {
|
||||
blockInfos, err := OpenBlocksWithInfo(c.snapshotPath, aggrType)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open blocks: %w", err)
|
||||
}
|
||||
|
||||
s := &Stats{
|
||||
Filtered: c.filter.min != 0 || c.filter.max != 0 || c.filter.label != "",
|
||||
Blocks: len(blockInfos),
|
||||
}
|
||||
|
||||
var blocksToImport []BlockInfo
|
||||
for _, bi := range blockInfos {
|
||||
meta := bi.Block.Meta()
|
||||
|
||||
if s.MinTime == 0 || meta.MinTime < s.MinTime {
|
||||
s.MinTime = meta.MinTime
|
||||
}
|
||||
if s.MaxTime == 0 || meta.MaxTime > s.MaxTime {
|
||||
s.MaxTime = meta.MaxTime
|
||||
}
|
||||
|
||||
if !c.filter.inRange(meta.MinTime, meta.MaxTime) {
|
||||
s.SkippedBlocks++
|
||||
if bi.Closer != nil {
|
||||
_ = bi.Closer.Close()
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
s.Samples += meta.Stats.NumSamples
|
||||
s.Series += meta.Stats.NumSeries
|
||||
blocksToImport = append(blocksToImport, bi)
|
||||
}
|
||||
if !c.statsPrinted {
|
||||
fmt.Println(s)
|
||||
c.statsPrinted = true
|
||||
}
|
||||
return blocksToImport, nil
|
||||
}
|
||||
|
||||
// querierSeriesSet wraps a SeriesSet and its underlying Querier, ensuring
|
||||
// the querier is closed once the SeriesSet has been fully consumed.
|
||||
// This releases the querier's read reference on the block, which is required
|
||||
// for Block.Close() to complete without hanging.
|
||||
type querierSeriesSet struct {
|
||||
storage.SeriesSet
|
||||
q storage.Querier
|
||||
closed bool
|
||||
}
|
||||
|
||||
// Next advances the iterator. When the underlying SeriesSet is exhausted,
|
||||
// it closes the querier to release resources.
|
||||
func (s *querierSeriesSet) Next() bool {
|
||||
if s.SeriesSet.Next() {
|
||||
return true
|
||||
}
|
||||
if !s.closed {
|
||||
_ = s.q.Close()
|
||||
s.closed = true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Close explicitly closes the underlying querier.
|
||||
// This must be called if iteration is stopped early (before Next returns false)
|
||||
// to release block read references and prevent Block.Close() from hanging.
|
||||
func (s *querierSeriesSet) Close() {
|
||||
if !s.closed {
|
||||
_ = s.q.Close()
|
||||
s.closed = true
|
||||
}
|
||||
}
|
||||
|
||||
// ClosableSeriesSet extends storage.SeriesSet with a Close method for explicit cleanup.
|
||||
type ClosableSeriesSet interface {
|
||||
storage.SeriesSet
|
||||
Close()
|
||||
}
|
||||
|
||||
// Read reads the given BlockInfo according to configured time and label filters.
|
||||
// The returned ClosableSeriesSet automatically closes the underlying querier when fully consumed,
|
||||
// but Close() should be called explicitly (e.g., via defer) to handle early returns.
|
||||
func (c *Client) Read(bi BlockInfo) (ClosableSeriesSet, error) {
|
||||
minTime, maxTime := bi.Block.Meta().MinTime, bi.Block.Meta().MaxTime
|
||||
if c.filter.min != 0 {
|
||||
minTime = c.filter.min
|
||||
}
|
||||
if c.filter.max != 0 {
|
||||
maxTime = c.filter.max
|
||||
}
|
||||
q, err := tsdb.NewBlockQuerier(bi.Block, minTime, maxTime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ss := q.Select(
|
||||
context.Background(),
|
||||
false,
|
||||
nil,
|
||||
labels.MustNewMatcher(labels.MatchRegexp, c.filter.label, c.filter.labelValue),
|
||||
)
|
||||
return &querierSeriesSet{
|
||||
SeriesSet: ss,
|
||||
q: q,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseTime(start, end string) (int64, int64, error) {
|
||||
var s, e int64
|
||||
if start == "" && end == "" {
|
||||
return 0, 0, nil
|
||||
}
|
||||
if start != "" {
|
||||
v, err := time.Parse(time.RFC3339, start)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("failed to parse %q: %s", start, err)
|
||||
}
|
||||
s = v.UnixNano() / int64(time.Millisecond)
|
||||
}
|
||||
if end != "" {
|
||||
v, err := time.Parse(time.RFC3339, end)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("failed to parse %q: %s", end, err)
|
||||
}
|
||||
e = v.UnixNano() / int64(time.Millisecond)
|
||||
}
|
||||
return s, e, nil
|
||||
}
|
||||
38
app/vmctl/thanos/stats.go
Normal file
38
app/vmctl/thanos/stats.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package thanos
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Stats represents data migration stats for Thanos blocks.
|
||||
type Stats struct {
|
||||
Filtered bool
|
||||
MinTime int64
|
||||
MaxTime int64
|
||||
Samples uint64
|
||||
Series uint64
|
||||
Blocks int
|
||||
SkippedBlocks int
|
||||
}
|
||||
|
||||
// String returns string representation for s.
|
||||
func (s Stats) String() string {
|
||||
str := fmt.Sprintf("Thanos snapshot stats:\n"+
|
||||
" blocks found: %d;\n"+
|
||||
" blocks skipped by time filter: %d;\n"+
|
||||
" min time: %d (%v);\n"+
|
||||
" max time: %d (%v);\n"+
|
||||
" samples: %d;\n"+
|
||||
" series: %d.",
|
||||
s.Blocks, s.SkippedBlocks,
|
||||
s.MinTime, time.Unix(s.MinTime/1e3, 0).Format(time.RFC3339),
|
||||
s.MaxTime, time.Unix(s.MaxTime/1e3, 0).Format(time.RFC3339),
|
||||
s.Samples, s.Series)
|
||||
|
||||
if s.Filtered {
|
||||
str += "\n* Stats numbers are based on blocks meta info and don't account for applied filters."
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
||||
309
app/vmctl/thanos_processor.go
Normal file
309
app/vmctl/thanos_processor.go
Normal file
@@ -0,0 +1,309 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/prometheus/model/labels"
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/thanos"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
)
|
||||
|
||||
type thanosProcessor struct {
|
||||
cl *thanos.Client
|
||||
im *vm.Importer
|
||||
cc int
|
||||
|
||||
isVerbose bool
|
||||
aggrTypes []thanos.AggrType
|
||||
}
|
||||
|
||||
func (tp *thanosProcessor) run(ctx context.Context) error {
|
||||
if len(tp.aggrTypes) == 0 {
|
||||
tp.aggrTypes = thanos.AllAggrTypes
|
||||
}
|
||||
|
||||
log.Printf("Processing blocks with aggregate types: %v", tp.aggrTypes)
|
||||
|
||||
// Use the first aggregate type to explore blocks (block list is the same for all types)
|
||||
blocks, err := tp.cl.Explore(tp.aggrTypes[0])
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore failed: %s", err)
|
||||
}
|
||||
if len(blocks) < 1 {
|
||||
return fmt.Errorf("found no blocks to import")
|
||||
}
|
||||
|
||||
// Separate blocks into raw (resolution=0) and downsampled (resolution>0)
|
||||
var rawBlocks, downsampledBlocks []thanos.BlockInfo
|
||||
for _, block := range blocks {
|
||||
if block.Resolution == thanos.ResolutionRaw {
|
||||
rawBlocks = append(rawBlocks, block)
|
||||
} else {
|
||||
downsampledBlocks = append(downsampledBlocks, block)
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Found %d raw blocks and %d downsampled blocks", len(rawBlocks), len(downsampledBlocks))
|
||||
|
||||
question := fmt.Sprintf("Found %d blocks to import (%d raw + %d downsampled with %d aggregate types). Continue?",
|
||||
len(blocks), len(rawBlocks), len(downsampledBlocks), len(tp.aggrTypes))
|
||||
if !prompt(ctx, question) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Calculate total number of block processing passes for the progress bar:
|
||||
// raw blocks are processed once, downsampled blocks are processed once per aggregate type.
|
||||
totalPasses := len(rawBlocks) + len(downsampledBlocks)*len(tp.aggrTypes)
|
||||
thanosBlocksTotal.Add(totalPasses)
|
||||
bar := barpool.AddWithTemplate(fmt.Sprintf(barTpl, "Processing blocks"), totalPasses)
|
||||
if err := barpool.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
defer barpool.Stop()
|
||||
|
||||
tp.im.ResetStats()
|
||||
|
||||
type phaseStats struct {
|
||||
name string
|
||||
series uint64
|
||||
samples uint64
|
||||
}
|
||||
var phases []phaseStats
|
||||
|
||||
// Process raw blocks first (no aggregate suffix)
|
||||
if len(rawBlocks) > 0 {
|
||||
log.Println("Processing raw blocks (resolution=0)...")
|
||||
stats, err := tp.processBlocks(rawBlocks, thanos.AggrTypeNone, bar)
|
||||
if err != nil {
|
||||
return fmt.Errorf("migration failed for raw blocks: %s", err)
|
||||
}
|
||||
phases = append(phases, phaseStats{
|
||||
name: "raw",
|
||||
series: stats.series,
|
||||
samples: stats.samples,
|
||||
})
|
||||
}
|
||||
|
||||
// Close blocks from the initial Explore. The querierSeriesSet wrapper
|
||||
// has already released all querier read references, so Close won't hang.
|
||||
thanos.CloseBlocks(blocks)
|
||||
|
||||
// Process downsampled blocks for each aggregate type.
|
||||
// Each type needs its own AggrChunkPool, so we reopen blocks per type.
|
||||
for _, aggrType := range tp.aggrTypes {
|
||||
if len(downsampledBlocks) < 1 {
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("Processing downsampled blocks with aggregate type: %s", aggrType)
|
||||
|
||||
aggrBlocks, err := tp.cl.Explore(aggrType)
|
||||
if err != nil {
|
||||
return fmt.Errorf("explore failed for aggr type %s: %s", aggrType, err)
|
||||
}
|
||||
|
||||
var downsampledOnly []thanos.BlockInfo
|
||||
for _, block := range aggrBlocks {
|
||||
if block.Resolution != thanos.ResolutionRaw {
|
||||
downsampledOnly = append(downsampledOnly, block)
|
||||
}
|
||||
}
|
||||
|
||||
if len(downsampledOnly) < 1 {
|
||||
log.Printf("No downsampled blocks found for aggregate type %s, skipping", aggrType)
|
||||
thanos.CloseBlocks(aggrBlocks)
|
||||
continue
|
||||
}
|
||||
|
||||
log.Printf("Processing %d blocks for aggregate type: %s", len(downsampledOnly), aggrType)
|
||||
stats, err := tp.processBlocks(downsampledOnly, aggrType, bar)
|
||||
thanos.CloseBlocks(aggrBlocks)
|
||||
if err != nil {
|
||||
return fmt.Errorf("migration failed for aggr type %s: %s", aggrType, err)
|
||||
}
|
||||
phases = append(phases, phaseStats{
|
||||
name: aggrType.String(),
|
||||
series: stats.series,
|
||||
samples: stats.samples,
|
||||
})
|
||||
}
|
||||
|
||||
// Print per-phase and total statistics
|
||||
var totalSeries, totalSamples uint64
|
||||
log.Printf("Migration statistics (%d raw blocks, %d downsampled blocks):", len(rawBlocks), len(downsampledBlocks))
|
||||
for _, p := range phases {
|
||||
log.Printf(" %s: %d series, %d samples", p.name, p.series, p.samples)
|
||||
totalSeries += p.series
|
||||
totalSamples += p.samples
|
||||
}
|
||||
log.Printf(" total: %d series, %d samples", totalSeries, totalSamples)
|
||||
|
||||
// Wait for all buffers to flush
|
||||
tp.im.Close()
|
||||
// Drain import errors channel
|
||||
for vmErr := range tp.im.Errors() {
|
||||
if vmErr.Err != nil {
|
||||
thanosErrorsTotal.Inc()
|
||||
return fmt.Errorf("import process failed: %s", wrapErr(vmErr, tp.isVerbose))
|
||||
}
|
||||
}
|
||||
|
||||
log.Println("Import finished!")
|
||||
log.Println(tp.im.Stats())
|
||||
return nil
|
||||
}
|
||||
|
||||
// processBlocksStats holds statistics collected during block processing.
|
||||
type processBlocksStats struct {
|
||||
blocks uint64
|
||||
series uint64
|
||||
samples uint64
|
||||
}
|
||||
|
||||
func (tp *thanosProcessor) processBlocks(blocks []thanos.BlockInfo, aggrType thanos.AggrType, bar barpool.Bar) (processBlocksStats, error) {
|
||||
blockReadersCh := make(chan thanos.BlockInfo)
|
||||
errCh := make(chan error, tp.cc)
|
||||
|
||||
var processedBlocks, totalSeries, totalSamples uint64
|
||||
var mu sync.Mutex
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := range tp.cc {
|
||||
workerID := i
|
||||
wg.Go(func() {
|
||||
for bi := range blockReadersCh {
|
||||
seriesCount, samplesCount, err := tp.do(bi, aggrType)
|
||||
if err != nil {
|
||||
thanosErrorsTotal.Inc()
|
||||
errCh <- fmt.Errorf("read failed for block %q with aggr %s: %s", bi.Block.Meta().ULID, aggrType, err)
|
||||
return
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
processedBlocks++
|
||||
totalSeries += seriesCount
|
||||
totalSamples += samplesCount
|
||||
log.Printf("[Worker %d] Block %s: %d series, %d samples | Total: %d/%d blocks, %d series, %d samples",
|
||||
workerID, bi.Block.Meta().ULID.String()[:8], seriesCount, samplesCount,
|
||||
processedBlocks, len(blocks), totalSeries, totalSamples)
|
||||
mu.Unlock()
|
||||
|
||||
thanosBlocksProcessed.Inc()
|
||||
bar.Increment()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// any error breaks the import
|
||||
for _, bi := range blocks {
|
||||
select {
|
||||
case thanosErr := <-errCh:
|
||||
close(blockReadersCh)
|
||||
wg.Wait()
|
||||
return processBlocksStats{}, fmt.Errorf("thanos error: %s", thanosErr)
|
||||
case vmErr := <-tp.im.Errors():
|
||||
close(blockReadersCh)
|
||||
wg.Wait()
|
||||
thanosErrorsTotal.Inc()
|
||||
return processBlocksStats{}, fmt.Errorf("import process failed: %s", wrapErr(vmErr, tp.isVerbose))
|
||||
case blockReadersCh <- bi:
|
||||
}
|
||||
}
|
||||
|
||||
close(blockReadersCh)
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
for err := range errCh {
|
||||
return processBlocksStats{}, fmt.Errorf("import process failed: %s", err)
|
||||
}
|
||||
|
||||
return processBlocksStats{
|
||||
blocks: processedBlocks,
|
||||
series: totalSeries,
|
||||
samples: totalSamples,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (tp *thanosProcessor) do(bi thanos.BlockInfo, aggrType thanos.AggrType) (uint64, uint64, error) {
|
||||
ss, err := tp.cl.Read(bi)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("failed to read block: %s", err)
|
||||
}
|
||||
defer ss.Close() // Ensure querier is closed even on early returns
|
||||
|
||||
var it chunkenc.Iterator
|
||||
var seriesCount, samplesCount uint64
|
||||
|
||||
for ss.Next() {
|
||||
var name string
|
||||
var labelPairs []vm.LabelPair
|
||||
series := ss.At()
|
||||
|
||||
series.Labels().Range(func(label labels.Label) {
|
||||
if label.Name == "__name__" {
|
||||
name = label.Value
|
||||
return
|
||||
}
|
||||
labelPairs = append(labelPairs, vm.LabelPair{
|
||||
Name: strings.Clone(label.Name),
|
||||
Value: strings.Clone(label.Value),
|
||||
})
|
||||
})
|
||||
if name == "" {
|
||||
return seriesCount, samplesCount, fmt.Errorf("failed to find `__name__` label in labelset for block %v", bi.Block.Meta().ULID)
|
||||
}
|
||||
|
||||
// Add resolution and aggregate type suffix to metric name for downsampled blocks
|
||||
if bi.Resolution != thanos.ResolutionRaw && aggrType != thanos.AggrTypeNone {
|
||||
name = fmt.Sprintf("%s:%s:%s", name, bi.Resolution.String(), aggrType.String())
|
||||
}
|
||||
|
||||
var timestamps []int64
|
||||
var values []float64
|
||||
it = series.Iterator(it)
|
||||
for {
|
||||
typ := it.Next()
|
||||
if typ == chunkenc.ValNone {
|
||||
break
|
||||
}
|
||||
if typ != chunkenc.ValFloat {
|
||||
continue
|
||||
}
|
||||
t, v := it.At()
|
||||
timestamps = append(timestamps, t)
|
||||
values = append(values, v)
|
||||
}
|
||||
if err := it.Err(); err != nil {
|
||||
return seriesCount, samplesCount, err
|
||||
}
|
||||
|
||||
samplesCount += uint64(len(timestamps))
|
||||
seriesCount++
|
||||
|
||||
ts := vm.TimeSeries{
|
||||
Name: name,
|
||||
LabelPairs: labelPairs,
|
||||
Timestamps: timestamps,
|
||||
Values: values,
|
||||
}
|
||||
if err := tp.im.Input(&ts); err != nil {
|
||||
return seriesCount, samplesCount, err
|
||||
}
|
||||
}
|
||||
return seriesCount, samplesCount, ss.Err()
|
||||
}
|
||||
|
||||
var (
|
||||
thanosBlocksTotal = metrics.NewCounter(`vmctl_thanos_migration_blocks_total`)
|
||||
thanosBlocksProcessed = metrics.NewCounter(`vmctl_thanos_migration_blocks_processed`)
|
||||
thanosErrorsTotal = metrics.NewCounter(`vmctl_thanos_migration_errors_total`)
|
||||
)
|
||||
@@ -22,6 +22,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
)
|
||||
|
||||
@@ -1362,7 +1363,7 @@ func applyGraphiteRegexpFilter(filter string, ss []string) ([]string, error) {
|
||||
const maxFastAllocBlockSize = 32 * 1024
|
||||
|
||||
// GetMetricNamesStats returns statistic for timeseries metric names usage.
|
||||
func GetMetricNamesStats(qt *querytracer.Tracer, limit, le int, matchPattern string) (storage.MetricNamesStatsResponse, error) {
|
||||
func GetMetricNamesStats(qt *querytracer.Tracer, limit, le int, matchPattern string) (metricnamestats.StatsResult, error) {
|
||||
qt = qt.NewChild("get metric names usage statistics with limit: %d, less or equal to: %d, match pattern=%q", limit, le, matchPattern)
|
||||
defer qt.Done()
|
||||
return vmstorage.GetMetricNamesStats(qt, limit, le, matchPattern)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
{% import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
) %}
|
||||
|
||||
{% stripspace %}
|
||||
@@ -34,9 +35,9 @@ TSDBStatusResponse generates response for /api/v1/status/tsdb .
|
||||
]
|
||||
{% endfunc %}
|
||||
|
||||
{% func tsdbStatusMetricNameEntries(a []storage.TopHeapEntry, queryStats []storage.MetricNamesStatsRecord) %}
|
||||
{% func tsdbStatusMetricNameEntries(a []storage.TopHeapEntry, queryStats []metricnamestats.StatRecord) %}
|
||||
{% code
|
||||
queryStatsByMetricName := make(map[string]storage.MetricNamesStatsRecord,len(queryStats))
|
||||
queryStatsByMetricName := make(map[string]metricnamestats.StatRecord,len(queryStats))
|
||||
for _, record := range queryStats{
|
||||
queryStatsByMetricName[record.MetricName] = record
|
||||
}
|
||||
|
||||
@@ -8,228 +8,229 @@ package prometheus
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
)
|
||||
|
||||
// TSDBStatusResponse generates response for /api/v1/status/tsdb .
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:8
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:8
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:8
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
|
||||
func StreamTSDBStatusResponse(qw422016 *qt422016.Writer, status *storage.TSDBStatus, qt *querytracer.Tracer) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:8
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:9
|
||||
qw422016.N().S(`{"status":"success","data":{"totalSeries":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:12
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:13
|
||||
qw422016.N().DUL(status.TotalSeries)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:12
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:13
|
||||
qw422016.N().S(`,"totalLabelValuePairs":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:13
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qw422016.N().DUL(status.TotalLabelValuePairs)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:13
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
qw422016.N().S(`,"seriesCountByMetricName":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:15
|
||||
streamtsdbStatusMetricNameEntries(qw422016, status.SeriesCountByMetricName, status.SeriesQueryStatsByMetricName)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:14
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:15
|
||||
qw422016.N().S(`,"seriesCountByLabelName":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:15
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
|
||||
streamtsdbStatusEntries(qw422016, status.SeriesCountByLabelName)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:15
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
|
||||
qw422016.N().S(`,"seriesCountByFocusLabelValue":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:17
|
||||
streamtsdbStatusEntries(qw422016, status.SeriesCountByFocusLabelValue)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:16
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:17
|
||||
qw422016.N().S(`,"seriesCountByLabelValuePair":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:17
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
|
||||
streamtsdbStatusEntries(qw422016, status.SeriesCountByLabelValuePair)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:17
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
|
||||
qw422016.N().S(`,"labelValueCountByLabelName":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:19
|
||||
streamtsdbStatusEntries(qw422016, status.LabelValueCountByLabelName)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:18
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:19
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:20
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
|
||||
qt.Done()
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:22
|
||||
streamdumpQueryTrace(qw422016, qt)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:21
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:22
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
func WriteTSDBStatusResponse(qq422016 qtio422016.Writer, status *storage.TSDBStatus, qt *querytracer.Tracer) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
StreamTSDBStatusResponse(qw422016, status, qt)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
func TSDBStatusResponse(status *storage.TSDBStatus, qt *querytracer.Tracer) string {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
WriteTSDBStatusResponse(qb422016, status, qt)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:23
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:24
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:25
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
func streamtsdbStatusEntries(qw422016 *qt422016.Writer, a []storage.TopHeapEntry) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:25
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:26
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:27
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:28
|
||||
for i, e := range a {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:27
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:28
|
||||
qw422016.N().S(`{"name":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:29
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:30
|
||||
qw422016.N().Q(e.Name)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:29
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:30
|
||||
qw422016.N().S(`,"value":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:30
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:31
|
||||
qw422016.N().D(int(e.Count))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:30
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:31
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:32
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:33
|
||||
if i+1 < len(a) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:32
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:33
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:32
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:33
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:33
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:34
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:33
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:34
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
func writetsdbStatusEntries(qq422016 qtio422016.Writer, a []storage.TopHeapEntry) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
streamtsdbStatusEntries(qw422016, a)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
func tsdbStatusEntries(a []storage.TopHeapEntry) string {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
writetsdbStatusEntries(qb422016, a)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:35
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:36
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:37
|
||||
func streamtsdbStatusMetricNameEntries(qw422016 *qt422016.Writer, a []storage.TopHeapEntry, queryStats []storage.MetricNamesStatsRecord) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:39
|
||||
queryStatsByMetricName := make(map[string]storage.MetricNamesStatsRecord, len(queryStats))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:38
|
||||
func streamtsdbStatusMetricNameEntries(qw422016 *qt422016.Writer, a []storage.TopHeapEntry, queryStats []metricnamestats.StatRecord) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:40
|
||||
queryStatsByMetricName := make(map[string]metricnamestats.StatRecord, len(queryStats))
|
||||
for _, record := range queryStats {
|
||||
queryStatsByMetricName[record.MetricName] = record
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:43
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:44
|
||||
qw422016.N().S(`[`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:45
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:46
|
||||
for i, e := range a {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:45
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:46
|
||||
qw422016.N().S(`{`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:48
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:49
|
||||
entry, ok := queryStatsByMetricName[e.Name]
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:49
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:50
|
||||
qw422016.N().S(`"name":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:50
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:51
|
||||
qw422016.N().Q(e.Name)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:50
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:51
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:51
|
||||
if !ok {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:51
|
||||
qw422016.N().S(`"value":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:52
|
||||
qw422016.N().D(int(e.Count))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:53
|
||||
} else {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:53
|
||||
if !ok {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:52
|
||||
qw422016.N().S(`"value":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:54
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:53
|
||||
qw422016.N().D(int(e.Count))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:54
|
||||
} else {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:54
|
||||
qw422016.N().S(`"value":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:55
|
||||
qw422016.N().D(int(e.Count))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:55
|
||||
qw422016.N().S(`,"requestsCount":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:55
|
||||
qw422016.N().D(int(entry.RequestsCount))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:55
|
||||
qw422016.N().S(`,"lastRequestTimestamp":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:56
|
||||
qw422016.N().D(int(entry.RequestsCount))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:56
|
||||
qw422016.N().S(`,"lastRequestTimestamp":`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:57
|
||||
qw422016.N().D(int(entry.LastRequestTs))
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:57
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:58
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:57
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:58
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:59
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:60
|
||||
if i+1 < len(a) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:59
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:60
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:59
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:60
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:60
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:61
|
||||
}
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:60
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:61
|
||||
qw422016.N().S(`]`)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
func writetsdbStatusMetricNameEntries(qq422016 qtio422016.Writer, a []storage.TopHeapEntry, queryStats []storage.MetricNamesStatsRecord) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
func writetsdbStatusMetricNameEntries(qq422016 qtio422016.Writer, a []storage.TopHeapEntry, queryStats []metricnamestats.StatRecord) {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
streamtsdbStatusMetricNameEntries(qw422016, a, queryStats)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
}
|
||||
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
func tsdbStatusMetricNameEntries(a []storage.TopHeapEntry, queryStats []storage.MetricNamesStatsRecord) string {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
func tsdbStatusMetricNameEntries(a []storage.TopHeapEntry, queryStats []metricnamestats.StatRecord) string {
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
writetsdbStatusMetricNameEntries(qb422016, a, queryStats)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
return qs422016
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:62
|
||||
//line app/vmselect/prometheus/tsdb_status_response.qtpl:63
|
||||
}
|
||||
|
||||
@@ -4842,6 +4842,137 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
// buckets that are consecutively empty at left and right ends will not be preserved.
|
||||
t.Run(`buckets_limit(trim_zero_preserve_empty_when_limit_not_reached)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(buckets_limit(3, (
|
||||
alias(label_set(36, "le", "+Inf"), "metric"),
|
||||
alias(label_set(36, "le", "25"), "metric"),
|
||||
alias(label_set(36, "le", "21"), "metric"),
|
||||
alias(label_set(36, "le", "19"), "metric"),
|
||||
alias(label_set(36, "le", "18"), "metric"),
|
||||
alias(label_set(36, "le", "17"), "metric"),
|
||||
alias(label_set(36, "le", "16"), "metric"),
|
||||
alias(label_set(27, "le", "12"), "metric"),
|
||||
alias(label_set(14, "le", "9"), "metric"),
|
||||
alias(label_set(0, "le", "6"), "metric"),
|
||||
alias(label_set(0, "le", "1"), "metric"),
|
||||
)))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{14, 14, 14, 14, 14, 14},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("metric")
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("9"),
|
||||
},
|
||||
}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{27, 27, 27, 27, 27, 27},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.MetricGroup = []byte("metric")
|
||||
r2.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("12"),
|
||||
},
|
||||
}
|
||||
r3 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{36, 36, 36, 36, 36, 36},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r3.MetricName.MetricGroup = []byte("metric")
|
||||
r3.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("16"),
|
||||
},
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1, r2, r3}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
|
||||
// the number of non-empty bucket doesn't reach the given "limit", so some empty buckets will be preserved
|
||||
t.Run(`buckets_limit(trim_zero)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(buckets_limit(5, (
|
||||
alias(label_set(36, "le", "18"), "metric"),
|
||||
alias(label_set(36, "le", "17"), "metric"),
|
||||
alias(label_set(36, "le", "16"), "metric"),
|
||||
alias(label_set(27, "le", "12"), "metric"),
|
||||
alias(label_set(14, "le", "9"), "metric"),
|
||||
alias(label_set(0, "le", "6"), "metric"),
|
||||
alias(label_set(0, "le", "1"), "metric"),
|
||||
)))`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0, 0, 0, 0, 0, 0},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.MetricGroup = []byte("metric")
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("6"),
|
||||
},
|
||||
}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{14, 14, 14, 14, 14, 14},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.MetricGroup = []byte("metric")
|
||||
r2.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("9"),
|
||||
},
|
||||
}
|
||||
r3 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{27, 27, 27, 27, 27, 27},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r3.MetricName.MetricGroup = []byte("metric")
|
||||
r3.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("12"),
|
||||
},
|
||||
}
|
||||
r4 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{36, 36, 36, 36, 36, 36},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r4.MetricName.MetricGroup = []byte("metric")
|
||||
r4.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("16"),
|
||||
},
|
||||
}
|
||||
r5 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{36, 36, 36, 36, 36, 36},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r5.MetricName.MetricGroup = []byte("metric")
|
||||
r5.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("17"),
|
||||
},
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1, r2, r3, r4, r5}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`buckets_limit(unused)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort(buckets_limit(5, (
|
||||
@@ -6228,50 +6359,6 @@ func TestExecSuccess(t *testing.T) {
|
||||
resultExpected := []netstorage.Result{r1, r2, r3, r4, r5, r6, r7}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(histogram_over_time) by (vmrange)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sort_by_label(
|
||||
buckets_limit(
|
||||
3,
|
||||
sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s])) by (vmrange)
|
||||
), "le"
|
||||
)`
|
||||
r1 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{40, 40, 40, 40, 40, 40},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r1.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("+Inf"),
|
||||
},
|
||||
}
|
||||
r2 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{0, 0, 0, 0, 0, 0},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r2.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("1.000e+00"),
|
||||
},
|
||||
}
|
||||
r3 := netstorage.Result{
|
||||
MetricName: metricNameExpected,
|
||||
Values: []float64{40, 40, 40, 40, 40, 40},
|
||||
Timestamps: timestampsExpected,
|
||||
}
|
||||
r3.MetricName.Tags = []storage.Tag{
|
||||
{
|
||||
Key: []byte("le"),
|
||||
Value: []byte("2.448e+00"),
|
||||
},
|
||||
}
|
||||
resultExpected := []netstorage.Result{r1, r2, r3}
|
||||
f(q, resultExpected)
|
||||
})
|
||||
t.Run(`sum(histogram_over_time)`, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
q := `sum(histogram_over_time(alias(label_set(rand(0)*1.3+1.1, "foo", "bar"), "xxx")[200s:5s]))`
|
||||
|
||||
@@ -461,6 +461,41 @@ func transformBucketsLimit(tfa *transformFuncArg) ([]*timeseries, error) {
|
||||
prevValue = value
|
||||
}
|
||||
}
|
||||
|
||||
// Remove buckets that are consecutively empty at left and right ends to obtain more accurate max and min values.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10417.
|
||||
epsilon := 1e-9
|
||||
l := 0
|
||||
r := len(leGroup) - 1
|
||||
trimLeft := true
|
||||
for r-l+1 > limit {
|
||||
leftHits := math.Abs(leGroup[l].hits)
|
||||
rightHits := math.Abs(leGroup[r].hits)
|
||||
leftEmpty := !math.IsNaN(leftHits) && leftHits <= epsilon
|
||||
rightEmpty := !math.IsNaN(rightHits) && rightHits <= epsilon
|
||||
if !leftEmpty && !rightEmpty {
|
||||
break
|
||||
}
|
||||
if trimLeft {
|
||||
if leftHits < epsilon {
|
||||
l++
|
||||
}
|
||||
// switch the trim pointer to the right side if needed
|
||||
if rightHits < epsilon {
|
||||
trimLeft = false
|
||||
}
|
||||
} else {
|
||||
if rightHits < epsilon {
|
||||
r--
|
||||
}
|
||||
// switch the trim pointer to the left side if needed
|
||||
if leftHits < epsilon {
|
||||
trimLeft = true
|
||||
}
|
||||
}
|
||||
}
|
||||
leGroup = leGroup[l : r+1]
|
||||
|
||||
for len(leGroup) > limit {
|
||||
// Preserve the first and the last bucket for better accuracy for min and max values
|
||||
xxMinIdx := 1
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
{% import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
) %}
|
||||
|
||||
{% stripspace %}
|
||||
MetricNamesStatsResponse generates response for /api/v1/status/metric_names_stats .
|
||||
{% func MetricNamesStatsResponse(stats *storage.MetricNamesStatsResponse, qt *querytracer.Tracer) %}
|
||||
{% func MetricNamesStatsResponse(stats *metricnamestats.StatsResult, qt *querytracer.Tracer) %}
|
||||
{
|
||||
"status":"success",
|
||||
"statsCollectedSince": {%dul= stats.CollectedSinceTs %},
|
||||
|
||||
@@ -7,7 +7,7 @@ package stats
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:1
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
)
|
||||
|
||||
// MetricNamesStatsResponse generates response for /api/v1/status/metric_names_stats .
|
||||
@@ -26,7 +26,7 @@ var (
|
||||
)
|
||||
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:8
|
||||
func StreamMetricNamesStatsResponse(qw422016 *qt422016.Writer, stats *storage.MetricNamesStatsResponse, qt *querytracer.Tracer) {
|
||||
func StreamMetricNamesStatsResponse(qw422016 *qt422016.Writer, stats *metricnamestats.StatsResult, qt *querytracer.Tracer) {
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:8
|
||||
qw422016.N().S(`{"status":"success","statsCollectedSince":`)
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:11
|
||||
@@ -91,7 +91,7 @@ func StreamMetricNamesStatsResponse(qw422016 *qt422016.Writer, stats *storage.Me
|
||||
}
|
||||
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:31
|
||||
func WriteMetricNamesStatsResponse(qq422016 qtio422016.Writer, stats *storage.MetricNamesStatsResponse, qt *querytracer.Tracer) {
|
||||
func WriteMetricNamesStatsResponse(qq422016 qtio422016.Writer, stats *metricnamestats.StatsResult, qt *querytracer.Tracer) {
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:31
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:31
|
||||
@@ -102,7 +102,7 @@ func WriteMetricNamesStatsResponse(qq422016 qtio422016.Writer, stats *storage.Me
|
||||
}
|
||||
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:31
|
||||
func MetricNamesStatsResponse(stats *storage.MetricNamesStatsResponse, qt *querytracer.Tracer) string {
|
||||
func MetricNamesStatsResponse(stats *metricnamestats.StatsResult, qt *querytracer.Tracer) string {
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:31
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmselect/stats/metric_names_usage_response.qtpl:31
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -22,6 +23,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
|
||||
@@ -55,11 +57,13 @@ var (
|
||||
denyQueriesOutsideRetention = flag.Bool("denyQueriesOutsideRetention", false, "Whether to deny queries outside the configured -retentionPeriod. "+
|
||||
"When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. "+
|
||||
"This may be useful when multiple data sources with distinct retentions are hidden behind query-tee")
|
||||
maxHourlySeries = flag.Int("storage.maxHourlySeries", 0, "The maximum number of unique series can be added to the storage during the last hour. "+
|
||||
maxHourlySeries = flag.Int64("storage.maxHourlySeries", 0, "The maximum number of unique series can be added to the storage during the last hour. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter . "+
|
||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
||||
"See also -storage.maxDailySeries")
|
||||
maxDailySeries = flag.Int("storage.maxDailySeries", 0, "The maximum number of unique series can be added to the storage during the last 24 hours. "+
|
||||
maxDailySeries = flag.Int64("storage.maxDailySeries", 0, "The maximum number of unique series can be added to the storage during the last 24 hours. "+
|
||||
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter . "+
|
||||
fmt.Sprintf("Setting this flag to '-1' sets limit to maximum possible value (%d) which is useful in order to enable series tracking without enforcing limits. ", math.MaxInt32)+
|
||||
"See also -storage.maxHourlySeries")
|
||||
|
||||
minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 100e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data")
|
||||
@@ -141,8 +145,8 @@ func Init(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
|
||||
WG = syncwg.WaitGroup{}
|
||||
opts := storage.OpenOptions{
|
||||
Retention: retentionPeriod.Duration(),
|
||||
MaxHourlySeries: *maxHourlySeries,
|
||||
MaxDailySeries: *maxDailySeries,
|
||||
MaxHourlySeries: getMaxHourlySeries(),
|
||||
MaxDailySeries: getMaxDailySeries(),
|
||||
DisablePerDayIndex: *disablePerDayIndex,
|
||||
TrackMetricNamesStats: *trackMetricNamesStats,
|
||||
IDBPrefillStart: *idbPrefillStart,
|
||||
@@ -233,7 +237,7 @@ func DeleteSeries(qt *querytracer.Tracer, tfss []*storage.TagFilters, maxMetrics
|
||||
}
|
||||
|
||||
// GetMetricNamesStats returns metric names usage stats with give limit and lte predicate
|
||||
func GetMetricNamesStats(qt *querytracer.Tracer, limit, le int, matchPattern string) (storage.MetricNamesStatsResponse, error) {
|
||||
func GetMetricNamesStats(qt *querytracer.Tracer, limit, le int, matchPattern string) (metricnamestats.StatsResult, error) {
|
||||
WG.Add(1)
|
||||
r := Storage.GetMetricNamesStats(qt, limit, le, matchPattern)
|
||||
WG.Done()
|
||||
@@ -602,10 +606,10 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="big_timestamp"}`, m.TooBigTimestampRows)
|
||||
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="small_timestamp"}`, m.TooSmallTimestampRows)
|
||||
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="invalid_raw_metric_name"}`, m.InvalidRawMetricNames)
|
||||
if *maxHourlySeries > 0 {
|
||||
if getMaxHourlySeries() > 0 {
|
||||
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="hourly_limit_exceeded"}`, m.HourlySeriesLimitRowsDropped)
|
||||
}
|
||||
if *maxDailySeries > 0 {
|
||||
if getMaxDailySeries() > 0 {
|
||||
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="daily_limit_exceeded"}`, m.DailySeriesLimitRowsDropped)
|
||||
}
|
||||
|
||||
@@ -615,13 +619,13 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteCounterUint64(w, `vm_slow_row_inserts_total`, m.SlowRowInserts)
|
||||
metrics.WriteCounterUint64(w, `vm_slow_per_day_index_inserts_total`, m.SlowPerDayIndexInserts)
|
||||
|
||||
if *maxHourlySeries > 0 {
|
||||
if getMaxHourlySeries() > 0 {
|
||||
metrics.WriteGaugeUint64(w, `vm_hourly_series_limit_current_series`, m.HourlySeriesLimitCurrentSeries)
|
||||
metrics.WriteGaugeUint64(w, `vm_hourly_series_limit_max_series`, m.HourlySeriesLimitMaxSeries)
|
||||
metrics.WriteCounterUint64(w, `vm_hourly_series_limit_rows_dropped_total`, m.HourlySeriesLimitRowsDropped)
|
||||
}
|
||||
|
||||
if *maxDailySeries > 0 {
|
||||
if getMaxDailySeries() > 0 {
|
||||
metrics.WriteGaugeUint64(w, `vm_daily_series_limit_current_series`, m.DailySeriesLimitCurrentSeries)
|
||||
metrics.WriteGaugeUint64(w, `vm_daily_series_limit_max_series`, m.DailySeriesLimitMaxSeries)
|
||||
metrics.WriteCounterUint64(w, `vm_daily_series_limit_rows_dropped_total`, m.DailySeriesLimitRowsDropped)
|
||||
@@ -746,3 +750,21 @@ func jsonResponseError(w http.ResponseWriter, err error) {
|
||||
errStr := err.Error()
|
||||
fmt.Fprintf(w, `{"status":"error","msg":%s}`, stringsutil.JSONString(errStr))
|
||||
}
|
||||
|
||||
func getMaxHourlySeries() int {
|
||||
limit := *maxHourlySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
return math.MaxInt32
|
||||
}
|
||||
|
||||
return int(limit)
|
||||
}
|
||||
|
||||
func getMaxDailySeries() int {
|
||||
limit := *maxDailySeries
|
||||
if limit == -1 || limit > math.MaxInt32 {
|
||||
return math.MaxInt32
|
||||
}
|
||||
|
||||
return int(limit)
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.26.1 AS build-web-stage
|
||||
FROM golang:1.26.2 AS build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
|
||||
@@ -87,11 +87,11 @@ func (tc *TestCase) MustStartDefaultVmsingle() *Vmsingle {
|
||||
}
|
||||
|
||||
// MustStartVmsingle is a test helper function that starts an instance of
|
||||
// vmsingle located at ../../bin/victoria-metrics and fails the test if the app
|
||||
// vmsingle located at ../../bin/victoria-metrics-race and fails the test if the app
|
||||
// fails to start.
|
||||
func (tc *TestCase) MustStartVmsingle(instance string, flags []string) *Vmsingle {
|
||||
tc.t.Helper()
|
||||
return tc.MustStartVmsingleAt(instance, "../../bin/victoria-metrics", flags)
|
||||
return tc.MustStartVmsingleAt(instance, "../../bin/victoria-metrics-race", flags)
|
||||
}
|
||||
|
||||
// MustStartVmsingleAt is a test helper function that starts an instance of
|
||||
@@ -108,11 +108,11 @@ func (tc *TestCase) MustStartVmsingleAt(instance, binary string, flags []string)
|
||||
}
|
||||
|
||||
// MustStartVmstorage is a test helper function that starts an instance of
|
||||
// vmstorage located at ../../bin/vmstorage and fails the test if the app fails
|
||||
// vmstorage located at ../../bin/vmstorage-race and fails the test if the app fails
|
||||
// to start.
|
||||
func (tc *TestCase) MustStartVmstorage(instance string, flags []string) *Vmstorage {
|
||||
tc.t.Helper()
|
||||
return tc.MustStartVmstorageAt(instance, "../../bin/vmstorage", flags)
|
||||
return tc.MustStartVmstorageAt(instance, "../../bin/vmstorage-race", flags)
|
||||
}
|
||||
|
||||
// MustStartVmstorageAt is a test helper function that starts an instance of
|
||||
@@ -293,12 +293,12 @@ func (tc *TestCase) MustStartCluster(opts *ClusterOptions) *Vmcluster {
|
||||
tc.t.Helper()
|
||||
|
||||
if opts.Vmstorage1Binary == "" {
|
||||
opts.Vmstorage1Binary = "../../bin/vmstorage"
|
||||
opts.Vmstorage1Binary = "../../bin/vmstorage-race"
|
||||
}
|
||||
vmstorage1 := tc.MustStartVmstorageAt(opts.Vmstorage1Instance, opts.Vmstorage1Binary, opts.Vmstorage1Flags)
|
||||
|
||||
if opts.Vmstorage2Binary == "" {
|
||||
opts.Vmstorage2Binary = "../../bin/vmstorage"
|
||||
opts.Vmstorage2Binary = "../../bin/vmstorage-race"
|
||||
}
|
||||
vmstorage2 := tc.MustStartVmstorageAt(opts.Vmstorage2Instance, opts.Vmstorage2Binary, opts.Vmstorage2Flags)
|
||||
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
@@ -297,6 +299,132 @@ func TestSingleIngestionProtocols(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestSingleCardinalityLimiter(t *testing.T) {
|
||||
waitFor := func(f func() bool) {
|
||||
const (
|
||||
retries = 20
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < retries; i++ {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out waiting for retry #%d", retries)
|
||||
}
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
singleHourly := tc.MustStartVmsingle("vmsingle-hourly", []string{
|
||||
"-retentionPeriod=100y",
|
||||
"-storage.maxHourlySeries=1",
|
||||
})
|
||||
|
||||
singleHourly.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
if v := singleHourly.GetIntMetric(t, "vm_hourly_series_limit_max_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleHourly.GetIntMetric(t, "vm_hourly_series_limit_current_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleHourly.GetIntMetric(t, "vm_hourly_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
singleHourly.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar2 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return singleHourly.GetIntMetric(t, "vm_hourly_series_limit_rows_dropped_total") > 0
|
||||
},
|
||||
)
|
||||
|
||||
singleDaily := tc.MustStartVmsingle("vmsingle-daily", []string{
|
||||
"-retentionPeriod=100y",
|
||||
"-storage.maxDailySeries=1",
|
||||
})
|
||||
|
||||
singleDaily.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
if v := singleDaily.GetIntMetric(t, "vm_daily_series_limit_max_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleDaily.GetIntMetric(t, "vm_daily_series_limit_current_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleDaily.GetIntMetric(t, "vm_daily_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
singleDaily.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar2 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return singleDaily.GetIntMetric(t, "vm_daily_series_limit_rows_dropped_total") > 0
|
||||
},
|
||||
)
|
||||
|
||||
singleUnlimited := tc.MustStartVmsingle("vmsingle-unlimited", []string{
|
||||
"-retentionPeriod=100y",
|
||||
"-storage.maxHourlySeries=-1",
|
||||
"-storage.maxDailySeries=-1",
|
||||
})
|
||||
metrics := make([]string, 0, 100)
|
||||
for i := range 100 {
|
||||
metrics = append(metrics, fmt.Sprintf("foo_bar%d 1 1652169600000", i)) // 2022-05-10T08:00:00Z
|
||||
}
|
||||
|
||||
singleUnlimited.PrometheusAPIV1ImportPrometheus(t, metrics, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return singleUnlimited.GetIntMetric(t, "vm_hourly_series_limit_current_series") > 0
|
||||
},
|
||||
)
|
||||
|
||||
if v := singleUnlimited.GetIntMetric(t, "vm_hourly_series_limit_max_series"); v == 0 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleUnlimited.GetIntMetric(t, "vm_hourly_series_limit_current_series"); v != 100 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleUnlimited.GetIntMetric(t, "vm_hourly_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleUnlimited.GetIntMetric(t, "vm_daily_series_limit_max_series"); v == 0 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleUnlimited.GetIntMetric(t, "vm_daily_series_limit_current_series"); v != 100 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := singleUnlimited.GetIntMetric(t, "vm_daily_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClusterIngestionProtocols(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
tc := apptest.NewTestCase(t)
|
||||
@@ -591,3 +719,145 @@ func TestClusterIngestionProtocols(t *testing.T) {
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func TestClusterCardinalityLimiter(t *testing.T) {
|
||||
waitFor := func(f func() bool) {
|
||||
const (
|
||||
retries = 20
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < retries; i++ {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out waiting for retry #%d", retries)
|
||||
}
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
// Test hourly series limit
|
||||
vmstorageHourly := tc.MustStartVmstorage("vmstorage-hourly", []string{
|
||||
"-storageDataPath=" + tc.Dir() + "/vmstorage-hourly",
|
||||
"-retentionPeriod=100y",
|
||||
"-storage.maxHourlySeries=1",
|
||||
})
|
||||
vminsertHourly := tc.MustStartVminsert("vminsert-hourly", []string{
|
||||
"-storageNode=" + vmstorageHourly.VminsertAddr(),
|
||||
})
|
||||
|
||||
vminsertHourly.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
if v := vmstorageHourly.GetIntMetric(t, "vm_hourly_series_limit_max_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageHourly.GetIntMetric(t, "vm_hourly_series_limit_current_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageHourly.GetIntMetric(t, "vm_hourly_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
vminsertHourly.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar2 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmstorageHourly.GetIntMetric(t, "vm_hourly_series_limit_rows_dropped_total") > 0
|
||||
},
|
||||
)
|
||||
|
||||
// Test daily series limit
|
||||
vmstorageDaily := tc.MustStartVmstorage("vmstorage-daily", []string{
|
||||
"-storageDataPath=" + tc.Dir() + "/vmstorage-daily",
|
||||
"-retentionPeriod=100y",
|
||||
"-storage.maxDailySeries=1",
|
||||
})
|
||||
vminsertDaily := tc.MustStartVminsert("vminsert-daily", []string{
|
||||
"-storageNode=" + vmstorageDaily.VminsertAddr(),
|
||||
})
|
||||
|
||||
vminsertDaily.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
if v := vmstorageDaily.GetIntMetric(t, "vm_daily_series_limit_max_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageDaily.GetIntMetric(t, "vm_daily_series_limit_current_series"); v != 1 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageDaily.GetIntMetric(t, "vm_daily_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
vminsertDaily.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
"foo_bar2 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmstorageDaily.GetIntMetric(t, "vm_daily_series_limit_rows_dropped_total") > 0
|
||||
},
|
||||
)
|
||||
|
||||
// Test unlimited series
|
||||
vmstorageUnlimited := tc.MustStartVmstorage("vmstorage-unlimited", []string{
|
||||
"-storageDataPath=" + tc.Dir() + "/vmstorage-unlimited",
|
||||
"-retentionPeriod=100y",
|
||||
"-storage.maxHourlySeries=-1",
|
||||
"-storage.maxDailySeries=-1",
|
||||
})
|
||||
vminsertUnlimited := tc.MustStartVminsert("vminsert-unlimited", []string{
|
||||
"-storageNode=" + vmstorageUnlimited.VminsertAddr(),
|
||||
})
|
||||
|
||||
metrics := make([]string, 0, 100)
|
||||
for i := range 100 {
|
||||
metrics = append(metrics, fmt.Sprintf("foo_bar%d 1 1652169600000", i)) // 2022-05-10T08:00:00Z
|
||||
}
|
||||
|
||||
vminsertUnlimited.PrometheusAPIV1ImportPrometheus(t, metrics, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmstorageUnlimited.GetIntMetric(t, "vm_hourly_series_limit_current_series") > 0
|
||||
},
|
||||
)
|
||||
|
||||
if v := vmstorageUnlimited.GetIntMetric(t, "vm_hourly_series_limit_max_series"); v == 0 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageUnlimited.GetIntMetric(t, "vm_hourly_series_limit_current_series"); v != 100 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageUnlimited.GetIntMetric(t, "vm_hourly_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_hourly_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageUnlimited.GetIntMetric(t, "vm_daily_series_limit_max_series"); v == 0 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageUnlimited.GetIntMetric(t, "vm_daily_series_limit_current_series"); v != 100 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmstorageUnlimited.GetIntMetric(t, "vm_daily_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vm_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
}
|
||||
|
||||
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/chunks/000001
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/chunks/000001
vendored
Normal file
Binary file not shown.
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/index
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/index
vendored
Normal file
Binary file not shown.
28
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/meta.json
vendored
Normal file
28
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/meta.json
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"compaction": {
|
||||
"level": 1,
|
||||
"sources": [
|
||||
"01KKS78P6B68DNJC87ZVPRGC3X"
|
||||
]
|
||||
},
|
||||
"maxTime": 1735696740001,
|
||||
"minTime": 1735689600000,
|
||||
"stats": {
|
||||
"numChunks": 8,
|
||||
"numFloatSamples": 480,
|
||||
"numSamples": 480,
|
||||
"numSeries": 4
|
||||
},
|
||||
"thanos": {
|
||||
"downsample": {
|
||||
"resolution": 0
|
||||
},
|
||||
"labels": {
|
||||
"prometheus": "test",
|
||||
"replica": "0"
|
||||
},
|
||||
"source": "prometheus"
|
||||
},
|
||||
"ulid": "01KKS78P6B68DNJC87ZVPRGC3X",
|
||||
"version": 1
|
||||
}
|
||||
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/tombstones
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78P6B68DNJC87ZVPRGC3X/tombstones
vendored
Normal file
Binary file not shown.
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/chunks/000001
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/chunks/000001
vendored
Normal file
Binary file not shown.
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/index
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/index
vendored
Normal file
Binary file not shown.
27
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/meta.json
vendored
Normal file
27
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/meta.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"compaction": {
|
||||
"level": 1,
|
||||
"sources": [
|
||||
"01KKS78PKKR6BQ0SQ5BARRTAKC"
|
||||
]
|
||||
},
|
||||
"maxTime": 1735696500001,
|
||||
"minTime": 1735689600000,
|
||||
"stats": {
|
||||
"numChunks": 4,
|
||||
"numSamples": 4,
|
||||
"numSeries": 4
|
||||
},
|
||||
"thanos": {
|
||||
"downsample": {
|
||||
"resolution": 300000
|
||||
},
|
||||
"labels": {
|
||||
"prometheus": "test",
|
||||
"replica": "0"
|
||||
},
|
||||
"source": "prometheus"
|
||||
},
|
||||
"ulid": "01KKS78PKKR6BQ0SQ5BARRTAKC",
|
||||
"version": 1
|
||||
}
|
||||
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/tombstones
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PKKR6BQ0SQ5BARRTAKC/tombstones
vendored
Normal file
Binary file not shown.
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/chunks/000001
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/chunks/000001
vendored
Normal file
Binary file not shown.
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/index
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/index
vendored
Normal file
Binary file not shown.
27
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/meta.json
vendored
Normal file
27
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/meta.json
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"compaction": {
|
||||
"level": 1,
|
||||
"sources": [
|
||||
"01KKS78PXE91TK8YX406DERWV2"
|
||||
]
|
||||
},
|
||||
"maxTime": 1735693200001,
|
||||
"minTime": 1735689600000,
|
||||
"stats": {
|
||||
"numChunks": 4,
|
||||
"numSamples": 4,
|
||||
"numSeries": 4
|
||||
},
|
||||
"thanos": {
|
||||
"downsample": {
|
||||
"resolution": 3600000
|
||||
},
|
||||
"labels": {
|
||||
"prometheus": "test",
|
||||
"replica": "0"
|
||||
},
|
||||
"source": "prometheus"
|
||||
},
|
||||
"ulid": "01KKS78PXE91TK8YX406DERWV2",
|
||||
"version": 1
|
||||
}
|
||||
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/tombstones
vendored
Normal file
BIN
apptest/tests/testdata/thanos-snapshot/01KKS78PXE91TK8YX406DERWV2/tombstones
vendored
Normal file
Binary file not shown.
1
apptest/tests/testdata/thanos-snapshot/expected_all_aggr_response.json
vendored
Normal file
1
apptest/tests/testdata/thanos-snapshot/expected_all_aggr_response.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
apptest/tests/testdata/thanos-snapshot/expected_filtered_aggr_response.json
vendored
Normal file
1
apptest/tests/testdata/thanos-snapshot/expected_filtered_aggr_response.json
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -3,6 +3,7 @@ package tests
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
@@ -362,3 +363,147 @@ func TestSingleVMAgentDropOnOverload(t *testing.T) {
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
func TestSingleVMAgentCardinalityLimiter(t *testing.T) {
|
||||
waitFor := func(f func() bool) {
|
||||
const (
|
||||
retries = 20
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < retries; i++ {
|
||||
if f() {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out waiting for retry #%d", retries)
|
||||
}
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
remoteWriteSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNoContent)
|
||||
}))
|
||||
defer remoteWriteSrv.Close()
|
||||
|
||||
// Verify hourly limit is applied
|
||||
vmagent := tc.MustStartVmagent("vmagent-hourly", []string{
|
||||
`-remoteWrite.flushInterval=50ms`,
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv.URL),
|
||||
"-remoteWrite.maxRowsPerBlock=1",
|
||||
"-remoteWrite.maxHourlySeries=1",
|
||||
"-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent-hourly",
|
||||
}, ``)
|
||||
|
||||
vmagent.APIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
if v := vmagent.GetIntMetric(t, "vmagent_hourly_series_limit_max_series"); v != 1 {
|
||||
t.Fatalf("unexpected vmagent_hourly_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent.GetIntMetric(t, "vmagent_hourly_series_limit_current_series"); v != 1 {
|
||||
t.Fatalf("unexpected vmagent_hourly_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent.GetIntMetric(t, "vmagent_hourly_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vmagent_hourly_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
vmagent.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar2 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent.GetIntMetric(t, "vmagent_hourly_series_limit_rows_dropped_total") > 0
|
||||
},
|
||||
)
|
||||
|
||||
// Daily limits
|
||||
vmagent2 := tc.MustStartVmagent("vmagent-daily", []string{
|
||||
`-remoteWrite.flushInterval=50ms`,
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv.URL),
|
||||
"-remoteWrite.maxRowsPerBlock=1",
|
||||
"-remoteWrite.maxDailySeries=1",
|
||||
"-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent-daily",
|
||||
}, ``)
|
||||
|
||||
vmagent2.APIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
if v := vmagent2.GetIntMetric(t, "vmagent_daily_series_limit_max_series"); v != 1 {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent2.GetIntMetric(t, "vmagent_daily_series_limit_current_series"); v != 1 {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent2.GetIntMetric(t, "vmagent_daily_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
vmagent2.APIV1ImportPrometheusNoWaitFlush(t, []string{
|
||||
"foo_bar2 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent2.GetIntMetric(t, "vmagent_daily_series_limit_rows_dropped_total") > 0
|
||||
},
|
||||
)
|
||||
|
||||
// test running with unlimited tracker
|
||||
vmagent3 := tc.MustStartVmagent("vmagent-unlimited", []string{
|
||||
`-remoteWrite.flushInterval=50ms`,
|
||||
fmt.Sprintf(`-remoteWrite.url=%s/api/v1/write`, remoteWriteSrv.URL),
|
||||
"-remoteWrite.maxRowsPerBlock=10",
|
||||
"-remoteWrite.maxDailySeries=-1",
|
||||
"-remoteWrite.maxHourlySeries=-1",
|
||||
"-remoteWrite.tmpDataPath=" + tc.Dir() + "/vmagent-unlimited",
|
||||
}, ``)
|
||||
|
||||
metrics := make([]string, 0, 100)
|
||||
for i := range 100 {
|
||||
metrics = append(metrics, fmt.Sprintf("foo_bar%d 1 1652169600000", i)) // 2022-05-10T08:00:00Z
|
||||
}
|
||||
|
||||
vmagent3.APIV1ImportPrometheusNoWaitFlush(t, metrics, apptest.QueryOpts{})
|
||||
|
||||
waitFor(
|
||||
func() bool {
|
||||
return vmagent3.GetIntMetric(t, "vmagent_hourly_series_limit_current_series") > 0
|
||||
},
|
||||
)
|
||||
|
||||
if v := vmagent3.GetIntMetric(t, "vmagent_hourly_series_limit_max_series"); v != math.MaxInt32 {
|
||||
t.Fatalf("unexpected vmagent_hourly_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent3.GetIntMetric(t, "vmagent_hourly_series_limit_current_series"); v != 100 {
|
||||
t.Fatalf("unexpected vmagent_hourly_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent3.GetIntMetric(t, "vmagent_hourly_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vmagent_hourly_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent3.GetIntMetric(t, "vmagent_daily_series_limit_max_series"); v != math.MaxInt32 {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_max_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent3.GetIntMetric(t, "vmagent_daily_series_limit_current_series"); v != 100 {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_current_series value: %d", v)
|
||||
}
|
||||
|
||||
if v := vmagent3.GetIntMetric(t, "vmagent_daily_series_limit_rows_dropped_total"); v != 0 {
|
||||
t.Fatalf("unexpected vmagent_daily_series_limit_rows_dropped_total value: %d", v)
|
||||
}
|
||||
}
|
||||
|
||||
180
apptest/tests/vmctl_thanos_migration_test.go
Normal file
180
apptest/tests/vmctl_thanos_migration_test.go
Normal file
@@ -0,0 +1,180 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
const (
|
||||
// thanosSnapshot contains both raw (resolution=0) and downsampled (resolution>0) blocks
|
||||
// with Thanos metadata in meta.json.
|
||||
thanosSnapshot = "./testdata/thanos-snapshot"
|
||||
|
||||
// thanosExpectedAllAggrResponse is the expected response when all aggregate types are imported
|
||||
// (default behavior without --thanos-aggr-types flag).
|
||||
thanosExpectedAllAggrResponse = "./testdata/thanos-snapshot/expected_all_aggr_response.json"
|
||||
|
||||
// thanosExpectedFilteredAggrResponse is the expected response when only specific aggregate
|
||||
// types are imported via --thanos-aggr-types flag.
|
||||
thanosExpectedFilteredAggrResponse = "./testdata/thanos-snapshot/expected_filtered_aggr_response.json"
|
||||
|
||||
// thanosQueryFilter is the PromQL query to select the test metrics.
|
||||
thanosQueryFilter = `{__name__=~"thanos_test.*"}`
|
||||
|
||||
// thanosQueryTimeStart and thanosQueryTimeEnd define the time range for querying imported data.
|
||||
thanosQueryTimeStart = "2025-01-01T00:00:00Z"
|
||||
thanosQueryTimeEnd = "2025-01-01T02:00:00Z"
|
||||
)
|
||||
|
||||
// TestSingleVmctlThanosMigrationAllAggr tests migration of Thanos blocks without
|
||||
// --thanos-aggr-types flag. All aggregate types should be imported by default.
|
||||
func TestSingleVmctlThanosMigrationAllAggr(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
vmsingleDst := tc.MustStartDefaultVmsingle()
|
||||
vmAddr := fmt.Sprintf("http://%s/", vmsingleDst.HTTPAddr())
|
||||
vmctlFlags := []string{
|
||||
`thanos`,
|
||||
`--thanos-snapshot=` + thanosSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
}
|
||||
|
||||
testThanosMigration(tc, vmsingleDst, vmctlFlags, thanosExpectedAllAggrResponse)
|
||||
}
|
||||
|
||||
// TestClusterVmctlThanosMigrationAllAggr tests migration of Thanos blocks to cluster
|
||||
// without --thanos-aggr-types flag. All aggregate types should be imported by default.
|
||||
func TestClusterVmctlThanosMigrationAllAggr(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
cluster := tc.MustStartDefaultCluster()
|
||||
vmAddr := fmt.Sprintf("http://%s/", cluster.Vminsert.HTTPAddr())
|
||||
vmctlFlags := []string{
|
||||
`thanos`,
|
||||
`--thanos-snapshot=` + thanosSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
`--vm-account-id=0`,
|
||||
}
|
||||
|
||||
testThanosMigration(tc, cluster, vmctlFlags, thanosExpectedAllAggrResponse)
|
||||
}
|
||||
|
||||
// TestSingleVmctlThanosMigrationFilteredAggr tests migration of Thanos blocks with
|
||||
// --thanos-aggr-types flag set to specific types (e.g., count,sum).
|
||||
func TestSingleVmctlThanosMigrationFilteredAggr(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
vmsingleDst := tc.MustStartDefaultVmsingle()
|
||||
vmAddr := fmt.Sprintf("http://%s/", vmsingleDst.HTTPAddr())
|
||||
vmctlFlags := []string{
|
||||
`thanos`,
|
||||
`--thanos-snapshot=` + thanosSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
`--thanos-aggr-types=count`,
|
||||
`--thanos-aggr-types=sum`,
|
||||
}
|
||||
|
||||
testThanosMigration(tc, vmsingleDst, vmctlFlags, thanosExpectedFilteredAggrResponse)
|
||||
}
|
||||
|
||||
// TestClusterVmctlThanosMigrationFilteredAggr tests migration of Thanos blocks to cluster
|
||||
// with --thanos-aggr-types flag set to specific types (e.g., count,sum).
|
||||
func TestClusterVmctlThanosMigrationFilteredAggr(t *testing.T) {
|
||||
fs.MustRemoveDir(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
cluster := tc.MustStartDefaultCluster()
|
||||
vmAddr := fmt.Sprintf("http://%s/", cluster.Vminsert.HTTPAddr())
|
||||
vmctlFlags := []string{
|
||||
`thanos`,
|
||||
`--thanos-snapshot=` + thanosSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
`--vm-account-id=0`,
|
||||
`--thanos-aggr-types=count`,
|
||||
`--thanos-aggr-types=sum`,
|
||||
}
|
||||
|
||||
testThanosMigration(tc, cluster, vmctlFlags, thanosExpectedFilteredAggrResponse)
|
||||
}
|
||||
|
||||
func testThanosMigration(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier, vmctlFlags []string, expectedFile string) {
|
||||
t := tc.T()
|
||||
t.Helper()
|
||||
|
||||
cmpOpt := cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType")
|
||||
|
||||
// Verify no data exists before migration
|
||||
got := sut.PrometheusAPIV1Query(t, thanosQueryFilter, apptest.QueryOpts{
|
||||
Step: "5m",
|
||||
Time: thanosQueryTimeStart,
|
||||
})
|
||||
|
||||
want := apptest.NewPrometheusAPIV1QueryResponse(t, `{"data":{"result":[]}}`)
|
||||
if diff := cmp.Diff(want, got, cmpOpt); diff != "" {
|
||||
t.Errorf("unexpected response before migration (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// Run vmctl migration
|
||||
tc.MustStartVmctl("vmctl", vmctlFlags)
|
||||
|
||||
sut.ForceFlush(t)
|
||||
|
||||
// Load expected response
|
||||
file, err := os.Open(expectedFile)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot open expected response file: %s", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
bytes, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot read expected response file: %s", err)
|
||||
}
|
||||
|
||||
var wantResponse apptest.PrometheusAPIV1QueryResponse
|
||||
if err := json.Unmarshal(bytes, &wantResponse); err != nil {
|
||||
t.Fatalf("cannot unmarshal expected response file: %s", err)
|
||||
}
|
||||
wantResponse.Sort()
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Retries: 300,
|
||||
Msg: "unexpected metrics stored after Thanos migration",
|
||||
Got: func() any {
|
||||
result := sut.PrometheusAPIV1Export(t, thanosQueryFilter, apptest.QueryOpts{
|
||||
Start: thanosQueryTimeStart,
|
||||
End: thanosQueryTimeEnd,
|
||||
})
|
||||
result.Sort()
|
||||
return result.Data.Result
|
||||
},
|
||||
Want: wantResponse.Data.Result,
|
||||
CmpOpts: []cmp.Option{
|
||||
cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType"),
|
||||
},
|
||||
})
|
||||
}
|
||||
@@ -29,7 +29,7 @@ func StartVmagent(instance string, flags []string, cli *Client, promScrapeConfig
|
||||
httpListenAddrRE,
|
||||
}
|
||||
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmagent", flags, &appOptions{
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmagent-race", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-promscrape.config": promScrapeConfigFilePath,
|
||||
|
||||
@@ -32,7 +32,7 @@ func StartVmauth(instance string, flags []string, cli *Client, configFilePath st
|
||||
httpBuilitinListenAddrRE,
|
||||
}
|
||||
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmauth", flags, &appOptions{
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmauth-race", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-auth.config": configFilePath,
|
||||
|
||||
@@ -10,6 +10,6 @@ func StartVmbackup(instance, storageDataPath, snapshotCreateURL, dst string, out
|
||||
"-snapshot.createURL=" + snapshotCreateURL,
|
||||
"-dst=" + dst,
|
||||
}
|
||||
_, _, err := startApp(instance, "../../bin/vmbackup", flags, &appOptions{wait: true, output: output})
|
||||
_, _, err := startApp(instance, "../../bin/vmbackup-race", flags, &appOptions{wait: true, output: output})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -4,6 +4,6 @@ import "io"
|
||||
|
||||
// StartVmctl starts an instance of vmctl cli with the given flags
|
||||
func StartVmctl(instance string, flags []string, output io.Writer) error {
|
||||
_, _, err := startApp(instance, "../../bin/vmctl", flags, &appOptions{wait: true, output: output})
|
||||
_, _, err := startApp(instance, "../../bin/vmctl-race", flags, &appOptions{wait: true, output: output})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -9,6 +9,6 @@ func StartVmrestore(instance, src, storageDataPath string, output io.Writer) err
|
||||
"-src=" + src,
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
}
|
||||
_, _, err := startApp(instance, "../../bin/vmrestore", flags, &appOptions{wait: true, output: output})
|
||||
_, _, err := startApp(instance, "../../bin/vmrestore-race", flags, &appOptions{wait: true, output: output})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -119,7 +119,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
@@ -199,7 +200,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -208,14 +210,14 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 9,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 14
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"colorMode": "none",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
@@ -257,7 +259,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "",
|
||||
"description": "Shows the total number of loaded alerting rules across selected instances and groups.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
@@ -266,7 +268,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -275,11 +278,11 @@
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 7,
|
||||
"x": 9,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 14
|
||||
},
|
||||
"id": 4,
|
||||
"id": 8,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
@@ -320,6 +323,144 @@
|
||||
"title": "Alerting rules",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the total number of pendings alerts in selected instances and grouping groups.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 14
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"last"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showPercentChange": false,
|
||||
"text": {
|
||||
"valueSize": 80
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vmalert_alerts_pending{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Alerting pending",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the total number of firing alerts in selected instances and grouping groups.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 14
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"percentChangeColorMode": "standard",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"last"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showPercentChange": false,
|
||||
"text": {
|
||||
"valueSize": 80
|
||||
},
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "sum(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})",
|
||||
"instant": false,
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Alerting firing",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@@ -332,6 +473,9 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"footer": {
|
||||
"reducers": []
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -339,7 +483,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -352,7 +497,7 @@
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Count (sum)"
|
||||
"options": "Count"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@@ -372,20 +517,12 @@
|
||||
"id": 2,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 1,
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Count (sum)"
|
||||
"displayName": "Count"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -398,7 +535,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group, alertname) > 0)",
|
||||
"expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group) > 0)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"key": "Q-3934f0fb-8ad6-4519-a98d-c26d0fc6b312-0",
|
||||
@@ -414,8 +551,9 @@
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"alertname": false
|
||||
"alertname": true
|
||||
},
|
||||
"includeByName": {},
|
||||
"indexByName": {
|
||||
"Time": 0,
|
||||
"Value": 3,
|
||||
@@ -428,23 +566,6 @@
|
||||
"group": "Group"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "groupBy",
|
||||
"options": {
|
||||
"fields": {
|
||||
"Count": {
|
||||
"aggregations": [
|
||||
"sum"
|
||||
],
|
||||
"operation": "aggregate"
|
||||
},
|
||||
"Group": {
|
||||
"aggregations": [],
|
||||
"operation": "groupby"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"type": "table"
|
||||
@@ -468,7 +589,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -531,16 +653,14 @@
|
||||
"id": 1,
|
||||
"options": {
|
||||
"cellHeight": "sm",
|
||||
"footer": {
|
||||
"countRows": false,
|
||||
"fields": "",
|
||||
"reducer": [
|
||||
"sum"
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"frameIndex": 1,
|
||||
"showHeader": true
|
||||
"showHeader": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
"displayName": "Count"
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
|
||||
@@ -115,7 +115,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"h": 3,
|
||||
"w": 4,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
|
||||
@@ -185,7 +185,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -250,7 +250,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -313,7 +313,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -380,7 +380,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -460,7 +460,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -557,7 +557,7 @@
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -663,7 +663,7 @@
|
||||
"sort": "asc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -783,7 +783,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -889,7 +889,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -993,7 +993,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1097,7 +1097,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1222,7 +1222,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1337,7 +1337,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1447,7 +1447,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1562,7 +1562,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1695,7 +1695,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1800,7 +1800,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1921,7 +1921,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2043,7 +2043,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2151,7 +2151,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2260,7 +2260,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2368,7 +2368,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2472,7 +2472,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2592,7 +2592,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "10.4.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2722,7 +2722,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2827,7 +2827,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2942,7 +2942,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3044,7 +3044,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3146,7 +3146,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3247,7 +3247,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3346,7 +3346,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3462,7 +3462,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3564,7 +3564,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.0.3",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3663,7 +3663,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3707,11 +3707,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -3720,6 +3722,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -3727,6 +3730,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3741,7 +3745,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3756,7 +3761,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 351
|
||||
"y": 30
|
||||
},
|
||||
"id": 52,
|
||||
"options": {
|
||||
@@ -3767,10 +3772,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3778,7 +3785,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmalert_remotewrite_sent_rows_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)",
|
||||
"expr": "sum(rate(vmalert_remotewrite_sent_rows_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) default sum(rate(vmalert_remotewrite_sent_rows_sum{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -3792,18 +3799,20 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the number of datapoints dropped by vmalert while sending to the configured remote write URL. vmalert performs up to 5 retries before dropping the data. Check vmalert's error logs for the specific error message.",
|
||||
"description": "Shows the global rate for number of written bytes via remote write connections.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -3812,6 +3821,115 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 30
|
||||
},
|
||||
"id": 60,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vmalert_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Bytes write rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Data could be dropped in two scenarios:\n1. The remote write queue(configured by -remoteWrite.maxQueueSize) is full: \nThe queue can fill rapidly if heavy rules generate millions of series, or if remote write requests are unable to send data to the destination in a timely manner, resulting in data being jammed in the queue. Consider tuning -remoteWrite.maxQueueSize or -remoteWrite.concurrency.\nSee also Rows per request pannel.\n2. The request to the configured remote write URL failed(vmalert performs up to 5 retries before dropping the data).\n\nCheck vmalert's error logs for the specific error message.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -3819,6 +3937,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3833,7 +3952,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3847,8 +3967,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 351
|
||||
"x": 0,
|
||||
"y": 38
|
||||
},
|
||||
"id": 53,
|
||||
"options": {
|
||||
@@ -3859,10 +3979,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3879,6 +4001,105 @@
|
||||
"title": "Datapoints drop rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Displays the maximum 99th percentile of the number of data samples sent per request to the configured remote write URL.\nThe value is influenced by the utilization of the remote write queue. It is normal for this metric to remain low when there are no rules generating a large number of series, and to spike when heavy rules generate thousands of series.\nDuring periods of high load (when many heavy rules are generating results), the optimal value for rows per request should be high to maximize the efficiency of each push operation.\n\nIf you observe datapoint drops and consistently low values for rows per request, try checking the write latency between vmalert and the remote write destination, or increasing `-remoteWrite.flushInterval`.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 38
|
||||
},
|
||||
"id": 68,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(histogram_quantile(0.99, sum(increase(vmalert_remotewrite_sent_rows_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange)))",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Rows per request ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
@@ -3897,6 +4118,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -3913,6 +4135,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3929,7 +4152,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3945,7 +4169,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 378
|
||||
"y": 46
|
||||
},
|
||||
"id": 54,
|
||||
"options": {
|
||||
@@ -3960,10 +4184,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3981,109 +4207,6 @@
|
||||
],
|
||||
"title": "Connections ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the global rate for number of written bytes via remote write connections.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 378
|
||||
},
|
||||
"id": 60,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vmalert_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Bytes write rate ($instance)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Remote write",
|
||||
|
||||
@@ -184,7 +184,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -249,7 +249,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -312,7 +312,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -379,7 +379,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -459,7 +459,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -556,7 +556,7 @@
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -662,7 +662,7 @@
|
||||
"sort": "asc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -782,7 +782,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -888,7 +888,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -992,7 +992,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1096,7 +1096,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.0.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1221,7 +1221,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1336,7 +1336,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1446,7 +1446,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1561,7 +1561,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1694,7 +1694,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1799,7 +1799,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1920,7 +1920,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2042,7 +2042,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2150,7 +2150,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2259,7 +2259,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2367,7 +2367,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2471,7 +2471,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2591,7 +2591,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"pluginVersion": "10.4.2",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2721,7 +2721,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2826,7 +2826,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -2941,7 +2941,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3043,7 +3043,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3145,7 +3145,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3246,7 +3246,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3345,7 +3345,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3461,7 +3461,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3563,7 +3563,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "8.0.3",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3662,7 +3662,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.2.6",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3706,11 +3706,13 @@
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -3719,6 +3721,7 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -3726,6 +3729,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3740,7 +3744,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3755,7 +3760,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 351
|
||||
"y": 30
|
||||
},
|
||||
"id": 52,
|
||||
"options": {
|
||||
@@ -3766,10 +3771,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3777,7 +3784,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vmalert_remotewrite_sent_rows_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)",
|
||||
"expr": "sum(rate(vmalert_remotewrite_sent_rows_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) default sum(rate(vmalert_remotewrite_sent_rows_sum{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -3791,18 +3798,20 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the number of datapoints dropped by vmalert while sending to the configured remote write URL. vmalert performs up to 5 retries before dropping the data. Check vmalert's error logs for the specific error message.",
|
||||
"description": "Shows the global rate for number of written bytes via remote write connections.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -3811,6 +3820,115 @@
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 30
|
||||
},
|
||||
"id": 60,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vmalert_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Bytes write rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Data could be dropped in two scenarios:\n1. The remote write queue(configured by -remoteWrite.maxQueueSize) is full: \nThe queue can fill rapidly if heavy rules generate millions of series, or if remote write requests are unable to send data to the destination in a timely manner, resulting in data being jammed in the queue. Consider tuning -remoteWrite.maxQueueSize or -remoteWrite.concurrency.\nSee also Rows per request pannel.\n2. The request to the configured remote write URL failed(vmalert performs up to 5 retries before dropping the data).\n\nCheck vmalert's error logs for the specific error message.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
@@ -3818,6 +3936,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3832,7 +3951,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3846,8 +3966,8 @@
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 351
|
||||
"x": 0,
|
||||
"y": 38
|
||||
},
|
||||
"id": 53,
|
||||
"options": {
|
||||
@@ -3858,10 +3978,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3878,6 +4000,105 @@
|
||||
"title": "Datapoints drop rate ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Displays the maximum 99th percentile of the number of data samples sent per request to the configured remote write URL.\nThe value is influenced by the utilization of the remote write queue. It is normal for this metric to remain low when there are no rules generating a large number of series, and to spike when heavy rules generate thousands of series.\nDuring periods of high load (when many heavy rules are generating results), the optimal value for rows per request should be high to maximize the efficiency of each push operation.\n\nIf you observe datapoint drops and consistently low values for rows per request, try checking the write latency between vmalert and the remote write destination, or increasing `-remoteWrite.flushInterval`.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 38
|
||||
},
|
||||
"id": 68,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "single",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(histogram_quantile(0.99, sum(increase(vmalert_remotewrite_sent_rows_bucket{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance, vmrange)))",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Rows per request ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
@@ -3896,6 +4117,7 @@
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"barWidthFactor": 0.6,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
@@ -3912,6 +4134,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3928,7 +4151,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3944,7 +4168,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 378
|
||||
"y": 46
|
||||
},
|
||||
"id": 54,
|
||||
"options": {
|
||||
@@ -3959,10 +4183,12 @@
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"hideZeros": false,
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3980,109 +4206,6 @@
|
||||
],
|
||||
"title": "Connections ($instance)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the global rate for number of written bytes via remote write connections.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "decbytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 378
|
||||
},
|
||||
"id": 60,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": true,
|
||||
"expr": "sum(rate(vmalert_remotewrite_conn_bytes_written_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Bytes write rate ($instance)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Remote write",
|
||||
|
||||
@@ -7,7 +7,7 @@ ROOT_IMAGE ?= alpine:3.23.3
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.23.3
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.26.1
|
||||
GO_BUILDER_IMAGE := golang:1.26.2
|
||||
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr :/ __)-1
|
||||
BASE_IMAGE := local/base:1.1.4-$(shell echo $(ROOT_IMAGE) | tr :/ __)-$(shell echo $(CERTS_IMAGE) | tr :/ __)
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
image: victoriametrics/vmagent:v1.139.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -42,14 +42,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.139.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.139.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
image: victoriametrics/vminsert:v1.139.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -68,7 +68,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
image: victoriametrics/vminsert:v1.139.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
image: victoriametrics/vmselect:v1.139.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -90,7 +90,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
image: victoriametrics/vmselect:v1.139.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -105,7 +105,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.138.0
|
||||
image: victoriametrics/vmauth:v1.139.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -119,7 +119,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.138.0
|
||||
image: victoriametrics/vmalert:v1.139.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
image: victoriametrics/vmagent:v1.139.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
image: victoriametrics/victoria-metrics:v1.139.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.138.0
|
||||
image: victoriametrics/vmalert:v1.139.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
image: victoriametrics/vmagent:v1.139.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
image: victoriametrics/victoria-metrics:v1.139.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.138.0
|
||||
image: victoriametrics/vmalert:v1.139.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -14,6 +14,15 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.29.2
|
||||
Released: 2026-04-02
|
||||
|
||||
- UI: Updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.5.1](https://docs.victoriametrics.com/anomaly-detection/ui/#v151) to [v1.6.0](https://docs.victoriametrics.com/anomaly-detection/ui/#v160), see respective [release notes](https://docs.victoriametrics.com/anomaly-detection/ui/#v160) for details. Notable changes include **full UI state modification** from [AI assistant](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) and [showing business boundaries](https://docs.victoriametrics.com/anomaly-detection/ui/#visualization-panel) on a graph.
|
||||
|
||||
- IMPROVEMENT: Added an option to proxy reader TLS/credential configuration from the `config.reader` to UI, allowing users to leverage the same secure connection settings for both backend and UI queries to datasource without requiring `vmauth` in front of the datasource for UI access. See [authentication section](https://docs.victoriametrics.com/anomaly-detection/ui/#authentication) for details.
|
||||
|
||||
- IMPROVEMENT: Added configurable reconnect retry handling for [`VmWriter`](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer) with `connection_retry_attempts` arg after transient connection-level write failures.
|
||||
|
||||
## v1.29.1
|
||||
Released: 2026-03-25
|
||||
|
||||
|
||||
@@ -421,7 +421,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.2
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -639,7 +639,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1 && docker image tag victoriametrics/vmanomaly:v1.29.1 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.29.2 && docker image tag victoriametrics/vmanomaly:v1.29.2 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -45,7 +45,8 @@ There are 2 types of compatibility to consider when migrating in stateful mode:
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.29.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1292) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | [v1.29.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1292) | Fully Compatible | - |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Partially compatible* | Dumped models of class [prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [seasonal quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) have problems with loading to [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) due to dropped `pytz` library. **Upgrading directly from v1.28.7 to [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) with a fix is suggested** |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
|
||||
@@ -122,7 +122,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1
|
||||
docker pull victoriametrics/vmanomaly:v1.29.2
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -142,7 +142,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.1 \
|
||||
victoriametrics/vmanomaly:v1.29.2 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -159,7 +159,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.1 \
|
||||
victoriametrics/vmanomaly:v1.29.2 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -172,7 +172,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.2
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
|
||||
@@ -119,6 +119,10 @@ To start exploring the UI, you can use embedded demo with preconfigured queries
|
||||
|
||||
## Authentication
|
||||
|
||||
> [!TIP]
|
||||
{{% available_from "v1.29.2" anomaly %}} You can proxy connection settings to UI, defined in [reader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) section of the configuration file, so that UI can connect to the data sources with the same settings (e.g. credentials, TLS, etc.). Set `server.use_reader_connection_settings` [arg](https://docs.victoriametrics.com/anomaly-detection/components/server/#parameters) to `true` in the configuration file to enable this feature, and UI will automatically use the connection settings from the reader configuration when connecting to the data sources. If `pass auth headers` option is enabled in the UI settings, it will take precedence over the reader-wise settings. See [example configuration](https://docs.victoriametrics.com/anomaly-detection/components/server/#example-configuration) for details.
|
||||
|
||||
|
||||
{{% available_from "v1.27.0" anomaly %}} The vmanomaly UI supports proxying authentication headers from [v1.1.0](#v110) and onwards.
|
||||
|
||||
Consider using [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) in front of both vmanomaly (UI and API) and data sources (VictoriaMetrics / VictoriaLogs) to enforce end-to-end setup for accessing the data from the UI.
|
||||
@@ -198,11 +202,11 @@ The best applications of this mode are:
|
||||
Copilot appears as a **chat popup** anchored to the bottom-right corner of the page. The panel is resizable by dragging its left edge, and can be opened or closed by clicking the respective icon.
|
||||
|
||||
> [!TIP] Copilot is context-aware
|
||||
> It reads your active model, scheduler, and anomaly settings from the UI automatically, so you don't need to paste your config manually.
|
||||
> It reads your active model, scheduler, and anomaly settings from the UI automatically, so you don't need to paste your config manually. {{% available_from "v1.29.2" anomaly %}} It also can read and modify full UI settings, such as data source URL, query parameters, scheduler settings, and more, to provide better suggestions.
|
||||
|
||||
### Configuration
|
||||
|
||||
AI Assistant is disabled by default; enable it with `VMANOMALY_COPILOT_ENABLED=true`, then configure an LLM provider API key and, optionally, a model. Once enabled and configured, Copilot will appear as a chat popup in the bottom-right corner of the UI.
|
||||
AI Assistant is disabled by default; enable it with `VMANOMALY_COPILOT_ENABLED=true`, then configure credentials for the selected LLM provider and **explicitly set** `VMANOMALY_COPILOT_MODEL` to the exact model Copilot should use. Do not rely on an implicit default model being selected. Once enabled and configured, Copilot will appear as a chat popup in the bottom-right corner of the UI.
|
||||
|
||||
|
||||
Supported providers and model formats:
|
||||
@@ -262,12 +266,18 @@ export AWS_DEFAULT_REGION=us-east-1
|
||||
# export AWS_SESSION_TOKEN=your_session_token # if using a session token
|
||||
```
|
||||
|
||||
Optionally override the default model:
|
||||
Explicitly set the model Copilot should use:
|
||||
|
||||
```bash
|
||||
export VMANOMALY_COPILOT_MODEL=openai:gpt-5-mini
|
||||
```
|
||||
|
||||
For example, if a smaller OpenAI model is desired, set:
|
||||
|
||||
```bash
|
||||
export VMANOMALY_COPILOT_MODEL=openai:gpt-5-nano
|
||||
```
|
||||
|
||||
### MCP tools server
|
||||
|
||||
Connects Copilot to [mcp-vmanomaly](https://github.com/VictoriaMetrics/mcp-vmanomaly) for full tool access (built-in docs, models configuration and validation, alerts recommendation, service healthchecks, etc.). Full [tools list](https://github.com/VictoriaMetrics/mcp-vmanomaly?tab=readme-ov-file#toolset):
|
||||
@@ -305,7 +315,7 @@ docker run -it --rm \
|
||||
-e VMANOMALY_MCP_SERVER_URL=http://mcp-vmanomaly:8081/mcp \
|
||||
-p 8080:8080 \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
victoriametrics/vmanomaly:v1.29.2 \
|
||||
vmanomaly_config.yaml
|
||||
```
|
||||
|
||||
@@ -351,6 +361,8 @@ The Visualization Panel has 2 modes of displaying data - either raw queried data
|
||||
|
||||
Also, timeseries (such as `y`, `y_hat`, etc.) can be toggled on/off by clicking on the legend items.
|
||||
|
||||
{{% available_from "v1.29.2" anomaly %}} Seeing model [business-boundaries](https://docs.victoriametrics.com/anomaly-detection/faq/#incorporating-domain-knowledge), such as [detection direction](https://docs.victoriametrics.com/anomaly-detection/components/models/#detection-direction) and minimal deviation from expected ([absolute](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) and [relative](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-relative-deviation-from-expected) combined) can be turned on with "business boundaries" toggle. Showing/hiding individual bands can be done by clicking on the respective legend items, while showing/hiding all business boundaries at once can be done with "business boundaries" toggle.
|
||||
|
||||
[Back to UI navigation](#ui-navigation)
|
||||
|
||||
### Model Panel
|
||||
@@ -628,6 +640,15 @@ If the **results** look good and the **model configuration should be deployed in
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.6.0
|
||||
Released: 2026-04-02
|
||||
|
||||
vmanomaly version: [v1.29.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1292)
|
||||
|
||||
- FEATURE: Now AI assistant can see and modify main UI components state (queries, time range, model configuration, etc.) to provide more contextual and accurate suggestions and actions.
|
||||
|
||||
- FEATURE: Added support for UI to use reader-wise settings (credentials, TLS, etc.) for connecting to data sources, which otherwise would require having `vmauth` in front of both UI and data sources, just for the UI to be able to connect to them. See [Authentication](#authentication) section for details.
|
||||
|
||||
### v1.5.1
|
||||
Released: 2026-03-25
|
||||
|
||||
|
||||
@@ -1265,7 +1265,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1
|
||||
docker pull victoriametrics/vmanomaly:v1.29.2
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1279,7 +1279,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.1 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.2 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
|
||||
@@ -27,9 +27,11 @@ Server component of VictoriaMetrics Anomaly Detection (`vmanomaly`) is responsib
|
||||
- `ui_default_state`: Optional [UI](https://docs.victoriametrics.com/anomaly-detection/ui/) state fragment to open on `/vmui/`. Must be URL-encoded and start with `#/?` (e.g. `#/?param=value`). See [Default State](https://docs.victoriametrics.com/anomaly-detection/ui/#default-state) section for details on constructing the value from UI state.
|
||||
- `max_concurrent_tasks`: Maximum number of concurrent anomaly detection tasks processed by the backend. Positive integer. All tasks above the limit will be cancelled if the limit is exceeded. Defaults to `2`.
|
||||
- `uvicorn_config`: Uvicorn configuration dictionary. Default is `{"log_level": "warning"}`. See [Uvicorn server settings](https://www.uvicorn.org/settings/) for details.
|
||||
- {{% available_from "v1.29.2" anomaly %}} `use_reader_connection_settings`: If set to `true`, UI will use connection settings (e.g. credentials, TLS, etc.) from the [reader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) configuration when connecting to data sources. This allows UI to connect to data sources with the same settings without requiring having `vmauth` in front of both UI and data sources.
|
||||
|
||||
### Example Configuration
|
||||
|
||||
> [!TIP]
|
||||
> If [hot-reloading](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#hot-reloading) is enabled in vmanomaly service, the server will automatically pick up changes made to the configuration file without requiring a restart.
|
||||
|
||||
```yaml
|
||||
@@ -45,7 +47,16 @@ server:
|
||||
uvicorn_config: # optional Uvicorn server configuration
|
||||
log_level: 'warning'
|
||||
|
||||
use_reader_connection_settings: true # if set to true, UI will use connection settings from reader configuration below when connecting to data sources, allowing it to connect with the same credentials, TLS settings, etc. without requiring having vmauth in front of both UI and data sources.
|
||||
|
||||
# other vmanomaly configuration sections, like reader, scheduler, models, etc.
|
||||
reader:
|
||||
datasource_url: %{DS_URL}
|
||||
user: %{DS_USER}
|
||||
password: %{DS_PASSWORD}
|
||||
# or
|
||||
# bearer_token: %{DS_BEARER_TOKEN}
|
||||
verify_tls: false
|
||||
```
|
||||
|
||||
### Accessing the server
|
||||
|
||||
@@ -255,6 +255,20 @@ Token is passed in the standard format with header: `Authorization: bearer {toke
|
||||
<td>
|
||||
<span>
|
||||
Path to a file, which contains token, that is passed in the standard format with header: `Authorization: bearer {token}`{{% available_from "v1.15.9" anomaly %}}
|
||||
</span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>
|
||||
|
||||
<span style="white-space: nowrap;">`connection_retry_attempts`</span>
|
||||
</td>
|
||||
<td>
|
||||
|
||||
`1`
|
||||
</td>
|
||||
<td>
|
||||
<span>
|
||||
Number of attempts to retry the connection in case of failure {{% available_from "v1.29.2" anomaly %}}.
|
||||
</span> </td>
|
||||
</tr>
|
||||
</tbody>
|
||||
@@ -276,6 +290,7 @@ writer:
|
||||
health_path: "health"
|
||||
user: "foo"
|
||||
password: "bar"
|
||||
connection_retry_attempts: 2 # if not specified, it will be 1 by default
|
||||
```
|
||||
|
||||
### Multitenancy support
|
||||
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -238,23 +238,23 @@ vmagent will write data into VictoriaMetrics single-node and cluster (with tenan
|
||||
# compose.yaml
|
||||
services:
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
image: victoriametrics/victoria-metrics:v1.139.0
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.139.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
image: victoriametrics/vminsert:v1.139.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
image: victoriametrics/vmselect:v1.139.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
image: victoriametrics/vmagent:v1.139.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -306,7 +306,7 @@ Now add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: docker.io/victoriametrics/vmauth:v1.138.0
|
||||
image: docker.io/victoriametrics/vmauth:v1.139.0
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
|
||||
@@ -7,6 +7,9 @@ sitemap:
|
||||
disable: true
|
||||
---
|
||||
|
||||
> [!NOTE] Tip
|
||||
> Store every configuration file you use during this guide in version control. You may need them for reference or to change the configuration of your installation.
|
||||
|
||||
This guide walks you through deploying a VictoriaMetrics cluster version on Kubernetes.
|
||||
|
||||
By the end of this guide, you will know:
|
||||
@@ -382,7 +385,7 @@ Consider reading these resources to complete your setup:
|
||||
- VictoriaMetrics
|
||||
- [Learn more about the cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
- [Migrate existing metric data into VictoriaMetrics with vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/)
|
||||
- [Setup alerts](https://docs.victoriametrics.com/victoriametrics/vmalert/)
|
||||
- [Setup alerts](https://docs.victoriametrics.com/guides/vmalert-datasource-managed-alerts-grafana/)
|
||||
- Grafana
|
||||
- [Enable persistent storage](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#enable-persistent-storage-recommended)
|
||||
- [Configure private TLS authority](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#configure-a-private-ca-certificate-authority)
|
||||
|
||||
@@ -7,6 +7,9 @@ sitemap:
|
||||
disable: true
|
||||
---
|
||||
|
||||
> [!NOTE] Tip
|
||||
> Store every configuration file you use during this guide in version control. You may need them for reference or to change the configuration of your installation.
|
||||
|
||||
This guide walks you through deploying a [single-node version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) on Kubernetes using Helm.
|
||||
|
||||
At the end of this guide, you will know:
|
||||
@@ -56,10 +59,19 @@ vm/victoria-metrics-anomaly 1.12.9 v1.28.2 Victoria
|
||||
|
||||
## 2. Install [VictoriaMetrics single](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) from Helm Chart
|
||||
|
||||
Download the Helm value files with the example configuration for VictoriaMetrics single-node:
|
||||
|
||||
```sh
|
||||
wget https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml
|
||||
```
|
||||
|
||||
> [!NOTE] Tip
|
||||
> Keep this file in version control. You may need it to update the configuration of your VictoriaMetrics service
|
||||
|
||||
Run this command in your terminal to install [VictoriaMetrics single node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/) in your cluster:
|
||||
|
||||
```shell
|
||||
helm install vmsingle vm/victoria-metrics-single -f https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml
|
||||
helm install vmsingle vm/victoria-metrics-single -f guide-vmsingle-values.yaml
|
||||
```
|
||||
|
||||
Below are the key sections in the chart values file [`guide-vmsingle-values.yaml`](https://docs.victoriametrics.com/guides/examples/guide-vmsingle-values.yaml):
|
||||
@@ -315,7 +327,7 @@ Consider reading these resources to complete your setup:
|
||||
- VictoriaMetrics
|
||||
- [Learn more about the single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/)
|
||||
- [Migrate existing metric data into VictoriaMetrics with vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/)
|
||||
- [Setup alerts](https://docs.victoriametrics.com/victoriametrics/vmalert/)
|
||||
- [Setup alerts](https://docs.victoriametrics.com/guides/vmalert-datasource-managed-alerts-grafana/)
|
||||
- Grafana
|
||||
- [Enable persistent storage](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#enable-persistent-storage-recommended)
|
||||
- [Configure private TLS authority](https://grafana.com/docs/grafana/latest/setup-grafana/installation/helm/#configure-a-private-ca-certificate-authority)
|
||||
|
||||
@@ -125,7 +125,7 @@ As a reference, see resource consumption of VictoriaMetrics cluster on our [play
|
||||
The Retention Period is the number of days or months for storing data. It affects the disk space usage.
|
||||
The formula for calculating required disk space is the following:
|
||||
```
|
||||
Bytes Per Sample * Ingestion rate * Replication Factor * (Retention Period in Seconds +1 Retention Cycle(day or month)) * 1.2 (recommended 20% of free space for merges )
|
||||
Bytes Per Sample * Ingestion rate * Replication Factor * (Retention Period in Seconds +1 Retention Cycle(day or month)) * 1.25 (recommended 20% of free space for merges )
|
||||
```
|
||||
|
||||
The **Retention Cycle** is one **day** or one **month**. If the retention period is higher than 30 days cycle is a month; otherwise day.
|
||||
@@ -143,7 +143,7 @@ Keep at least **20%** of free space for VictoriaMetrics to remain efficient with
|
||||
|
||||
A Kubernetes environment that produces 5k time series per second with 1-year of the Retention Period and ReplicationFactor=2:
|
||||
|
||||
`(1 byte-per-sample * 5000 time series * 2 replication factor * 34128000 seconds) * 1.2 ) / 2^30 = 381 GB`
|
||||
`(1 byte-per-sample * 5000 time series * 2 replication factor * 34128000 seconds) * 1.25 ) / 2^30 = 397 GB`
|
||||
|
||||
VictoriaMetrics requires additional disk space for the index. The lower Churn Rate, the lower is disk space usage for the index.
|
||||
Usually, index takes about **20%** of the disk space for storing data. High cardinality setups may use **>50%** of storage size for index. If your indexdb looks unexpectedly large, see [FAQ: Why indexdb size is so large?](https://docs.victoriametrics.com/victoriametrics/faq/#why-indexdb-size-is-so-large) for typical ratios and troubleshooting tips.
|
||||
|
||||
@@ -155,15 +155,15 @@ These services will store and query the metrics scraped by vmagent.
|
||||
# compose.yaml
|
||||
services:
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.139.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
image: victoriametrics/vminsert:v1.139.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
image: victoriametrics/vmselect:v1.139.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
ports:
|
||||
@@ -196,7 +196,7 @@ Add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.138.0-enterprise
|
||||
image: victoriametrics/vmauth:v1.139.0-enterprise
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
@@ -251,7 +251,7 @@ Add the vmagent service to `compose.yaml` with OAuth2 configuration:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
image: victoriametrics/vmagent:v1.139.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
|
||||
615
docs/guides/vmalert-datasource-managed-alerts-grafana/README.md
Normal file
615
docs/guides/vmalert-datasource-managed-alerts-grafana/README.md
Normal file
@@ -0,0 +1,615 @@
|
||||
---
|
||||
build:
|
||||
list: never
|
||||
publishResources: false
|
||||
render: never
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
|
||||
Grafana offers a rich alerting UI, including rule grouping, silences, and notification history. While Grafana-managed alerts are easy to use, [they can hit performance issues without additional configuration since they depend on a relational database by default](https://grafana.com/blog/how-we-improved-grafanas-alert-state-history-to-provide-better-insights-into-your-alerting-data/).
|
||||
|
||||
By moving rule evaluation to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/), you can move past these limitations while retaining Grafana's unified alerting UI. This guide shows the ideal topology for scalable alerting using vmalert, Alertmanager, and Grafana datasource-managed alerts.
|
||||
|
||||
## Grafana Alert Modes
|
||||
|
||||
Grafana supports two alert modes, which can run side by side:
|
||||
- Grafana-managed: alerts are created and evaluated entirely within Grafana itself. The alert state is stored in a SQL database by default, but [Grafana can be configured to store this data in a Prometheus-compatible database like VictoriaMetrics as well](https://grafana.com/docs/grafana/latest/alerting/set-up/configure-alert-state-history/#configure-prometheus-for-alert-state-grafana_alerts-metric).
|
||||
- Datasource-managed: alerts have their rules defined, stored, and evaluated in an external system like vmalert and Alertmanager, with Grafana just providing the UI. State is stored in VictoriaMetrics.
|
||||
|
||||
The following table compares the two modes:
|
||||
|
||||
| Aspect | Grafana-Managed | Data Source-Managed |
|
||||
| ------------------- | -------------------------------------------------- | ----------------------------- |
|
||||
| Where rules live | Grafana's SQL database | vmalert's YAML config |
|
||||
| Evaluation | Grafana's scheduler | vmalert |
|
||||
| Horizontal Scaling | Complex (requires HA SQL database) | Simple (add more pods) |
|
||||
| State storage | SQL backend or a Prometheus datasource | VictoriaMetrics |
|
||||
| UI Management | Full create/edit in Grafana | View-only |
|
||||
| Dependencies | SQL + Grafana | VictoriaMetrics |
|
||||
| Version control | Complex (rules must be exported/imported) | Easy (rules are in YAML file) |
|
||||
|
||||
## Datasource-managed Alert Topology
|
||||
|
||||
The proposed alert setup relies on the following services:
|
||||
|
||||
- VictoriaMetrics: provides the time-series database and persists alert status.
|
||||
- vmalert: evaluates alerting rules from its config file against VictoriaMetrics data and forwards firing alerts to Alertmanager.
|
||||
- Alertmanager: groups and routes alerts to the configured recipients.
|
||||
- Grafana: serves as the unified UI, connecting to VictoriaMetrics for rules and metrics, and to Alertmanager for notifications/silences.
|
||||
|
||||

|
||||
|
||||
## vmalert Demo with Docker {#docker}
|
||||
|
||||
In this section, we'll describe how you can try datasource-managed alerts on Grafana with Docker Compose. Follow the steps in this section to see how the Grafana UI looks in datasource-managed alerts.
|
||||
|
||||
First, create `alerts.yml`. The following configuration file creates an always-firing alert, which you can use to test datasource-managed alerts in Grafana.
|
||||
|
||||
```yaml
|
||||
# alerts.yml
|
||||
groups:
|
||||
- name: demo
|
||||
rules:
|
||||
# Always-firing demo alert so you see something immediately
|
||||
- alert: AlwaysFiring
|
||||
expr: vector(1)
|
||||
for: 10s
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Demo alert that always fires"
|
||||
description: "This is a demo alert from vmalert using vector(1)."
|
||||
|
||||
# Simple recording rule you can graph in Grafana
|
||||
- record: demo:vector_one
|
||||
expr: vector(1)
|
||||
```
|
||||
|
||||
Next, create a basic Alertmanager config called `alertmanager.yml`. This example does not forward alerts anywhere, but serves as a source for Grafana:
|
||||
|
||||
```yaml
|
||||
# alertmanager.yml
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
receiver: "log"
|
||||
|
||||
receivers:
|
||||
- name: "log"
|
||||
```
|
||||
|
||||
Finally, create `grafana-datasources.yml` to configure Grafana to use VictoriaMetrics and Alertmanager as datasources for alerts and notifications:
|
||||
|
||||
```yaml
|
||||
# grafana-datasources.yml
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://victoriametrics:8428
|
||||
isDefault: true
|
||||
|
||||
- name: Alertmanager
|
||||
type: alertmanager
|
||||
access: proxy
|
||||
url: http://alertmanager:9093
|
||||
jsonData:
|
||||
implementation: prometheus
|
||||
```
|
||||
|
||||
The final piece is the Docker Compose file. This ties all the services together and sets up the required command-line arguments:
|
||||
|
||||
```yaml
|
||||
# compose.yml
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.139.0
|
||||
command:
|
||||
- "--storageDataPath=/victoria-metrics-data"
|
||||
- "--selfScrapeInterval=10s"
|
||||
# Proxy vmalert APIs so Grafana can see rules via VictoriaMetrics
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
ports:
|
||||
- "8428:8428"
|
||||
volumes:
|
||||
- vm-data:/victoria-metrics-data
|
||||
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.31.1
|
||||
command:
|
||||
- "--config.file=/etc/alertmanager/alertmanager.yml"
|
||||
ports:
|
||||
- "9093:9093"
|
||||
volumes:
|
||||
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.139.0
|
||||
depends_on:
|
||||
- victoriametrics
|
||||
- alertmanager
|
||||
command:
|
||||
# read metrics from VictoriaMetrics
|
||||
- "--datasource.url=http://victoriametrics:8428"
|
||||
# store and retrieve alert state from VictoriaMetrics
|
||||
- "--remoteRead.url=http://victoriametrics:8428"
|
||||
- "--remoteWrite.url=http://victoriametrics:8428"
|
||||
# configure Alertmanager as the notifier
|
||||
- "--notifier.url=http://alertmanager:9093"
|
||||
- "--rule=/etc/vmalert/alerts.yml"
|
||||
- "--evaluationInterval=15s"
|
||||
# external settings link vmalerts and Alertmanager to Grafana
|
||||
- "--external.url=http://localhost:3000"
|
||||
- "--external.alert.source=explore?orgId=1&left={\"datasource\":\"VictoriaMetrics\",\"queries\":[{\"refId\":\"A\",\"expr\":\"{{.Expr|queryEscape}}\"}]}"
|
||||
ports:
|
||||
- "8880:8880"
|
||||
volumes:
|
||||
- ./alerts.yml:/etc/vmalert/alerts.yml:ro
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.4
|
||||
depends_on:
|
||||
- victoriametrics
|
||||
- alertmanager
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_USER: admin
|
||||
GF_SECURITY_ADMIN_PASSWORD: admin
|
||||
GF_PATHS_PROVISIONING: /etc/grafana/provisioning
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- ./grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml:ro
|
||||
- grafana-data:/var/lib/grafana
|
||||
|
||||
volumes:
|
||||
vm-data:
|
||||
grafana-data:
|
||||
```
|
||||
|
||||
Let's break down the main command line arguments that connect every component:
|
||||
|
||||
- VictoriaMetrics
|
||||
- `-vmalert.proxyURL`: forwards Grafana requests for `/api/v1/rules` and `/api/v1/alerts` to vmalert, enabling rule visibility in Grafana UI
|
||||
|
||||
- vmalert
|
||||
- `-datasource.url`: configures VictoriaMetrics as the query source for rule evaluation
|
||||
- `-remoteWrite.url`: defines VictoriaMetrics as the backend used to persist rule state across restarts
|
||||
- `-remoteRead.url`: defines VictoriaMetrics as the backend used to read historical state for pending alerts
|
||||
- `-notifier.url`: directs firing alerts to Alertmanager
|
||||
- `-external.url`: defines the base URL for alert links so users see the public URL of Alertmanager, or an external alerting UI like Karma or Grafana in notifications.
|
||||
- `-external.alert.source`: creates a template for clickable alert links for Grafana. Allows Alertmanager UI to link directly to Grafana
|
||||
|
||||
Now, start the demo with:
|
||||
|
||||
```sh
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
Open your browser at `localhost:3000` and log in to Grafana with username `admin` and password `admin`.
|
||||
|
||||
If you open the sidebar and select **Alerting** > **Alert rules**, you should be able to see one alert pending or firing.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Datasource-managed alert firing in Grafana</figcaption>
|
||||
|
||||
Open the sidebar again and go to **Alerting** > **Active notifications** to see the active alert reported by Alertmanager.
|
||||
|
||||

|
||||
|
||||
You can also see the alerts in VMUI by opening the browser in `http://localhost:8428/vmui/?#/rules`. This is possible only when we have configured `-vmalert.proxyURL` in VictoriaMetrics.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Alerts can be visualized in VMUI directly</figcaption>
|
||||
|
||||
If you open the browser in `http://localhost:9093/#/alerts`, you will see the Alertmanager UI with the firing alert.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Alertmanager UI showing the firing alert</figcaption>
|
||||
|
||||
Clicking **Source** should take you back to Grafana and display the query that generated the alert.
|
||||
|
||||
## vmalert and VictoriaMetrics Single on Kubernetes {#vmsingle}
|
||||
|
||||
This section explains how to configure datasource-managed alerts on the VictoriaMetrics single-node version on Kubernetes.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- A Kubernetes cluster
|
||||
- VictoriaMetrics single-node
|
||||
- Grafana
|
||||
- Helm values or config files used for installation
|
||||
|
||||
You can follow this guide to install all required components: [Kubernetes monitoring via VictoriaMetrics single](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-single/).
|
||||
|
||||
### 1. Ensure VictoriaMetrics and Grafana are installed
|
||||
|
||||
Ensure you have added and updated the VictoriaMetrics Helm repository:
|
||||
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
|
||||
```
|
||||
|
||||
Confirm that the VictoriaMetrics single-node version is installed (assuming the release name `vmsingle` from the [installation guide](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-single/))
|
||||
|
||||
```sh
|
||||
kubectl get pods -l app.kubernetes.io/instance=vmsingle
|
||||
```
|
||||
|
||||
You should get a single running pod:
|
||||
|
||||
```sh
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmsingle-victoria-metrics-single-server-0 1/1 Running 0 48m
|
||||
```
|
||||
|
||||
Do the same for Grafana:
|
||||
|
||||
```sh
|
||||
kubectl get pod -l app.kubernetes.io/name=grafana
|
||||
```
|
||||
|
||||
You should get the name of the Grafana pod running in your cluster:
|
||||
|
||||
```sh
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
my-grafana-65d6d4ccbc-nxkxq 1/1 Running 0 58m
|
||||
```
|
||||
|
||||
### 2. Install vmalert and Alertmanager
|
||||
|
||||
Create a Helm values file for vmalert and Alertmanager called `vm-alerting-values.yml`.
|
||||
|
||||
The example below comes with two demo alerts. Add your own vmalert [alerting rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#rules) in the `config: alerts:` section below.
|
||||
|
||||
```sh
|
||||
cat <<EOF > vm-alerting-values.yml
|
||||
# Enable and configure Alertmanager
|
||||
alertmanager:
|
||||
enabled: true
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ["alertname"]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: "log"
|
||||
|
||||
receivers:
|
||||
- name: "log"
|
||||
# place your default route here for notifications
|
||||
|
||||
# Configure vmalert ("server" section in this chart)
|
||||
server:
|
||||
# vmalert evaluation datasource: point at vmsingle’s Prometheus-compatible API
|
||||
datasource:
|
||||
url: http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428
|
||||
|
||||
# Where vmalert stores and reads alert state (remote write/read)
|
||||
remote:
|
||||
write:
|
||||
url: http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428
|
||||
read:
|
||||
url: http://vmsingle-victoria-metrics-single-server.default.svc.cluster.local.:8428
|
||||
|
||||
# Configure Alertmanager as notifier
|
||||
notifier:
|
||||
alertmanager:
|
||||
# Adjust namespace/name if you install into a non-default namespace or change the release name
|
||||
url: http://vmalert-victoria-metrics-alert-alertmanager:9093
|
||||
|
||||
# Inline demo rules. Add your alerting groups and rules here
|
||||
config:
|
||||
alerts:
|
||||
groups:
|
||||
- name: vm-health
|
||||
rules:
|
||||
- alert: TooManyRestarts
|
||||
expr: changes(process_start_time_seconds{job=~"victoriametrics|vmagent|vmalert"}[15m]) > 2
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} too many restarts (instance {{ $labels.instance }})"
|
||||
description: "Job {{ $labels.job }} has restarted more than twice in the last 15 minutes. It might be crashlooping."
|
||||
- alert: ServiceDown
|
||||
expr: up{job=~"victoriametrics|vmagent|vmalert"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service {{ $labels.job }} is down on {{ $labels.instance }}"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||
EOF
|
||||
```
|
||||
|
||||
Install `vmalert` and Alertmanager with:
|
||||
|
||||
```sh
|
||||
helm install vmalert vm/victoria-metrics-alert -f vm-alerting-values.yml
|
||||
```
|
||||
|
||||
### 3. Configure VictoriaMetrics single to proxy to vmalert
|
||||
|
||||
For datasource-managed alerts, Grafana talks to VictoriaMetrics, and VictoriaMetrics proxies alerting-related API calls to vmalert via the `-vmalert.proxyURL` flag.
|
||||
|
||||
First, check the service name for `vmalert`:
|
||||
|
||||
```sh
|
||||
kubectl get svc -l app.kubernetes.io/instance=vmalert,app=server
|
||||
```
|
||||
|
||||
Get the name of the `vmalert` service:
|
||||
|
||||
```sh
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vmalert-victoria-metrics-alert-server ClusterIP None <none> 8880/TCP 58m
|
||||
```
|
||||
|
||||
The internal DNS name for this service, in the default namespace, will be:
|
||||
|
||||
```text
|
||||
vmalert-victoria-metrics-alert-server.default.svc.cluster.local:8880
|
||||
```
|
||||
|
||||
Next, create a Helm values file with the internal Kubernetes URL for vmalert.
|
||||
|
||||
```sh
|
||||
cat <<EOF > vm-vmalert-proxy-values.yml
|
||||
# vm-vmalert-proxy-values.yaml
|
||||
server:
|
||||
extraArgs:
|
||||
vmalert.proxyURL: http://vmalert-victoria-metrics-alert-server.default.svc.cluster.local:8880
|
||||
EOF
|
||||
|
||||
```
|
||||
|
||||
Update the configuration of VictoriaMetrics single by applying both *your original Helm values* and this new overlay. In the example below, the original values file is called `vmsingle-values-file.yml` (this is the file you used when you first installed the cluster):
|
||||
|
||||
```sh
|
||||
helm upgrade vmsingle vm/victoria-metrics-single \
|
||||
-f vmsingle-values-file.yml \
|
||||
-f vm-vmalert-proxy-values.yml
|
||||
```
|
||||
|
||||
After this upgrade, vmsingle will start proxying `/api/v1/rules`, `/api/v1/alerts`, and other `vmalert` [endpoints](https://docs.victoriametrics.com/victoriametrics/vmalert/#web) to the vmalert service, enabling Grafana’s alerting UI and API to work through the VictoriaMetrics datasource.
|
||||
|
||||
To finish the setup, jump to the [Configure Grafana](https://docs.victoriametrics.com/guides/vmalert-datasource-managed-alerts-grafana/#grafana) section
|
||||
|
||||
## vmalert and VictoriaMetrics Cluster on Kubernetes {#vmcluster}
|
||||
|
||||
This section explains how to configure datasource-managed alerts on the VictoriaMetrics cluster version on Kubernetes.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- A Kubernetes cluster
|
||||
- VictoriaMetrics cluster
|
||||
- Grafana
|
||||
- Helm values or config files used for the installation of the cluster
|
||||
|
||||
You can follow this guide to install the cluster and Grafana first: [Kubernetes monitoring with VictoriaMetrics cluster](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/).
|
||||
|
||||
### 1. Ensure VictoriaMetrics and Grafana are installed
|
||||
|
||||
Ensure you have added and updated the VictoriaMetrics Helm repository:
|
||||
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Confirm that the VictoriaMetrics cluster is installed (assuming the release name `vmcluster` from the [installation guide](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/)):
|
||||
|
||||
```sh
|
||||
kubectl get pods -l app.kubernetes.io/instance=vmcluster
|
||||
```
|
||||
|
||||
You should see pods for `vmselect`, `vminsert`, and `vmstorage`, for example:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmcluster-victoria-metrics-cluster-vminsert-b4d494b4c-cx2m5 1/1 Running 0 3m8s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-b4d494b4c-xdv76 1/1 Running 0 3m8s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-67979c98fc-7hfdl 1/1 Running 0 3m8s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-67979c98fc-ftpzn 1/1 Running 0 3m8s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 3m8s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 2m48s
|
||||
```
|
||||
|
||||
VictoriaMetrics exposes its write API via the `vminsert` service on port 8480 and its read (Prometheus-compatible) API via the `vmselect` service on port 8481 by default. For a default installation, these DNS names are:
|
||||
|
||||
- Write: `vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local.:8480`
|
||||
- Read: `vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.:8481`
|
||||
|
||||
Now, ensure Grafana is installed:
|
||||
|
||||
```sh
|
||||
kubectl get pod -l app.kubernetes.io/name=grafana
|
||||
```
|
||||
|
||||
You should get the name of the Grafana pod running in your cluster:
|
||||
|
||||
```sh
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
my-grafana-65d6d4ccbc-nxkxq 1/1 Running 0 58m
|
||||
```
|
||||
|
||||
### 2. Install vmalert and Alertmanager
|
||||
|
||||
Create a Helm values file for vmalert and Alertmanager called `vm-alerting-values.yml`.
|
||||
|
||||
The example below comes with two demo alerts. Add your own vmalert [alerting rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#rules) in the `config: alerts:` section below.
|
||||
|
||||
|
||||
```sh
|
||||
cat <<EOF > vm-alerting-values.yml
|
||||
# Enable and configure Alertmanager
|
||||
alertmanager:
|
||||
enabled: true
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ["alertname"]
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
receiver: "log"
|
||||
|
||||
receivers:
|
||||
- name: "log"
|
||||
# place your notification route here
|
||||
|
||||
# Configure vmalert ("server" section in this chart)
|
||||
server:
|
||||
# vmalert evaluation datasource: point at vmselect’s Prometheus-compatible API
|
||||
datasource:
|
||||
url: http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.:8481/select/multitenant/prometheus/
|
||||
|
||||
# Where vmalert stores and reads alert state (remote write/read)
|
||||
remote:
|
||||
write:
|
||||
# send ALERTS / recording rule series into vminsert
|
||||
url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local.:8480/insert/0/prometheus/
|
||||
read:
|
||||
# read back alert state from vmselect
|
||||
url: http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.:8481/select/0/prometheus/
|
||||
|
||||
# Configure Alertmanager as notifier
|
||||
notifier:
|
||||
alertmanager:
|
||||
# Adjust namespace/name if you install into a non-default namespace or change the release name
|
||||
url: http://vmalert-victoria-metrics-alert-alertmanager:9093
|
||||
|
||||
# Inline demo rules. Add your alerting groups and rules here
|
||||
config:
|
||||
alerts:
|
||||
groups:
|
||||
- name: vm-health
|
||||
rules:
|
||||
- alert: TooManyRestarts
|
||||
expr: changes(process_start_time_seconds{job=~"victoriametrics|vmagent|vmalert"}[15m]) > 2
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} too many restarts (instance {{ $labels.instance }})"
|
||||
description: "Job {{ $labels.job }} has restarted more than twice in the last 15 minutes.
|
||||
It might be crashlooping."
|
||||
- alert: ServiceDown
|
||||
expr: up{job=~"victoriametrics|vmagent|vmalert"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Service {{ $labels.job }} is down on {{ $labels.instance }}"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
|
||||
EOF
|
||||
```
|
||||
|
||||
|
||||
The key differences from the [single-node setup](https://docs.victoriametrics.com/guides/vmalert-datasource-managed-alerts-grafana/#vmsingle) section
|
||||
- `server.datasource.url` and `server.remote.read.url` point to the `vmselect` read endpoint (`/select/multitenant/prometheus/`).
|
||||
- `server.remote.write.url` points to the `vminsert` write endpoint (`/insert/multitenant/prometheus/`).
|
||||
|
||||
Install `vmalert` and Alertmanager with:
|
||||
|
||||
```sh
|
||||
helm install vmalert vm/victoria-metrics-alert -f vm-alerting-values.yml
|
||||
```
|
||||
|
||||
### 3. Configure VictoriaMetrics Cluster to proxy to vmalert
|
||||
|
||||
For datasource-managed alerts, Grafana talks to VictoriaMetrics, and VictoriaMetrics proxies alerting-related API calls to vmalert via the `-vmalert.proxyURL` flag. In the cluster version, set this flag on `vmselect` and point it to the vmalert service so Grafana can reach the alert state.
|
||||
|
||||
First, check the service name for vmalert:
|
||||
|
||||
```sh
|
||||
kubectl get svc -l app.kubernetes.io/instance=vmalert,app=server
|
||||
```
|
||||
|
||||
You should see something like:
|
||||
|
||||
```sh
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vmalert-victoria-metrics-alert-server ClusterIP None <none> 8880/TCP 58m
|
||||
```
|
||||
|
||||
The internal DNS name for this service, in the default namespace, will be:
|
||||
|
||||
```text
|
||||
vmalert-victoria-metrics-alert-server.default.svc.cluster.local:8880
|
||||
```
|
||||
|
||||
Create a Helm values overlay file for the cluster called `vmcluster-vmalert-proxy-values.yml`.
|
||||
|
||||
The `vmselect.extraArgs` map in the `victoria-metrics-cluster` chart allows you to pass arbitrary command-line flags to vmselect, including `-vmalert.proxyURL`.
|
||||
|
||||
```sh
|
||||
cat <<EOF > vmcluster-vmalert-proxy-values.yml
|
||||
vmselect:
|
||||
extraArgs:
|
||||
vmalert.proxyURL: http://vmalert-victoria-metrics-alert-server.default.svc.cluster.local:8880
|
||||
EOF
|
||||
```
|
||||
|
||||
Update the VictoriaMetrics cluster configuration by applying both your *original Helm values* and this new overlay. In the example below, the original values file is called `vmcluster-values-file.yml` (this is the file you used when you first installed the cluster):
|
||||
|
||||
```sh
|
||||
helm upgrade vmcluster vm/victoria-metrics-cluster \
|
||||
-f victoria-metrics-cluster-values.yml \
|
||||
-f vmcluster-vmalert-proxy-values.yml
|
||||
```
|
||||
|
||||
After this upgrade, vmselect will start proxying `/api/v1/rules`, `/api/v1/alerts`, and other `vmalert` endpoints to the vmalert service, enabling Grafana’s alerting UI and API to work through the VictoriaMetrics datasource.
|
||||
|
||||
To finalize the setup, continue on to the next section, [Configure Grafana](#grafana).
|
||||
|
||||
## Configure Grafana {#grafana}
|
||||
|
||||
The last step on any Kubernetes-based installation is to add Alertmanager to Grafana so that Grafana can show notifications alongside the rules it discovers via the VictoriaMetrics datasource.
|
||||
|
||||
Get the service name for Alertmanager in your cluster:
|
||||
|
||||
```sh
|
||||
kubectl get svc -l app.kubernetes.io/instance=vmalert,app=alertmanager
|
||||
```
|
||||
|
||||
You should see something like:
|
||||
|
||||
```text
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vmalert-victoria-metrics-alert-alertmanager ClusterIP 10.43.114.243 <none> 9093/TCP 68m
|
||||
```
|
||||
|
||||
Next, add Alertmanager to Grafana:
|
||||
|
||||
1. Log in to your Grafana dashboard.
|
||||
2. Go to **Connections** > **Datasources**.
|
||||
3. Press **+ Add new data source**.
|
||||
4. Search and select “Alertmanager”.
|
||||
5. Fill in the following parameters (adjusting namespace/service name if needed):
|
||||
- Implementation: Prometheus
|
||||
- URL: `http://vmalert-victoria-metrics-alert-alertmanager.default.svc.cluster.local:9093`
|
||||
|
||||
Ensure the URL matches the Alertmanager service name you obtained earlier.
|
||||
|
||||
6. Press **Save & Test**.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Adding Alertmanager to Grafana</figcaption>
|
||||
|
||||
With vmselect’s `vmalert.proxyURL` set and Alertmanager configured as a datasource, Grafana should now be able to display vmalert rules and alert instances from the VictoriaMetrics Cluster datasource and show notifications managed by Alertmanager in its UI.
|
||||
|
||||
## See also
|
||||
|
||||
- [YouTube: Mathias Palmersheim - Who will be your Ruler](https://youtu.be/NfhVOEkznFY)
|
||||
- [Kubernetes monitoring via VictoriaMetrics single](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-single/)
|
||||
- [Kubernetes monitoring with VictoriaMetrics cluster](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/)
|
||||
- Learn more about [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/)
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
---
|
||||
weight: 16
|
||||
title: Datasource-Managed Alerts with vmalert and Grafana
|
||||
menu:
|
||||
docs:
|
||||
parent: "guides"
|
||||
weight: 16
|
||||
tags:
|
||||
- metrics
|
||||
- guide
|
||||
- kubernetes
|
||||
- alerts
|
||||
aliases:
|
||||
- /guides/vmalert-datasource-managed-alerts-grafana.html
|
||||
---
|
||||
{{% content "README.md" %}}
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 403 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 634 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 373 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 557 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 185 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 114 KiB |
@@ -111,6 +111,7 @@ See also [case studies](https://docs.victoriametrics.com/victoriametrics/casestu
|
||||
* [Monitoring Pipeline with Prometheus and VictoriaMetrics](https://clovisc.medium.com/monitoring-pipeline-with-blackbox-exporter-prometheus-victoriametrics-and-vmalert-0ab020c7202a)
|
||||
* [FreeBSD: monitoring with VictoriaMetrics and Grafana](https://setevoy.medium.com/freebsd-monitoring-with-victoriametrics-and-grafana-f789904f2628)
|
||||
* [QCon London 2026: Wrangling Telemetry at Scale, a Guide to Self-Hosted Observability](https://www.infoq.com/news/2026/03/self-hosted-observability/)
|
||||
* [How We Made Telemetry Queries 10x Faster: Chunk-Split Caching for Metrics, Logs, and Traces](https://mirastacklabs.ai/blog/chunk-split-caching/)
|
||||
|
||||
## Third-party articles and slides about VictoriaLogs
|
||||
|
||||
|
||||
@@ -100,6 +100,10 @@ The following steps must be performed during the upgrade / downgrade procedure:
|
||||
* Wait until the process stops. This can take a few seconds.
|
||||
* Start the upgraded VictoriaMetrics.
|
||||
|
||||
> If you'd prefer not to manage upgrades yourself, [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_bestpractices_upgrade)
|
||||
> performs version upgrades automatically during scheduled maintenance windows with no action required on your part.
|
||||
> See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
## Backup Recommendations
|
||||
|
||||
VictoriaMetrics supports backups via [vmbackup](https://docs.victoriametrics.com/victoriametrics/vmbackup/) and [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/) tools. There is also [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/), which simplifies backup automation.
|
||||
|
||||
@@ -556,6 +556,9 @@ See more details on [how to monitor VictoriaMetrics components](https://docs.vic
|
||||
- `-storage.maxHourlySeries` is the limit on the number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series) during the last hour.
|
||||
- `-storage.maxDailySeries` is the limit on the number of unique time series during the day. This limit can be used for limiting daily [time series churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate).
|
||||
|
||||
It is possible to use `-1` as a value for these flags{{% available_from "#" %}} in order to enable series tracking but set limit to maximum possible value.
|
||||
This is useful in order to estimate the number of unique series written to `vmstorage` without enforcing limits.
|
||||
|
||||
Note that these limits are set and applied individually per each `vmstorage` node in the cluster. So, if the cluster has `N` `vmstorage` nodes, then the cluster-level limits will be `N` times bigger than the per-`vmstorage` limits.
|
||||
|
||||
See more details about cardinality limiter in [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-limiter).
|
||||
@@ -755,6 +758,10 @@ The `minimum downtime` strategy has the following benefits comparing to `no down
|
||||
- It allows minimizing the duration of config update / version upgrade for clusters with big number of nodes
|
||||
of for clusters with big `vmstorage` nodes, which may take long time for graceful restart.
|
||||
|
||||
> If you'd prefer to avoid managing cluster upgrades yourself, [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_cluster_upgrade)
|
||||
> handles version upgrades automatically during maintenance windows, and lets you create, resize, or delete deployments with a few clicks from the UI.
|
||||
> See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
### Improving re-routing performance during restart
|
||||
|
||||
`vmstorage` nodes may experience increased usage for CPU, RAM and disk IO during
|
||||
|
||||
@@ -27,5 +27,5 @@ to [the latest available releases](https://docs.victoriametrics.com/victoriametr
|
||||
|
||||
## Currently supported LTS release lines
|
||||
|
||||
- v1.136.x - the latest one is [v1.136.2 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.2)
|
||||
- v1.122.x - the latest one is [v1.122.17 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.17)
|
||||
- v1.136.x - the latest one is [v1.136.3 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.3)
|
||||
- v1.122.x - the latest one is [v1.122.18 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.18)
|
||||
@@ -58,9 +58,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.138.0
|
||||
docker pull victoriametrics/victoria-metrics:v1.139.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.138.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.139.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
|
||||
@@ -247,6 +247,10 @@ The following steps must be performed during the upgrade / downgrade procedure:
|
||||
|
||||
Prometheus doesn't drop data during VictoriaMetrics restart. See [this article](https://grafana.com/blog/2019/03/25/whats-new-in-prometheus-2.8-wal-based-remote-write/) for details. The same applies also to [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
|
||||
> If you'd prefer not to manage upgrades yourself, [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_single_upgrade)
|
||||
> performs version upgrades automatically during maintenance windows with no action required on your part.
|
||||
> See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
## vmui
|
||||
|
||||
VictoriaMetrics provides UI for query troubleshooting and exploration. The UI is available at `http://victoriametrics:8428/vmui`
|
||||
@@ -1786,6 +1790,10 @@ via [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) or via
|
||||
See also [VictoriaMetrics Monitoring](https://victoriametrics.com/blog/victoriametrics-monitoring/)
|
||||
and [troubleshooting docs](https://docs.victoriametrics.com/victoriametrics/troubleshooting/).
|
||||
|
||||
> [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_single_monitoring)
|
||||
> provides built-in monitoring dashboards and automatic alerts when resource consumption is high or configured limits are approached,
|
||||
> so you get notified before issues impact your workload. See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
> VictoriaMetrics components do not expose metadata `TYPE` and `HELP` fields on `/metrics` page.
|
||||
> Services like Google Cloud Managed Prometheus could require metadata to be present for scraping. In this case, pass `-metrics.exposeMetadata`
|
||||
command-line to them. See [these docs](https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type) for details.
|
||||
@@ -1968,6 +1976,9 @@ By default, VictoriaMetrics doesn't limit the number of stored time series. The
|
||||
|
||||
Both limits can be set simultaneously. If any of these limits is reached, then incoming samples for new time series are dropped. A sample of dropped series is put in the log with `WARNING` level.
|
||||
|
||||
It is possible to use `-1` as a value for these flags{{% available_from "#" %}} in order to enable series tracking but set limit to maximum possible value.
|
||||
This is useful in order to estimate the number of unique series which is written to VictoriaMetrics single without enforcing limits.
|
||||
|
||||
The exceeded limits can be [monitored](#monitoring) with the following metrics:
|
||||
|
||||
* `vm_hourly_series_limit_rows_dropped_total` - the number of metrics dropped due to exceeded hourly limit on the number of unique time series.
|
||||
|
||||
@@ -26,6 +26,23 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.26.1 to Go1.26.2. See [the list of issues addressed in Go1.26.2](https://github.com/golang/go/issues?q=milestone%3AGo1.26.2%20label%3ACherryPickApproved).
|
||||
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add per-URL `-remoteWrite.disableMetadata` flag to disable metadata sending for specific remote storage URLs. See [#10711](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10711). Thanks to @evkuzin for the contribution.
|
||||
* FEATURE: introduce `vm_filestream_fsync_duration_seconds_total` and `vm_filestream_fsync_calls_total` metrics, which can be used for detecting slow storage if it cannot keep up with the current data ingestion rate. See [#10432](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432). Thanks to @mehrdadbn9 for the contribution.
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add dedicated `thanos` mode for [migrating data from Thanos](https://docs.victoriametrics.com/victoriametrics/vmctl/thanos/). This mode supports both raw and downsampled Thanos blocks, including all aggregate types (count, sum, min, max, counter). Each aggregate is imported as a separate metric with resolution and aggregate type suffixes (e.g., `metric_name:5m:count`). The new mode uses `--thanos-*` prefixed flags: `--thanos-snapshot`, `--thanos-concurrency`, `--thanos-filter-time-start`, `--thanos-filter-time-end`, `--thanos-filter-label`, `--thanos-filter-label-value`, and `--thanos-aggr-types`. See [#9262](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9262).
|
||||
* FEATURE: [dashboards/alert-statistics](https://grafana.com/grafana/dashboards/24553): add pending and firing alerts stats; fix query in `FIRING over time by group` panel. See [#10571](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10571). Thanks to @sias32 for the contribution.
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): add random jitter to concurrent periodical flushers targeting the remote write destination. This helps spread remote write flushes across the flush interval, avoiding congestion at the remote write destination and enhancing queue data consumption. See [#10729](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10729).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): expose `vmalert_remotewrite_sent_rows` and `vmalert_remotewrite_sent_bytes` histograms to provide better visibility into remote write request sizes. See [#10727](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10727).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): allow setting `-1` value for `-remoteWrite.maxHourlySeries` and `-remoteWrite.maxDailySeries` command-line flags. This automatically sets limits to the highest possible value in order to enable tracking without enforcing any limits. This is helpful for estimating current usage before applying real limits. See [#9614](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9614).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): allow setting `-1` value for `-storage.maxHourlySeries` and `-storage.maxDailySeries` command-line flags. This automatically sets limits to the highest possible value in order to enable tracking without enforcing any limits. This is helpful for estimating current usage before applying real limits. See [#9614](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9614).
|
||||
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): retry the requests that failed with unexpected EOF due to unstable network to S3 service. See [#10699](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10699).
|
||||
* BUGFIX: All VictoriaMetrics components: Fix an issue where `unsupported` metric metadata type was exposed for summaries and quantiles if a summary wasn't updated within a certain time window. See [metrics#120](https://github.com/VictoriaMetrics/metrics/issues/120) and [metrics#121](https://github.com/VictoriaMetrics/metrics/pull/121).
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): align request body buffering flags - `maxRequestBodySizeToRetry` and `requestBufferSize` to the same `16KB` value. Allow disabling request buffering by setting `requestBufferSize=0`. See [#10675](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10675)
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fix `scrape_series_added` metric to update only on successful scrapes, aligning its behavior with Prometheus. See [#10653](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10653).
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): improve the selection algorithm of [buckets_limit](https://docs.victoriametrics.com/victoriametrics/metricsql/#buckets_limit) to remove consecutive empty buckets at the beginning and end to obtain more accurate min and max values. See [#10417](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10417).
|
||||
|
||||
## [v1.139.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.139.0)
|
||||
|
||||
Released at 2026-03-27
|
||||
@@ -39,7 +56,7 @@ Released at 2026-03-27
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): allow specifying `basic_auth` in scrape configs without `username`. Previously this resulted in a config error. Now a warning is logged instead. See [#6956](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6956). Thanks to @andriibeee for the contribution.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): add support for negative buckets in [OpenTelemetry](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/) `ExponentialHistogram` during ingestion. See [#9896-comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9896#issuecomment-4037522985).
|
||||
* FEATURE: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): Improve restore speed on linux systems by pre-allocating files and optimizing write path. This behavior can be disabled with `-skipFilePreallocation`. See [#10661](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10661/). Thanks to @BenNF for the contribution.
|
||||
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): [yandexcloud_sd_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#yandexcloud_sd_configs) now supports `folder_ids` for limiting discovery to specific folders. See [#10623](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10623). Thanks to @Br1an67 for the contribution.
|
||||
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): retry RPC by dialing a new connection instead of reusing a pooled one when the previous attempt fails with `io.EOF`, `broken pipe` or `reset by peer`. This reduces query failures caused by stale connections to restarted vmstorage nodes. See [#10314](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10314)
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix autocomplete dropdown not closing on the Raw Query page. See [#10665](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10665)
|
||||
@@ -48,7 +65,7 @@ Released at 2026-03-27
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): stop logging `error`-level messages when scraping targets that expose OpenMetrics `info`, `gaugehistogram`, `stateset`, or `unknown` metric types. These are valid [OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md) types and should be parsed without error. See [#10685](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10685). Thanks to @tsarna for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent panic during directory deletion on `NFS`-based mounts. The bug was introduced in [83da33d8](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/83da33d8cfe8352fd0022d05a8b6346ebb48420d) and included in [v1.123.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/changelog/CHANGELOG_2025.md#v11230). See [#9842](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9842).
|
||||
|
||||
## [v1.138.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.138.0)
|
||||
## [v1.139.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.139.0)
|
||||
|
||||
Released at 2026-03-13
|
||||
|
||||
@@ -62,7 +79,6 @@ Released at 2026-03-13
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): support negative values for the group `eval_offset` option, which allows starting group evaluation at `groupInterval-abs(eval_offset)` within `[0...groupInterval]`. See [#10424](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10424).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): Disable `/graphite/tags/tagSeries` and `/graphite/tags/tagMultiSeries` for Graphite tag registration since it is unlikely it is used in context of VictoriaMetrics. See [10544](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10544).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): rename debug tools buttons for clarity. See [#10453](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10453).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): [yandexcloud_sd_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#yandexcloud_sd_configs) now supports `folder_ids` for limiting discovery to specific folders. See [#10623](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10623). Thanks to @Br1an67 for the contribution.
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: replace `histogram` with `untyped` metric metadata type for [VictoriaMetrics histograms](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) when `-metrics.exposeMetadata` is set. See [#82](https://github.com/VictoriaMetrics/metrics/issues/82).
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly route requests to `default_url`. Previously, `request_path` query arg could be set incorrectly during concurrent requests. See [#10626](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10626).
|
||||
@@ -104,6 +120,20 @@ It enables back `Discovered targets` debug UI by default.
|
||||
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly apply `extra_filters[]` filter when querying `vm_account_id` or `vm_project_id` labels via [multitenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) request for `/api/v1/label/…/values` API. Before, `extra_filters` was ignored. See [#10503](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10503).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): revert the use of rollup result cache for [instant queries](https://docs.victoriametrics.com/keyConcepts.html#instant-query) that contain [`rate`](https://docs.victoriametrics.com/MetricsQL.html#rate) function with a lookbehind window larger than `-search.minWindowForInstantRollupOptimization`. The cache usage was removed since [v1.132.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.132.0). See [#10098](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098#issuecomment-3895011084) for more details.
|
||||
|
||||
## [v1.136.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.3)
|
||||
|
||||
Released at 2026-03-27
|
||||
|
||||
**v1.136.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.136.x line will be supported for at least 12 months since [v1.136.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11360) release**
|
||||
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): retry RPC by dialing a new connection instead of reusing a pooled one when the previous attempt fails with `io.EOF`, `broken pipe` or `reset by peer`. This reduces query failures caused by stale connections to restarted vmstorage nodes. See [#10314](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10314)
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix autocomplete dropdown not closing on the Raw Query page. See [#10665](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10665)
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): enforce `datasource_type=prometheus` when [proxying](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert) Grafana requests to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/). Grafana supports only `prometheus` and `loki` alerts. Without this fix, Grafana shows `Error loading alerts` when non-Prometheus alert types are returned. See [victoriametrics-datasource#329](https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): stop logging `error`-level messages when scraping targets that expose OpenMetrics `info`, `gaugehistogram`, `stateset`, or `unknown` metric types. These are valid [OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md) types and should be parsed without error. See [#10685](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10685). Thanks to @tsarna for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent panic during directory deletion on `NFS`-based mounts. The bug was introduced in [83da33d8](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/83da33d8cfe8352fd0022d05a8b6346ebb48420d) and included in [v1.123.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/changelog/CHANGELOG_2025.md#v11230). See [#9842](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9842).
|
||||
|
||||
## [v1.136.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.2)
|
||||
|
||||
Released at 2026-03-13
|
||||
@@ -293,6 +323,17 @@ See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/ch
|
||||
|
||||
See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/changelog_2025/#v11230)
|
||||
|
||||
## [v1.122.18](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.18)
|
||||
|
||||
Released at 2026-03-27
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): retry RPC by dialing a new connection instead of reusing a pooled one when the previous attempt fails with `io.EOF`, `broken pipe` or `reset by peer`. This reduces query failures caused by stale connections to restarted vmstorage nodes. See [#10314](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10314)
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): enforce `datasource_type=prometheus` when [proxying](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert) Grafana requests to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/). Grafana supports only `prometheus` and `loki` alerts. Without this fix, Grafana shows `Error loading alerts` when non-Prometheus alert types are returned. See [victoriametrics-datasource#329](https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329).
|
||||
|
||||
## [v1.122.17](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.17)
|
||||
|
||||
Released at 2026-03-13
|
||||
|
||||
@@ -117,7 +117,7 @@ It is allowed to run VictoriaMetrics and VictoriaLogs Enterprise components in [
|
||||
|
||||
Binary releases of Enterprise components are available at [the releases page for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest)
|
||||
and [the releases page for VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/releases/latest).
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.138.0-enterprise.tar.gz`.
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.139.0-enterprise.tar.gz`.
|
||||
|
||||
In order to run binary release of Enterprise component, please download the `*-enterprise.tar.gz` archive for your OS and architecture
|
||||
from the corresponding releases page and unpack it. Then run the unpacked binary.
|
||||
@@ -135,8 +135,8 @@ For example, the following command runs VictoriaMetrics Enterprise binary with t
|
||||
obtained at [this page](https://victoriametrics.com/products/enterprise/trial/):
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.138.0/victoria-metrics-linux-amd64-v1.138.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.138.0-enterprise.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.139.0/victoria-metrics-linux-amd64-v1.139.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.139.0-enterprise.tar.gz
|
||||
./victoria-metrics-prod -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
@@ -151,7 +151,7 @@ Alternatively, VictoriaMetrics Enterprise license can be stored in the file and
|
||||
It is allowed to run VictoriaMetrics and VictoriaLogs Enterprise components in [cases listed here](#valid-cases-for-victoriametrics-enterprise).
|
||||
|
||||
Docker images for Enterprise components are available at [VictoriaMetrics Docker Hub](https://hub.docker.com/u/victoriametrics) and [VictoriaMetrics Quay](https://quay.io/organization/victoriametrics).
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.138.0-enterprise`.
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.139.0-enterprise`.
|
||||
|
||||
In order to run Docker image of VictoriaMetrics Enterprise component, it is required to provide the license key via the command-line
|
||||
flag as described in the [binary-releases](#binary-releases) section.
|
||||
@@ -161,13 +161,13 @@ Enterprise license key can be obtained at [this page](https://victoriametrics.co
|
||||
For example, the following command runs VictoriaMetrics Enterprise Docker image with the specified license key:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.138.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.139.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
Alternatively, the license code can be stored in the file and then referred via `-licenseFile` command-line flag:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.138.0-enterprise -licenseFile=/path/to/vm-license
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.139.0-enterprise -licenseFile=/path/to/vm-license
|
||||
```
|
||||
|
||||
Example docker-compose configuration:
|
||||
@@ -177,7 +177,7 @@ version: "3.5"
|
||||
services:
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
image: victoriametrics/victoria-metrics:v1.139.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -209,7 +209,7 @@ is used to provide the license key in plain-text:
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.138.0-enterprise
|
||||
tag: v1.139.0-enterprise
|
||||
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
@@ -220,7 +220,7 @@ In order to provide the license key via existing secret, the following values fi
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.138.0-enterprise
|
||||
tag: v1.139.0-enterprise
|
||||
|
||||
license:
|
||||
secret:
|
||||
@@ -270,7 +270,7 @@ spec:
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
image:
|
||||
tag: v1.138.0-enterprise
|
||||
tag: v1.139.0-enterprise
|
||||
```
|
||||
|
||||
In order to provide the license key via an existing secret, the following custom resource is used:
|
||||
@@ -287,7 +287,7 @@ spec:
|
||||
name: vm-license
|
||||
key: license
|
||||
image:
|
||||
tag: v1.138.0-enterprise
|
||||
tag: v1.139.0-enterprise
|
||||
```
|
||||
|
||||
Example secret with license key:
|
||||
@@ -338,7 +338,7 @@ Builds are available for amd64 and arm64 architectures.
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.138.0-enterprise.tar.gz`
|
||||
`victoria-metrics-linux-amd64-v1.139.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
@@ -347,7 +347,7 @@ Includes:
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.138.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
`victoriametrics/victoria-metrics:v1.139.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
|
||||
## Monitoring license expiration
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ scrape_configs:
|
||||
After you created the `scrape.yaml` file, download and unpack [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the same directory:
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.138.0/victoria-metrics-linux-amd64-v1.138.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.138.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.139.0/victoria-metrics-linux-amd64-v1.139.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.139.0.tar.gz
|
||||
```
|
||||
|
||||
Then start VictoriaMetrics and instruct it to scrape targets defined in `scrape.yaml` and save scraped metrics
|
||||
@@ -150,8 +150,8 @@ Then start [single-node VictoriaMetrics](https://docs.victoriametrics.com/victor
|
||||
|
||||
```yaml
|
||||
# Download and unpack single-node VictoriaMetrics
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.138.0/victoria-metrics-linux-amd64-v1.138.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.138.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.139.0/victoria-metrics-linux-amd64-v1.139.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.139.0.tar.gz
|
||||
|
||||
# Run single-node VictoriaMetrics with the given scrape.yaml
|
||||
./victoria-metrics-prod -promscrape.config=scrape.yaml
|
||||
|
||||
@@ -285,7 +285,7 @@ See also [quantiles over input metrics](#quantiles-over-input-metrics) and [aggr
|
||||
[Histogram](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) is a set of [counter](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#counter)
|
||||
metrics with different `vmrange` or `le` labels. Since typical usage of histograms is to calculate quantiles over the
|
||||
buckets change via [histogram_quantile](https://docs.victoriametrics.com/victoriametrics/metricsql/#histogram_quantile) function the
|
||||
appropriate aggregation output for this is [total](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#rate_sum):
|
||||
appropriate aggregation output for this is [rate_sum](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/#rate_sum):
|
||||
|
||||
```yaml
|
||||
- match: 'http_request_duration_seconds_bucket'
|
||||
|
||||
@@ -461,7 +461,9 @@ VictoriaMetrics remote write protocol provides the following benefits comparing
|
||||
|
||||
* Reduced disk read/write IO and disk space usage at `vmagent` when the remote storage is temporarily unavailable.
|
||||
In this case `vmagent` buffers the incoming data to disk using the VictoriaMetrics remote write format.
|
||||
This reduces disk read/write IO and disk space usage by 2x-5x comparing to Prometheus remote write format.
|
||||
This reduces disk read/write IO and disk space usage by 2x-5x compared to Prometheus remote write format.
|
||||
|
||||
> See blogpost [Save network costs with VictoriaMetrics remote write protocol](https://victoriametrics.com/blog/victoriametrics-remote-write/).
|
||||
|
||||
`vmagent` uses VictoriaMetrics remote write protocol by default {{% available_from "v1.116.0" %}} when it sends data to VictoriaMetrics components such as other `vmagent` instances,
|
||||
[single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/)
|
||||
@@ -611,6 +613,8 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
|
||||
`vmagent` sets `scrape_series_added` to zero when it runs with `-promscrape.noStaleMarkers` command-line flag
|
||||
or when it scrapes target with `no_stale_markers: true` option, e.g. when [staleness markers](#prometheus-staleness-markers) are disabled.
|
||||
|
||||
Restarting `vmagent` can cause `scrape_series_added` to rise because all time series are new to a newly started `vmagent`.
|
||||
|
||||
* `scrape_series_limit` - the limit on the number of unique [series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) the given target can expose according to [these docs](#cardinality-limiter).
|
||||
This metric is exposed only if the series limit is set.
|
||||
|
||||
@@ -937,6 +941,9 @@ The limit can be enforced by setting the following command-line flags:
|
||||
* `-remoteWrite.maxDailySeries` - limits the number of unique time series `vmagent` can write to remote storage systems during the last day.
|
||||
Useful for limiting daily churn rate.
|
||||
|
||||
It is possible to use `-1` as a value for these flags{{% available_from "#" %}} in order to enable series tracking but set limit to maximum possible value.
|
||||
This is useful in order to estimate the number of unique series which is written to remote storage systems without enforcing limits.
|
||||
|
||||
Both limits can be set simultaneously. If any of these limits is reached, then samples for new time series are dropped instead of sending
|
||||
them to remote storage systems. A sample of dropped series is put in the log with `WARNING` level.
|
||||
|
||||
|
||||
@@ -36,6 +36,10 @@ Pass `-help` to `vmauth` in order to see all the supported command-line flags wi
|
||||
|
||||
Feel free to [contact us](mailto:info@victoriametrics.com) if you need a customized auth proxy for VictoriaMetrics with the support of LDAP, SSO, RBAC, SAML, accounting, and rate limiting, such as [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/).
|
||||
|
||||
> [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmauth_access)
|
||||
> provides built-in access control with organization-level roles, and lets you create and revoke read, write, or read/write access tokens with a couple of clicks from the UI, without the need of additional proxies.
|
||||
> See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
## Use cases
|
||||
|
||||
* [Simple HTTP proxy](#simple-http-proxy)
|
||||
|
||||
@@ -9,7 +9,7 @@ sitemap:
|
||||
<!-- The file should not be updated manually. Run make docs-update-flags while preparing a new release to sync flags in docs from actual binaries. -->
|
||||
```shellhelp
|
||||
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics.
|
||||
vmauth authenticates and authorizes incoming requests and proxies them to VictoriaMetrics components or any other HTTP backends.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victoriametrics/vmauth/ .
|
||||
|
||||
|
||||
@@ -26,6 +26,10 @@ This tool simplifies the creation of hourly, daily, weekly and monthly backups.
|
||||
Enterprise binaries can be downloaded and evaluated for free from [the releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
See how to request a [free trial license](https://victoriametrics.com/products/enterprise/trial/).
|
||||
|
||||
> If you'd prefer not to manage backups at all, [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmbackup_intro)
|
||||
> handles backups and restores automatically without the need of extra configurations.
|
||||
> See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
### Single node backup
|
||||
To make a complete backup for VictoriaMetrics single node run the following command:
|
||||
```sh
|
||||
|
||||
@@ -8,15 +8,43 @@ menu:
|
||||
weight: 5
|
||||
---
|
||||
|
||||
Thanos uses the same storage engine as Prometheus and its data layout on-disk should be the same. That means
|
||||
`vmctl` in mode [Prometheus](https://docs.victoriametrics.com/victoriametrics/vmctl/prometheus/) may be used for migrating historical data from Thanos.
|
||||
`vmctl` provides a dedicated `thanos` mode for migrating data from Thanos blocks to VictoriaMetrics.
|
||||
This mode supports both raw blocks (resolution=0) and downsampled blocks with all aggregate types.
|
||||
|
||||
## Migration via Prometheus mode
|
||||
> **Important:** Before copying Thanos blocks from object storage to local disk, stop the **Thanos Compactor**.
|
||||
> The Compactor is [not concurrency-safe](https://thanos.io/tip/components/compact.md/) and may compact,
|
||||
> downsample, or delete blocks while you are copying, resulting in an inconsistent snapshot.
|
||||
> Thanos Sidecar does not need to be stopped — it only uploads new blocks and does not modify existing ones.
|
||||
|
||||
## Migration via Thanos mode
|
||||
|
||||
The `thanos` mode reads Thanos snapshot blocks directly from disk, including:
|
||||
- **Raw blocks** (resolution=0): Original metrics data from Prometheus
|
||||
- **Downsampled blocks** (resolution=5m or 1h): Aggregated data with count, sum, min, max, and counter aggregates
|
||||
|
||||
### Downsampled data handling
|
||||
|
||||
When importing downsampled blocks, each aggregate type is imported as a separate metric with resolution and aggregate type suffixes.
|
||||
For example, a metric `http_requests_total` from a 5-minute downsampled block will be imported as:
|
||||
- `http_requests_total:5m:count` - number of raw samples aggregated into the 5-minute window
|
||||
- `http_requests_total:5m:sum` - sum of values in the 5-minute window
|
||||
- `http_requests_total:5m:min` - minimum value in the 5-minute window
|
||||
- `http_requests_total:5m:max` - maximum value in the 5-minute window
|
||||
- `http_requests_total:5m:counter` - cumulative counter value per window, intended for `rate()`/`increase()` calculations over downsampled data
|
||||
|
||||
You can filter which aggregate types to import using the `--thanos-aggr-types` flag:
|
||||
```sh
|
||||
vmctl thanos --thanos-snapshot thanos-data --vm-addr http://victoria-metrics:8428 --thanos-aggr-types=count --thanos-aggr-types=sum
|
||||
```
|
||||
|
||||
If `--thanos-aggr-types` is not specified, all aggregate types will be imported.
|
||||
|
||||
### Migration steps
|
||||
|
||||
We assume you're using **Thanos Sidecar** on your Prometheus pods, and that you have a separate Thanos Store installation.
|
||||
To migrate the data we need to start writing fresh (current) data to VictoriaMetrics, and migrate historical data in background.
|
||||
|
||||
### Current data
|
||||
#### Current data
|
||||
|
||||
1. For now, keep your Thanos Sidecar and Thanos-related Prometheus configuration, but make it to stream metrics to VictoriaMetrics:
|
||||
```yaml
|
||||
@@ -45,10 +73,10 @@ Replace `<tenant>` based on your [multitenancy settings](https://docs.victoriame
|
||||
3. Within two hours, Prometheus should finish its current data file and hand it off to Thanos Store for long term
|
||||
storage.
|
||||
|
||||
### Historical data
|
||||
#### Historical data
|
||||
|
||||
Let's assume your data is stored on S3 served by minio. You first need to copy that out to a local filesystem,
|
||||
then import it into VM using `vmctl` in [Prometheus mode](https://docs.victoriametrics.com/victoriametrics/vmctl/prometheus/).
|
||||
then import it into VM using `vmctl thanos` mode.
|
||||
|
||||
1. Copy data from minio.
|
||||
1. Run the `minio/mc` Docker container.
|
||||
@@ -79,11 +107,100 @@ then import it into VM using `vmctl` in [Prometheus mode](https://docs.victoriam
|
||||
|
||||
2. Import using `vmctl`.
|
||||
1. Follow the [Quick Start instructions](https://docs.victoriametrics.com/victoriametrics/vmctl/#quick-start) to get `vmctl` on your machine.
|
||||
1. Use [Prometheus](https://docs.victoriametrics.com/victoriametrics/vmctl/prometheus/) mode to import data:
|
||||
1. Use `thanos` mode to import data:
|
||||
```sh
|
||||
vmctl prometheus --prom-snapshot thanos-data --vm-addr http://victoria-metrics:8428
|
||||
vmctl thanos --thanos-snapshot thanos-data --vm-addr http://victoria-metrics:8428
|
||||
```
|
||||
|
||||
The output will look like this:
|
||||
```sh
|
||||
Thanos import mode
|
||||
2026/03/16 15:52:22 Processing blocks with aggregate types: [count sum min max counter]
|
||||
2026/03/16 15:52:22 Found 1 raw blocks and 2 downsampled blocks
|
||||
Thanos snapshot stats:
|
||||
blocks found: 3;
|
||||
blocks skipped by time filter: 0;
|
||||
min time: 1735689600000 (2025-01-01T01:00:00+01:00);
|
||||
max time: 1735696740001 (2025-01-01T02:59:00+01:00);
|
||||
samples: 488;
|
||||
series: 12.
|
||||
2026/03/16 15:52:22 Processing raw blocks (resolution=0)...
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 480 samples | Total: 1/1 blocks, 4 series, 480 samples
|
||||
2026/03/16 15:52:22 Processing downsampled blocks with aggregate type: count
|
||||
2026/03/16 15:52:22 Processing 2 blocks for aggregate type: count
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 96 samples | Total: 1/2 blocks, 4 series, 96 samples
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 8 samples | Total: 2/2 blocks, 8 series, 104 samples
|
||||
2026/03/16 15:52:22 Processing downsampled blocks with aggregate type: sum
|
||||
2026/03/16 15:52:22 Processing 2 blocks for aggregate type: sum
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 96 samples | Total: 1/2 blocks, 4 series, 96 samples
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 8 samples | Total: 2/2 blocks, 8 series, 104 samples
|
||||
2026/03/16 15:52:22 Processing downsampled blocks with aggregate type: min
|
||||
2026/03/16 15:52:22 Processing 2 blocks for aggregate type: min
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 96 samples | Total: 1/2 blocks, 4 series, 96 samples
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 8 samples | Total: 2/2 blocks, 8 series, 104 samples
|
||||
2026/03/16 15:52:22 Processing downsampled blocks with aggregate type: max
|
||||
2026/03/16 15:52:22 Processing 2 blocks for aggregate type: max
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 96 samples | Total: 1/2 blocks, 4 series, 96 samples
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 8 samples | Total: 2/2 blocks, 8 series, 104 samples
|
||||
2026/03/16 15:52:22 Processing downsampled blocks with aggregate type: counter
|
||||
2026/03/16 15:52:22 Processing 2 blocks for aggregate type: counter
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 96 samples | Total: 1/2 blocks, 4 series, 96 samples
|
||||
2026/03/16 15:52:22 [Worker 0] Block 01KKS78P: 4 series, 8 samples | Total: 2/2 blocks, 8 series, 104 samples
|
||||
2026/03/16 15:52:22 Migration statistics (1 raw blocks, 2 downsampled blocks):
|
||||
2026/03/16 15:52:22 raw: 4 series, 480 samples
|
||||
2026/03/16 15:52:22 count: 8 series, 104 samples
|
||||
2026/03/16 15:52:22 sum: 8 series, 104 samples
|
||||
2026/03/16 15:52:22 min: 8 series, 104 samples
|
||||
2026/03/16 15:52:22 max: 8 series, 104 samples
|
||||
2026/03/16 15:52:22 counter: 8 series, 104 samples
|
||||
2026/03/16 15:52:22 total: 44 series, 1000 samples
|
||||
2026/03/16 15:52:22 Import finished!
|
||||
2026/03/16 15:52:22 VictoriaMetrics importer stats:
|
||||
idle duration: 0s;
|
||||
time spent while importing: 10.314541ms;
|
||||
total samples: 1000;
|
||||
samples/s: 96950.51;
|
||||
total bytes: 23.5 kB;
|
||||
bytes/s: 2.3 MB;
|
||||
import requests: 2;
|
||||
import requests retries: 0;
|
||||
2026/03/16 15:52:22 Total time: 13.729792ms
|
||||
```
|
||||
|
||||
### Filtering data
|
||||
|
||||
You can filter the data to import using the following flags:
|
||||
- `--thanos-filter-time-start` - import only data with timestamps after this time (RFC3339 format)
|
||||
- `--thanos-filter-time-end` - import only data with timestamps before this time (RFC3339 format)
|
||||
- `--thanos-filter-label` - filter timeseries by label name (e.g., `__name__`)
|
||||
- `--thanos-filter-label-value` - regular expression to filter label values
|
||||
|
||||
Example with time filtering:
|
||||
```sh
|
||||
vmctl thanos --thanos-snapshot thanos-data --vm-addr http://victoria-metrics:8428 \
|
||||
--thanos-filter-time-start=2024-01-01T00:00:00Z \
|
||||
--thanos-filter-time-end=2024-12-31T23:59:59Z
|
||||
```
|
||||
|
||||
Example with label filtering (import only specific metrics):
|
||||
```sh
|
||||
vmctl thanos --thanos-snapshot thanos-data --vm-addr http://victoria-metrics:8428 \
|
||||
--thanos-filter-label=__name__ \
|
||||
--thanos-filter-label-value="http_requests_.*"
|
||||
```
|
||||
|
||||
## Thanos mode flags
|
||||
|
||||
| Flag | Description | Default |
|
||||
|------|-------------|---------|
|
||||
| `--thanos-snapshot` | Path to Thanos snapshot directory containing raw and/or downsampled blocks | (required) |
|
||||
| `--thanos-concurrency` | Number of concurrently running snapshot readers | 1 |
|
||||
| `--thanos-filter-time-start` | Time filter to select timeseries with timestamp equal or higher than provided value (RFC3339 format) | |
|
||||
| `--thanos-filter-time-end` | Time filter to select timeseries with timestamp equal or lower than provided value (RFC3339 format) | |
|
||||
| `--thanos-filter-label` | Label name to filter timeseries by | |
|
||||
| `--thanos-filter-label-value` | Regular expression to filter label values | `.*` |
|
||||
| `--thanos-aggr-types` | Aggregate types to import from downsampled blocks (count, sum, min, max, counter). Can be specified multiple times. If not set, all types are imported | |
|
||||
|
||||
## Remote read protocol
|
||||
|
||||
> Migration via remote read protocol allows to fetch data via API. This is usually a resource intensive operation
|
||||
@@ -142,4 +259,4 @@ Processing ranges: 8799 / 8799 [████████████████
|
||||
|
||||
See [remote-read mode](https://docs.victoriametrics.com/victoriametrics/vmctl/remoteread/) for more details.
|
||||
|
||||
See also general [vmctl migration tips](https://docs.victoriametrics.com/victoriametrics/vmctl/#migration-tips).
|
||||
See also general [vmctl migration tips](https://docs.victoriametrics.com/victoriametrics/vmctl/#migration-tips).
|
||||
|
||||
@@ -34,9 +34,9 @@ vmctl command-line tool is available as:
|
||||
|
||||
Download and unpack vmctl:
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.138.0/vmutils-darwin-arm64-v1.138.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.139.0/vmutils-darwin-arm64-v1.139.0.tar.gz
|
||||
|
||||
tar xzf vmutils-darwin-arm64-v1.138.0.tar.gz
|
||||
tar xzf vmutils-darwin-arm64-v1.139.0.tar.gz
|
||||
```
|
||||
|
||||
Once binary is unpacked, see the full list of supported modes by running the following command:
|
||||
|
||||
@@ -16,6 +16,10 @@ aliases:
|
||||
|
||||
Restore process can be interrupted at any time. It is automatically resumed when restarting `vmrestore` with the same args. If file preallocation is enabled{{% available_from "v1.139.0" %}}, it resumes from the last complete file, if file preallocation is disabled via `-skipFilePreallocation` then it resumes from the interruption point mid file.
|
||||
|
||||
> If you'd prefer not to manage backups/restores at all, [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmbackup_intro)
|
||||
> handles backups and restores automatically without the need of extra configurations.
|
||||
> See the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/) to get started.
|
||||
|
||||
## Usage
|
||||
|
||||
VictoriaMetrics must be stopped during the restore process.
|
||||
|
||||
6
go.mod
6
go.mod
@@ -1,6 +1,6 @@
|
||||
module github.com/VictoriaMetrics/VictoriaMetrics
|
||||
|
||||
go 1.26.1
|
||||
go 1.26.2
|
||||
|
||||
require (
|
||||
cloud.google.com/go/storage v1.60.0
|
||||
@@ -10,8 +10,8 @@ require (
|
||||
github.com/VictoriaMetrics/VictoriaLogs v0.0.0-20260218111324-95b48d57d032
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3
|
||||
github.com/VictoriaMetrics/metrics v1.42.0
|
||||
github.com/VictoriaMetrics/metricsql v0.85.0
|
||||
github.com/VictoriaMetrics/metrics v1.43.1
|
||||
github.com/VictoriaMetrics/metricsql v0.86.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.1
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.8
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.22.1
|
||||
|
||||
8
go.sum
8
go.sum
@@ -58,10 +58,10 @@ github.com/VictoriaMetrics/easyproto v1.2.0 h1:FJT9uNXA2isppFuJErbLqD306KoFlehl7
|
||||
github.com/VictoriaMetrics/easyproto v1.2.0/go.mod h1:QlGlzaJnDfFd8Lk6Ci/fuLxfTo3/GThPs2KH23mv710=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3 h1:rBabE0iIxcqKEMCwUmwHZ9dgEqXerg8FRbRDUvC7OVc=
|
||||
github.com/VictoriaMetrics/fastcache v1.13.3/go.mod h1:hHXhl4DA2fTL2HTZDJFXWgW0LNjo6B+4aj2Wmng3TjU=
|
||||
github.com/VictoriaMetrics/metrics v1.42.0 h1:t/OGs3BjMUYhxw/h83Z28qAss8DuA4QEVwO4NwJ9hZc=
|
||||
github.com/VictoriaMetrics/metrics v1.42.0/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.85.0 h1:xI+EfqsOgY0T2yd7p8hcYQ52LOtf+1i8fQQzQ+RGtZM=
|
||||
github.com/VictoriaMetrics/metricsql v0.85.0/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VictoriaMetrics/metrics v1.43.1 h1:j3Ba4l2K1q3pkvzPqt6aSiQ2DBlAEj3VPVeBtpR3t/Y=
|
||||
github.com/VictoriaMetrics/metrics v1.43.1/go.mod h1:xDM82ULLYCYdFRgQ2JBxi8Uf1+8En1So9YUwlGTOqTc=
|
||||
github.com/VictoriaMetrics/metricsql v0.86.0 h1:IFD08amp+nkW6I+pB3+iyamewkIrbEojkQP4cmEbwkU=
|
||||
github.com/VictoriaMetrics/metricsql v0.86.0/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
|
||||
github.com/alecthomas/units v0.0.0-20240927000941-0f3dac36c52b h1:mimo19zliBX/vSQ6PWWSL9lK8qwHozUj03+zLoEB8O0=
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -164,7 +165,12 @@ func (fs *FS) Init(ctx context.Context) error {
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9280
|
||||
"ExpiredToken": {},
|
||||
},
|
||||
})
|
||||
}, retry.IsErrorRetryableFunc(func(err error) aws.Ternary {
|
||||
if errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return aws.TrueTernary
|
||||
}
|
||||
return aws.UnknownTernary
|
||||
}))
|
||||
})
|
||||
}),
|
||||
}
|
||||
|
||||
@@ -243,17 +243,12 @@ func newWriter(f *os.File, nocache bool) *Writer {
|
||||
|
||||
// MustClose syncs the underlying file to storage and then closes it.
|
||||
func (w *Writer) MustClose() {
|
||||
if err := w.bw.Flush(); err != nil {
|
||||
logger.Panicf("FATAL: cannot flush buffered data to file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
w.flush()
|
||||
|
||||
putBufioWriter(w.bw)
|
||||
w.bw = nil
|
||||
|
||||
if !fsutil.IsFsyncDisabled() {
|
||||
if err := w.f.Sync(); err != nil {
|
||||
logger.Panicf("FATAL: cannot sync file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
}
|
||||
w.sync()
|
||||
if err := w.st.close(); err != nil {
|
||||
logger.Panicf("FATAL: cannot close streamTracker for file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
@@ -265,6 +260,24 @@ func (w *Writer) MustClose() {
|
||||
writersCount.Dec()
|
||||
}
|
||||
|
||||
func (w *Writer) flush() {
|
||||
if err := w.bw.Flush(); err != nil {
|
||||
logger.Panicf("FATAL: cannot flush buffered data to file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Writer) sync() {
|
||||
if !fsutil.IsFsyncDisabled() {
|
||||
startTime := time.Now()
|
||||
if err := w.f.Sync(); err != nil {
|
||||
logger.Panicf("FATAL: cannot sync file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
d := time.Since(startTime).Seconds()
|
||||
fsyncDuration.Add(d)
|
||||
fsyncCalls.Inc()
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
writeDuration = metrics.NewFloatCounter(`vm_filestream_write_duration_seconds_total`)
|
||||
writeCallsBuffered = metrics.NewCounter(`vm_filestream_buffered_write_calls_total`)
|
||||
@@ -272,6 +285,9 @@ var (
|
||||
writtenBytesBuffered = metrics.NewCounter(`vm_filestream_buffered_written_bytes_total`)
|
||||
writtenBytesReal = metrics.NewCounter(`vm_filestream_real_written_bytes_total`)
|
||||
writersCount = metrics.NewCounter(`vm_filestream_writers`)
|
||||
|
||||
fsyncDuration = metrics.NewFloatCounter(`vm_filestream_fsync_duration_seconds_total`)
|
||||
fsyncCalls = metrics.NewCounter(`vm_filestream_fsync_calls_total`)
|
||||
)
|
||||
|
||||
// Write writes p to the underlying file.
|
||||
@@ -292,13 +308,9 @@ func (w *Writer) Write(p []byte) (int, error) {
|
||||
//
|
||||
// if isSync is true, then the flushed data is fsynced to the underlying storage.
|
||||
func (w *Writer) MustFlush(isSync bool) {
|
||||
if err := w.bw.Flush(); err != nil {
|
||||
logger.Panicf("FATAL: cannot flush buffered data to file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
w.flush()
|
||||
if isSync {
|
||||
if err := w.f.Sync(); err != nil {
|
||||
logger.Panicf("FATAL: cannot fsync data to the underlying storage for file %q: %s", w.f.Name(), err)
|
||||
}
|
||||
w.sync()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -786,23 +786,7 @@ func (uw *urlWatcher) reloadObjects() string {
|
||||
func (uw *urlWatcher) watchForUpdates() {
|
||||
gw := uw.gw
|
||||
stopCh := gw.ctx.Done()
|
||||
minBackoffDelay := timeutil.AddJitterToDuration(time.Second)
|
||||
maxBackoffDelay := timeutil.AddJitterToDuration(time.Second * 30)
|
||||
backoffDelay := minBackoffDelay
|
||||
backoffSleep := func() {
|
||||
t := timerpool.Get(backoffDelay)
|
||||
select {
|
||||
case <-stopCh:
|
||||
timerpool.Put(t)
|
||||
return
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
backoffDelay *= 2
|
||||
if backoffDelay > maxBackoffDelay {
|
||||
backoffDelay = maxBackoffDelay
|
||||
}
|
||||
}
|
||||
bt := timeutil.NewBackoffTimer(time.Second, time.Second*30)
|
||||
apiURL := uw.apiURL
|
||||
delimiter := getQueryArgsDelimiter(apiURL)
|
||||
timeoutSeconds := time.Duration(0.9 * float64(gw.client.Timeout)).Seconds()
|
||||
@@ -818,7 +802,7 @@ func (uw *urlWatcher) watchForUpdates() {
|
||||
|
||||
resourceVersion := uw.reloadObjects()
|
||||
if resourceVersion == "" {
|
||||
backoffSleep()
|
||||
bt.Wait(stopCh)
|
||||
continue
|
||||
}
|
||||
requestURL := apiURL + "&resourceVersion=" + url.QueryEscape(resourceVersion)
|
||||
@@ -826,25 +810,25 @@ func (uw *urlWatcher) watchForUpdates() {
|
||||
if err != nil {
|
||||
if !errors.Is(err, context.Canceled) {
|
||||
logger.Errorf("cannot perform request to %q: %s", requestURL, err)
|
||||
backoffSleep()
|
||||
bt.Wait(stopCh)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
if resp.StatusCode == 410 {
|
||||
// There is no need for sleep on 410 error. See https://kubernetes.io/docs/reference/using-api/api-concepts/#410-gone-responses
|
||||
backoffDelay = minBackoffDelay
|
||||
bt.Reset()
|
||||
uw.staleResourceVersions.Inc()
|
||||
uw.resourceVersion = ""
|
||||
} else {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
logger.Errorf("unexpected status code for request to %q: %d; want %d; response: %q", requestURL, resp.StatusCode, http.StatusOK, body)
|
||||
backoffSleep()
|
||||
bt.Wait(stopCh)
|
||||
}
|
||||
continue
|
||||
}
|
||||
backoffDelay = minBackoffDelay
|
||||
bt.Reset()
|
||||
err = uw.readObjectUpdateStream(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
@@ -852,7 +836,7 @@ func (uw *urlWatcher) watchForUpdates() {
|
||||
logger.Errorf("error when reading WatchEvent stream from %q: %s", requestURL, err)
|
||||
uw.resourceVersion = ""
|
||||
}
|
||||
backoffSleep()
|
||||
bt.Wait(stopCh)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user