mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-04 01:22:05 +03:00
Compare commits
100 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7f5a8af464 | ||
|
|
181a465c89 | ||
|
|
7288adab21 | ||
|
|
993a9d92d6 | ||
|
|
45c889a1cf | ||
|
|
dca5d44f2b | ||
|
|
9e4f0cc900 | ||
|
|
54dc9cc322 | ||
|
|
67e6752b82 | ||
|
|
bb54075c23 | ||
|
|
08f5220bc3 | ||
|
|
f9015da6eb | ||
|
|
539498058e | ||
|
|
b3d22403eb | ||
|
|
0fce51e3b4 | ||
|
|
9e118fe1ee | ||
|
|
3553c60399 | ||
|
|
9b54bd6e8d | ||
|
|
a90edc71c7 | ||
|
|
83deddc84c | ||
|
|
434cb7028c | ||
|
|
107b6517b7 | ||
|
|
f68f5b3113 | ||
|
|
d49b4a7550 | ||
|
|
bd8b4eb78b | ||
|
|
41558066db | ||
|
|
af064ca65a | ||
|
|
eced71a96d | ||
|
|
23fd269ccf | ||
|
|
1f5d02e059 | ||
|
|
690aaf7d2d | ||
|
|
1e0f7f0d28 | ||
|
|
c7a16e1df6 | ||
|
|
2cb909022f | ||
|
|
fe70b963e4 | ||
|
|
9bb726751c | ||
|
|
3c85ffb1e6 | ||
|
|
65cb6468ac | ||
|
|
8e645ea708 | ||
|
|
b95bdb5781 | ||
|
|
5ecc5770c2 | ||
|
|
02c03793b3 | ||
|
|
c74c4b24d7 | ||
|
|
07be0c6129 | ||
|
|
826c408e0e | ||
|
|
913b64d9b5 | ||
|
|
6b76dead5a | ||
|
|
41991edb34 | ||
|
|
eb7c21bde5 | ||
|
|
3cc8013dd9 | ||
|
|
1209f33c6d | ||
|
|
3c87e361ba | ||
|
|
f5c9c5bf01 | ||
|
|
7712a34ba6 | ||
|
|
d890bf52fe | ||
|
|
f52478dac7 | ||
|
|
bcc2c85e53 | ||
|
|
001f9218b1 | ||
|
|
f7fc897f85 | ||
|
|
e58b512305 | ||
|
|
d33efbbd95 | ||
|
|
23cb0475e9 | ||
|
|
3d3fcf8fcb | ||
|
|
d99e3e52f3 | ||
|
|
bbcfc0ce59 | ||
|
|
d9ac6867cb | ||
|
|
00712b184b | ||
|
|
30ca617960 | ||
|
|
aba5205896 | ||
|
|
aef59d9281 | ||
|
|
b1582b3012 | ||
|
|
dd769d87c0 | ||
|
|
febe9a2882 | ||
|
|
337ccd7c62 | ||
|
|
c9789b3c18 | ||
|
|
c9db487613 | ||
|
|
77fffb4dc7 | ||
|
|
8701ec0968 | ||
|
|
94f3302aca | ||
|
|
16909a2b6b | ||
|
|
51fdd885ea | ||
|
|
a213f5a423 | ||
|
|
3a812a8b28 | ||
|
|
4375699013 | ||
|
|
53a6bbfdf8 | ||
|
|
897f1b97e3 | ||
|
|
309f1898b3 | ||
|
|
8998526384 | ||
|
|
e55e2a4274 | ||
|
|
29ec5d2898 | ||
|
|
adef9693af | ||
|
|
8f01ac42a8 | ||
|
|
8223a5235f | ||
|
|
fe5f2bd5d7 | ||
|
|
00075ac4ee | ||
|
|
3f39946f99 | ||
|
|
1ddfd55e51 | ||
|
|
5bb012b67b | ||
|
|
78fb987bef | ||
|
|
a0084dc223 |
2
.github/pull_request_template.md
vendored
2
.github/pull_request_template.md
vendored
@@ -6,4 +6,4 @@ Please provide a brief description of the changes you made. Be as specific as po
|
||||
|
||||
The following checks are **mandatory**:
|
||||
|
||||
- [ ] My change adheres to [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/).
|
||||
- [ ] My change adheres to [VictoriaMetrics contributing guidelines](https://docs.victoriametrics.com/victoriametrics/contributing/#pull-request-checklist).
|
||||
|
||||
21
Makefile
21
Makefile
@@ -195,6 +195,25 @@ vmutils-crossbuild: \
|
||||
vmutils-openbsd-amd64 \
|
||||
vmutils-windows-amd64
|
||||
|
||||
publish-latest:
|
||||
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert-tool $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmauth $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmbackup $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmrestore $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmctl $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmgateway $(MAKE) publish-via-docker-latest
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackupmanager $(MAKE) publish-via-docker-latest
|
||||
|
||||
publish-victoria-logs-latest:
|
||||
PKG_TAG=$(TAG) APP_NAME=victoria-logs $(MAKE) publish-via-docker-latest
|
||||
PKG_TAG=$(TAG) APP_NAME=vlogscli $(MAKE) publish-via-docker-latest
|
||||
|
||||
publish-release:
|
||||
rm -rf bin/*
|
||||
git checkout $(TAG) && $(MAKE) release && $(MAKE) publish && \
|
||||
@@ -526,7 +545,7 @@ test-full:
|
||||
test-full-386:
|
||||
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
integration-test: victoria-metrics vmagent vmalert vmauth vmctl
|
||||
integration-test: victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore
|
||||
go test ./apptest/... -skip="^TestCluster.*"
|
||||
|
||||
benchmark:
|
||||
|
||||
12
README.md
12
README.md
@@ -40,16 +40,16 @@ VictoriaMetrics is optimized for timeseries data, even when old time series are
|
||||
* **Easy to setup**: No dependencies, single [small binary](https://medium.com/@valyala/stripping-dependency-bloat-in-victoriametrics-docker-image-983fb5912b0d), configuration through command-line flags, but the default is also fine-tuned; backup and restore with [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282).
|
||||
* **Global query view**: Multiple Prometheus instances or any other data sources may ingest data into VictoriaMetrics and queried via a single query.
|
||||
* **Various Protocols**: Support metric scraping, ingestion and backfilling in various protocol.
|
||||
* [Prometheus exporters](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter), [Prometheus remote write API](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus), [Prometheus exposition format](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format).
|
||||
* [InfluxDB line protocol](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb) over HTTP, TCP and UDP.
|
||||
* [Prometheus exporters](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter), [Prometheus remote write API](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/), [Prometheus exposition format](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format).
|
||||
* [InfluxDB line protocol](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/) over HTTP, TCP and UDP.
|
||||
* [Graphite plaintext protocol](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting) with [tags](https://graphite.readthedocs.io/en/latest/tags.html#carbon).
|
||||
* [OpenTSDB put message](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb#sending-data-via-telnet).
|
||||
* [HTTP OpenTSDB /api/put requests](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb#sending-data-via-http).
|
||||
* [OpenTSDB put message](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-telnet).
|
||||
* [HTTP OpenTSDB /api/put requests](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-http).
|
||||
* [JSON line format](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-json-line-format).
|
||||
* [Arbitrary CSV data](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-csv-data).
|
||||
* [Native binary format](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-native-format).
|
||||
* [DataDog agent or DogStatsD](https://docs.victoriametrics.com/victoriametrics/integrations/datadog).
|
||||
* [NewRelic infrastructure agent](https://docs.victoriametrics.com/victoriametrics/integrations/newrelic#sending-data-from-agent).
|
||||
* [DataDog agent or DogStatsD](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/).
|
||||
* [NewRelic infrastructure agent](https://docs.victoriametrics.com/victoriametrics/integrations/newrelic/#sending-data-from-agent).
|
||||
* [OpenTelemetry metrics format](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#sending-data-via-opentelemetry).
|
||||
* **NFS-based storages**: Supports storing data on NFS-based storages such as Amazon EFS, Google Filestore.
|
||||
* And many other features such as metrics relabeling, cardinality limiter, etc.
|
||||
|
||||
@@ -101,9 +101,11 @@ func (lr *LineReader) readMoreData() bool {
|
||||
|
||||
bufLen := len(lr.buf)
|
||||
if bufLen >= MaxLineSizeBytes.IntN() {
|
||||
logger.Warnf("%s: the line length exceeds -insert.maxLineSizeBytes=%d; skipping it; line contents=%q", lr.name, MaxLineSizeBytes.IntN(), lr.buf)
|
||||
ok, skippedBytes := lr.skipUntilNextLine()
|
||||
logger.Warnf("%s: the line length exceeds -insert.maxLineSizeBytes=%d; skipping it; total skipped bytes=%d",
|
||||
lr.name, MaxLineSizeBytes.IntN(), skippedBytes)
|
||||
tooLongLinesSkipped.Inc()
|
||||
return lr.skipUntilNextLine()
|
||||
return ok
|
||||
}
|
||||
|
||||
lr.buf = slicesutil.SetLength(lr.buf, MaxLineSizeBytes.IntN())
|
||||
@@ -121,26 +123,35 @@ func (lr *LineReader) readMoreData() bool {
|
||||
|
||||
var tooLongLinesSkipped = metrics.NewCounter("vl_too_long_lines_skipped_total")
|
||||
|
||||
func (lr *LineReader) skipUntilNextLine() bool {
|
||||
func (lr *LineReader) skipUntilNextLine() (bool, int) {
|
||||
|
||||
// Initialize skipped bytes count with MaxLineSizeBytes because
|
||||
// we've already read that many bytes without encountering a newline,
|
||||
// indicating the line size exceeds the maximum allowed limit.
|
||||
skipSizeBytes := MaxLineSizeBytes.IntN()
|
||||
|
||||
for {
|
||||
lr.buf = slicesutil.SetLength(lr.buf, MaxLineSizeBytes.IntN())
|
||||
n, err := lr.r.Read(lr.buf)
|
||||
skipSizeBytes += n
|
||||
lr.buf = lr.buf[:n]
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
lr.eofReached = true
|
||||
lr.buf = lr.buf[:0]
|
||||
return true
|
||||
return true, skipSizeBytes
|
||||
}
|
||||
lr.err = fmt.Errorf("cannot skip the current line: %s", err)
|
||||
return false
|
||||
return false, skipSizeBytes
|
||||
}
|
||||
if n := bytes.IndexByte(lr.buf, '\n'); n >= 0 {
|
||||
// Include skipped bytes before \n, including the newline itself.
|
||||
skipSizeBytes += n + 1 - len(lr.buf)
|
||||
// Include \n in the buf, so too long line is replaced with an empty line.
|
||||
// This is needed for maintaining synchorinzation consistency between lines
|
||||
// in protocols such as Elasticsearch bulk import.
|
||||
lr.buf = append(lr.buf[:0], lr.buf[n:]...)
|
||||
return true
|
||||
return true, skipSizeBytes
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package internalinsert
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
@@ -18,17 +17,11 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
disableInsert = flag.Bool("internalinsert.disable", false, "Whether to disable /internal/insert HTTP endpoint")
|
||||
maxRequestSize = flagutil.NewBytes("internalinsert.maxRequestSize", 64*1024*1024, "The maximum size in bytes of a single request, which can be accepted at /internal/insert HTTP endpoint")
|
||||
)
|
||||
|
||||
// RequestHandler processes /internal/insert requests.
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) {
|
||||
if *disableInsert {
|
||||
httpserver.Errorf(w, r, "requests to /internal/insert are disabled with -internalinsert.disable command-line flag")
|
||||
return
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
if r.Method != "POST" {
|
||||
w.WriteHeader(http.StatusMethodNotAllowed)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package vlinsert
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
@@ -13,6 +14,12 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/loki"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/opentelemetry"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/syslog"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
)
|
||||
|
||||
var (
|
||||
disableInsert = flag.Bool("insert.disable", false, "Whether to disable /insert/* HTTP endpoints")
|
||||
disableInternal = flag.Bool("internalinsert.disable", false, "Whether to disable /internal/insert HTTP endpoint")
|
||||
)
|
||||
|
||||
// Init initializes vlinsert
|
||||
@@ -27,19 +34,31 @@ func Stop() {
|
||||
|
||||
// RequestHandler handles insert requests for VictoriaLogs
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := r.URL.Path
|
||||
path := strings.ReplaceAll(r.URL.Path, "//", "/")
|
||||
|
||||
if strings.HasPrefix(path, "/insert/") {
|
||||
if *disableInsert {
|
||||
httpserver.Errorf(w, r, "requests to /insert/* are disabled with -insert.disable command-line flag")
|
||||
return true
|
||||
}
|
||||
|
||||
return insertHandler(w, r, path)
|
||||
}
|
||||
|
||||
if path == "/internal/insert" {
|
||||
if *disableInternal || *disableInsert {
|
||||
httpserver.Errorf(w, r, "requests to /internal/insert are disabled with -internalinsert.disable or -insert.disable command-line flag")
|
||||
return true
|
||||
}
|
||||
internalinsert.RequestHandler(w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(path, "/insert/") {
|
||||
// Skip requests, which do not start with /insert/, since these aren't our requests.
|
||||
return false
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func insertHandler(w http.ResponseWriter, r *http.Request, path string) bool {
|
||||
path = strings.TrimPrefix(path, "/insert")
|
||||
path = strings.ReplaceAll(path, "//", "/")
|
||||
|
||||
switch path {
|
||||
case "/jsonline":
|
||||
@@ -69,7 +88,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case strings.HasPrefix(path, "/datadog/"):
|
||||
path = strings.TrimPrefix(path, "/datadog")
|
||||
return datadog.RequestHandler(path, w, r)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package internalselect
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
@@ -22,15 +21,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
)
|
||||
|
||||
var disableSelect = flag.Bool("internalselect.disable", false, "Whether to disable /internal/select/* HTTP endpoints")
|
||||
|
||||
// RequestHandler processes requests to /internal/select/*
|
||||
func RequestHandler(ctx context.Context, w http.ResponseWriter, r *http.Request) {
|
||||
if *disableSelect {
|
||||
httpserver.Errorf(w, r, "requests to /internal/select/* are disabled with -internalselect.disable command-line flag")
|
||||
return
|
||||
}
|
||||
|
||||
startTime := time.Now()
|
||||
|
||||
path := r.URL.Path
|
||||
|
||||
@@ -55,7 +55,10 @@ func ProcessFacetsRequest(ctx context.Context, w http.ResponseWriter, r *http.Re
|
||||
}
|
||||
keepConstFields := httputil.GetBool(r, "keep_const_fields")
|
||||
|
||||
// Pipes must be dropped, since it is expected facets are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
q.AddFacetsPipe(limit, maxValuesPerField, maxValueLen, keepConstFields)
|
||||
|
||||
var mLock sync.Mutex
|
||||
@@ -156,8 +159,10 @@ func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Requ
|
||||
fieldsLimit = 0
|
||||
}
|
||||
|
||||
// Prepare the query for hits count.
|
||||
// Pipes must be dropped, since it is expected hits are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
q.AddCountByTimePipe(int64(step), int64(offset), fields)
|
||||
|
||||
var mLock sync.Mutex
|
||||
@@ -290,6 +295,10 @@ func ProcessFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *htt
|
||||
return
|
||||
}
|
||||
|
||||
// Pipes must be dropped, since it is expected field names are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
// Obtain field names for the given query
|
||||
fieldNames, err := vlstorage.GetFieldNames(ctx, tenantIDs, q)
|
||||
if err != nil {
|
||||
@@ -329,6 +338,10 @@ func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *ht
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Pipes must be dropped, since it is expected field values are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
// Obtain unique values for the given field
|
||||
values, err := vlstorage.GetFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
|
||||
if err != nil {
|
||||
@@ -351,6 +364,10 @@ func ProcessStreamFieldNamesRequest(ctx context.Context, w http.ResponseWriter,
|
||||
return
|
||||
}
|
||||
|
||||
// Pipes must be dropped, since it is expected stream field names are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
// Obtain stream field names for the given query
|
||||
names, err := vlstorage.GetStreamFieldNames(ctx, tenantIDs, q)
|
||||
if err != nil {
|
||||
@@ -389,6 +406,10 @@ func ProcessStreamFieldValuesRequest(ctx context.Context, w http.ResponseWriter,
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Pipes must be dropped, since it is expected stream field values are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
// Obtain stream field values for the given query and the given fieldName
|
||||
values, err := vlstorage.GetStreamFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
|
||||
if err != nil {
|
||||
@@ -420,6 +441,10 @@ func ProcessStreamIDsRequest(ctx context.Context, w http.ResponseWriter, r *http
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Pipes must be dropped, since it is expected stream ids are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
// Obtain streamIDs for the given query
|
||||
streamIDs, err := vlstorage.GetStreamIDs(ctx, tenantIDs, q, uint64(limit))
|
||||
if err != nil {
|
||||
@@ -451,6 +476,10 @@ func ProcessStreamsRequest(ctx context.Context, w http.ResponseWriter, r *http.R
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Pipes must be dropped, since it is expected stream are obtained
|
||||
// from the real logs stored in the database.
|
||||
q.DropAllPipes()
|
||||
|
||||
// Obtain streams for the given query
|
||||
streams, err := vlstorage.GetStreams(ctx, tenantIDs, q, uint64(limit))
|
||||
if err != nil {
|
||||
@@ -551,7 +580,7 @@ var liveTailRequests = metrics.NewCounter(`vl_live_tailing_requests`)
|
||||
const tailOffsetNsecs = 5e9
|
||||
|
||||
type logRow struct {
|
||||
timestamp string
|
||||
timestamp int64
|
||||
fields []logstorage.Field
|
||||
}
|
||||
|
||||
@@ -567,7 +596,7 @@ type tailProcessor struct {
|
||||
mu sync.Mutex
|
||||
|
||||
perStreamRows map[string][]logRow
|
||||
lastTimestamps map[string]string
|
||||
lastTimestamps map[string]int64
|
||||
|
||||
err error
|
||||
}
|
||||
@@ -577,7 +606,7 @@ func newTailProcessor(cancel func()) *tailProcessor {
|
||||
cancel: cancel,
|
||||
|
||||
perStreamRows: make(map[string][]logRow),
|
||||
lastTimestamps: make(map[string]string),
|
||||
lastTimestamps: make(map[string]int64),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -594,7 +623,7 @@ func (tp *tailProcessor) writeBlock(_ uint, db *logstorage.DataBlock) {
|
||||
}
|
||||
|
||||
// Make sure columns contain _time field, since it is needed for proper tail work.
|
||||
timestamps, ok := db.GetTimestamps()
|
||||
timestamps, ok := db.GetTimestamps(nil)
|
||||
if !ok {
|
||||
tp.err = fmt.Errorf("missing _time field")
|
||||
tp.cancel()
|
||||
@@ -1043,9 +1072,7 @@ func getLastNQueryResults(ctx context.Context, tenantIDs []logstorage.TenantID,
|
||||
}
|
||||
|
||||
func getLastNRows(rows []logRow, limit int) []logRow {
|
||||
sort.Slice(rows, func(i, j int) bool {
|
||||
return rows[i].timestamp < rows[j].timestamp
|
||||
})
|
||||
sortLogRows(rows)
|
||||
if len(rows) > limit {
|
||||
rows = rows[len(rows)-limit:]
|
||||
}
|
||||
@@ -1070,7 +1097,7 @@ func getQueryResultsWithLimit(ctx context.Context, tenantIDs []logstorage.Tenant
|
||||
clonedColumnNames[i] = strings.Clone(c.Name)
|
||||
}
|
||||
|
||||
timestamps, ok := db.GetTimestamps()
|
||||
timestamps, ok := db.GetTimestamps(nil)
|
||||
if !ok {
|
||||
missingTimeColumn.Store(true)
|
||||
cancel()
|
||||
|
||||
@@ -25,6 +25,9 @@ var (
|
||||
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+
|
||||
"limit is reached; see also -search.maxQueryDuration")
|
||||
maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution. It can be overridden to a smaller value on a per-query basis via 'timeout' query arg")
|
||||
|
||||
disableSelect = flag.Bool("select.disable", false, "Whether to disable /select/* HTTP endpoints")
|
||||
disableInternal = flag.Bool("internalselect.disable", false, "Whether to disable /internal/select/* HTTP endpoints")
|
||||
)
|
||||
|
||||
func getDefaultMaxConcurrentRequests() int {
|
||||
@@ -71,13 +74,31 @@ var vmuiFileServer = http.FileServer(http.FS(vmuiFiles))
|
||||
|
||||
// RequestHandler handles select requests for VictoriaLogs
|
||||
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := r.URL.Path
|
||||
path := strings.ReplaceAll(r.URL.Path, "//", "/")
|
||||
|
||||
if !strings.HasPrefix(path, "/select/") && !strings.HasPrefix(path, "/internal/select/") {
|
||||
// Skip requests, which do not start with /select/, since these aren't our requests.
|
||||
return false
|
||||
if strings.HasPrefix(path, "/select/") {
|
||||
if *disableSelect {
|
||||
httpserver.Errorf(w, r, "requests to /select/* are disabled with -select.disable command-line flag")
|
||||
return true
|
||||
}
|
||||
|
||||
return selectHandler(w, r, path)
|
||||
}
|
||||
path = strings.ReplaceAll(path, "//", "/")
|
||||
|
||||
if strings.HasPrefix(path, "/internal/select/") {
|
||||
if *disableInternal || *disableSelect {
|
||||
httpserver.Errorf(w, r, "requests to /internal/select/* are disabled with -internalselect.disable or -select.disable command-line flag")
|
||||
return true
|
||||
}
|
||||
internalselect.RequestHandler(r.Context(), w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func selectHandler(w http.ResponseWriter, r *http.Request, path string) bool {
|
||||
ctx := r.Context()
|
||||
|
||||
if path == "/select/vmui" {
|
||||
// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
|
||||
@@ -100,7 +121,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
ctx := r.Context()
|
||||
if path == "/select/logsql/tail" {
|
||||
logsqlTailRequests.Inc()
|
||||
// Process live tailing request without timeout, since it is OK to run live tailing requests for very long time.
|
||||
@@ -120,13 +140,6 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
defer decRequestConcurrency()
|
||||
|
||||
if strings.HasPrefix(path, "/internal/select/") {
|
||||
// Process internal request from vlselect without timeout (e.g. use ctx instead of ctxWithTimeout),
|
||||
// since the timeout must be controlled by the vlselect.
|
||||
internalselect.RequestHandler(ctx, w, r)
|
||||
return true
|
||||
}
|
||||
|
||||
ok := processSelectRequest(ctxWithTimeout, w, r, path)
|
||||
if !ok {
|
||||
return false
|
||||
|
||||
@@ -66,8 +66,8 @@ or at your own [VictoriaMetrics instance](https://docs.victoriametrics.com/victo
|
||||
The list of MetricsQL features on top of PromQL:
|
||||
|
||||
* Graphite-compatible filters can be passed via `{__graphite__="foo.*.bar"}` syntax.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite#selecting-graphite-metrics).
|
||||
VictoriaMetrics can be used as Graphite datasource in Grafana. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite#graphite-api-usage) for details.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#selecting-graphite-metrics).
|
||||
VictoriaMetrics can be used as Graphite datasource in Grafana. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#graphite-api-usage) for details.
|
||||
See also [label_graphite_group](#label_graphite_group) function, which can be used for extracting the given groups from Graphite metric name.
|
||||
* Lookbehind window in square brackets for [rollup functions](#rollup-functions) may be omitted. VictoriaMetrics automatically selects the lookbehind window
|
||||
depending on the `step` query arg passed to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query)
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
208
app/vlselect/vmui/assets/index-DhqzKCNf.js
Normal file
208
app/vlselect/vmui/assets/index-DhqzKCNf.js
Normal file
File diff suppressed because one or more lines are too long
@@ -35,10 +35,10 @@
|
||||
<meta property="og:title" content="UI for VictoriaLogs">
|
||||
<meta property="og:url" content="https://victoriametrics.com/products/victorialogs/">
|
||||
<meta property="og:description" content="Explore your log data with VictoriaLogs UI">
|
||||
<script type="module" crossorigin src="./assets/index-DLp5TlUn.js"></script>
|
||||
<script type="module" crossorigin src="./assets/index-DhqzKCNf.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-D8IJGiEn.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-C85_NB5q.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D5re9hC6.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
@@ -248,6 +248,9 @@ func (sn *storageNode) executeRequestAt(ctx context.Context, path string, args u
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when creating a request: %s", err)
|
||||
}
|
||||
if err := sn.ac.SetHeaders(req, true); err != nil {
|
||||
return nil, fmt.Errorf("cannot set auth headers for %q: %w", reqURL, err)
|
||||
}
|
||||
|
||||
// send the request to the storage node
|
||||
resp, err := sn.c.Do(req)
|
||||
|
||||
@@ -120,6 +120,9 @@ func normalizeURL(uOrig *url.URL) *url.URL {
|
||||
u := *uOrig
|
||||
// Prevent from attacks with using `..` in r.URL.Path
|
||||
u.Path = path.Clean(u.Path)
|
||||
if u.Path == "." {
|
||||
u.Path = "/"
|
||||
}
|
||||
if !strings.HasSuffix(u.Path, "/") && strings.HasSuffix(uOrig.Path, "/") {
|
||||
// The path.Clean() removes trailing slash.
|
||||
// Return it back if needed.
|
||||
|
||||
@@ -128,7 +128,40 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
// Simple routing with `url_prefix`
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "", "http://foo.bar/.", "", "", nil, "least_loaded", 0)
|
||||
}, "", "http://foo.bar", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "/", "http://foo.bar", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "http://aaa///", "http://foo.bar", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/"),
|
||||
}, "/", "http://foo.bar/", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/"),
|
||||
}, "/x", "http://foo.bar/x", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/"),
|
||||
}, "/x/", "http://foo.bar/x/", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/"),
|
||||
}, "http://abc///x/", "http://foo.bar/x/", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/"),
|
||||
}, "http://foo//x", "http://foo.bar/x", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/baz"),
|
||||
}, "", "http://foo.bar/baz", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/baz"),
|
||||
}, "/", "http://foo.bar/baz", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/x/"),
|
||||
}, "/abc", "http://foo.bar/x/abc", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/x/"),
|
||||
}, "/abc/", "http://foo.bar/x/abc/", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
HeadersConf: HeadersConf{
|
||||
@@ -149,6 +182,12 @@ func TestCreateTargetURLSuccess(t *testing.T) {
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "a/b?c=d", "http://foo.bar/a/b?c=d", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar"),
|
||||
}, "/a/b?c=d", "http://foo.bar/a/b?c=d", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("http://foo.bar/"),
|
||||
}, "/a/b?c=d", "http://foo.bar/a/b?c=d", "", "", nil, "least_loaded", 0)
|
||||
f(&UserInfo{
|
||||
URLPrefix: mustParseURL("https://sss:3894/x/y"),
|
||||
}, "/z", "https://sss:3894/x/y/z", "", "", nil, "least_loaded", 0)
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/barpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/native"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/remoteread"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/influx"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/opentsdb"
|
||||
@@ -44,6 +45,7 @@ func main() {
|
||||
if c.Bool(globalDisableProgressBar) {
|
||||
barpool.Disable(true)
|
||||
}
|
||||
netutil.EnableIPv6()
|
||||
return nil
|
||||
}
|
||||
app := &cli.App{
|
||||
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
)
|
||||
|
||||
var maxGraphiteSeries = flag.Int("search.maxGraphiteSeries", 300e3, "The maximum number of time series, which can be scanned during queries to Graphite Render API. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/integrations/graphite#render-api")
|
||||
"See https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#render-api")
|
||||
|
||||
type evalConfig struct {
|
||||
startTime int64
|
||||
|
||||
@@ -22,9 +22,9 @@ import (
|
||||
|
||||
var (
|
||||
maxGraphiteTagKeysPerSearch = flag.Int("search.maxGraphiteTagKeys", 100e3, "The maximum number of tag keys returned from Graphite API, which returns tags. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/integrations/graphite#tags-api")
|
||||
"See https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#tags-api")
|
||||
maxGraphiteTagValuesPerSearch = flag.Int("search.maxGraphiteTagValues", 100e3, "The maximum number of tag values returned from Graphite API, which returns tag values. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/integrations/graphite#tags-api")
|
||||
"See https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#tags-api")
|
||||
)
|
||||
|
||||
// TagsDelSeriesHandler implements /tags/delSeries handler.
|
||||
|
||||
@@ -818,6 +818,7 @@ func QueryHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWr
|
||||
LookbackDelta: lookbackDelta,
|
||||
RoundDigits: getRoundDigits(r),
|
||||
EnforcedTagFilterss: etfs,
|
||||
CacheTagFilters: etfs,
|
||||
GetRequestURI: func() string {
|
||||
return httpserver.GetRequestURI(r)
|
||||
},
|
||||
@@ -927,6 +928,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.Respo
|
||||
LookbackDelta: lookbackDelta,
|
||||
RoundDigits: getRoundDigits(r),
|
||||
EnforcedTagFilterss: etfs,
|
||||
CacheTagFilters: etfs,
|
||||
GetRequestURI: func() string {
|
||||
return httpserver.GetRequestURI(r)
|
||||
},
|
||||
|
||||
@@ -5,8 +5,10 @@ import (
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/atomicutil"
|
||||
)
|
||||
|
||||
// callbacks for optimized incremental calculations for aggregate functions
|
||||
@@ -66,9 +68,8 @@ var incrementalAggrFuncCallbacksMap = map[string]*incrementalAggrFuncCallbacks{
|
||||
type incrementalAggrContextMap struct {
|
||||
m map[string]*incrementalAggrContext
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with
|
||||
// 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(map[string]*incrementalAggrContext{})%128]byte
|
||||
// The padding prevents false sharing
|
||||
_ [atomicutil.CacheLineSize - unsafe.Sizeof(map[string]*incrementalAggrContext{})%atomicutil.CacheLineSize]byte
|
||||
}
|
||||
|
||||
type incrementalAggrFuncContext struct {
|
||||
|
||||
@@ -17,6 +17,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/atomicutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
@@ -139,6 +140,13 @@ type EvalConfig struct {
|
||||
// EnforcedTagFilterss may contain additional label filters to use in the query.
|
||||
EnforcedTagFilterss [][]storage.TagFilter
|
||||
|
||||
// CacheTagFilters stores the original tag-filter sets and extra_label from the request.
|
||||
// The slice is never modified after creation and is used only to build
|
||||
// the query-cache key.
|
||||
//
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9001
|
||||
CacheTagFilters [][]storage.TagFilter
|
||||
|
||||
// The callback, which returns the request URI during logging.
|
||||
// The request URI isn't stored here because its' construction may take non-trivial amounts of CPU.
|
||||
GetRequestURI func() string
|
||||
@@ -165,6 +173,7 @@ func copyEvalConfig(src *EvalConfig) *EvalConfig {
|
||||
ec.LookbackDelta = src.LookbackDelta
|
||||
ec.RoundDigits = src.RoundDigits
|
||||
ec.EnforcedTagFilterss = src.EnforcedTagFilterss
|
||||
ec.CacheTagFilters = src.CacheTagFilters
|
||||
ec.GetRequestURI = src.GetRequestURI
|
||||
ec.QueryStats = src.QueryStats
|
||||
|
||||
@@ -1885,9 +1894,8 @@ func doRollupForTimeseries(funcName string, keepMetricNames bool, rc *rollupConf
|
||||
type timeseriesWithPadding struct {
|
||||
tss []*timeseries
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with
|
||||
// 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof([]*timeseries{})%128]byte
|
||||
// The padding prevents false sharing
|
||||
_ [atomicutil.CacheLineSize - unsafe.Sizeof([]*timeseries{})%atomicutil.CacheLineSize]byte
|
||||
}
|
||||
|
||||
type timeseriesByWorkerID struct {
|
||||
|
||||
@@ -529,7 +529,7 @@ type rollupFuncArg struct {
|
||||
timestamps []int64
|
||||
|
||||
// Real value preceding values.
|
||||
// Is populated if preceding value is within the -search.maxStalenessInterval (rc.LookbackDelta).
|
||||
// Is populated if preceding value is within the rc.LookbackDelta.
|
||||
realPrevValue float64
|
||||
|
||||
// Real value which goes after values.
|
||||
@@ -776,13 +776,18 @@ func (rc *rollupConfig) doInternal(dstValues []float64, tsm *timeseriesMap, valu
|
||||
rfa.realPrevValue = nan
|
||||
if i > 0 {
|
||||
prevValue, prevTimestamp := values[i-1], timestamps[i-1]
|
||||
// set realPrevValue if rc.LookbackDelta == 0
|
||||
// or if distance between datapoint in prev interval and beginning of this interval
|
||||
// set realPrevValue if rc.LookbackDelta == 0 or
|
||||
// if distance between datapoint in prev interval and first datapoint in this interval
|
||||
// doesn't exceed LookbackDelta.
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1381
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/894
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8045
|
||||
if rc.LookbackDelta == 0 || (tStart-prevTimestamp) < rc.LookbackDelta {
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935
|
||||
currTimestamp := tStart
|
||||
if len(rfa.timestamps) > 0 {
|
||||
currTimestamp = rfa.timestamps[0]
|
||||
}
|
||||
if rc.LookbackDelta == 0 || (currTimestamp-prevTimestamp) < rc.LookbackDelta {
|
||||
rfa.realPrevValue = prevValue
|
||||
}
|
||||
}
|
||||
@@ -1826,14 +1831,18 @@ func rollupIncreasePure(rfa *rollupFuncArg) float64 {
|
||||
// There is no need in handling NaNs here, since they must be cleaned up
|
||||
// before calling rollup funcs.
|
||||
values := rfa.values
|
||||
// restore to the real value because of potential staleness reset
|
||||
prevValue := rfa.realPrevValue
|
||||
prevValue := rfa.prevValue
|
||||
if math.IsNaN(prevValue) {
|
||||
if len(values) == 0 {
|
||||
return nan
|
||||
}
|
||||
// Assume the counter starts from 0.
|
||||
prevValue = 0
|
||||
if !math.IsNaN(rfa.realPrevValue) {
|
||||
// Assume that the value didn't change during the current gap
|
||||
// if realPrevValue exists.
|
||||
prevValue = rfa.realPrevValue
|
||||
}
|
||||
}
|
||||
if len(values) == 0 {
|
||||
// Assume the counter didn't change since prevValue.
|
||||
|
||||
@@ -291,7 +291,7 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.CacheTagFilters)
|
||||
metainfoBuf := rrc.c.Get(nil, bb.B)
|
||||
if len(metainfoBuf) == 0 {
|
||||
qt.Printf("nothing found")
|
||||
@@ -313,7 +313,7 @@ func (rrc *rollupResultCache) GetSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
if !ok {
|
||||
mi.RemoveKey(key)
|
||||
metainfoBuf = mi.Marshal(metainfoBuf[:0])
|
||||
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
bb.B = marshalRollupResultCacheKeyForSeries(bb.B[:0], expr, window, ec.Step, ec.CacheTagFilters)
|
||||
rrc.c.Set(bb.B, metainfoBuf)
|
||||
return nil, ec.Start
|
||||
}
|
||||
@@ -419,7 +419,7 @@ func (rrc *rollupResultCache) PutSeries(qt *querytracer.Tracer, ec *EvalConfig,
|
||||
metainfoBuf := bbPool.Get()
|
||||
defer bbPool.Put(metainfoBuf)
|
||||
|
||||
metainfoKey.B = marshalRollupResultCacheKeyForSeries(metainfoKey.B[:0], expr, window, ec.Step, ec.EnforcedTagFilterss)
|
||||
metainfoKey.B = marshalRollupResultCacheKeyForSeries(metainfoKey.B[:0], expr, window, ec.Step, ec.CacheTagFilters)
|
||||
metainfoBuf.B = rrc.c.Get(metainfoBuf.B[:0], metainfoKey.B)
|
||||
var mi rollupResultCacheMetainfo
|
||||
if len(metainfoBuf.B) > 0 {
|
||||
|
||||
@@ -1719,6 +1719,33 @@ func TestRollupDeltaWithStaleness(t *testing.T) {
|
||||
timestampsExpected := []int64{0, 10e3, 20e3, 30e3, 40e3}
|
||||
testRowsEqual(t, gotValues, rc.Timestamps, valuesExpected, timestampsExpected)
|
||||
})
|
||||
|
||||
t.Run("issue-8935", func(t *testing.T) {
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935
|
||||
// below dataset has a gap that exceeds LookbackDelta.
|
||||
// The step is picked in a way that on [60e3-90e3] window
|
||||
// the prevValue will be NaN, but 60e3-55e3 still matches
|
||||
// timestamp=10e3 and stores its value as realPrevValue.
|
||||
// This results into delta=1-50=-49 increase result.
|
||||
// The fix makes it to deduct LookbackDelta not from window start
|
||||
// but from first captured data point in the window, so it becomes 70e3-55e3=15e3.
|
||||
// And realPrevValue becomes NaN due to staleness detection.
|
||||
timestamps = []int64{0, 10000, 70000, 80000}
|
||||
values = []float64{50, 50, 1, 1}
|
||||
rc := rollupConfig{
|
||||
Func: rollupDelta,
|
||||
Start: 0,
|
||||
End: 90e3,
|
||||
Step: 30e3,
|
||||
LookbackDelta: 55e3,
|
||||
MaxPointsPerSeries: 1e4,
|
||||
}
|
||||
rc.Timestamps = rc.getTimestamps()
|
||||
gotValues, _ := rc.Do(nil, values, timestamps)
|
||||
valuesExpected := []float64{0, 0, 0, 1}
|
||||
timestampsExpected := []int64{0, 30e3, 60e3, 90e3}
|
||||
testRowsEqual(t, gotValues, rc.Timestamps, valuesExpected, timestampsExpected)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRollupIncreasePureWithStaleness(t *testing.T) {
|
||||
|
||||
@@ -66,8 +66,8 @@ or at your own [VictoriaMetrics instance](https://docs.victoriametrics.com/victo
|
||||
The list of MetricsQL features on top of PromQL:
|
||||
|
||||
* Graphite-compatible filters can be passed via `{__graphite__="foo.*.bar"}` syntax.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite#selecting-graphite-metrics).
|
||||
VictoriaMetrics can be used as Graphite datasource in Grafana. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite#graphite-api-usage) for details.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#selecting-graphite-metrics).
|
||||
VictoriaMetrics can be used as Graphite datasource in Grafana. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#graphite-api-usage) for details.
|
||||
See also [label_graphite_group](#label_graphite_group) function, which can be used for extracting the given groups from Graphite metric name.
|
||||
* Lookbehind window in square brackets for [rollup functions](#rollup-functions) may be omitted. VictoriaMetrics automatically selects the lookbehind window
|
||||
depending on the `step` query arg passed to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query)
|
||||
209
app/vmselect/vmui/assets/index-D-ssBbZq.js
Normal file
209
app/vmselect/vmui/assets/index-D-ssBbZq.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -36,10 +36,10 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-xmjGcv4-.js"></script>
|
||||
<script type="module" crossorigin src="./assets/index-D-ssBbZq.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-D8IJGiEn.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-C85_NB5q.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D5re9hC6.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.24.3 AS build-web-stage
|
||||
FROM golang:1.24.4 AS build-web-stage
|
||||
COPY build /build
|
||||
|
||||
WORKDIR /build
|
||||
@@ -6,7 +6,7 @@ COPY web/ /build/
|
||||
RUN GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o web-amd64 github.com/VictoriMetrics/vmui/ && \
|
||||
GOOS=windows GOARCH=amd64 CGO_ENABLED=0 go build -o web-windows github.com/VictoriMetrics/vmui/
|
||||
|
||||
FROM alpine:3.21.3
|
||||
FROM alpine:3.22.0
|
||||
USER root
|
||||
|
||||
COPY --from=build-web-stage /build/web-amd64 /app/web
|
||||
|
||||
@@ -66,8 +66,8 @@ or at your own [VictoriaMetrics instance](https://docs.victoriametrics.com/victo
|
||||
The list of MetricsQL features on top of PromQL:
|
||||
|
||||
* Graphite-compatible filters can be passed via `{__graphite__="foo.*.bar"}` syntax.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite#selecting-graphite-metrics).
|
||||
VictoriaMetrics can be used as Graphite datasource in Grafana. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite#graphite-api-usage) for details.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#selecting-graphite-metrics).
|
||||
VictoriaMetrics can be used as Graphite datasource in Grafana. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#graphite-api-usage) for details.
|
||||
See also [label_graphite_group](#label_graphite_group) function, which can be used for extracting the given groups from Graphite metric name.
|
||||
* Lookbehind window in square brackets for [rollup functions](#rollup-functions) may be omitted. VictoriaMetrics automatically selects the lookbehind window
|
||||
depending on the `step` query arg passed to [/api/v1/query_range](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#range-query)
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
import { hasSortPipe } from "./sort";
|
||||
|
||||
describe("hasSortPipe()", () => {
|
||||
/** Queries that MUST be recognised as containing a sort/order pipe. */
|
||||
const positive: string[] = [
|
||||
// ───── basic usage ─────
|
||||
"sort by (_time)",
|
||||
"| sort by (_time)",
|
||||
"|sort(_time) desc",
|
||||
"| order by (foo desc)",
|
||||
"_time:5m | sort by (_stream, _time)",
|
||||
|
||||
// ───── documented options ─────
|
||||
"_time:1h | sort by (request_duration desc) limit 10",
|
||||
"_time:1h | sort by (request_duration desc) partition by (host) limit 3",
|
||||
"_time:5m | sort by (_time) rank as position",
|
||||
|
||||
// ───── whitespace / tabs ─────
|
||||
"|\t sort\tby (host)",
|
||||
|
||||
// ───── no space after the pipe ─────
|
||||
"foo|sort by (_time)",
|
||||
];
|
||||
|
||||
/** Queries that MUST **not** be recognised (false positives). */
|
||||
const negative: string[] = [
|
||||
"", // empty
|
||||
"error | sample 100", // no sort
|
||||
"|sorted(field)", // 'sorted' ≠ 'sort'
|
||||
"|sorter(field)", // 'sorter' ≠ 'sort'
|
||||
"my_sort(field)", // function name
|
||||
"| sorta by (field)", // 'sorta'
|
||||
"foo | orderliness by (bar)", // 'orderliness' ≠ 'order'
|
||||
];
|
||||
|
||||
it.each(positive)("detects pipe in ➜ %s", query => {
|
||||
expect(hasSortPipe(query)).toBe(true);
|
||||
});
|
||||
|
||||
it.each(negative)("does NOT detect pipe in ➜ %s", query => {
|
||||
expect(hasSortPipe(query)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,5 @@
|
||||
const hasSortPipeRe = /(?:^|\|)\s*(?:sort|order)\b/i;
|
||||
|
||||
export function hasSortPipe(query: string): boolean {
|
||||
return hasSortPipeRe.test(query);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { FC, useMemo, useState } from "preact/compat";
|
||||
import { FC, useMemo, useState } from "preact/compat";
|
||||
import useBoolean from "../../../hooks/useBoolean";
|
||||
import { RestartIcon, SettingsIcon } from "../../Main/Icons";
|
||||
import Button from "../../Main/Button/Button";
|
||||
@@ -62,7 +62,8 @@ const GroupLogsConfigurators: FC<Props> = ({ logs }) => {
|
||||
].some(Boolean);
|
||||
|
||||
const logsKeys = useMemo(() => {
|
||||
return Array.from(new Set(logs.map(l => Object.keys(l)).flat()));
|
||||
const uniqueKeys = new Set(logs.map(l => Object.keys(l)).flat());
|
||||
return Array.from(uniqueKeys).sort((a, b) => a.localeCompare(b));
|
||||
}, [logs]);
|
||||
|
||||
const {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { FC, useEffect, useRef, useMemo } from "preact/compat";
|
||||
import { FC, useEffect, useRef, useMemo } from "preact/compat";
|
||||
import Button from "../../Main/Button/Button";
|
||||
import { SearchIcon, SettingsIcon } from "../../Main/Icons";
|
||||
import "./style.scss";
|
||||
@@ -49,8 +49,8 @@ const TableSettings: FC<TableSettingsProps> = ({
|
||||
|
||||
const filteredColumns = useMemo(() => {
|
||||
const allColumns = customColumns.concat(columns);
|
||||
if (!searchColumn) return allColumns;
|
||||
return allColumns.filter(col => col.includes(searchColumn));
|
||||
const result = searchColumn ? allColumns.filter(col => col.includes(searchColumn)) : allColumns;
|
||||
return result.sort((a, b) => a.localeCompare(b));
|
||||
}, [columns, customColumns, searchColumn]);
|
||||
|
||||
const isAllChecked = useMemo(() => {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { FC, useEffect, useMemo, useState } from "preact/compat";
|
||||
import { FC, useEffect, useMemo, useState } from "preact/compat";
|
||||
import ExploreLogsBody from "./ExploreLogsBody/ExploreLogsBody";
|
||||
import useStateSearchParams from "../../hooks/useStateSearchParams";
|
||||
import useSearchParamsFromObject from "../../hooks/useSearchParamsFromObject";
|
||||
@@ -18,6 +18,7 @@ import { useSearchParams } from "react-router-dom";
|
||||
import { useQueryDispatch, useQueryState } from "../../state/query/QueryStateContext";
|
||||
import { getUpdatedHistory } from "../../components/QueryHistory/utils";
|
||||
import { useDebounceCallback } from "../../hooks/useDebounceCallback";
|
||||
import usePrevious from "../../hooks/usePrevious";
|
||||
|
||||
const storageLimit = Number(getFromStorage("LOGS_LIMIT"));
|
||||
const defaultLimit = isNaN(storageLimit) ? LOGS_ENTRIES_LIMIT : storageLimit;
|
||||
@@ -30,6 +31,7 @@ const ExploreLogs: FC = () => {
|
||||
const { setSearchParamsFromKeys } = useSearchParamsFromObject();
|
||||
const [searchParams] = useSearchParams();
|
||||
const hideChart = useMemo(() => searchParams.get("hide_chart"), [searchParams]);
|
||||
const prevHideChart = usePrevious(hideChart);
|
||||
|
||||
const [limit, setLimit] = useStateSearchParams(defaultLimit, "limit");
|
||||
const [query, setQuery] = useStateSearchParams("*", "query");
|
||||
@@ -118,11 +120,10 @@ const ExploreLogs: FC = () => {
|
||||
}, [query, isUpdatingQuery]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!hideChart) debouncedFetchLogs(period, true);
|
||||
return () => {
|
||||
debouncedFetchLogs.cancel?.();
|
||||
};
|
||||
}, [hideChart, period]);
|
||||
if (!hideChart && prevHideChart) {
|
||||
fetchLogHits(period);
|
||||
}
|
||||
}, [hideChart, prevHideChart, period]);
|
||||
|
||||
return (
|
||||
<div className="vm-explore-logs">
|
||||
|
||||
@@ -10,6 +10,8 @@ import { useSearchParams } from "react-router-dom";
|
||||
import throttle from "lodash/throttle";
|
||||
import GroupLogsItem from "../../../GroupLogs/GroupLogsItem";
|
||||
import LiveTailingSettings from "./LiveTailingSettings";
|
||||
import Alert from "../../../../../components/Main/Alert/Alert";
|
||||
import { isDecreasing } from "../../../../../utils/array";
|
||||
|
||||
const SCROLL_THRESHOLD = 100;
|
||||
const scrollToBottom = () => window.scrollTo({
|
||||
@@ -36,7 +38,8 @@ const LiveTailingView: FC<ViewProps> = ({ settingsRef }) => {
|
||||
stopLiveTailing,
|
||||
pauseLiveTailing,
|
||||
resumeLiveTailing,
|
||||
clearLogs
|
||||
clearLogs,
|
||||
isLimitedLogsPerUpdate
|
||||
} = useLiveTailingLogs(query, rowsPerPage);
|
||||
|
||||
const displayFieldsString = searchParams.get(LOGS_URL_PARAMS.DISPLAY_FIELDS) || LOGS_DISPLAY_FIELDS;
|
||||
@@ -64,13 +67,17 @@ const LiveTailingView: FC<ViewProps> = ({ settingsRef }) => {
|
||||
const container = containerRef.current;
|
||||
if (!container) return;
|
||||
|
||||
let prevScrollTop: number[] = [];
|
||||
const handleScroll = () => {
|
||||
const { scrollTop, scrollHeight, clientHeight } = document.documentElement;
|
||||
const isBottom = Math.abs(scrollHeight - scrollTop - clientHeight) < SCROLL_THRESHOLD;
|
||||
|
||||
setIsAtBottom(isBottom);
|
||||
prevScrollTop.push(scrollTop);
|
||||
prevScrollTop = prevScrollTop.slice(-3);
|
||||
const isMoveToTop = isDecreasing(prevScrollTop);
|
||||
|
||||
if (!isBottom && !isPaused) {
|
||||
if (!isBottom && !isPaused && isMoveToTop) {
|
||||
pauseLiveTailing();
|
||||
}
|
||||
};
|
||||
@@ -89,8 +96,6 @@ const LiveTailingView: FC<ViewProps> = ({ settingsRef }) => {
|
||||
handleResumeLiveTailing();
|
||||
}, [rowsPerPage]);
|
||||
|
||||
|
||||
|
||||
if (error) {
|
||||
return <div className="vm-live-tailing-view__error">{error}</div>;
|
||||
}
|
||||
@@ -138,6 +143,7 @@ const LiveTailingView: FC<ViewProps> = ({ settingsRef }) => {
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{isLimitedLogsPerUpdate && (<Alert variant="warning">Too many logs per second detected. Large volumes of log data are difficult to process and may impact performance. We recommend adding filters to your query for better analysis and system performance.</Alert>)}
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -34,9 +34,9 @@
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
overflow: auto;
|
||||
padding: $padding-global;
|
||||
min-height: 200px;
|
||||
font-family: $font-family-monospace;
|
||||
padding-bottom: $padding-medium;
|
||||
}
|
||||
|
||||
&__empty {
|
||||
|
||||
@@ -0,0 +1,147 @@
|
||||
import { act, renderHook } from "@testing-library/preact";
|
||||
import { useLiveTailingLogs } from "./useLiveTailingLogs";
|
||||
import { vi } from "vitest";
|
||||
|
||||
vi.mock("../../../../../state/common/StateContext", () => ({
|
||||
useAppState: () => ({ serverUrl: "http://localhost:8080" }),
|
||||
}));
|
||||
|
||||
vi.mock("../../../../../hooks/useTenant", () => ({
|
||||
useTenant: () => ({}),
|
||||
}));
|
||||
|
||||
// Mock dependencies
|
||||
const mockFetch = vi.fn();
|
||||
global.fetch = mockFetch;
|
||||
|
||||
const createMockStreamResponse = (logs: string[], sendCount: number = 1) => ({
|
||||
ok: true,
|
||||
body: new ReadableStream({
|
||||
async start(controller) {
|
||||
for (let i = 0; i < sendCount; i++) {
|
||||
logs.forEach((log) => {
|
||||
controller.enqueue(new TextEncoder().encode(log + "\n"));
|
||||
});
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
|
||||
controller.close();
|
||||
},
|
||||
}),
|
||||
text: async () => logs.join("\n"),
|
||||
});
|
||||
|
||||
describe("useLiveTailingLogs", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it("should start live tailing and process logs", async () => {
|
||||
const query = "*";
|
||||
const limit = 10;
|
||||
const { result } = renderHook(() => useLiveTailingLogs(query, limit));
|
||||
|
||||
mockFetch.mockResolvedValue(createMockStreamResponse(["{\"logs\":\"test log\"}"]));
|
||||
|
||||
await act(async () => {
|
||||
const started = await result.current.startLiveTailing();
|
||||
expect(started).toBe(true);
|
||||
});
|
||||
|
||||
expect(mockFetch).toHaveBeenCalledTimes(1);
|
||||
expect(mockFetch).toHaveBeenCalledWith(
|
||||
"http://localhost:8080/select/logsql/tail",
|
||||
expect.objectContaining({
|
||||
method: "POST",
|
||||
body: new URLSearchParams({
|
||||
query: query.trim(),
|
||||
}),
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it("should pause and resume live tailing", () => {
|
||||
const query = "*";
|
||||
const limit = 10;
|
||||
const { result } = renderHook(() => useLiveTailingLogs(query, limit));
|
||||
|
||||
act(() => {
|
||||
result.current.pauseLiveTailing();
|
||||
});
|
||||
|
||||
expect(result.current.isPaused).toBe(true);
|
||||
|
||||
act(() => {
|
||||
result.current.resumeLiveTailing();
|
||||
});
|
||||
|
||||
expect(result.current.isPaused).toBe(false);
|
||||
});
|
||||
|
||||
it("should stop live tailing", async () => {
|
||||
const query = "*";
|
||||
const limit = 10;
|
||||
const { result } = renderHook(() => useLiveTailingLogs(query, limit));
|
||||
|
||||
act(() => {
|
||||
result.current.stopLiveTailing();
|
||||
});
|
||||
|
||||
expect(result.current.logs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should clear logs", () => {
|
||||
const query = "*";
|
||||
const limit = 10;
|
||||
const { result } = renderHook(() => useLiveTailingLogs(query, limit));
|
||||
|
||||
act(() => {
|
||||
result.current.clearLogs();
|
||||
});
|
||||
|
||||
expect(result.current.logs).toEqual([]);
|
||||
});
|
||||
|
||||
it("should handle errors during live tailing", async () => {
|
||||
const query = "*";
|
||||
const limit = 10;
|
||||
const { result } = renderHook(() => useLiveTailingLogs(query, limit));
|
||||
|
||||
mockFetch.mockRejectedValue(new Error("Network error"));
|
||||
|
||||
await act(async () => {
|
||||
const started = await result.current.startLiveTailing();
|
||||
expect(started).toBe(false);
|
||||
});
|
||||
|
||||
expect(result.current.error).toBe("Error: Network error");
|
||||
expect(result.current.logs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should process high load of logs incoming at 100k logs per second", async () => {
|
||||
const query = "*";
|
||||
const limit = 1000;
|
||||
const logCount = 10000; // High log rate
|
||||
const logs = Array.from({ length: logCount }, (_, i) => `{"log": "log message ${i}"}`);
|
||||
const { result } = renderHook(() => useLiveTailingLogs(query, limit));
|
||||
|
||||
mockFetch.mockResolvedValue(createMockStreamResponse(logs, 7));
|
||||
|
||||
await act(async () => {
|
||||
const started = await result.current.startLiveTailing();
|
||||
expect(started).toBe(true);
|
||||
});
|
||||
|
||||
// Wait for logs to process
|
||||
await new Promise((resolve) => setTimeout(resolve, 7000));
|
||||
|
||||
// Verify logs are limited and processed correctly
|
||||
expect(result.current.logs.length).toBeLessThanOrEqual(limit);
|
||||
// After setting flag isLimitedLogsPerUpdate when more than 200 logs received 5 times in a row,
|
||||
// we take only the last 200 logs, so we get 800 older logs (9200 - 9999) and 200 new logs (9800-9999)
|
||||
expect(result.current.logs[0].log).toStrictEqual("log message 9200");
|
||||
expect(result.current.logs[799].log).toStrictEqual("log message 9999");
|
||||
expect(result.current.isLimitedLogsPerUpdate).toBeTruthy();
|
||||
}, { timeout: 9000 });
|
||||
});
|
||||
@@ -2,22 +2,161 @@ import { useCallback, useEffect, useRef, useState } from "preact/compat";
|
||||
import { ErrorTypes } from "../../../../../types";
|
||||
import { Logs } from "../../../../../api/types";
|
||||
import { useAppState } from "../../../../../state/common/StateContext";
|
||||
import { useSearchParams } from "react-router-dom";
|
||||
import useBoolean from "../../../../../hooks/useBoolean";
|
||||
import { useTenant } from "../../../../../hooks/useTenant";
|
||||
|
||||
/**
|
||||
* Defines the maximum number of consecutive times logs can be fetched above the threshold
|
||||
* before showing a warning notification, and vice versa:
|
||||
* - If logs are fetched above a threshold this many times in a row -> show warning
|
||||
* - If warning is shown, it won't disappear until logs are fetched below a threshold
|
||||
* this many times in a row
|
||||
*
|
||||
* This threshold helps optimize log display performance when dealing with large volumes of logs.
|
||||
* If the threshold is consistently exceeded, users will be prompted to add filters to their query
|
||||
* for better system performance and more focused log analysis.
|
||||
*/
|
||||
const MAX_ATTEMPTS_FETCH_LOGS_PER_SECOND = 5;
|
||||
/**
|
||||
* Defines the log's threshold, after which will be shown a warning notification
|
||||
*/
|
||||
const LOGS_THRESHOLD = 200;
|
||||
const CONNECTION_TIMEOUT_MS = 5000;
|
||||
const PROCESSING_INTERVAL_MS = 1000;
|
||||
|
||||
const createStreamProcessor = (
|
||||
bufferRef: React.MutableRefObject<string>,
|
||||
bufferLinesRef: React.MutableRefObject<string[]>,
|
||||
setError: (error: string) => void,
|
||||
restartTailing: () => Promise<boolean>
|
||||
) => {
|
||||
return async (reader: ReadableStreamDefaultReader<Uint8Array>) => {
|
||||
let lastDataTime = Date.now();
|
||||
|
||||
const connectionCheckInterval = setInterval(() => {
|
||||
const timeSinceLastData = Date.now() - lastDataTime;
|
||||
if (timeSinceLastData > CONNECTION_TIMEOUT_MS) {
|
||||
clearInterval(connectionCheckInterval);
|
||||
restartTailing();
|
||||
return;
|
||||
}
|
||||
}, CONNECTION_TIMEOUT_MS);
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
lastDataTime = Date.now();
|
||||
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
const lines = (bufferRef.current + chunk).split("\n");
|
||||
bufferRef.current = lines.pop() || "";
|
||||
bufferLinesRef.current = [...bufferLinesRef.current, ...lines];
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.name !== "AbortError") {
|
||||
console.error("Stream processing error:", e);
|
||||
setError(String(e));
|
||||
}
|
||||
} finally {
|
||||
clearInterval(connectionCheckInterval);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
const updateLimitModeTracking = (
|
||||
linesCount: number,
|
||||
attemptsFetchLimitRef: React.MutableRefObject<number>,
|
||||
attemptsFetchLowRef: React.MutableRefObject<number>,
|
||||
isLimitedLogsPerUpdate: boolean,
|
||||
) => {
|
||||
if (linesCount > LOGS_THRESHOLD) {
|
||||
attemptsFetchLimitRef.current++;
|
||||
attemptsFetchLowRef.current = 0;
|
||||
} else {
|
||||
attemptsFetchLowRef.current++;
|
||||
attemptsFetchLimitRef.current = 0;
|
||||
}
|
||||
|
||||
if (attemptsFetchLimitRef.current > MAX_ATTEMPTS_FETCH_LOGS_PER_SECOND) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (attemptsFetchLowRef.current > MAX_ATTEMPTS_FETCH_LOGS_PER_SECOND) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return isLimitedLogsPerUpdate;
|
||||
};
|
||||
|
||||
const parseLogLines = (lines: string[], counterRef: React.MutableRefObject<bigint>): Logs[] => {
|
||||
return lines
|
||||
.map(line => {
|
||||
try {
|
||||
const parsedLine = line && JSON.parse(line);
|
||||
parsedLine._log_id = counterRef.current++;
|
||||
return parsedLine;
|
||||
} catch (e) {
|
||||
console.error(`Failed to parse "${line}" to JSON\n`, e);
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(Boolean) as Logs[];
|
||||
};
|
||||
|
||||
interface ProcessBufferedLogsParams {
|
||||
lines: string[];
|
||||
limit: number;
|
||||
counterRef: React.MutableRefObject<bigint>;
|
||||
attemptsFetchLimitRef: React.MutableRefObject<number>;
|
||||
attemptsFetchLowRef: React.MutableRefObject<number>;
|
||||
setIsLimitedLogsPerUpdate: (isLimited: boolean) => void;
|
||||
setLogs: React.Dispatch<React.SetStateAction<Logs[]>>;
|
||||
bufferLinesRef: React.MutableRefObject<string[]>;
|
||||
isLimitedLogsPerUpdate: boolean;
|
||||
}
|
||||
|
||||
const processBufferedLogs = ({
|
||||
lines,
|
||||
limit,
|
||||
counterRef,
|
||||
attemptsFetchLimitRef,
|
||||
attemptsFetchLowRef,
|
||||
setIsLimitedLogsPerUpdate,
|
||||
setLogs,
|
||||
bufferLinesRef,
|
||||
isLimitedLogsPerUpdate
|
||||
}: ProcessBufferedLogsParams) => {
|
||||
|
||||
const isLimitLogsMode = updateLimitModeTracking(lines.length, attemptsFetchLimitRef, attemptsFetchLowRef, isLimitedLogsPerUpdate);
|
||||
const limitedLines = isLimitLogsMode && lines.length > LOGS_THRESHOLD ? lines.slice(-LOGS_THRESHOLD) : lines;
|
||||
const newLogs = parseLogLines(limitedLines, counterRef);
|
||||
|
||||
setIsLimitedLogsPerUpdate(isLimitLogsMode);
|
||||
setLogs(prevLogs => {
|
||||
const combinedLogs = [...prevLogs, ...newLogs];
|
||||
return combinedLogs.length > limit ? combinedLogs.slice(-limit) : combinedLogs;
|
||||
});
|
||||
bufferLinesRef.current = [];
|
||||
};
|
||||
|
||||
export const useLiveTailingLogs = (query: string, limit: number) => {
|
||||
const { serverUrl } = useAppState();
|
||||
const [searchParams] = useSearchParams();
|
||||
|
||||
const [logs, setLogs] = useState<Logs[]>([]);
|
||||
const { value: isPaused, setTrue: pauseLiveTailing, setFalse: resumeLiveTailing } = useBoolean(false);
|
||||
const tenant = useTenant();
|
||||
const [error, setError] = useState<ErrorTypes | string>();
|
||||
const [isLimitedLogsPerUpdate, setIsLimitedLogsPerUpdate] = useState(false);
|
||||
|
||||
const counterRef = useRef<bigint>(0n);
|
||||
const abortControllerRef = useRef(new AbortController());
|
||||
const readerRef = useRef<ReadableStreamDefaultReader<Uint8Array> | null>(null);
|
||||
const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const bufferRef = useRef<string>("");
|
||||
const bufferLinesRef = useRef<string[]>([]);
|
||||
const attemptsFetchLimitLogsPerSecondCountRef = useRef<number>(0);
|
||||
const attemptsFetchLowLogsPerSecondCountRef = useRef<number>(0);
|
||||
|
||||
const stopLiveTailing = useCallback(() => {
|
||||
if (readerRef.current) {
|
||||
@@ -40,13 +179,8 @@ export const useLiveTailingLogs = (query: string, limit: number) => {
|
||||
const { signal } = abortControllerRef.current;
|
||||
|
||||
setError(undefined);
|
||||
setLogs([]);
|
||||
|
||||
try {
|
||||
const tenant = {
|
||||
AccountID: searchParams.get("accountID") || "0",
|
||||
ProjectID: searchParams.get("projectID") || "0"
|
||||
};
|
||||
const response = await fetch(`${serverUrl}/select/logsql/tail`, {
|
||||
signal,
|
||||
method: "POST",
|
||||
@@ -68,25 +202,14 @@ export const useLiveTailingLogs = (query: string, limit: number) => {
|
||||
const reader = response.body.getReader();
|
||||
readerRef.current = reader;
|
||||
|
||||
const processStream = async () => {
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
const processStream = createStreamProcessor(
|
||||
bufferRef,
|
||||
bufferLinesRef,
|
||||
setError,
|
||||
startLiveTailing
|
||||
);
|
||||
|
||||
// Convert the Uint8Array to a string
|
||||
const chunk = new TextDecoder().decode(value);
|
||||
bufferRef.current += chunk;
|
||||
}
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.name !== "AbortError") {
|
||||
console.error("Stream processing error:", e);
|
||||
setError(String(e));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
processStream();
|
||||
processStream(reader);
|
||||
return true;
|
||||
} catch (e) {
|
||||
if (e instanceof Error && e.name !== "AbortError") {
|
||||
@@ -98,42 +221,35 @@ export const useLiveTailingLogs = (query: string, limit: number) => {
|
||||
}
|
||||
}, [query, stopLiveTailing]);
|
||||
|
||||
|
||||
useEffect(() => {
|
||||
if (isPaused) return;
|
||||
if (isPaused) {
|
||||
const pauseTimerId = setInterval(() => {
|
||||
if (bufferLinesRef.current.length > limit) {
|
||||
bufferLinesRef.current = bufferLinesRef.current.slice(-limit);
|
||||
}
|
||||
}, PROCESSING_INTERVAL_MS);
|
||||
return () => {
|
||||
clearInterval(pauseTimerId);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process incoming log data at a throttled rate (every 1s)
|
||||
* This interval-based approach prevents CPU overload by:
|
||||
* 1. Batching log processing instead of processing each chunk immediately
|
||||
* 2. Limiting UI updates to a reasonable frequency (1/sec) even when data streams in rapidly
|
||||
* 3. Reducing performance impact when handling large volumes of incoming logs
|
||||
* 4. Allowing efficient garbage collection between processing cycles
|
||||
*/
|
||||
const timerId = setInterval(() => {
|
||||
const lines = bufferRef.current.split("\n");
|
||||
bufferRef.current = lines.pop() || "";
|
||||
|
||||
const newLogs = lines
|
||||
.map(line => {
|
||||
try {
|
||||
const parsedLine = line && JSON.parse(line);
|
||||
parsedLine._log_id = counterRef.current++;
|
||||
return parsedLine;
|
||||
} catch (e) {
|
||||
console.error(`Failed to parse "${line}" to JSON\n`, e);
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(Boolean) as Logs[];
|
||||
|
||||
setLogs(prevLogs => {
|
||||
const combinedLogs = [...prevLogs, ...newLogs];
|
||||
return combinedLogs.length > limit ? combinedLogs.slice(-limit) : combinedLogs;
|
||||
const lines = bufferLinesRef.current;
|
||||
processBufferedLogs({
|
||||
lines,
|
||||
limit,
|
||||
counterRef,
|
||||
attemptsFetchLimitRef: attemptsFetchLimitLogsPerSecondCountRef,
|
||||
attemptsFetchLowRef: attemptsFetchLowLogsPerSecondCountRef,
|
||||
setIsLimitedLogsPerUpdate,
|
||||
isLimitedLogsPerUpdate,
|
||||
setLogs,
|
||||
bufferLinesRef
|
||||
});
|
||||
}, 1000);
|
||||
}, PROCESSING_INTERVAL_MS);
|
||||
|
||||
return () => clearInterval(timerId);
|
||||
}, [limit, isPaused]);
|
||||
}, [limit, isPaused, isLimitedLogsPerUpdate]);
|
||||
|
||||
const clearLogs = useCallback(() => {
|
||||
setLogs([]);
|
||||
@@ -147,6 +263,7 @@ export const useLiveTailingLogs = (query: string, limit: number) => {
|
||||
stopLiveTailing,
|
||||
pauseLiveTailing,
|
||||
resumeLiveTailing,
|
||||
clearLogs
|
||||
clearLogs,
|
||||
isLimitedLogsPerUpdate
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
@@ -27,7 +27,7 @@ const TableView: FC<ViewProps> = ({ data, settingsRef }) => {
|
||||
keys.add(key);
|
||||
}
|
||||
}
|
||||
return Array.from(keys);
|
||||
return Array.from(keys).sort((a,b) => a.localeCompare(b));
|
||||
}, [data]);
|
||||
|
||||
const handleSetRowsPerPage = (limit: number) => {
|
||||
@@ -77,4 +77,4 @@ const TableView: FC<ViewProps> = ({ data, settingsRef }) => {
|
||||
);
|
||||
};
|
||||
|
||||
export default TableView;
|
||||
export default TableView;
|
||||
|
||||
@@ -17,9 +17,9 @@ import Pagination from "../../../components/Main/Pagination/Pagination";
|
||||
import SelectLimit from "../../../components/Main/Pagination/SelectLimit/SelectLimit";
|
||||
import { usePaginateGroups } from "../hooks/usePaginateGroups";
|
||||
import { GroupLogsType } from "../../../types";
|
||||
import { getNanoTimestamp } from "../../../utils/time";
|
||||
import useDeviceDetect from "../../../hooks/useDeviceDetect";
|
||||
import DownloadLogsButton from "../DownloadLogsButton/DownloadLogsButton";
|
||||
import { hasSortPipe } from "../../../components/Configurators/QueryEditor/LogsQL/utils/sort";
|
||||
|
||||
interface Props {
|
||||
logs: Logs[];
|
||||
@@ -30,6 +30,9 @@ const GroupLogs: FC<Props> = ({ logs, settingsRef }) => {
|
||||
const { isMobile } = useDeviceDetect();
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
|
||||
const query = searchParams.get("query") || "";
|
||||
const queryHasSort = hasSortPipe(query);
|
||||
|
||||
const [page, setPage] = useState(1);
|
||||
const [expandGroups, setExpandGroups] = useState<boolean[]>([]);
|
||||
|
||||
@@ -47,15 +50,10 @@ const GroupLogs: FC<Props> = ({ logs, settingsRef }) => {
|
||||
const streamValue = item.values[0]?.[groupBy] || "";
|
||||
const pairs = getStreamPairs(streamValue);
|
||||
|
||||
// values sorting by time
|
||||
const values = item.values.sort((a, b) => {
|
||||
const aTimestamp = getNanoTimestamp(a._time);
|
||||
const bTimestamp = getNanoTimestamp(b._time);
|
||||
|
||||
if (aTimestamp < bTimestamp) return 1;
|
||||
if (aTimestamp > bTimestamp) return -1;
|
||||
return 0;
|
||||
});
|
||||
// VictoriaLogs sends rows oldest → newest when the query has no `| sort` pipe,
|
||||
// so we reverse the array to put the newest entries first.
|
||||
// If a sort is already specified, keep the original order.
|
||||
const values = queryHasSort ? item.values : item.values.toReversed();
|
||||
|
||||
return {
|
||||
keys: item.keys,
|
||||
@@ -64,8 +62,8 @@ const GroupLogs: FC<Props> = ({ logs, settingsRef }) => {
|
||||
pairs,
|
||||
total: values.length,
|
||||
};
|
||||
}).sort((a, b) => b.values.length - a.values.length); // groups sorting
|
||||
}, [logs, groupBy]);
|
||||
}).sort((a, b) => b.total - a.total); // groups sorting
|
||||
}, [logs, groupBy, queryHasSort]);
|
||||
|
||||
const paginatedGroups = usePaginateGroups(groupData, page, rowsPerPage);
|
||||
|
||||
@@ -164,7 +162,7 @@ const GroupLogs: FC<Props> = ({ logs, settingsRef }) => {
|
||||
ariaLabel={expandAll ? "Collapse All" : "Expand All"}
|
||||
/>
|
||||
</Tooltip>
|
||||
<DownloadLogsButton getLogs={getLogs} />
|
||||
<DownloadLogsButton getLogs={getLogs}/>
|
||||
<GroupLogsConfigurators logs={logs}/>
|
||||
</div>
|
||||
), settingsRef.current)}
|
||||
|
||||
@@ -154,13 +154,13 @@ const Relabel: FC = () => {
|
||||
<div className="vm-relabeling-steps-item__row">
|
||||
<span>Input Labels:</span>
|
||||
<code>
|
||||
<pre dangerouslySetInnerHTML={{ __html: step.inLabels }}/>
|
||||
<pre dangerouslySetInnerHTML={{ __html: step.errors?.inLabels || step.inLabels }}/>
|
||||
</code>
|
||||
</div>
|
||||
<div className="vm-relabeling-steps-item__row">
|
||||
<span>Output labels:</span>
|
||||
<code>
|
||||
<pre dangerouslySetInnerHTML={{ __html: step.outLabels }}/>
|
||||
<pre dangerouslySetInnerHTML={{ __html: step.errors?.outLabels || step.outLabels }}/>
|
||||
</code>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -138,6 +138,10 @@ export interface RelabelStep {
|
||||
rule: string;
|
||||
inLabels: string;
|
||||
outLabels: string;
|
||||
errors: {
|
||||
inLabels: string;
|
||||
outLabels: string;
|
||||
}
|
||||
}
|
||||
|
||||
export interface RelabelData {
|
||||
|
||||
36
app/vmui/packages/vmui/src/utils/array.test.ts
Normal file
36
app/vmui/packages/vmui/src/utils/array.test.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { isDecreasing } from "./array";
|
||||
|
||||
describe("isDecreasing", () => {
|
||||
it("should return true for an array with strictly decreasing numbers", () => {
|
||||
expect(isDecreasing([5, 4, 3, 2, 1])).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for an array with increasing numbers", () => {
|
||||
expect(isDecreasing([1, 2, 3, 4, 5])).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for an array with equal consecutive numbers", () => {
|
||||
expect(isDecreasing([5, 5, 4, 3, 2])).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for an empty array", () => {
|
||||
expect(isDecreasing([])).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for an array with a single element", () => {
|
||||
expect(isDecreasing([1])).toBe(false);
|
||||
});
|
||||
|
||||
it("should return false for an array with both increasing and decreasing numbers", () => {
|
||||
expect(isDecreasing([5, 3, 4, 2, 1])).toBe(false);
|
||||
});
|
||||
|
||||
it("should return true for an array with negative strictly decreasing numbers", () => {
|
||||
expect(isDecreasing([-1, -2, -3, -4])).toBe(true);
|
||||
});
|
||||
|
||||
it("should return false for an array with a mix of positive and negative numbers that do not strictly decrease", () => {
|
||||
expect(isDecreasing([3, 2, -1, -1])).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,4 +1,4 @@
|
||||
export const arrayEquals = (a: (string|number)[], b: (string|number)[]) => {
|
||||
export const arrayEquals = (a: (string | number)[], b: (string | number)[]) => {
|
||||
return a.length === b.length && a.every((val, index) => val === b[index]);
|
||||
};
|
||||
|
||||
@@ -17,3 +17,8 @@ export function groupByMultipleKeys<T>(items: T[], keys: (keyof T)[]): { keys: s
|
||||
}));
|
||||
}
|
||||
|
||||
export const isDecreasing = (arr: number[]): boolean => {
|
||||
if (arr.length < 2) return false;
|
||||
|
||||
return arr.every((v, i) => i === 0 || v < arr[i - 1]);
|
||||
};
|
||||
|
||||
@@ -21,6 +21,7 @@ type PrometheusQuerier interface {
|
||||
PrometheusAPIV1Query(t *testing.T, query string, opts QueryOpts) *PrometheusAPIV1QueryResponse
|
||||
PrometheusAPIV1QueryRange(t *testing.T, query string, opts QueryOpts) *PrometheusAPIV1QueryResponse
|
||||
PrometheusAPIV1Series(t *testing.T, matchQuery string, opts QueryOpts) *PrometheusAPIV1SeriesResponse
|
||||
PrometheusAPIV1ExportNative(t *testing.T, query string, opts QueryOpts) []byte
|
||||
}
|
||||
|
||||
// Writer contains methods for writing new data
|
||||
@@ -29,6 +30,7 @@ type Writer interface {
|
||||
PrometheusAPIV1Write(t *testing.T, records []pb.TimeSeries, opts QueryOpts)
|
||||
PrometheusAPIV1ImportPrometheus(t *testing.T, records []string, opts QueryOpts)
|
||||
PrometheusAPIV1ImportCSV(t *testing.T, records []string, opts QueryOpts)
|
||||
PrometheusAPIV1ImportNative(t *testing.T, data []byte, opts QueryOpts)
|
||||
|
||||
// Graphit APIs
|
||||
GraphiteWrite(t *testing.T, records []string, opts QueryOpts)
|
||||
|
||||
@@ -28,6 +28,7 @@ type Stopper interface {
|
||||
|
||||
// NewTestCase creates a new test case.
|
||||
func NewTestCase(t *testing.T) *TestCase {
|
||||
t.Parallel()
|
||||
return &TestCase{t, NewClient(), make(map[string]Stopper)}
|
||||
}
|
||||
|
||||
@@ -188,9 +189,39 @@ func (tc *TestCase) MustStartVmauth(instance string, flags []string, configFileY
|
||||
return app
|
||||
}
|
||||
|
||||
// MustStartVmbackup is a test helper that starts an instance of vmbackup
|
||||
// and waits until the app exits. It fails the test if the app fails to start or
|
||||
// exits with non zero code.
|
||||
func (tc *TestCase) MustStartVmbackup(instance, storageDataPath, snapshotCreateURL, dst string) {
|
||||
tc.t.Helper()
|
||||
|
||||
if err := StartVmbackup(instance, storageDataPath, snapshotCreateURL, dst); err != nil {
|
||||
tc.t.Fatalf("vmbackup %q failed to start or exited with non-zero code: %v", instance, err)
|
||||
}
|
||||
|
||||
// Do not add the process to the list of running apps using
|
||||
// tc.addApp(instance, app), because the method blocks until the process
|
||||
// exits.
|
||||
}
|
||||
|
||||
// MustStartVmrestore is a test helper that starts an instance of vmrestore
|
||||
// and waits until the app exits. It fails the test if the app fails to start or
|
||||
// exits with non zero code.
|
||||
func (tc *TestCase) MustStartVmrestore(instance, src, storageDataPath string) {
|
||||
tc.t.Helper()
|
||||
|
||||
if err := StartVmrestore(instance, src, storageDataPath); err != nil {
|
||||
tc.t.Fatalf("vmrestore %q failed to start or exited with non-zero code: %v", instance, err)
|
||||
}
|
||||
|
||||
// Do not add the process to the list of running apps using
|
||||
// tc.addApp(instance, app), because the method blocks until the process
|
||||
// exits.
|
||||
}
|
||||
|
||||
// MustStartDefaultCluster starts a typical cluster configuration with default
|
||||
// flags.
|
||||
func (tc *TestCase) MustStartDefaultCluster() PrometheusWriteQuerier {
|
||||
func (tc *TestCase) MustStartDefaultCluster() *Vmcluster {
|
||||
tc.t.Helper()
|
||||
|
||||
return tc.MustStartCluster(&ClusterOptions{
|
||||
@@ -224,7 +255,7 @@ type ClusterOptions struct {
|
||||
}
|
||||
|
||||
// MustStartCluster starts a typical cluster configuration with custom flags.
|
||||
func (tc *TestCase) MustStartCluster(opts *ClusterOptions) PrometheusWriteQuerier {
|
||||
func (tc *TestCase) MustStartCluster(opts *ClusterOptions) *Vmcluster {
|
||||
tc.t.Helper()
|
||||
|
||||
opts.Vmstorage1Flags = append(opts.Vmstorage1Flags, []string{
|
||||
|
||||
239
apptest/tests/backup_restore_test.go
Normal file
239
apptest/tests/backup_restore_test.go
Normal file
@@ -0,0 +1,239 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
type testBackupRestoreOpts struct {
|
||||
startSUT func() at.PrometheusWriteQuerier
|
||||
stopSUT func()
|
||||
storageDataPaths []string
|
||||
snapshotCreateURLs func(at.PrometheusWriteQuerier) []string
|
||||
}
|
||||
|
||||
func TestSingleBackupRestore(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storageDataPath := filepath.Join(tc.Dir(), "vmsingle")
|
||||
|
||||
opts := testBackupRestoreOpts{
|
||||
startSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle("vmsingle", []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
})
|
||||
},
|
||||
stopSUT: func() {
|
||||
tc.StopApp("vmsingle")
|
||||
},
|
||||
storageDataPaths: []string{
|
||||
storageDataPath,
|
||||
},
|
||||
snapshotCreateURLs: func(sut at.PrometheusWriteQuerier) []string {
|
||||
return []string{
|
||||
sut.(*at.Vmsingle).SnapshotCreateURL(),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
testBackupRestore(tc, opts)
|
||||
}
|
||||
|
||||
func TestClusterBackupRestore(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
storage1DataPath := filepath.Join(tc.Dir(), "vmstorage1")
|
||||
storage2DataPath := filepath.Join(tc.Dir(), "vmstorage2")
|
||||
|
||||
opts := testBackupRestoreOpts{
|
||||
startSUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1",
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2",
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
VminsertInstance: "vminsert",
|
||||
VminsertFlags: []string{},
|
||||
VmselectInstance: "vmselect",
|
||||
VmselectFlags: []string{
|
||||
"-search.maxStalenessInterval=1m",
|
||||
},
|
||||
})
|
||||
},
|
||||
stopSUT: func() {
|
||||
tc.StopApp("vminsert")
|
||||
tc.StopApp("vmselect")
|
||||
tc.StopApp("vmstorage1")
|
||||
tc.StopApp("vmstorage2")
|
||||
},
|
||||
storageDataPaths: []string{
|
||||
storage1DataPath,
|
||||
storage2DataPath,
|
||||
},
|
||||
snapshotCreateURLs: func(sut at.PrometheusWriteQuerier) []string {
|
||||
c := sut.(*at.Vmcluster)
|
||||
return []string{
|
||||
c.Vmstorages[0].SnapshotCreateURL(),
|
||||
c.Vmstorages[1].SnapshotCreateURL(),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
testBackupRestore(tc, opts)
|
||||
}
|
||||
|
||||
func testBackupRestore(tc *at.TestCase, opts testBackupRestoreOpts) {
|
||||
t := tc.T()
|
||||
|
||||
const msecPerMinute = 60 * 1000
|
||||
genData := func(count int, prefix string, start int64) (recs []string, wantSeries []map[string]string, wantQueryResults []*at.QueryResult) {
|
||||
recs = make([]string, count)
|
||||
wantSeries = make([]map[string]string, count)
|
||||
wantQueryResults = make([]*at.QueryResult, count)
|
||||
for i := range count {
|
||||
name := fmt.Sprintf("%s_%03d", prefix, i)
|
||||
value := float64(i)
|
||||
timestamp := start + int64(i)*msecPerMinute
|
||||
|
||||
recs[i] = fmt.Sprintf("%s %f %d", name, value, timestamp)
|
||||
wantSeries[i] = map[string]string{"__name__": name}
|
||||
wantQueryResults[i] = &at.QueryResult{
|
||||
Metric: map[string]string{"__name__": name},
|
||||
Samples: []*at.Sample{{Timestamp: timestamp, Value: value}},
|
||||
}
|
||||
}
|
||||
return recs, wantSeries, wantQueryResults
|
||||
}
|
||||
|
||||
backupBaseDir, err := filepath.Abs(filepath.Join(tc.Dir(), "backups"))
|
||||
if err != nil {
|
||||
t.Fatalf("could not get absolute path for the backup base dir")
|
||||
}
|
||||
|
||||
// assertSeries retrieves set of all metric names from the storage and
|
||||
// compares it with the expected set.
|
||||
assertSeries := func(app at.PrometheusQuerier, query string, start, end int64, want []map[string]string) {
|
||||
t.Helper()
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/series response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1Series(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
}).Sort()
|
||||
},
|
||||
Want: &at.PrometheusAPIV1SeriesResponse{
|
||||
Status: "success",
|
||||
Data: want,
|
||||
},
|
||||
FailNow: true,
|
||||
})
|
||||
}
|
||||
|
||||
// assertSeries retrieves all data from the storage and compares it with the
|
||||
// expected result.
|
||||
assertQueryResults := func(app at.PrometheusQuerier, query string, start, end int64, want []*at.QueryResult) {
|
||||
t.Helper()
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "unexpected /api/v1/query_range response",
|
||||
Got: func() any {
|
||||
return app.PrometheusAPIV1QueryRange(t, query, at.QueryOpts{
|
||||
Start: fmt.Sprintf("%d", start),
|
||||
End: fmt.Sprintf("%d", end),
|
||||
Step: "60s",
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: want,
|
||||
},
|
||||
},
|
||||
FailNow: true,
|
||||
Retries: 300,
|
||||
})
|
||||
}
|
||||
|
||||
createBackup := func(sut at.PrometheusWriteQuerier, name string) {
|
||||
for i, storageDataPath := range opts.storageDataPaths {
|
||||
replica := fmt.Sprintf("replica-%d", i)
|
||||
instance := fmt.Sprintf("vmbackup-%s-%s", name, replica)
|
||||
snapshotCreateURL := opts.snapshotCreateURLs(sut)[i]
|
||||
backupPath := "fs://" + filepath.Join(backupBaseDir, name, replica)
|
||||
tc.MustStartVmbackup(instance, storageDataPath, snapshotCreateURL, backupPath)
|
||||
}
|
||||
}
|
||||
|
||||
restoreFromBackup := func(name string) {
|
||||
for i, storageDataPath := range opts.storageDataPaths {
|
||||
replica := fmt.Sprintf("replica-%d", i)
|
||||
instance := fmt.Sprintf("vmrestore-%s-%s", name, replica)
|
||||
backupPath := "fs://" + filepath.Join(backupBaseDir, name, replica)
|
||||
tc.MustStartVmrestore(instance, backupPath, storageDataPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Use the same number of metrics and time range for all the data ingestions
|
||||
// below.
|
||||
const numMetrics = 1000
|
||||
// With 1000 metrics (one per minute), the time range spans 2 months.
|
||||
end := time.Date(2025, 3, 1, 10, 0, 0, 0, time.UTC).UnixMilli()
|
||||
start := end - numMetrics*msecPerMinute
|
||||
|
||||
// Verify backup/restore:
|
||||
//
|
||||
// - Start vmsingle with empty storage data dir.
|
||||
// - Ingest first batch or records (batch1) and ensure they can be queried.
|
||||
// - Create batch1 backup
|
||||
// - Ingest second batch of records (batch2) and ensure the queries return
|
||||
// (batch1 + batch2) data.
|
||||
// - Stop vmsingle
|
||||
// - Restore batch1 from backup
|
||||
// - Start vmsingle
|
||||
// - Ensure that the queries return batch1 data only.
|
||||
|
||||
batch1Data, wantBatch1Series, wantBatch1QueryResults := genData(numMetrics, "batch1", start)
|
||||
batch2Data, wantBatch2Series, wantBatch2QueryResults := genData(numMetrics, "batch2", start)
|
||||
wantBatch12Series := slices.Concat(wantBatch1Series, wantBatch2Series)
|
||||
wantBatch12QueryResults := slices.Concat(wantBatch1QueryResults, wantBatch2QueryResults)
|
||||
|
||||
sut := opts.startSUT()
|
||||
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch1Data, at.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
assertSeries(sut, `{__name__=~"batch1.*"}`, start, end, wantBatch1Series)
|
||||
assertQueryResults(sut, `{__name__=~"batch1.*"}`, start, end, wantBatch1QueryResults)
|
||||
createBackup(sut, "batch1")
|
||||
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, batch2Data, at.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
assertSeries(sut, `{__name__=~"batch(1|2).*"}`, start, end, wantBatch12Series)
|
||||
assertQueryResults(sut, `{__name__=~"batch(1|2).*"}`, start, end, wantBatch12QueryResults)
|
||||
createBackup(sut, "batch12")
|
||||
|
||||
opts.stopSUT()
|
||||
|
||||
restoreFromBackup("batch1")
|
||||
|
||||
sut = opts.startSUT()
|
||||
|
||||
assertSeries(sut, `{__name__=~"batch1.*"}`, start, end, wantBatch1Series)
|
||||
assertQueryResults(sut, `{__name__=~"batch1.*"}`, start, end, wantBatch1QueryResults)
|
||||
}
|
||||
73
apptest/tests/export_import_test.go
Normal file
73
apptest/tests/export_import_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
)
|
||||
|
||||
func TestSingleExportImportNative(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultVmsingle()
|
||||
|
||||
testExportImportNative(tc.T(), sut)
|
||||
}
|
||||
|
||||
func TestClusterExportImportNative(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultCluster()
|
||||
|
||||
testExportImportNative(tc.T(), sut)
|
||||
}
|
||||
|
||||
// testExportImportNative test export and import in VictoriaMetrics’ native format.
|
||||
// see: https://docs.victoriametrics.com/#how-to-import-data-in-native-format
|
||||
func testExportImportNative(t *testing.T, sut at.PrometheusWriteQuerier) {
|
||||
// create test data
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
`native_export_import 10 1707123456700`, // 2024-02-05T08:57:36.700Z
|
||||
}, at.QueryOpts{
|
||||
ExtraLabels: []string{"el1=elv1", "el2=elv2"},
|
||||
})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
// export test data via native export API
|
||||
exportResult := sut.PrometheusAPIV1ExportNative(t, "native_export_import", at.QueryOpts{
|
||||
Start: "2024-02-05T08:50:00.700Z",
|
||||
End: "2024-02-05T09:00:00.700Z",
|
||||
})
|
||||
|
||||
// re-import test data via native import API
|
||||
sut.PrometheusAPIV1ImportNative(t, exportResult, at.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
// check query result
|
||||
got := sut.PrometheusAPIV1QueryRange(t, "native_export_import", at.QueryOpts{
|
||||
Start: "2024-02-05T08:57:36.700Z",
|
||||
End: "2024-02-05T08:57:36.700Z",
|
||||
Step: "60s",
|
||||
})
|
||||
|
||||
cmpOptions := []cmp.Option{
|
||||
cmpopts.IgnoreFields(at.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType"),
|
||||
cmpopts.EquateNaNs(),
|
||||
}
|
||||
want := at.NewPrometheusAPIV1QueryResponse(t, `{"data": {"result": [{"metric": {"__name__": "native_export_import", "el1": "elv1", "el2":"elv2"}, "values": []}]}}`)
|
||||
want.Data.Result[0].Samples = []*at.Sample{
|
||||
at.NewSample(t, "2024-02-05T08:57:36.700Z", 10),
|
||||
}
|
||||
if diff := cmp.Diff(want, got, cmpOptions...); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
@@ -216,4 +216,15 @@ func TestClusterMultiTenantSelect(t *testing.T) {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if got := vmselect.GetIntMetric(t, `vm_cache_requests_total{type="multitenancy/tenants"}`); got != 0 {
|
||||
t.Errorf("unexpected multitenancy tenants cache requests; got %d; want 0", got)
|
||||
}
|
||||
|
||||
if got := vmselect.GetIntMetric(t, `vm_cache_misses_total{type="multitenancy/tenants"}`); got != 0 {
|
||||
t.Errorf("unexpected multitenancy tenants cache misses; got %d; want 0", got)
|
||||
}
|
||||
|
||||
if got := vmselect.GetIntMetric(t, `vm_cache_entries{type="multitenancy/tenants"}`); got != 0 {
|
||||
t.Errorf("unexpected multitenancy tenants cache entries; got %d; want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
78
apptest/tests/rollup_result_cache_test.go
Normal file
78
apptest/tests/rollup_result_cache_test.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
)
|
||||
|
||||
func TestClusterRollupResultCache(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
|
||||
cmpOpt := cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType")
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
vmstorage := tc.MustStartVmstorage("vmstorage", []string{
|
||||
"-storageDataPath=" + tc.Dir() + "/vmstorage",
|
||||
"-retentionPeriod=100y",
|
||||
})
|
||||
vminsert := tc.MustStartVminsert("vminsert", []string{
|
||||
"-storageNode=" + vmstorage.VminsertAddr(),
|
||||
})
|
||||
vmselect := tc.MustStartVmselect("vmselect", []string{
|
||||
"-storageNode=" + vmstorage.VmselectAddr(),
|
||||
"-search.tenantCacheExpireDuration=0",
|
||||
})
|
||||
|
||||
var tenantLabelsSamples = []string{
|
||||
`foo_bar{vm_account_id="5"} 1.00 1652169720000`, // 2022-05-10T08:00:00Z'
|
||||
`foo_bar{vm_account_id="5",vm_project_id="15"} 3.00 1652169720000`, // 2022-05-10T08:02:00Z
|
||||
}
|
||||
|
||||
vminsert.PrometheusAPIV1ImportPrometheus(t, tenantLabelsSamples, apptest.QueryOpts{Tenant: "multitenant"})
|
||||
vmstorage.ForceFlush(t)
|
||||
|
||||
want := apptest.NewPrometheusAPIV1QueryResponse(t,
|
||||
`{"data":
|
||||
{"result":[
|
||||
{"metric":{"__name__":"foo_bar","vm_account_id":"5","vm_project_id": "0"},"values":[[1652169720,"1"],[1652169780,"1"]]},
|
||||
{"metric":{"__name__":"foo_bar","vm_account_id":"5","vm_project_id":"15"},"values":[[1652169720,"3"],[1652169780,"3"]]}
|
||||
]
|
||||
}
|
||||
}`,
|
||||
)
|
||||
|
||||
got := vmselect.PrometheusAPIV1QueryRange(t, `foo_bar{}`, apptest.QueryOpts{
|
||||
Tenant: "multitenant",
|
||||
Start: "2022-05-10T07:59:00.000Z",
|
||||
End: "2022-05-10T08:05:00.000Z",
|
||||
Step: "1m",
|
||||
ExtraFilters: []string{`{vm_account_id="5",vm_project_id="15"}`, `{vm_account_id="5",vm_project_id="0"}`},
|
||||
})
|
||||
if diff := cmp.Diff(want, got, cmpOpt); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
want = apptest.NewPrometheusAPIV1QueryResponse(t,
|
||||
`{"data":
|
||||
{"result":[]}
|
||||
}`,
|
||||
)
|
||||
|
||||
got = vmselect.PrometheusAPIV1QueryRange(t, `foo_bar{}`, apptest.QueryOpts{
|
||||
Tenant: "multitenant",
|
||||
Start: "2022-05-10T07:59:00.000Z",
|
||||
End: "2022-05-10T08:05:00.000Z",
|
||||
Step: "1m",
|
||||
ExtraFilters: []string{`{vm_account_id="99",vm_project_id="99"}`},
|
||||
})
|
||||
if diff := cmp.Diff(want, got, cmpOpt); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
}
|
||||
@@ -5,8 +5,9 @@ import (
|
||||
"regexp"
|
||||
"testing"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
// snapshotNameRE covers years 1970-2099.
|
||||
@@ -104,7 +105,7 @@ func TestClusterSnapshots_CreateListDelete(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultCluster().(*at.Vmcluster)
|
||||
sut := tc.MustStartDefaultCluster()
|
||||
|
||||
// Insert some data.
|
||||
const numSamples = 1000
|
||||
|
||||
270739
apptest/tests/testdata/prometheus/expected_response.json
vendored
270739
apptest/tests/testdata/prometheus/expected_response.json
vendored
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
@@ -1,17 +0,0 @@
|
||||
{
|
||||
"ulid": "01JHWQ445Y2P1TDYB05AEKD6MC",
|
||||
"minTime": 1737204082361,
|
||||
"maxTime": 1737204302539,
|
||||
"stats": {
|
||||
"numSamples": 60275,
|
||||
"numSeries": 2792,
|
||||
"numChunks": 2792
|
||||
},
|
||||
"compaction": {
|
||||
"level": 1,
|
||||
"sources": [
|
||||
"01JHWQ445Y2P1TDYB05AEKD6MC"
|
||||
]
|
||||
},
|
||||
"version": 1
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"ulid": "01JWS713P2E4MQW7T643GYGD69",
|
||||
"minTime": 1748897918112,
|
||||
"maxTime": 1748897922411,
|
||||
"stats": {
|
||||
"numSamples": 2434,
|
||||
"numSeries": 2434,
|
||||
"numChunks": 2434
|
||||
},
|
||||
"compaction": {
|
||||
"level": 1,
|
||||
"sources": [
|
||||
"01JWS713P2E4MQW7T643GYGD69"
|
||||
]
|
||||
},
|
||||
"version": 1
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
@@ -12,23 +13,58 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
func TestVmctlPrometheusProtocolToVMSingle(t *testing.T) {
|
||||
const (
|
||||
testSnapshot = "./testdata/prometheus/snapshots/20250602T205846Z-7e03e43cf46dda03"
|
||||
expectedResponseFile = "./testdata/prometheus/expected_response.json"
|
||||
)
|
||||
|
||||
func TestSingleVmctlPrometheusProtocol(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
vmsingleDst := tc.MustStartDefaultVmsingle()
|
||||
vmAddr := fmt.Sprintf("http://%s/", vmsingleDst.HTTPAddr())
|
||||
vmctlFlags := []string{
|
||||
`prometheus`,
|
||||
`--prom-snapshot=` + testSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
}
|
||||
|
||||
testPrometheusProtocol(tc, vmsingleDst, vmctlFlags)
|
||||
}
|
||||
|
||||
func TestClusterVmctlPrometheusProtocol(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
|
||||
tc := apptest.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
cluster := tc.MustStartDefaultCluster()
|
||||
vmAddr := fmt.Sprintf("http://%s/", cluster.Vminsert.HTTPAddr())
|
||||
vmctlFlags := []string{
|
||||
`prometheus`,
|
||||
`--prom-snapshot=` + testSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
`--vm-account-id=0`,
|
||||
}
|
||||
|
||||
testPrometheusProtocol(tc, cluster, vmctlFlags)
|
||||
}
|
||||
|
||||
func testPrometheusProtocol(tc *apptest.TestCase, sut apptest.PrometheusWriteQuerier, vmctlFlags []string) {
|
||||
t := tc.T()
|
||||
t.Helper()
|
||||
|
||||
cmpOpt := cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType")
|
||||
|
||||
vmsingleDst := tc.MustStartVmsingle("vmsingle", []string{
|
||||
"-storageDataPath=" + tc.Dir() + "/vmsingle",
|
||||
"-retentionPeriod=100y",
|
||||
})
|
||||
|
||||
// test for empty data request
|
||||
got := vmsingleDst.PrometheusAPIV1Query(t, `{__name__=~".*"}`, apptest.QueryOpts{
|
||||
got := sut.PrometheusAPIV1Query(t, `{__name__=~".*"}`, apptest.QueryOpts{
|
||||
Step: "5m",
|
||||
Time: "2025-01-18T12:45:00Z",
|
||||
Time: "2025-06-02T17:14:00Z",
|
||||
})
|
||||
|
||||
want := apptest.NewPrometheusAPIV1QueryResponse(t, `{"data":{"result":[]}}`)
|
||||
@@ -36,19 +72,12 @@ func TestVmctlPrometheusProtocolToVMSingle(t *testing.T) {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
vmAddr := fmt.Sprintf("http://%s/", vmsingleDst.HTTPAddr())
|
||||
testSnapshot := "./testdata/prometheus/snapshots/20250118T124506Z-59d1b952d7eaf547"
|
||||
_ = tc.MustStartVmctl("vmctl", []string{
|
||||
`prometheus`,
|
||||
`--prom-snapshot=` + testSnapshot,
|
||||
`--vm-addr=` + vmAddr,
|
||||
`--disable-progress-bar=true`,
|
||||
})
|
||||
_ = tc.MustStartVmctl("vmctl", vmctlFlags)
|
||||
|
||||
vmsingleDst.ForceFlush(t)
|
||||
sut.ForceFlush(t)
|
||||
|
||||
// open the expected series response file
|
||||
file, err := os.Open("./testdata/prometheus/expected_response.json")
|
||||
file, err := os.Open(expectedResponseFile)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot open expected series response file: %s", err)
|
||||
}
|
||||
@@ -59,20 +88,25 @@ func TestVmctlPrometheusProtocolToVMSingle(t *testing.T) {
|
||||
t.Fatalf("cannot read expected series response file: %s", err)
|
||||
}
|
||||
|
||||
wantResponse := apptest.NewPrometheusAPIV1QueryResponse(t, string(bytes))
|
||||
var wantResponse apptest.PrometheusAPIV1QueryResponse
|
||||
if err := json.Unmarshal(bytes, &wantResponse); err != nil {
|
||||
t.Fatalf("cannot unmarshal expected series response file: %s", err)
|
||||
}
|
||||
wantResponse.Sort()
|
||||
|
||||
tc.Assert(&apptest.AssertOptions{
|
||||
Msg: `unexpected metrics stored on vmsingle via the prometheus protocol`,
|
||||
// For cluster version, we need to wait longer for the metrics to be stored
|
||||
Retries: 300,
|
||||
Msg: `unexpected metrics stored on vmsingle via the prometheus protocol`,
|
||||
Got: func() any {
|
||||
exported := vmsingleDst.PrometheusAPIV1Export(t, `{__name__=~".*"}`, apptest.QueryOpts{
|
||||
Start: "2025-01-18T00:45:00Z",
|
||||
End: "2025-01-18T23:46:00Z",
|
||||
expected := sut.PrometheusAPIV1Export(t, `{__name__="vm_log_messages_total", location=~"VictoriaMetrics/lib/ingestserver/opentsdb/server.go:(48|59)"}`, apptest.QueryOpts{
|
||||
Start: "2025-06-02T00:00:00Z",
|
||||
End: "2025-06-02T23:59:59Z",
|
||||
})
|
||||
exported.Sort()
|
||||
return exported
|
||||
expected.Sort()
|
||||
return expected.Data.Result
|
||||
},
|
||||
Want: &apptest.PrometheusAPIV1QueryResponse{Data: wantResponse.Data},
|
||||
Want: wantResponse.Data.Result,
|
||||
CmpOpts: []cmp.Option{
|
||||
cmpopts.IgnoreFields(apptest.PrometheusAPIV1QueryResponse{}, "Status", "Data.ResultType"),
|
||||
},
|
||||
|
||||
13
apptest/vmbackup.go
Normal file
13
apptest/vmbackup.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package apptest
|
||||
|
||||
// StartVmbackup starts an instance of vmbackup with the given flags and waits
|
||||
// until it exits.
|
||||
func StartVmbackup(instance, storageDataPath, snapshotCreateURL, dst string) error {
|
||||
flags := []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-snapshot.createURL=" + snapshotCreateURL,
|
||||
"-dst=" + dst,
|
||||
}
|
||||
_, _, err := startApp(instance, "../../bin/vmbackup", flags, &appOptions{wait: true})
|
||||
return err
|
||||
}
|
||||
@@ -8,8 +8,9 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
pb "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/golang/snappy"
|
||||
|
||||
pb "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
// Vminsert holds the state of a vminsert app and provides vminsert-specific
|
||||
@@ -87,6 +88,12 @@ func (app *Vminsert) ClusternativeListenAddr() string {
|
||||
return app.clusternativeListenAddr
|
||||
}
|
||||
|
||||
// HTTPAddr returns the address at which the vminsert process is
|
||||
// listening for incoming HTTP requests.
|
||||
func (app *Vminsert) HTTPAddr() string {
|
||||
return app.httpListenAddr
|
||||
}
|
||||
|
||||
// InfluxWrite is a test helper function that inserts a
|
||||
// collection of records in Influx line format by sending a HTTP
|
||||
// POST request to /influx/write vmsingle endpoint.
|
||||
@@ -143,6 +150,28 @@ func (app *Vminsert) PrometheusAPIV1ImportCSV(t *testing.T, records []string, op
|
||||
})
|
||||
}
|
||||
|
||||
// PrometheusAPIV1ImportNative is a test helper function that inserts a collection
|
||||
// of records in Native format for the given tenant by sending an HTTP POST
|
||||
// request to prometheus/api/v1/import/native vminsert endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/cluster-victoriametrics/#url-format
|
||||
func (app *Vminsert) PrometheusAPIV1ImportNative(t *testing.T, data []byte, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/insert/%s/prometheus/api/v1/import/native", app.httpListenAddr, opts.getTenant())
|
||||
uv := opts.asURLValues()
|
||||
uvs := uv.Encode()
|
||||
if len(uvs) > 0 {
|
||||
url += "?" + uvs
|
||||
}
|
||||
app.sendBlocking(t, 1, func() {
|
||||
_, statusCode := app.cli.Post(t, url, "text/plain", data)
|
||||
if statusCode != http.StatusNoContent {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusNoContent)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// OpenTSDBAPIPut is a test helper function that inserts a collection of
|
||||
// records in OpenTSDB format for the given tenant by sending an HTTP POST
|
||||
// request to /opentsdb/api/put vminsert endpoint.
|
||||
|
||||
12
apptest/vmrestore.go
Normal file
12
apptest/vmrestore.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package apptest
|
||||
|
||||
// StartVmrestore starts an instance of vmrestore with the given flags and waits
|
||||
// until it exits.
|
||||
func StartVmrestore(instance, src, storageDataPath string) error {
|
||||
flags := []string{
|
||||
"-src=" + src,
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
}
|
||||
_, _, err := startApp(instance, "../../bin/vmrestore", flags, &appOptions{wait: true})
|
||||
return err
|
||||
}
|
||||
@@ -71,6 +71,22 @@ func (app *Vmselect) PrometheusAPIV1Export(t *testing.T, query string, opts Quer
|
||||
return NewPrometheusAPIV1QueryResponse(t, res)
|
||||
}
|
||||
|
||||
// PrometheusAPIV1ExportNative is a test helper function that performs the export of
|
||||
// raw samples in native binary format by sending an HTTP POST request to
|
||||
// /prometheus/api/v1/export/native vmselect endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1exportnative
|
||||
func (app *Vmselect) PrometheusAPIV1ExportNative(t *testing.T, query string, opts QueryOpts) []byte {
|
||||
t.Helper()
|
||||
|
||||
exportURL := fmt.Sprintf("http://%s/select/%s/prometheus/api/v1/export/native", app.httpListenAddr, opts.getTenant())
|
||||
values := opts.asURLValues()
|
||||
values.Add("match[]", query)
|
||||
values.Add("format", "promapi")
|
||||
res, _ := app.cli.PostForm(t, exportURL, values)
|
||||
return []byte(res)
|
||||
}
|
||||
|
||||
// PrometheusAPIV1Query is a test helper function that performs PromQL/MetricsQL
|
||||
// instant query by sending a HTTP POST request to /prometheus/api/v1/query
|
||||
// vmselect endpoint.
|
||||
|
||||
@@ -35,10 +35,11 @@ type Vmsingle struct {
|
||||
prometheusAPIV1WriteURL string
|
||||
|
||||
// vmselect URLs.
|
||||
prometheusAPIV1ExportURL string
|
||||
prometheusAPIV1QueryURL string
|
||||
prometheusAPIV1QueryRangeURL string
|
||||
prometheusAPIV1SeriesURL string
|
||||
prometheusAPIV1ExportURL string
|
||||
prometheusAPIV1ExportNativeURL string
|
||||
prometheusAPIV1QueryURL string
|
||||
prometheusAPIV1QueryRangeURL string
|
||||
prometheusAPIV1SeriesURL string
|
||||
}
|
||||
|
||||
// StartVmsingle starts an instance of vmsingle with the given flags. It also
|
||||
@@ -81,6 +82,7 @@ func StartVmsingle(instance string, flags []string, cli *Client) (*Vmsingle, err
|
||||
prometheusAPIV1ImportPrometheusURL: fmt.Sprintf("http://%s/prometheus/api/v1/import/prometheus", stderrExtracts[1]),
|
||||
prometheusAPIV1WriteURL: fmt.Sprintf("http://%s/prometheus/api/v1/write", stderrExtracts[1]),
|
||||
prometheusAPIV1ExportURL: fmt.Sprintf("http://%s/prometheus/api/v1/export", stderrExtracts[1]),
|
||||
prometheusAPIV1ExportNativeURL: fmt.Sprintf("http://%s/prometheus/api/v1/export/native", stderrExtracts[1]),
|
||||
prometheusAPIV1QueryURL: fmt.Sprintf("http://%s/prometheus/api/v1/query", stderrExtracts[1]),
|
||||
prometheusAPIV1QueryRangeURL: fmt.Sprintf("http://%s/prometheus/api/v1/query_range", stderrExtracts[1]),
|
||||
prometheusAPIV1SeriesURL: fmt.Sprintf("http://%s/prometheus/api/v1/series", stderrExtracts[1]),
|
||||
@@ -161,11 +163,31 @@ func (app *Vmsingle) PrometheusAPIV1ImportCSV(t *testing.T, records []string, op
|
||||
}
|
||||
}
|
||||
|
||||
// PrometheusAPIV1ImportNative is a test helper function that inserts a collection
|
||||
// of records in native format for the given tenant by sending an HTTP POST
|
||||
// request to /api/v1/import/native vmsingle endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-native-format
|
||||
func (app *Vmsingle) PrometheusAPIV1ImportNative(t *testing.T, data []byte, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
url := fmt.Sprintf("http://%s/api/v1/import/native", app.httpListenAddr)
|
||||
uv := opts.asURLValues()
|
||||
uvs := uv.Encode()
|
||||
if len(uvs) > 0 {
|
||||
url += "?" + uvs
|
||||
}
|
||||
_, statusCode := app.cli.Post(t, url, "text/plain", data)
|
||||
if statusCode != http.StatusNoContent {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusNoContent)
|
||||
}
|
||||
}
|
||||
|
||||
// OpenTSDBAPIPut is a test helper function that inserts a collection of
|
||||
// records in OpenTSDB format for the given tenant by sending an HTTP POST
|
||||
// request to /api/put vmsingle endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb#sending-data-via-http
|
||||
// See https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/#sending-data-via-http
|
||||
func (app *Vmsingle) OpenTSDBAPIPut(t *testing.T, records []string, opts QueryOpts) {
|
||||
t.Helper()
|
||||
|
||||
@@ -235,6 +257,23 @@ func (app *Vmsingle) PrometheusAPIV1Export(t *testing.T, query string, opts Quer
|
||||
return NewPrometheusAPIV1QueryResponse(t, res)
|
||||
}
|
||||
|
||||
// PrometheusAPIV1ExportNative is a test helper function that performs the export of
|
||||
// raw samples in native binary format by sending an HTTP POST request to
|
||||
// /prometheus/api/v1/export/native vmselect endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/url-examples/#apiv1exportnative
|
||||
func (app *Vmsingle) PrometheusAPIV1ExportNative(t *testing.T, query string, opts QueryOpts) []byte {
|
||||
t.Helper()
|
||||
|
||||
t.Helper()
|
||||
values := opts.asURLValues()
|
||||
values.Add("match[]", query)
|
||||
values.Add("format", "promapi")
|
||||
|
||||
res, _ := app.cli.PostForm(t, app.prometheusAPIV1ExportNativeURL, values)
|
||||
return []byte(res)
|
||||
}
|
||||
|
||||
// PrometheusAPIV1Query is a test helper function that performs PromQL/MetricsQL
|
||||
// instant query by sending a HTTP POST request to /prometheus/api/v1/query
|
||||
// vmsingle endpoint.
|
||||
@@ -324,8 +363,7 @@ func (app *Vmsingle) APIV1AdminStatusMetricNamesStatsReset(t *testing.T, opts Qu
|
||||
func (app *Vmsingle) SnapshotCreate(t *testing.T) *SnapshotCreateResponse {
|
||||
t.Helper()
|
||||
|
||||
queryURL := fmt.Sprintf("http://%s/snapshot/create", app.httpListenAddr)
|
||||
data, statusCode := app.cli.Post(t, queryURL, "", nil)
|
||||
data, statusCode := app.cli.Post(t, app.SnapshotCreateURL(), "", nil)
|
||||
if got, want := statusCode, http.StatusOK; got != want {
|
||||
t.Fatalf("unexpected status code: got %d, want %d, resp text=%q", got, want, data)
|
||||
}
|
||||
@@ -338,6 +376,11 @@ func (app *Vmsingle) SnapshotCreate(t *testing.T) *SnapshotCreateResponse {
|
||||
return &res
|
||||
}
|
||||
|
||||
// SnapshotCreateURL returns the URL for creating snapshots.
|
||||
func (app *Vmsingle) SnapshotCreateURL() string {
|
||||
return fmt.Sprintf("http://%s/snapshot/create", app.httpListenAddr)
|
||||
}
|
||||
|
||||
// APIV1AdminTSDBSnapshot creates a database snapshot by sending a query to the
|
||||
// /api/v1/admin/tsdb/snapshot endpoint.
|
||||
//
|
||||
|
||||
@@ -99,8 +99,7 @@ func (app *Vmstorage) ForceMerge(t *testing.T) {
|
||||
func (app *Vmstorage) SnapshotCreate(t *testing.T) *SnapshotCreateResponse {
|
||||
t.Helper()
|
||||
|
||||
queryURL := fmt.Sprintf("http://%s/snapshot/create", app.httpListenAddr)
|
||||
data, statusCode := app.cli.Post(t, queryURL, "", nil)
|
||||
data, statusCode := app.cli.Post(t, app.SnapshotCreateURL(), "", nil)
|
||||
if got, want := statusCode, http.StatusOK; got != want {
|
||||
t.Fatalf("unexpected status code: got %d, want %d, resp text=%q", got, want, data)
|
||||
}
|
||||
@@ -113,6 +112,11 @@ func (app *Vmstorage) SnapshotCreate(t *testing.T) *SnapshotCreateResponse {
|
||||
return &res
|
||||
}
|
||||
|
||||
// SnapshotCreateURL returns the URL for creating snapshots.
|
||||
func (app *Vmstorage) SnapshotCreateURL() string {
|
||||
return fmt.Sprintf("http://%s/snapshot/create", app.httpListenAddr)
|
||||
}
|
||||
|
||||
// SnapshotList lists existing database snapshots by sending a query to the
|
||||
// /snapshot/list endpoint.
|
||||
//
|
||||
|
||||
@@ -3631,7 +3631,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \\n\\nThe lower the better. \\n\\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \\n* Ingestion of completely new, not seen before time series;\\n* [Re-routing](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-availability) of series when one or more vmstorage nodes are unavailable;\\n* Not enough memory to maintain big enough caches for the current workload.\\n\\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \\n\\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \n\nThe lower the better. \n\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \n* Ingestion of completely new, not seen before time series;\n* [Re-routing](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-availability) of series when one or more vmstorage nodes are unavailable;\n* Not enough memory to maintain big enough caches for the current workload.\n\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \n\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -3888,7 +3888,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \\n\\nThe lower the better. \\n\\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \\n* Ingestion of completely new, not seen before time series;\\n* Not enough memory to maintain big enough caches for the current workload.\\n\\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \\n\\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \n\nThe lower the better. \n\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \n* Ingestion of completely new, not seen before time series;\n* Not enough memory to maintain big enough caches for the current workload.\n\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \n\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -3632,7 +3632,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \\n\\nThe lower the better. \\n\\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \\n* Ingestion of completely new, not seen before time series;\\n* [Re-routing](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-availability) of series when one or more vmstorage nodes are unavailable;\\n* Not enough memory to maintain big enough caches for the current workload.\\n\\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \\n\\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \n\nThe lower the better. \n\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \n* Ingestion of completely new, not seen before time series;\n* [Re-routing](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-availability) of series when one or more vmstorage nodes are unavailable;\n* Not enough memory to maintain big enough caches for the current workload.\n\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \n\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -3889,7 +3889,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \\n\\nThe lower the better. \\n\\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \\n* Ingestion of completely new, not seen before time series;\\n* Not enough memory to maintain big enough caches for the current workload.\\n\\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \\n\\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"description": "The percentage of [slow inserts](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-a-slow-insert) compared to the total ingestion rate. \n\nThe lower the better. \n\nIn short, slow insert is a cache miss. There are following reasons for slow inserts to go up: \n* Ingestion of completely new, not seen before time series;\n* Not enough memory to maintain big enough caches for the current workload.\n\nIf percentage remains high (>10%) during extended periods of time, then it is likely more RAM is needed for optimal handling of the current number of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series). \n\nSee [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183) for details.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
DOCKER_REGISTRIES ?= docker.io quay.io
|
||||
DOCKER_NAMESPACE ?= victoriametrics
|
||||
|
||||
ROOT_IMAGE ?= alpine:3.21.3
|
||||
ROOT_IMAGE ?= alpine:3.22.0
|
||||
ROOT_IMAGE_SCRATCH ?= scratch
|
||||
CERTS_IMAGE := alpine:3.21.3
|
||||
CERTS_IMAGE := alpine:3.22.0
|
||||
|
||||
GO_BUILDER_IMAGE := golang:1.24.3-alpine
|
||||
GO_BUILDER_IMAGE := golang:1.24.4-alpine
|
||||
BUILDER_IMAGE := local/builder:2.0.0-$(shell echo $(GO_BUILDER_IMAGE) | tr :/ __)-1
|
||||
BASE_IMAGE := local/base:1.1.4-$(shell echo $(ROOT_IMAGE) | tr :/ __)-$(shell echo $(CERTS_IMAGE) | tr :/ __)
|
||||
DOCKER ?= docker
|
||||
@@ -129,6 +129,11 @@ publish-via-docker:
|
||||
$(APP_NAME)-linux-ppc64le-prod \
|
||||
$(APP_NAME)-linux-386-prod
|
||||
|
||||
publish-via-docker-latest:
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
docker buildx imagetools create --tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):latest $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG); \
|
||||
)
|
||||
|
||||
run-via-docker: package-via-docker
|
||||
$(DOCKER_RUN) -it --rm \
|
||||
--user $(shell id -u):$(shell id -g) \
|
||||
|
||||
@@ -6,7 +6,7 @@ RUN apk add git gcc musl-dev make wget --no-cache && \
|
||||
cd /opt/cross-builder && \
|
||||
for arch in aarch64 x86_64; do \
|
||||
wget \
|
||||
https://musl.cc/${arch}-linux-musl-cross.tgz \
|
||||
https://github.com/VictoriaMetrics/muslcc-mirror/releases/download/v1.0.0/${arch}-linux-musl-cross.tgz \
|
||||
-O /opt/cross-builder/${arch}-musl.tgz \
|
||||
--no-verbose && \
|
||||
tar zxf ${arch}-musl.tgz -C ./ && \
|
||||
|
||||
@@ -36,30 +36,30 @@ services:
|
||||
user: root
|
||||
|
||||
vlinsert:
|
||||
image: victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
command:
|
||||
- "--storageNode=vlstorage-1:9428"
|
||||
- "--storageNode=vlstorage-2:9428"
|
||||
|
||||
vlselect-1:
|
||||
image: victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
command:
|
||||
- "--storageNode=vlstorage-1:9428"
|
||||
- "--storageNode=vlstorage-2:9428"
|
||||
vlselect-2:
|
||||
image: victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
command:
|
||||
- "--storageNode=vlstorage-1:9428"
|
||||
- "--storageNode=vlstorage-2:9428"
|
||||
|
||||
vlstorage-1:
|
||||
image: victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
command:
|
||||
- "--storageDataPath=/vlogs"
|
||||
volumes:
|
||||
- vldata-1:/vlogs
|
||||
vlstorage-2:
|
||||
image: victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
command:
|
||||
- "--storageDataPath=/vlogs"
|
||||
volumes:
|
||||
|
||||
@@ -38,7 +38,7 @@ services:
|
||||
# VictoriaLogs instance, a single process responsible for
|
||||
# storing logs and serving read queries.
|
||||
victorialogs:
|
||||
image: victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
ports:
|
||||
- "9428:9428"
|
||||
command:
|
||||
|
||||
@@ -42,13 +42,13 @@ groups:
|
||||
description: "Job \"{{ $labels.job }}\" on instance {{ $labels.instance }} fails to scrape targets for last 15m"
|
||||
|
||||
- alert: ScrapePoolHasNoTargets
|
||||
expr: sum(vm_promscrape_scrape_pool_targets) without (status) == 0
|
||||
expr: sum(vm_promscrape_scrape_pool_targets) without (status, instance, pod) == 0
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Vmagent has scrape_pool with 0 configured/discovered targets"
|
||||
description: "Vmagent \"{{ $labels.job }}\" on instance {{ $labels.instance }} has scrape_pool \"{{ $labels.scrape_job }}\"
|
||||
description: "Vmagent \"{{ $labels.job }}\" has scrape_pool \"{{ $labels.scrape_job }}\"
|
||||
with 0 discovered targets. It is likely a misconfiguration. Please follow https://docs.victoriametrics.com/victoriametrics/vmagent/#debugging-scrape-targets
|
||||
to troubleshoot the scraping config."
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
services:
|
||||
# meta service will be ignored by compose
|
||||
.victorialogs:
|
||||
image: docker.io/victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: docker.io/victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
command:
|
||||
- -storageDataPath=/vlogs
|
||||
- -loggerFormat=json
|
||||
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.21.0
|
||||
image: victoriametrics/vmanomaly:v1.23.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
- vlogs
|
||||
|
||||
generator:
|
||||
image: golang:1.24.3-alpine
|
||||
image: golang:1.24.4-alpine
|
||||
restart: always
|
||||
working_dir: /go/src/app
|
||||
volumes:
|
||||
|
||||
@@ -2,7 +2,7 @@ version: "3"
|
||||
|
||||
services:
|
||||
generator:
|
||||
image: golang:1.24.3-alpine
|
||||
image: golang:1.24.4-alpine
|
||||
restart: always
|
||||
working_dir: /go/src/app
|
||||
volumes:
|
||||
|
||||
@@ -3,7 +3,7 @@ version: "3"
|
||||
services:
|
||||
# Run `make package-victoria-logs` to build victoria-logs image
|
||||
vlogs:
|
||||
image: docker.io/victoriametrics/victoria-logs:v1.22.2-victorialogs
|
||||
image: docker.io/victoriametrics/victoria-logs:v1.23.3-victorialogs
|
||||
volumes:
|
||||
- vlogs:/vlogs
|
||||
ports:
|
||||
|
||||
@@ -4,27 +4,27 @@ set -ex
|
||||
|
||||
# Unarchived size: 5.1M Apache.log
|
||||
if [ ! -f Apache.tar.gz ]; then
|
||||
curl -o Apache.tar.gz -C - https://zenodo.org/record/3227177/files/Apache.tar.gz?download=1
|
||||
curl -o Apache.tar.gz -L -C - https://zenodo.org/records/3227177/files/Apache.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 13G hadoop-*.log
|
||||
if [ ! -f HDFS_2.tar.gz ]; then
|
||||
curl -o HDFS_2.tar.gz -C - https://zenodo.org/record/3227177/files/HDFS_2.tar.gz?download=1
|
||||
curl -o HDFS_2.tar.gz -L -C - https://zenodo.org/records/3227177/files/HDFS_2.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 2.3M Linux.log
|
||||
if [ ! -f Linux.tar.gz ]; then
|
||||
curl -o Linux.tar.gz -C - https://zenodo.org/record/3227177/files/Linux.tar.gz?download=1
|
||||
curl -o Linux.tar.gz -L -C - https://zenodo.org/records/3227177/files/Linux.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 32G Thunderbird.log
|
||||
if [ ! -f Thunderbird.tar.gz ]; then
|
||||
curl -o Thunderbird.tar.gz -C - https://zenodo.org/record/3227177/files/Thunderbird.tar.gz?download=1
|
||||
curl -o Thunderbird.tar.gz -L -C - https://zenodo.org/records/3227177/files/Thunderbird.tar.gz?download=1
|
||||
fi
|
||||
|
||||
# Unarchived size: 73M SSH.log
|
||||
if [ ! -f SSH.tar.gz ]; then
|
||||
curl -o SSH.tar.gz -C - https://zenodo.org/record/3227177/files/SSH.tar.gz?download=1
|
||||
curl -o SSH.tar.gz -L -C - https://zenodo.org/records/3227177/files/SSH.tar.gz?download=1
|
||||
fi
|
||||
|
||||
mkdir -p logs
|
||||
|
||||
@@ -27,9 +27,9 @@ VictoriaMetrics supports metrics scraping in the same way as Prometheus does. Ch
|
||||
|
||||
Besides scraping, VictoriaMetrics accepts write requests for various ingestion protocols. This One Click app supports the following protocols:
|
||||
|
||||
- [Datadog](https://docs.victoriametrics.com/victoriametrics/integrations/datadog), [Influx (telegraph)](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb#influxdb-compatible-agents-such-as-telegraf), [JSON](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-json-line-format), [CSV](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-csv-data), [Prometheus](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format) on port :8428
|
||||
- [Datadog](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/), [Influx (telegraph)](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/#influxdb-compatible-agents-such-as-telegraf), [JSON](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-json-line-format), [CSV](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-csv-data), [Prometheus](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format) on port :8428
|
||||
- [Graphite (statsd)](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting) on port :2003 tcp/udp
|
||||
- [OpenTSDB](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb) on port :4242
|
||||
- [OpenTSDB](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/) on port :4242
|
||||
- Influx (telegraph) on port :8089 tcp/udp
|
||||
|
||||
See more details and examples in [official documentation](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/).
|
||||
|
||||
@@ -27,9 +27,9 @@ VictoriaMetrics supports metrics scraping in the same way as Prometheus does. Ch
|
||||
|
||||
Besides scraping, VictoriaMetrics accepts write requests for various ingestion protocols. This One Click app supports the following protocols:
|
||||
|
||||
- [Datadog](https://docs.victoriametrics.com/victoriametrics/integrations/datadog), [Influx (telegraph)](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb#influxdb-compatible-agents-such-as-telegraf), [JSON](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-json-line-format), [CSV](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-csv-data), [Prometheus](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format) on port :8428
|
||||
- [Datadog](https://docs.victoriametrics.com/victoriametrics/integrations/datadog/), [Influx (telegraph)](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/#influxdb-compatible-agents-such-as-telegraf), [JSON](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-json-line-format), [CSV](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-csv-data), [Prometheus](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-data-in-prometheus-exposition-format) on port :8428
|
||||
- [Graphite (statsd)](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#ingesting) on port :2003 tcp/udp
|
||||
- [OpenTSDB](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb) on port :4242
|
||||
- [OpenTSDB](https://docs.victoriametrics.com/victoriametrics/integrations/opentsdb/) on port :4242
|
||||
- Influx (telegraph) on port :8089 tcp/udp
|
||||
|
||||
See more details and examples in [official documentation](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/).
|
||||
|
||||
@@ -14,6 +14,35 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.23.2
|
||||
Released: 2025-06-09
|
||||
|
||||
- IMPROVEMENT: Increased convergence speed for [OnlineZScoreModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score), [ZScoreModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#z-score), [MADModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#mad), and [OnlineMADModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-mad) models. Now it works better for tight optimization budgets (n_trials < 10, timeout < 1s)
|
||||
|
||||
- BUGFIX: Now mean and variance of [OnlineZScoreModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) with exponential `decay` < 1 [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#decay) are properly calculated for unbiased predictions.
|
||||
|
||||
## v1.23.1
|
||||
Released: 2025-06-08
|
||||
|
||||
- BUGFIX: In [sharding mode](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#horizontal-scalability) the corner case when shard number (`VMANOMALY_MEMBER_NUM`) is greater than the number of configured shards (`VMANOMALY_MEMBERS_COUNT`) is now properly handled.
|
||||
|
||||
- BUGFIX: In [sharding mode](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#horizontal-scalability), the corner case when the number of produced [sub-configurations](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#sub-configuration) is less than the number of configured shards (`VMANOMALY_MEMBERS_COUNT`) is now properly handled. Until config hot-reload is supported, such "idle" shards will be turned off with exit code 1 and respective critical message logged.
|
||||
|
||||
## v1.23.0
|
||||
Released: 2025-06-05
|
||||
|
||||
- FEATURE: Added `decay` [argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#decay) to [online models](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models). This parameters allows for newer data to be weighted more heavily in online models. By default this is set to 1 which means all data points are weighted the same to maintain backward compatibility with existing configs. The closer this value is to 0 the more important new data is.
|
||||
|
||||
- IMPROVEMENT: **Restored back parallelization** in the read/fit/infer pipeline, previously disabled in [v1.22.0](#v1220-experimental) due to deadlock issues. The new implementation prevents deadlocks, allowing to control the parallelization level via `n_workers` in [settings section](https://docs.victoriametrics.com/anomaly-detection/components/settings/). It's suggested to upgrade from [v1.22.0](#v1220) - [v1.22.1](#v1221) to this version to regain the performance benefits of parallel processing.
|
||||
|
||||
- IMPROVEMENT: Added `--dryRun` [argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments) to `vmanomaly` to enable dry run mode. This mode allows to validate configuration without executing any actual operations and doesn't require a license. It is particularly useful to test the configurations before deploying them in a production environment.
|
||||
|
||||
- IMPROVEMENT: Enhanced task scheduling to reduce locks between anomaly detection models' fit and inference calls, improving their concurrent performance.
|
||||
|
||||
- IMPROVEMENT: `min_dev_from_expected` model [common argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) is now bi-directional, allowing you to set *different* thresholds for peaks and drops.
|
||||
|
||||
- BUGFIX: Now `clip_predictions` [model common arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#clip-predictions) is properly used with [online models](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models), ensuring that the predictions are clipped to the respective query's `data_range` values even if the model saw *less datapoints* than required `min_n_samples_seen_` to produce anomaly scores (e.g., when a new model instance was created during `infer` call for new timeseries not seen at training time).
|
||||
|
||||
## v1.22.1
|
||||
Released: 2025-05-11
|
||||
|
||||
@@ -28,7 +57,7 @@ Released: 2025-04-11
|
||||
|
||||
**(Experimental Patch Release)**
|
||||
|
||||
> Important Notice - this patch disables parallelization to resolve rate but critical deadlock issue that completely halted the fit/infer pipeline (resulting in no anomaly scores, no model refits, and no log output) on multicore systems. Although this change improves resource usage by reducing peak-to-average RAM consumption, it incurs a 2–4x slowdown in fit/infer routines. We recommend upgrading only if your current deployments are experiencing deadlock-related outages. Future releases will reintroduce optimized parallelization.
|
||||
> Important Notice - this patch disables parallelization to resolve rate but critical deadlock issue that completely halted the fit/infer pipeline (resulting in no anomaly scores, no model refits, and no log output) on multicore systems. Although this change improves resource usage by reducing peak-to-average RAM consumption, it incurs a 2–4x slowdown in fit/infer routines. We recommend upgrading only if your current deployments are experiencing deadlock-related outages. Please upgrade to [v1.23.0](#v1230) or newer for restored parallelization.
|
||||
|
||||
- BUGFIX: Resolved an intermittent deadlock in the fit/infer process that previously caused the service to freeze indefinitely, thereby preventing anomaly score production and model refits on multicore systems.
|
||||
|
||||
@@ -189,7 +218,7 @@ Released: 2024-10-22
|
||||
## v1.17.1
|
||||
Released: 2024-10-18
|
||||
|
||||
- BUGFIX: [Prophet models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) no longer fail to train on *constant* data, data consisting of the same value and no variation across time. The bug prevented the `fit` stage from completing successfully, resulting in the model instance not being stored in the model registry, after automated model cleanup was added in [v1.17.0](#1170).
|
||||
- BUGFIX: [Prophet models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) no longer fail to train on *constant* data, data consisting of the same value and no variation across time. The bug prevented the `fit` stage from completing successfully, resulting in the model instance not being stored in the model registry, after automated model cleanup was added in [v1.17.0](#v1170).
|
||||
|
||||
## v1.17.0
|
||||
Released: 2024-10-17
|
||||
|
||||
@@ -224,7 +224,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.21.0
|
||||
image: victoriametrics/vmanomaly:v1.23.2
|
||||
# ...
|
||||
ports:
|
||||
- "8490:8490"
|
||||
@@ -256,9 +256,10 @@ For Helm chart users, refer to the `persistentVolume` [section](https://github.c
|
||||
|
||||
With the introduction of [online models](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) {{% available_from "v1.15.0" anomaly %}} , you can additionally reduce resource consumption (e.g., flatten `fit` stage peaks by querying less data from VictoriaMetrics at once).
|
||||
|
||||
- **Reduced Latency**: Online models update incrementally, which can lead to faster response times for anomaly detection since the model continuously adapts to new data without waiting for a batch `fit`.
|
||||
- **Reduced latency**: Online models update incrementally, which can lead to faster response times for anomaly detection since the model continuously adapts to new data without waiting for a batch `fit`.
|
||||
- **Scalability**: Handling smaller data chunks at a time reduces memory and computational overhead, making it easier to scale the anomaly detection system.
|
||||
- **Improved Resource Utilization**: By spreading the computational load over time and reducing peak demands, online models make more efficient use of system resources, potentially lowering operational costs.
|
||||
- **Optimized resource utilization**: By spreading the computational load over time and reducing peak demands, online models make more efficient use of resources and inducing less data transfer from VictoriaMetrics TSDB, improving overall system performance.
|
||||
- **Faster convergence**: Online models can adapt {{% available_from "v1.23.0" anomaly %}} to changes in data patterns more quickly, which is particularly beneficial in dynamic environments where data characteristics may shift frequently. See `decay` argument descrition [here](https://docs.victoriametrics.com/anomaly-detection/components/models/#decay).
|
||||
|
||||
Here's an example of how we can switch from (offline) [Z-score model](https://docs.victoriametrics.com/anomaly-detection/components/models/#z-score) to [Online Z-score model](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score):
|
||||
|
||||
@@ -292,6 +293,7 @@ models:
|
||||
zscore_example:
|
||||
class: 'zscore_online'
|
||||
min_n_samples_seen: 120 # i.e. minimal relevant seasonality or (initial) fit_window / sampling_frequency
|
||||
decay: 0.999 # decay factor to control how fast the model adapts to new data, the lower, the faster it adapts
|
||||
schedulers: ['periodic']
|
||||
# other model params ...
|
||||
# other config sections ...
|
||||
@@ -430,7 +432,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.21.0 && docker image tag victoriametrics/vmanomaly:v1.21.0 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.23.2 && docker image tag victoriametrics/vmanomaly:v1.23.2 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -34,24 +34,26 @@ The `vmanomaly` service supports several command-line arguments to configure its
|
||||
> `vmanomaly` support {{% available_from "v1.18.5" anomaly %}} running on config **directories**, see the `config` positional arg description in help message below.
|
||||
|
||||
```shellhelp
|
||||
usage: vmanomaly.py [-h] [--license STRING | --licenseFile PATH] [--license.forceOffline] [--loggerLevel {INFO,DEBUG,ERROR,WARNING,FATAL}] [--watch] config [config ...]
|
||||
usage: vmanomaly.py [-h] [--license STRING | --licenseFile PATH] [--license.forceOffline] [--loggerLevel {DEBUG,WARNING,FATAL,ERROR,INFO}] [--watch] [--dryRun] [--outputSpec PATH] config [config ...]
|
||||
|
||||
VictoriaMetrics Anomaly Detection Service
|
||||
|
||||
positional arguments:
|
||||
config YAML config file(s) or directories containing YAML files. Multiple files will recursively merge each other values so multiple configs can be combined. If a directory
|
||||
is provided, all `.yaml` files inside will be merged, without recursion. Default: vmanomaly.yaml is expected in the current directory.
|
||||
config YAML config file(s) or directories containing YAML files. Multiple files will recursively merge each other values so multiple configs can be combined. If a directory is provided,
|
||||
all `.yaml` files inside will be merged, without recursion. Default: vmanomaly.yaml is expected in the current directory.
|
||||
|
||||
options:
|
||||
-h show this help message and exit
|
||||
--license STRING License key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/trial/ to obtain a trial license.
|
||||
--licenseFile PATH Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/trial/ to obtain a trial license.
|
||||
--license.forceOffline
|
||||
Whether to force offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The
|
||||
issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification.
|
||||
--loggerLevel {INFO,DEBUG,ERROR,WARNING,FATAL}
|
||||
Whether to force offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued
|
||||
license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification.
|
||||
--loggerLevel {DEBUG,WARNING,FATAL,ERROR,INFO}
|
||||
Minimum level to log. Possible values: DEBUG, INFO, WARNING, ERROR, FATAL.
|
||||
--watch [DEPRECATED SINCE v1.11.0] Watch config files for changes. This option is no longer supported and will be ignored.
|
||||
--dryRun Validate only: parse + merge all YAML(s) and run schema checks, then exit. Does not require a license to run. Does not expose metrics, or launch vmanomaly service(s).
|
||||
--outputSpec PATH Target location of .yaml output spec.
|
||||
```
|
||||
|
||||
You can specify these options when running `vmanomaly` to fine-tune logging levels or handle licensing configurations, as per your requirements.
|
||||
@@ -116,13 +118,13 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.20.1
|
||||
docker pull victoriametrics/vmanomaly:v1.23.2
|
||||
```
|
||||
|
||||
2. (Optional step) tag the `vmanomaly` Docker image:
|
||||
|
||||
```sh
|
||||
docker image tag victoriametrics/vmanomaly:v1.20.1 vmanomaly
|
||||
docker image tag victoriametrics/vmanomaly:v1.23.2 vmanomaly
|
||||
```
|
||||
|
||||
3. Start the `vmanomaly` Docker container with a *license file*, use the command below.
|
||||
@@ -156,7 +158,7 @@ docker run -it --user 1000:1000 \
|
||||
services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.21.0
|
||||
image: victoriametrics/vmanomaly:v1.23.2
|
||||
volumes:
|
||||
$YOUR_LICENSE_FILE_PATH:/license
|
||||
$YOUR_CONFIG_FILE_PATH:/config.yml
|
||||
@@ -189,8 +191,9 @@ with [these Helm charts](https://github.com/VictoriaMetrics/helm-charts/blob/mas
|
||||
## How to configure vmanomaly
|
||||
To run `vmanomaly` you need to set up configuration file in `yaml` format.
|
||||
|
||||
Here is an example of config file that will run [Facebook Prophet](https://facebook.github.io/prophet/) model, that will be retrained every 2 hours on 14 days of previous data. It will generate [inference metrics](https://docs.victoriametrics.com/anomaly-detection/components/models#vmanomaly-output) (including `anomaly_score`) every 1 minute.
|
||||
> Before deploying, to check the correctness of your configuration validate config file(s) with `--dryRun` [command-line](#command-line-arguments) flag for chosen deployment method (Docker, Kubernetes, etc.). This will parse and merge all YAML files, run schema checks, logs errors and warnings (if found) and then exit without starting the service or requiring a license.
|
||||
|
||||
Here is an example of config file that will run [Facebook Prophet](https://facebook.github.io/prophet/) model, that will be retrained every 2 hours on 14 days of previous data. It will generate [inference metrics](https://docs.victoriametrics.com/anomaly-detection/components/models#vmanomaly-output) (including `anomaly_score`) every 1 minute.
|
||||
|
||||
```yaml
|
||||
schedulers:
|
||||
@@ -206,7 +209,7 @@ models:
|
||||
prophet_model:
|
||||
class: 'prophet'
|
||||
provide_series: ['anomaly_score', 'yhat', 'yhat_lower', 'yhat_upper'] # for debugging
|
||||
tz_aware: True
|
||||
tz_aware: True # set to True if your data is timezone-aware, to deal with DST changes correctly
|
||||
tz_use_cyclical_encoding: True
|
||||
tz_seasonalities: # intra-day + intra-week seasonality
|
||||
- name: 'hod' # intra-day seasonality, hour of the day
|
||||
|
||||
@@ -80,7 +80,7 @@ Additionally, a replication factor `R ≥ 1` ensures [high availability](#high-a
|
||||
|
||||
<p></p>
|
||||
|
||||
<img src="../vmanomaly-sharding-ha-diagram.webp" alt="vmanomaly-sharding-ha-diagram" width="800px"/>
|
||||

|
||||
|
||||
> Please [refer to deployment options section](#deployment-options) for the examples (Docker, Docker Compose, Helm). To avoid duplicate metrics being reported from each vmanomaly service used in sharded mode, make sure that [deduplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) is configured on vmsingle or vmselect and vmstorage for the VictoriaMetrics instance used in the [writer section of the configuration](https://docs.victoriametrics.com/anomaly-detection/components/writer).
|
||||
|
||||
@@ -132,7 +132,7 @@ When `VMANOMALY_REPLICATION_FACTOR` > 1, each [sub-config](#sub-configuration) `
|
||||
|
||||
<p></p>
|
||||
|
||||
<img src="../vmanomaly-sharding-ha-diagram.webp" alt="vmanomaly-sharding-ha-diagram" width="800px"/>
|
||||

|
||||
|
||||
> Please [refer to deployment options section](#deployment-options) for the examples (Docker, Docker Compose, Helm). To avoid duplicate metrics being reported from each vmanomaly service used in sharded mode, make sure that [deduplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) is configured on vmsingle or vmselect and vmstorage for the VictoriaMetrics instance used in the [writer section of the configuration](https://docs.victoriametrics.com/anomaly-detection/components/writer).
|
||||
|
||||
|
||||
@@ -44,13 +44,13 @@ The Grafana Dashboard is helpful for:
|
||||
|
||||
> Use the **top-level dashboard filters** to refine metrics by job, instance, or specific components for more focused monitoring. The time range filter, along with `job` and `instance` filters, is applied across all components. All other filters apply to all dashboard sections except "Instance Overview." Hover over the (i) icon for detailed filter descriptions.
|
||||
|
||||
<img src="../vmanomaly-dashboard-1-filters.webp" alt="vmanomaly-dashboard-1-filters" width="800px"/>
|
||||

|
||||
|
||||
The Grafana Dashboard for `vmanomaly` is organized into various panels that offer insights into different components and their operational metrics. The main sections are as follows:
|
||||
|
||||
### Instance Overview
|
||||
|
||||
<img src="../vmanomaly-dashboard-2-instance-overview.webp" alt="vmanomaly-dashboard-2-instance-overview" width="800px"/>
|
||||

|
||||
|
||||
This panel provides general information about the state at individual `instance` level, including metrics such as uptime, restarts, errors, license expiration, and overall status. It serves as a critical starting point for assessing the health of the anomaly detection service. If any issues are identified with a particular instance — such as a low success rate, a high number of skipped or erroneous runs, or increased resource consumption — you can drill down further by using the dashboard filter `instance={{instance}}` for more detailed analysis.
|
||||
|
||||
@@ -69,7 +69,7 @@ This global panel holds statistics related to models, filtered by the dashboard
|
||||
- Counts of successful, skipped, or erroneous model runs.
|
||||
- Average timings for different model stages.
|
||||
|
||||
<img src="../vmanomaly-dashboard-3-global-panel-models.webp" alt="vmanomaly-dashboard-3-global-panel-models" width="800px"/>
|
||||

|
||||
|
||||
**Healthy scenario**:
|
||||
- **Data Acceptance**: Should be consistently high, ideally close to 100%. This indicates that the system is successfully processing the majority of incoming data without issues (e.g., no NaNs or Inf values).
|
||||
@@ -81,7 +81,7 @@ This global panel holds statistics related to models, filtered by the dashboard
|
||||
#### I/O
|
||||
This global panel holds statistics related to I/O operations and data processing, filtered by the dashboard settings.
|
||||
|
||||
<img src="../vmanomaly-dashboard-3-global-panel-io.webp" alt="vmanomaly-dashboard-3-global-io" width="800px"/>
|
||||

|
||||
|
||||
**Healthy scenario**:
|
||||
- **I/O success, %**: Should be close to 100%.
|
||||
@@ -90,7 +90,7 @@ This global panel holds statistics related to I/O operations and data processing
|
||||
#### Latency
|
||||
This global panel holds latency statistics (reads, writes, response processing by stages), filtered by the dashboard settings.
|
||||
|
||||
<img src="../vmanomaly-dashboard-3-global-panel-latency.webp" alt="vmanomaly-dashboard-3-global-latency" width="800px"/>
|
||||

|
||||
|
||||
**Healthy scenario**:
|
||||
- **Timeseries graphs**: Should appear stable over time, without significant spikes or drops.
|
||||
@@ -100,7 +100,7 @@ This global panel holds latency statistics (reads, writes, response processing b
|
||||
|
||||
This global panel holds resource utilization (CPU, RAM, File Descriptors) on both an overall and per-`instance` level, filtered by the dashboard settings.
|
||||
|
||||
<img src="../vmanomaly-dashboard-3-global-panel-resources.webp" alt="vmanomaly-dashboard-3-global-resources" width="800px"/>
|
||||

|
||||
|
||||
**Healthy scenario**:
|
||||
- **Timeseries graphs**: Should appear stable over time, without significant spikes or drops. An absence of upward trends (e.g., trends in RAM usage may indicate a [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate) in your input data).
|
||||
@@ -109,7 +109,7 @@ This global panel holds resource utilization (CPU, RAM, File Descriptors) on bot
|
||||
|
||||
These panels contain repeated blocks for each unique `model_alias` (a distinct entity defined in the `models` [configuration section](https://docs.victoriametrics.com/anomaly-detection/components/models)), filtered according to the current dashboard settings. They provide information on the number of unique entities (such as queries, schedulers, and instances) that a particular `model_alias` interacts with, as well as the count of active model instances available for inferring new data.
|
||||
|
||||
<img src="../vmanomaly-dashboard-4-model-sections.webp" alt="vmanomaly-dashboard-4-model-sections" width="800px"/>
|
||||

|
||||
|
||||
**Healthy scenario**:
|
||||
- **Erroneous Runs**: There should be zero erroneous runs. Any errors suggest potential issues with the service or uncaught corner cases that need immediate attention.
|
||||
@@ -130,7 +130,7 @@ The alerting rules are provided in a YAML file called [`alerts-vmanomaly.yml`](h
|
||||
|
||||
These alerting rules complements the [dashboard](#grafana-dashboard) to monitor the health of `vmanomaly`. Each alert has annotations to help understand the issue and guide troubleshooting efforts. Below are the key alerts included, grouped into 2 sections:
|
||||
|
||||
<img src="../firing-alerts-groups.webp" alt="firing-alerts-groups" width="800px"/>
|
||||

|
||||
|
||||
`vmanomaly-health` alerting group:
|
||||
- **`TooManyRestarts`**: Triggers if an instance restarts more than twice within 15 minutes, suggesting the process might be crashlooping and needs investigation.
|
||||
@@ -140,7 +140,7 @@ These alerting rules complements the [dashboard](#grafana-dashboard) to monitor
|
||||
- **`TooHighMemoryUsage`**: Alerts when RAM usage exceeds 85% for a continuous 5-minute period and the need to adjust resource allocation or load.
|
||||
- **`NoSelfMonitoringMetrics`**: Alerts when vmanomaly up time metric has not been seen in Victoriametrics for 15 minutes, indicating the service is down or unable to push metrics to Victoriametrics.
|
||||
|
||||
<img src="../firing-alerts-example-too-many-restarts.webp" alt="firing-alerts-example-too-many-restarts" width="800px"/>
|
||||

|
||||
|
||||
`vmanomaly-issues` alerting group:
|
||||
- **`ServiceErrorsDetected`**: Alerts if model run errors are detected, indicating problems with the anomaly detection service or its dependencies.
|
||||
@@ -148,4 +148,4 @@ These alerting rules complements the [dashboard](#grafana-dashboard) to monitor
|
||||
- **`HighReadErrorRate`**: Alerts when the error rate for read operations exceeds 5% in a 5-minute window, suggesting issues with the data source, server constraints, or network.
|
||||
- **`HighWriteErrorRate`**: Alerts when the error rate for write operations exceeds 5% in a 5-minute window, indicating issues with data writing, potential server-side violations, or network problems.
|
||||
|
||||
<img src="../firing-alerts-example-skipped-runs.webp" alt="firing-alerts-example-skipped-runs" width="800px"/>
|
||||

|
||||
|
||||
@@ -5,6 +5,7 @@ This chapter describes different components, that correspond to respective secti
|
||||
- [Scheduler(s) section](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/) - Required
|
||||
- [Writer section](https://docs.victoriametrics.com/anomaly-detection/components/writer/) - Required
|
||||
- [Monitoring section](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/) - Optional
|
||||
- [Settings section](https://docs.victoriametrics.com/anomaly-detection/components/settings/) - Optional
|
||||
|
||||
> Once the service starts, automated config validation is performed{{% available_from "v1.7.2" anomaly %}}. Please see container logs for errors that need to be fixed to create fully valid config, visiting sections above for examples and documentation.
|
||||
|
||||
@@ -21,6 +22,10 @@ Below, you will find an example illustrating how the components of `vmanomaly` i
|
||||
Here's a minimalistic full config example, demonstrating many-to-many configuration (actual for [latest version](https://docs.victoriametrics.com/anomaly-detection/changelog/)):
|
||||
|
||||
```yaml
|
||||
settings:
|
||||
n_workers: 4 # number of workers to run models in parallel
|
||||
anomaly_score_outside_data_range: 5.0 # default anomaly score for anomalies outside expected data range
|
||||
|
||||
# how and when to run the models is defined by schedulers
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/scheduler/
|
||||
schedulers:
|
||||
@@ -51,7 +56,8 @@ models:
|
||||
provide_series: ['anomaly_score', 'yhat', 'yhat_lower', 'yhat_upper']
|
||||
queries: ['cpu_seconds_total']
|
||||
schedulers: ['periodic_1w'] # will be attached to 1-week schedule, fit every 1h and infer every 15m
|
||||
min_dev_from_expected: 0.01 # if |y - yhat| < 0.01, anomaly score will be 0
|
||||
min_dev_from_expected: [0.01, 0.01] # minimum deviation from expected value to be even considered as anomaly
|
||||
anomaly_score_outside_data_range: 1.5 # override default anomaly score outside expected data range
|
||||
detection_direction: 'above_expected'
|
||||
args: # model-specific arguments
|
||||
interval_width: 0.98
|
||||
|
||||
@@ -189,9 +189,11 @@ reader:
|
||||
|
||||
### Minimal deviation from expected
|
||||
|
||||
`min_dev_from_expected`{{% available_from "v1.13.0" anomaly %}} argument is designed to **reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive)** in scenarios where deviations between the actual value (`y`) and the expected value (`yhat`) are **relatively** high. Such deviations can cause models to generate high [anomaly scores](https://docs.victoriametrics.com/anomaly-detection/faq/#what-is-anomaly-score). However, these deviations may not be significant enough in **absolute values** from a business perspective to be considered anomalies. This parameter ensures that anomaly scores for data points where `|y - yhat| < min_dev_from_expected` are explicitly set to 0. By default, if this parameter is not set, it behaves as `min_dev_from_expected=0` to maintain backward compatibility.
|
||||
`min_dev_from_expected`{{% available_from "v1.13.0" anomaly %}} argument is designed to **reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive)** in scenarios where deviations between the actual value (`y`) and the expected value (`yhat`) are **relatively** high. Such deviations can cause models to generate high [anomaly scores](https://docs.victoriametrics.com/anomaly-detection/faq/#what-is-anomaly-score). However, these deviations may not be significant enough in **absolute values** from a business perspective to be considered anomalies. This parameter ensures that anomaly scores for data points where `|y - yhat| < min_dev_from_expected` are explicitly set to 0. By default, if this parameter is not set, it is set to `0` to maintain backward compatibility.
|
||||
|
||||
> `min_dev_from_expected` must be >= 0. The higher the value of `min_dev_from_expected`, the fewer data points will be available for anomaly detection, and vice versa.
|
||||
> {{% available_from "v1.23.0" anomaly %}} The `min_dev_from_expected` argument can be a list of two float values, allowing separate thresholds for upper and lower deviations. This is useful when the acceptable deviation varies in different directions (e.g., `min_dev_from_expected: [0.01, 0.02]` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is `0.02` when `y` is greater than `yhat`). If only one value is provided, it is broadcasted to both directions, meaning that the same threshold is applied for both upper and lower deviations (e.g., `min_dev_from_expected: 0.01` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is also `0.01` when `y` is greater than `yhat`).
|
||||
|
||||
> `min_dev_from_expected` must be >= 0. The higher the value of `min_dev_from_expected`, the more significant the deviation must be to generate an anomaly score > 1. This helps in filtering out small deviations that may not be meaningful in the context of the monitored metric.
|
||||
|
||||
*Example*: Consider a scenario where CPU utilization is low and oscillates around 0.3% (0.003). A sudden spike to 1.3% (0.013) represents a +333% increase in **relative** terms, but only a +1 percentage point (0.01) increase in **absolute** terms, which may be negligible and not warrant an alert. Setting the `min_dev_from_expected` argument to `0.01` (1%) will ensure that all anomaly scores for deviations <= `0.01` are set to 0.
|
||||
|
||||
@@ -220,12 +222,13 @@ models:
|
||||
zscore_with_min_dev:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
min_dev_from_expected: 5.0
|
||||
min_dev_from_expected: [5.0, 5.0]
|
||||
queries: ['need_to_include_min_dev'] # use such models on queries where domain experience confirm usefulness
|
||||
zscore_wo_min_dev:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
# if not set, equals to setting min_dev_from_expected == 0
|
||||
# if not set, equals to setting min_dev_from_expected == 0 (meaning no filtering is applied)
|
||||
# min_dev_from_expected: [0.0, 0.0]
|
||||
queries: ['normal_behavior'] # use the default where it's not needed
|
||||
```
|
||||
|
||||
@@ -360,7 +363,7 @@ The `anomaly_score_outside_data_range` {{% available_from "v1.20.0" anomaly %}}
|
||||
|
||||
**How it works**
|
||||
- If **not set**, the **default value (`1.01`)** is used for backward compatibility.
|
||||
- If defined at the **service level** (`settings`), it applies to all models **unless overridden at the model level**.
|
||||
- If defined at the **service level** (`settings` [section](https://docs.victoriametrics.com/anomaly-detection/components/settings/#anomaly-score-outside-data-range)), it applies to all models **unless overridden at the model level**.
|
||||
- If set **per model**, it takes **priority over the global setting**.
|
||||
|
||||
**Example (override)**
|
||||
@@ -396,6 +399,38 @@ models:
|
||||
anomaly_score_outside_data_range: 3.0
|
||||
```
|
||||
|
||||
### Decay
|
||||
|
||||
> The `decay` argument works only in combination with [online models](#online-models) like [`ZScoreOnlineModel`](#online-z-score) or [`OnlineQuantileModel`](#online-seasonal-quantile).
|
||||
|
||||
The `decay` {{% available_from "v1.23.0" anomaly %}} argument is used to control the (exponential) **decay factor** for online models, which determines how quickly the model adapts to new data. It is a float value between `0.0` and `1.0`, where:
|
||||
- `1.0` means no decay (the model treats all data equally, without giving more weight to recent data). This is the default value for backward compatibility.
|
||||
- Less than `1.0` means that the model will give more weight to recent data, effectively "forgetting" older data over time.
|
||||
|
||||
Roughly speaking, for the recent N datapoints model processes `decay` = `d` means that these datapoints will contribute to the model as [1 - d^X] percent of total importance, for example decay of
|
||||
- `0.99` means that 100 recent datapoints will contribute as [1 - 0.99^100] = 63.23% of total importance
|
||||
- `0.999` means that 1000 recent datapoints will contribute as [1 - 0.999^1000] = 63.23% of total importance
|
||||
|
||||
For example, if the model is updated every 5 minutes (`scheduler.infer_every`), on five 1-minute datapoints and there is a need to keep the last 1 day of data as the most impactful, setting `decay: 0.996` will ensure that the model has the last (86400/60) = 1440 datapoints contributing as [1 - 0.996^1440] = 99.6% of total importance, without the need to re-train the model on all 1440 datapoints every day with `fit_every: 1d` (which would be the limitation for [offline models](#offline-models)).
|
||||
|
||||
Example config:
|
||||
|
||||
```yaml
|
||||
# other components like writer, schedulers, monitoring ...
|
||||
reader:
|
||||
# ...
|
||||
queries:
|
||||
q1: metricsql_expression1
|
||||
# ...
|
||||
|
||||
models:
|
||||
online_zscore:
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3.0
|
||||
decay: 0.996 # decay factor for online model, default is 1.0
|
||||
queries: ['q1']
|
||||
```
|
||||
|
||||
|
||||
## Model types
|
||||
|
||||
@@ -626,7 +661,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -656,7 +691,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -703,7 +738,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -739,7 +774,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -795,7 +830,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -831,7 +866,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -871,7 +906,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -907,7 +942,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -962,7 +997,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -999,7 +1034,7 @@ models:
|
||||
# schedulers: [all scheduler aliases defined in `scheduler` section]
|
||||
# queries: [all query aliases defined in `reader.queries` section]
|
||||
# detection_direction: 'both' # meaning both drops and spikes will be captured
|
||||
# min_dev_from_expected: 0.0 # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# min_dev_from_expected: [0.0, 0.0] # meaning, no minimal threshold is applied to prevent smaller anomalies
|
||||
# scale: [1.0, 1.0] # if needed, prediction intervals' width can be increased (>1) or narrowed (<1)
|
||||
# clip_predictions: False # if data_range for respective `queries` is set in reader, `yhat.*` columns will be clipped
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
@@ -1241,7 +1276,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.21.0
|
||||
docker pull victoriametrics/vmanomaly:v1.23.2
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1255,7 +1290,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.21.0 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.23.2 /config.yaml \
|
||||
--licenseFile=/license
|
||||
```
|
||||
|
||||
|
||||
@@ -65,8 +65,10 @@ There is change{{% available_from "v1.13.0" anomaly %}} of [`queries`](https://d
|
||||
> Having **different** individual `step` args for queries (i.e. `30s` for `q1` and `2m` for `q2`) is not yet supported for [multivariate model](https://docs.victoriametrics.com/anomaly-detection/components/models/#multivariate-models) if you want to run it on several queries simultaneously (i.e. setting [`queries`](https://docs.victoriametrics.com/anomaly-detection/components/models/#queries) arg of a model to [`q1`, `q2`]).
|
||||
|
||||
- `data_range`{{% available_from "v1.15.1" anomaly %}} (list[float | string]): It allows defining **valid** data ranges for input per individual query in `queries`, resulting in:
|
||||
- **High anomaly scores** (>1) when the *data falls outside the expected range*, indicating a data constraint violation.
|
||||
- **Lowest anomaly scores** (=0) when the *model's predictions (`yhat`) fall outside the expected range*, meaning uncertain predictions.
|
||||
- **High anomaly scores** (>1) when the *data falls outside the expected range*, indicating a data range constraint violation (e.g. improperly configured metricsQL query, sensor malfunction, overflows in underlying metrics, etc.). Anomaly scores can be set to a specific value, like `5`, to indicate a strong violation, using the `anomaly_score_outside_data_range` [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#score-outside-data-range) of a respective model this query is used in.
|
||||
- **Lowest anomaly scores** (=0) when the *model's predictions (`yhat`) fall outside the expected range*, meaning uncertain predictions that does not really aligh with the data.
|
||||
|
||||
Works together with `anomaly_score_outside_data_range` [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#score-outside-data-range) of a model to determine the anomaly score for such cases as well as with `clip_predictions` [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#clip-predictions) of a model to clip the predictions to the expected range.
|
||||
|
||||
> If not set explicitly (or if older config style prior to [v1.13.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1130)) is used, then it is set to reader-level `data_range` arg{{% available_from "v1.18.1" anomaly %}}
|
||||
|
||||
|
||||
139
docs/anomaly-detection/components/settings.md
Normal file
139
docs/anomaly-detection/components/settings.md
Normal file
@@ -0,0 +1,139 @@
|
||||
---
|
||||
title: Settings
|
||||
weight: 6
|
||||
menu:
|
||||
docs:
|
||||
parent: "vmanomaly-components"
|
||||
weight: 6
|
||||
identifier: "vmanomaly-settings"
|
||||
tags:
|
||||
- metrics
|
||||
- enterprise
|
||||
aliases:
|
||||
- ./settings.html
|
||||
---
|
||||
|
||||
Through the **Settings** section of a config, you can configure the following parameters of the anomaly detection service:
|
||||
|
||||
## Anomaly Score Outside Data Range
|
||||
|
||||
This argument allows you to override the anomaly score for anomalies that are caused by values outside the expected **data range** of particular [query](https://docs.victoriametrics.com/anomaly-detection/components/models#queries). The reasons for such anomalies can be various, such as improperly constructed metricsQL queries, sensor malfunctions, or other issues that lead to unexpected values in the data and reqire investigation.
|
||||
|
||||
> If not set, the [anomaly score](https://docs.victoriametrics.com/anomaly-detection/faq#what-is-anomaly-score) for such anomalies defaults to `1.01` for backward compatibility, however, it is recommended to set it to a higher value, such as `5.0`, to better reflect the severity of anomalies that fall outside the expected data range to catch them faster and check the query for correctness and underlying data for potential issues.
|
||||
|
||||
Here's an example configuration that sets default anomaly score outside expected data range to `5.0` and overrides it for a specific model to `1.5`:
|
||||
|
||||
```yaml
|
||||
settings:
|
||||
n_workers: 4
|
||||
anomaly_score_outside_data_range: 5.0
|
||||
|
||||
schedulers:
|
||||
periodic:
|
||||
class: periodic
|
||||
fit_every: 5m
|
||||
fit_window: 3h
|
||||
infer_every: 30s
|
||||
# other schedulers
|
||||
|
||||
models:
|
||||
zscore_online_override:
|
||||
class: zscore_online
|
||||
z_threshold: 3.5
|
||||
clip_predictions: True
|
||||
# will be inherited from settings.anomaly_score_outside_data_range
|
||||
# anomaly_score_outside_data_range: 5.0
|
||||
zscore_online_override:
|
||||
class: zscore_online
|
||||
z_threshold: 3.5
|
||||
clip_predictions: True
|
||||
anomaly_score_outside_data_range: 1.5 # will override settings.anomaly_score_outside_data_range
|
||||
# other models
|
||||
|
||||
reader:
|
||||
class: vm
|
||||
datasource_url: 'https://play.victoriametrics.com'
|
||||
tenant_id: "0"
|
||||
queries:
|
||||
error_rate:
|
||||
expr: 'rand()*100 + rand()' # example query that generates values between 1 and 100 and sometimes exceeds 100
|
||||
data_range: [0., 100.] # expected data range for the underlying query and business logic
|
||||
# other queries
|
||||
sampling_period: 30s
|
||||
latency_offset: 10ms
|
||||
query_from_last_seen_timestamp: False
|
||||
verify_tls: False
|
||||
# other reader settings
|
||||
|
||||
writer:
|
||||
class: "vm"
|
||||
datasource_url: http://localhost:8428
|
||||
metric_format:
|
||||
__name__: "$VAR"
|
||||
for: "$QUERY_KEY"
|
||||
# other writer settings
|
||||
|
||||
monitoring:
|
||||
push:
|
||||
url: http://localhost:8428
|
||||
push_frequency: 1m
|
||||
# other monitoring settings
|
||||
```
|
||||
|
||||
## Parallelization
|
||||
|
||||
The `n_workers` argument allows you to explicitly specify the number of workers for internal parallelization of the service. This can help improve performance on multicore systems by allowing the service to process multiple tasks in parallel. For backward compatibility, it's set to `1` by default, meaning that the service will run in a single-threaded mode. It should be an integer greater than or equal to `-1`, where `-1` and `0` means that the service will automatically inherit the number of workers based on the number of available CPU cores.
|
||||
|
||||
Increasing the number can be particularly useful when dealing with a high volume of queries returning many (long) timeseries.
|
||||
Decreasing the number can be useful when running the service on a system with limited resources or when you want to reduce the load on the system.
|
||||
|
||||
Here's an example configuration that uses 4 workers for service's internal parallelization:
|
||||
|
||||
```yaml
|
||||
settings:
|
||||
n_workers: 4
|
||||
|
||||
schedulers:
|
||||
periodic:
|
||||
class: periodic
|
||||
fit_every: 5m
|
||||
fit_window: 3h
|
||||
infer_every: 30s
|
||||
# other schedulers
|
||||
|
||||
models:
|
||||
zscore_online_override:
|
||||
class: zscore_online
|
||||
z_threshold: 3.5
|
||||
clip_predictions: True
|
||||
# other models
|
||||
|
||||
reader:
|
||||
class: vm
|
||||
datasource_url: 'https://play.victoriametrics.com'
|
||||
tenant_id: "0"
|
||||
queries:
|
||||
example_query:
|
||||
expr: 'rand() + 1' # example query that generates random values between 1 and 2
|
||||
data_range: [1., 2.]
|
||||
# other queries
|
||||
sampling_period: 30s
|
||||
latency_offset: 10ms
|
||||
query_from_last_seen_timestamp: False
|
||||
verify_tls: False
|
||||
# other reader settings
|
||||
|
||||
writer:
|
||||
class: "vm"
|
||||
datasource_url: http://localhost:8428
|
||||
metric_format:
|
||||
__name__: "$VAR"
|
||||
for: "$QUERY_KEY"
|
||||
# other writer settings
|
||||
|
||||
monitoring:
|
||||
push:
|
||||
url: http://localhost:8428
|
||||
push_frequency: 1m
|
||||
# other monitoring settings
|
||||
```
|
||||
@@ -40,7 +40,7 @@ The value is designed to:
|
||||
- *fall between 0 and 1* if model consider that datapoint is following usual pattern
|
||||
- *exceed 1* if the datapoint is abnormal
|
||||
|
||||
Then, users can enable alerting rules based on the **anomaly score** with [vmalert](#what-is-vmalert).
|
||||
Then, users can enable alerting rules based on the **anomaly score** with [vmalert](#id-2-what-is-vmalert).
|
||||
|
||||
## 2. What is vmalert?
|
||||
|
||||
@@ -387,7 +387,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.21.0
|
||||
image: victoriametrics/vmanomaly:v1.23.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -390,7 +390,7 @@ config:
|
||||
target_label: kubernetes_pod_name
|
||||
```
|
||||
|
||||
* By updating `remoteWrite` we configuring [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) to write scraped metrics into the `vminsert` service.
|
||||
* By updating `remoteWrite` we're configuring [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) to write scraped metrics into the `vminsert` service.
|
||||
* The second part of this yaml file is needed to add the `metric_relabel_configs` section that helps us to show Kubernetes metrics on the Grafana dashboard.
|
||||
|
||||
|
||||
@@ -473,7 +473,7 @@ By running this command we:
|
||||
* Provision a VictoriaMetrics data source with the url from the output above which we remembered.
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/11176) for [VictoriaMetrics Cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/).
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/12683) for [VictoriaMetrics Agent](https://docs.victoriametrics.com/victoriametrics/vmagent/).
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/14205) dashboard to see Kubernetes cluster metrics.
|
||||
* Add [this dashboard](https://grafana.com/grafana/dashboards/14205) to see Kubernetes cluster metrics.
|
||||
|
||||
|
||||
Please see the output log in your terminal. Copy, paste and run these commands.
|
||||
@@ -505,7 +505,7 @@ The VictoriaMetrics dashboard is also available to use:
|
||||
|
||||
vmagent has its own dashboard:
|
||||
|
||||

|
||||

|
||||
|
||||
## 6. Final thoughts
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ by VictoriaMetrics, so lookups by names or labels have the same query speed.
|
||||
|
||||
## Write data
|
||||
|
||||
VictoriaMetrics supports [InfluxDB line protocol](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb)
|
||||
VictoriaMetrics supports [InfluxDB line protocol](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/)
|
||||
for data ingestion. For example, to write a measurement to VictoriaMetrics we need to send an HTTP POST request with
|
||||
payload in a line protocol format:
|
||||
```sh
|
||||
@@ -95,7 +95,7 @@ The expected response is the following:
|
||||
}
|
||||
```
|
||||
|
||||
_VictoriaMetrics performs additional [data mapping](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb#data-transformations)
|
||||
_VictoriaMetrics performs additional [data mapping](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/#data-transformations)
|
||||
to the data ingested via InfluxDB line protocol._
|
||||
|
||||
VictoriaMetrics is also compatible with [Telegraf](https://github.com/influxdata/telegraf).
|
||||
@@ -117,7 +117,7 @@ querying and visualizing metrics:
|
||||
|
||||

|
||||
|
||||
VictoriaMetrics can be configured as [Grafana datasource](https://docs.victoriametrics.com/victoriametrics/integrations/grafana).
|
||||
VictoriaMetrics can be configured as [Grafana datasource](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
|
||||
See more about [how to query data in VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#query-data).
|
||||
|
||||
### Basic concepts
|
||||
@@ -154,7 +154,7 @@ WHERE ("instance" = 'localhost')
|
||||
GROUP BY time (1m)
|
||||
```
|
||||
|
||||
Now, let's [import](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb#influxdb-compatible-agents-such-as-telegraf)
|
||||
Now, let's [import](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/#influxdb-compatible-agents-such-as-telegraf)
|
||||
the same data sample in VictoriaMetrics and plot it in Grafana. To understand how the InfluxQL query might be translated
|
||||
to [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) let's break it into components first:
|
||||
|
||||
|
||||
@@ -100,8 +100,8 @@ It is hard to predict the amount of compute resources (CPU, Mem) or cluster size
|
||||
Active Time Series. The much better approach is to run tests for your type of load (ingestion and reads) and extrapolate
|
||||
from there.
|
||||
|
||||
For example, if you already run [Prometheus](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus)
|
||||
or [Telegraf](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb#influxdb-compatible-agents-such-as-telegraf)
|
||||
For example, if you already run [Prometheus](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/)
|
||||
or [Telegraf](https://docs.victoriametrics.com/victoriametrics/integrations/influxdb/#influxdb-compatible-agents-such-as-telegraf)
|
||||
for metrics collection then just configure them (or a part of them) to replicate data to VictoriaMetrics. In this way,
|
||||
you'd have the most precise simulation of your production environment.
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user