mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-08 19:33:35 +03:00
Compare commits
13 Commits
v1.145.0-c
...
cluster
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1468b31fc9 | ||
|
|
702172ce66 | ||
|
|
c7aa1c3a8c | ||
|
|
ec3e329fb0 | ||
|
|
cbf8beee00 | ||
|
|
0d7cad418c | ||
|
|
b628b573af | ||
|
|
f52b752461 | ||
|
|
f56dfef272 | ||
|
|
0bceb58c76 | ||
|
|
4614afb4dc | ||
|
|
d5c142d7db | ||
|
|
7dd8892af1 |
@@ -26,7 +26,7 @@ var (
|
||||
// NewVMinsertServer creates and start vminsert server at the given addr
|
||||
func NewVMinsertServer(addr string, tc *tls.Config) (*vminsertapi.VMInsertServer, error) {
|
||||
api := &vminsertAPI{}
|
||||
return vminsertapi.NewVMInsertServer(addr, *vminsertConnsShutdownDuration, "clusternative", api, tc)
|
||||
return vminsertapi.NewServer(addr, *vminsertConnsShutdownDuration, "clusternative", api, tc)
|
||||
}
|
||||
|
||||
type vminsertAPI struct {
|
||||
|
||||
@@ -36,9 +36,6 @@ var (
|
||||
func NewVMSelectServer(addr string) (*vmselectapi.Server, error) {
|
||||
api := &vmstorageAPI{}
|
||||
limits := vmselectapi.Limits{
|
||||
MaxLabelNames: *maxTagKeys,
|
||||
MaxLabelValues: *maxTagValues,
|
||||
MaxTagValueSuffixes: *maxTagValueSuffixesPerSearch,
|
||||
MaxConcurrentRequests: *maxConcurrentRequests,
|
||||
MaxConcurrentRequestsFlagName: "clusternative.maxConcurrentRequests",
|
||||
MaxQueueDuration: *maxQueueDuration,
|
||||
@@ -69,6 +66,9 @@ func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.S
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
|
||||
if maxLabelValues <= 0 || maxLabelValues > *maxTagValues {
|
||||
maxLabelValues = *maxTagValues
|
||||
}
|
||||
dl := searchutil.DeadlineFromTimestamp(deadline)
|
||||
labelValues, _, err := netstorage.LabelValues(qt, true, labelName, sq, maxLabelValues, dl)
|
||||
return labelValues, wrapClusterNativeError(err)
|
||||
@@ -76,12 +76,18 @@ func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQ
|
||||
|
||||
func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
|
||||
maxSuffixes int, deadline uint64) ([]string, error) {
|
||||
if maxSuffixes <= 0 || maxSuffixes > *maxTagValueSuffixesPerSearch {
|
||||
maxSuffixes = *maxTagValueSuffixesPerSearch
|
||||
}
|
||||
dl := searchutil.DeadlineFromTimestamp(deadline)
|
||||
suffixes, _, err := netstorage.TagValueSuffixes(qt, accountID, projectID, true, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, dl)
|
||||
return suffixes, wrapClusterNativeError(err)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
|
||||
if maxLabelNames <= 0 || maxLabelNames > *maxTagKeys {
|
||||
maxLabelNames = *maxTagKeys
|
||||
}
|
||||
dl := searchutil.DeadlineFromTimestamp(deadline)
|
||||
labelNames, _, err := netstorage.LabelNames(qt, true, sq, maxLabelNames, dl)
|
||||
return labelNames, wrapClusterNativeError(err)
|
||||
|
||||
@@ -9,12 +9,10 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/servers"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
@@ -29,7 +27,8 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vminsertapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -42,12 +41,23 @@ var (
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the given -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
|
||||
vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
|
||||
vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
|
||||
vminsertConnsShutdownDuration = flag.Duration("storage.vminsertConnsShutdownDuration", 10*time.Second, "The time needed for gradual closing of vminsert connections during "+
|
||||
"graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. "+
|
||||
"Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. "+
|
||||
"Configured value must always be lower than the graceful shutdown period configured by the orchestration platform (terminationGracePeriodSeconds for Kubernetes). "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#improving-re-routing-performance-during-restart")
|
||||
vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
|
||||
vmselectMaxConcurrentRequests = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
|
||||
"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
|
||||
"may require high amounts of memory. See also -search.maxQueueDuration")
|
||||
vmselectMaxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
|
||||
"when -search.maxConcurrentRequests limit is reached")
|
||||
vmselectDisableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
|
||||
"This reduces CPU usage at the cost of higher network bandwidth usage")
|
||||
snapshotAuthKey = flagutil.NewPassword("snapshotAuthKey", "authKey, which must be passed in query string to /snapshot* pages. It overrides -httpAuth.*")
|
||||
forceMergeAuthKey = flagutil.NewPassword("forceMergeAuthKey", "authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.*")
|
||||
forceFlushAuthKey = flagutil.NewPassword("forceFlushAuthKey", "authKey, which must be passed in query string to /internal/force_flush pages. It overrides -httpAuth.*")
|
||||
snapshotsMaxAge = flagutil.NewRetentionDuration("snapshotsMaxAge", "3d", "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted")
|
||||
_ = flag.Duration("snapshotCreateTimeout", 0, "Deprecated: this flag does nothing")
|
||||
|
||||
_ = flag.Duration("finalMergeDelay", 0, "Deprecated: this flag does nothing")
|
||||
@@ -119,7 +129,7 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// vmstoage is optimized for reduced memory allocations,
|
||||
// vmstorage is optimized for reduced memory allocations,
|
||||
// so it can run with the reduced GOGC in order to reduce the used memory,
|
||||
// while keeping CPU usage spent in GC at low levels.
|
||||
//
|
||||
@@ -178,7 +188,7 @@ func main() {
|
||||
LogNewSeries: *logNewSeries,
|
||||
}
|
||||
strg := storage.MustOpenStorage(*storageDataPath, opts)
|
||||
initStaleSnapshotsRemover(strg)
|
||||
vmStorage := newVMStorage(strg, *vmselectMaxConcurrentRequests)
|
||||
|
||||
var m storage.Metrics
|
||||
strg.UpdateMetrics(&m)
|
||||
@@ -192,19 +202,23 @@ func main() {
|
||||
|
||||
// register storage metrics
|
||||
storageMetrics := metrics.NewSet()
|
||||
storageMetrics.RegisterMetricsWriter(func(w io.Writer) {
|
||||
writeStorageMetrics(w, strg)
|
||||
})
|
||||
storageMetrics.RegisterMetricsWriter(vmStorage.writeStorageMetrics)
|
||||
metrics.RegisterSet(storageMetrics)
|
||||
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
|
||||
servers.GetMaxUniqueTimeSeries() // for init and logging only.
|
||||
vminsertSrv, err := servers.NewVMInsertServer(*vminsertAddr, strg)
|
||||
vminsertSrv, err := vminsertapi.NewServer(*vminsertAddr, *vminsertConnsShutdownDuration, "vminsert", vmStorage, nil)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create a server with -vminsertAddr=%s: %s", *vminsertAddr, err)
|
||||
|
||||
}
|
||||
vmselectSrv, err := servers.NewVMSelectServer(*vmselectAddr, strg)
|
||||
limits := vmselectapi.Limits{
|
||||
MaxConcurrentRequests: *vmselectMaxConcurrentRequests,
|
||||
MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
|
||||
MaxQueueDuration: *vmselectMaxQueueDuration,
|
||||
MaxQueueDurationFlagName: "search.maxQueueDuration",
|
||||
}
|
||||
vmselectSrv, err := vmselectapi.NewServer(*vmselectAddr, vmStorage, limits, *vmselectDisableRPCCompression)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create a server with -vmselectAddr=%s: %s", *vmselectAddr, err)
|
||||
}
|
||||
@@ -213,8 +227,7 @@ func main() {
|
||||
if len(listenAddrs) == 0 {
|
||||
listenAddrs = []string{":8482"}
|
||||
}
|
||||
requestHandler := newRequestHandler(strg)
|
||||
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{UseProxyProtocol: useProxyProtocol})
|
||||
go httpserver.Serve(listenAddrs, vmStorage.requestHandler, httpserver.ServeOptions{UseProxyProtocol: useProxyProtocol})
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
@@ -234,7 +247,6 @@ func main() {
|
||||
metrics.UnregisterSet(storageMetrics, true)
|
||||
storageMetrics = nil
|
||||
|
||||
stopStaleSnapshotsRemover()
|
||||
vmselectSrv.MustStop()
|
||||
vminsertSrv.MustStop()
|
||||
protoparserutil.StopUnmarshalWorkers()
|
||||
@@ -242,31 +254,29 @@ func main() {
|
||||
|
||||
logger.Infof("gracefully closing the storage at %s", *storageDataPath)
|
||||
startTime = time.Now()
|
||||
strg.MustClose()
|
||||
vmStorage.Stop()
|
||||
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
fs.MustStopDirRemover()
|
||||
logger.Infof("the vmstorage has been stopped")
|
||||
}
|
||||
|
||||
func newRequestHandler(strg *storage.Storage) httpserver.RequestHandler {
|
||||
return func(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, `vmstorage - a component of VictoriaMetrics cluster<br/>
|
||||
// requestHandler is a storage request handler.
|
||||
// TODO(@rtm0): Move to a separate file, request_handler.go
|
||||
func (vms *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := r.URL.Path
|
||||
|
||||
if path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, `vmstorage - a component of VictoriaMetrics cluster<br/>
|
||||
<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/">docs</a><br>
|
||||
`)
|
||||
return true
|
||||
}
|
||||
return requestHandler(w, r, strg)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storage) bool {
|
||||
path := r.URL.Path
|
||||
if path == "/internal/force_merge" {
|
||||
if !httpserver.CheckAuthFlag(w, r, forceMergeAuthKey) {
|
||||
return true
|
||||
@@ -278,7 +288,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
defer activeForceMerges.Dec()
|
||||
logger.Infof("forced merge for partition_prefix=%q has been started", partitionNamePrefix)
|
||||
startTime := time.Now()
|
||||
if err := strg.ForceMergePartitions(partitionNamePrefix); err != nil {
|
||||
if err := vms.s.ForceMergePartitions(partitionNamePrefix); err != nil {
|
||||
logger.Errorf("error in forced merge for partition_prefix=%q: %s", partitionNamePrefix, err)
|
||||
return
|
||||
}
|
||||
@@ -291,7 +301,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
return true
|
||||
}
|
||||
logger.Infof("flushing storage to make pending data available for reading")
|
||||
strg.DebugFlush()
|
||||
vms.s.DebugFlush()
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -311,7 +321,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
}
|
||||
logger.Infof("enabling logging of new series for the next %s. This may increase resource usage during this period.", time.Duration(dealine)*time.Second)
|
||||
endTime := fasttime.UnixTimestamp() + uint64(dealine)
|
||||
strg.SetLogNewSeriesUntil(endTime)
|
||||
vms.s.SetLogNewSeriesUntil(endTime)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"logEndTime":%q}}`, time.Unix(int64(endTime), 0))
|
||||
return true
|
||||
}
|
||||
@@ -327,13 +337,13 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
case "/create":
|
||||
snapshotsCreateTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshotName := strg.MustCreateSnapshot()
|
||||
snapshotName := vms.s.MustCreateSnapshot()
|
||||
|
||||
// Verify whether the client already closed the connection.
|
||||
// In this case it is better to drop the created snapshot, since the client isn't interested in it.
|
||||
if err := r.Context().Err(); err != nil {
|
||||
logger.Infof("deleting already created snapshot at %s because the client canceled the request", snapshotName)
|
||||
if err := deleteSnapshot(strg, snapshotName); err != nil {
|
||||
if err := vms.deleteSnapshot(snapshotName); err != nil {
|
||||
logger.Infof("cannot delete just created snapshot: %s", err)
|
||||
return true
|
||||
}
|
||||
@@ -345,7 +355,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
case "/list":
|
||||
snapshotsListTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshots := strg.MustListSnapshots()
|
||||
snapshots := vms.s.MustListSnapshots()
|
||||
fmt.Fprintf(w, `{"status":"ok","snapshots":[`)
|
||||
if len(snapshots) > 0 {
|
||||
for _, snapshot := range snapshots[:len(snapshots)-1] {
|
||||
@@ -359,7 +369,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
snapshotsDeleteTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshotName := r.FormValue("snapshot")
|
||||
if err := deleteSnapshot(strg, snapshotName); err != nil {
|
||||
if err := vms.deleteSnapshot(snapshotName); err != nil {
|
||||
jsonResponseError(w, err)
|
||||
snapshotsDeleteErrorsTotal.Inc()
|
||||
return true
|
||||
@@ -369,9 +379,9 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
case "/delete_all":
|
||||
snapshotsDeleteAllTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshots := strg.MustListSnapshots()
|
||||
snapshots := vms.s.MustListSnapshots()
|
||||
for _, snapshotName := range snapshots {
|
||||
if err := strg.DeleteSnapshot(snapshotName); err != nil {
|
||||
if err := vms.s.DeleteSnapshot(snapshotName); err != nil {
|
||||
err = fmt.Errorf("cannot delete snapshot %q: %w", snapshotName, err)
|
||||
jsonResponseError(w, err)
|
||||
snapshotsDeleteAllErrorsTotal.Inc()
|
||||
@@ -385,50 +395,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storag
|
||||
}
|
||||
}
|
||||
|
||||
func deleteSnapshot(strg *storage.Storage, snapshotName string) error {
|
||||
snapshots := strg.MustListSnapshots()
|
||||
for _, snName := range snapshots {
|
||||
if snName == snapshotName {
|
||||
if err := strg.DeleteSnapshot(snName); err != nil {
|
||||
return fmt.Errorf("cannot delete snapshot %q: %w", snName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("cannot find snapshot %q", snapshotName)
|
||||
}
|
||||
|
||||
func initStaleSnapshotsRemover(strg *storage.Storage) {
|
||||
staleSnapshotsRemoverCh = make(chan struct{})
|
||||
if snapshotsMaxAge.Duration() <= 0 {
|
||||
return
|
||||
}
|
||||
snapshotsMaxAgeDur := snapshotsMaxAge.Duration()
|
||||
staleSnapshotsRemoverWG.Go(func() {
|
||||
d := timeutil.AddJitterToDuration(time.Second * 11)
|
||||
t := time.NewTicker(d)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-staleSnapshotsRemoverCh:
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
strg.MustDeleteStaleSnapshots(snapshotsMaxAgeDur)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func stopStaleSnapshotsRemover() {
|
||||
close(staleSnapshotsRemoverCh)
|
||||
staleSnapshotsRemoverWG.Wait()
|
||||
}
|
||||
|
||||
var (
|
||||
staleSnapshotsRemoverCh chan struct{}
|
||||
staleSnapshotsRemoverWG sync.WaitGroup
|
||||
)
|
||||
|
||||
var (
|
||||
activeForceMerges = metrics.NewCounter("vm_active_force_merges")
|
||||
|
||||
@@ -443,7 +409,9 @@ var (
|
||||
snapshotsDeleteAllErrorsTotal = metrics.NewCounter(`vm_http_request_errors_total{path="/snapshot/delete_all"}`)
|
||||
)
|
||||
|
||||
func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
// TODO(@rtm0): Move to metrics.go.
|
||||
func (vms *VMStorage) writeStorageMetrics(w io.Writer) {
|
||||
strg := vms.s
|
||||
var m storage.Metrics
|
||||
strg.UpdateMetrics(&m)
|
||||
tm := &m.TableMetrics
|
||||
@@ -667,7 +635,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled`, tm.ScheduledDownsamplingPartitions)
|
||||
metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled_size_bytes`, tm.ScheduledDownsamplingPartitionsSize)
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(servers.GetMaxUniqueTimeSeries()))
|
||||
metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(vms.maxUniqueTimeSeriesCalculated))
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_metrics_metadata_storage_items`, m.MetadataStorageItemsCurrent)
|
||||
metrics.WriteCounterUint64(w, `vm_metrics_metadata_storage_size_bytes`, m.MetadataStorageCurrentSizeBytes)
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
package servers
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vminsertapi"
|
||||
)
|
||||
|
||||
var (
|
||||
precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression "+
|
||||
"at the cost of precision loss")
|
||||
vminsertConnsShutdownDuration = flag.Duration("storage.vminsertConnsShutdownDuration", 10*time.Second, "The time needed for gradual closing of vminsert connections during "+
|
||||
"graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. "+
|
||||
"Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. "+
|
||||
"Configured value must always be lower than the graceful shutdown period configured by the orchestration platform (terminationGracePeriodSeconds for Kubernetes). "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#improving-re-routing-performance-during-restart")
|
||||
)
|
||||
|
||||
// NewVMInsertServer starts vminsertapi.VMInsertServer at the given addr serving the given storage.
|
||||
func NewVMInsertServer(addr string, storage *storage.Storage) (*vminsertapi.VMInsertServer, error) {
|
||||
if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
|
||||
return nil, fmt.Errorf("invalid -precisionBits: %w", err)
|
||||
}
|
||||
api := &vminsertAPI{
|
||||
storage: storage,
|
||||
}
|
||||
|
||||
return vminsertapi.NewVMInsertServer(addr, *vminsertConnsShutdownDuration, "vminsert", api, nil)
|
||||
}
|
||||
|
||||
type vminsertAPI struct {
|
||||
storage *storage.Storage
|
||||
}
|
||||
|
||||
// WriteRows implements lib/vminsertapi.API interface
|
||||
func (v *vminsertAPI) WriteRows(rows []storage.MetricRow) error {
|
||||
v.storage.AddRows(rows, uint8(*precisionBits))
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteMetadata implements lib/vminsertapi.API interface
|
||||
func (v *vminsertAPI) WriteMetadata(rows []metricsmetadata.Row) error {
|
||||
v.storage.AddMetadataRows(rows)
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsReadOnly implements lib/vminsertapi.API interface
|
||||
func (v *vminsertAPI) IsReadOnly() bool {
|
||||
return v.storage.IsReadOnly()
|
||||
}
|
||||
@@ -1,312 +0,0 @@
|
||||
package servers
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
|
||||
)
|
||||
|
||||
var (
|
||||
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. "+
|
||||
"This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect")
|
||||
maxTagKeys = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
|
||||
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
|
||||
maxTagValues = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search. "+
|
||||
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
|
||||
maxTagValueSuffixesPerSearch = flag.Int("search.maxTagValueSuffixesPerSearch", 100e3, "The maximum number of tag value suffixes returned from /metrics/find")
|
||||
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
|
||||
"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
|
||||
"may require high amounts of memory. See also -search.maxQueueDuration")
|
||||
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
|
||||
"when -search.maxConcurrentRequests limit is reached")
|
||||
|
||||
disableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
|
||||
"This reduces CPU usage at the cost of higher network bandwidth usage")
|
||||
)
|
||||
|
||||
var (
|
||||
maxUniqueTimeseriesValue int
|
||||
maxUniqueTimeseriesValueOnce sync.Once
|
||||
)
|
||||
|
||||
// NewVMSelectServer starts new server at the given addr, which serves vmselect requests from the given s.
|
||||
func NewVMSelectServer(addr string, s *storage.Storage) (*vmselectapi.Server, error) {
|
||||
api := &vmstorageAPI{
|
||||
s: s,
|
||||
}
|
||||
limits := vmselectapi.Limits{
|
||||
MaxLabelNames: *maxTagKeys,
|
||||
MaxLabelValues: *maxTagValues,
|
||||
MaxTagValueSuffixes: *maxTagValueSuffixesPerSearch,
|
||||
MaxConcurrentRequests: *maxConcurrentRequests,
|
||||
MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
|
||||
MaxQueueDuration: *maxQueueDuration,
|
||||
MaxQueueDurationFlagName: "search.maxQueueDuration",
|
||||
}
|
||||
return vmselectapi.NewServer(addr, api, limits, *disableRPCCompression)
|
||||
}
|
||||
|
||||
// vmstorageAPI impelements vmselectapi.API
|
||||
type vmstorageAPI struct {
|
||||
s *storage.Storage
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := getMaxMetrics(sq.MaxMetrics)
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return nil, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
bi := getBlockIterator()
|
||||
bi.sr.Init(qt, api.s, tfss, tr, maxMetrics, deadline)
|
||||
if err := bi.sr.Error(); err != nil {
|
||||
bi.MustClose()
|
||||
return nil, err
|
||||
}
|
||||
return bi, nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return nil, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return api.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return api.s.SearchLabelValues(qt, sq.AccountID, sq.ProjectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
|
||||
maxSuffixes int, deadline uint64) ([]string, error) {
|
||||
suffixes, err := api.s.SearchTagValueSuffixes(qt, accountID, projectID, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(suffixes) >= maxSuffixes {
|
||||
return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
|
||||
"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
|
||||
}
|
||||
return suffixes, nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return api.s.SearchLabelNames(qt, sq.AccountID, sq.ProjectID, tfss, tr, maxLabelNames, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
|
||||
return api.s.GetSeriesCount(accountID, projectID, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
|
||||
return api.s.SearchTenants(qt, tr, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
|
||||
return api.s.GetTSDBStatus(qt, sq.AccountID, sq.ProjectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return 0, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return api.s.DeleteSeries(qt, tfss, maxMetrics)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
|
||||
api.s.RegisterMetricNames(qt, mrs)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
|
||||
return api.s.GetMetricNamesStats(qt, tt, limit, le, matchPattern), nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
|
||||
api.s.ResetMetricNamesStats(qt)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
|
||||
tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
|
||||
accountID := sq.AccountID
|
||||
projectID := sq.ProjectID
|
||||
for _, tagFilters := range sq.TagFilterss {
|
||||
tfs := storage.NewTagFilters(accountID, projectID)
|
||||
for i := range tagFilters {
|
||||
tf := &tagFilters[i]
|
||||
if string(tf.Key) == "__graphite__" {
|
||||
query := tf.Value
|
||||
qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
|
||||
paths, err := api.s.SearchGraphitePaths(qtChild, accountID, projectID, tr, query, maxMetrics, deadline)
|
||||
qtChild.Donef("found %d series", len(paths))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
|
||||
}
|
||||
if len(paths) >= maxMetrics {
|
||||
return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
|
||||
"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
|
||||
"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
|
||||
}
|
||||
tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
|
||||
continue
|
||||
}
|
||||
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
|
||||
}
|
||||
}
|
||||
tfss = append(tfss, tfs)
|
||||
}
|
||||
return tfss, nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
|
||||
return api.s.GetMetadataRows(qt, tt, limit, metricName, deadline)
|
||||
}
|
||||
|
||||
// blockIterator implements vmselectapi.BlockIterator
|
||||
type blockIterator struct {
|
||||
sr storage.Search
|
||||
mb storage.MetricBlock
|
||||
}
|
||||
|
||||
var blockIteratorsPool sync.Pool
|
||||
|
||||
func (bi *blockIterator) MustClose() {
|
||||
bi.sr.MustClose()
|
||||
bi.mb.MetricName = nil
|
||||
bi.mb.Block.Reset()
|
||||
blockIteratorsPool.Put(bi)
|
||||
}
|
||||
|
||||
func getBlockIterator() *blockIterator {
|
||||
v := blockIteratorsPool.Get()
|
||||
if v == nil {
|
||||
v = &blockIterator{}
|
||||
}
|
||||
return v.(*blockIterator)
|
||||
}
|
||||
|
||||
func (bi *blockIterator) NextBlock(dst []byte) ([]byte, bool) {
|
||||
if !bi.sr.NextMetricBlock() {
|
||||
return dst, false
|
||||
}
|
||||
mb := bi.mb
|
||||
mb.MetricName = bi.sr.MetricBlockRef.MetricName
|
||||
bi.sr.MetricBlockRef.BlockRef.MustReadBlock(&mb.Block)
|
||||
dst = mb.Marshal(dst[:0])
|
||||
return dst, true
|
||||
}
|
||||
|
||||
func (bi *blockIterator) Error() error {
|
||||
return bi.sr.Error()
|
||||
}
|
||||
|
||||
func getMaxMetrics(searchQueryLimit int) int {
|
||||
if searchQueryLimit <= 0 {
|
||||
return GetMaxUniqueTimeSeries()
|
||||
}
|
||||
// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
|
||||
if *maxUniqueTimeseries != 0 && searchQueryLimit > *maxUniqueTimeseries {
|
||||
searchQueryLimit = *maxUniqueTimeseries
|
||||
}
|
||||
return searchQueryLimit
|
||||
}
|
||||
|
||||
// GetMaxUniqueTimeSeries returns `-search.maxUniqueTimeseries` or the auto-calculated value based on available resources.
|
||||
// The calculation is split into calculateMaxUniqueTimeSeriesForResource for unit testing.
|
||||
func GetMaxUniqueTimeSeries() int {
|
||||
maxUniqueTimeseriesValueOnce.Do(func() {
|
||||
maxUniqueTimeseriesValue = *maxUniqueTimeseries
|
||||
if maxUniqueTimeseriesValue <= 0 {
|
||||
maxUniqueTimeseriesValue = calculateMaxUniqueTimeSeriesForResource(*maxConcurrentRequests, memory.Remaining())
|
||||
}
|
||||
})
|
||||
return maxUniqueTimeseriesValue
|
||||
}
|
||||
|
||||
// calculateMaxUniqueTimeSeriesForResource calculate the max metrics limit calculated by available resources.
|
||||
func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMemory int) int {
|
||||
if maxConcurrentRequests <= 0 {
|
||||
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
|
||||
// In such cases, fallback to unlimited.
|
||||
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
|
||||
return 2e9
|
||||
}
|
||||
|
||||
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
|
||||
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
|
||||
mts := remainingMemory / 200 / maxConcurrentRequests
|
||||
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
|
||||
return mts
|
||||
}
|
||||
391
app/vmstorage/vmstorage.go
Normal file
391
app/vmstorage/vmstorage.go
Normal file
@@ -0,0 +1,391 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
|
||||
)
|
||||
|
||||
var (
|
||||
precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression "+
|
||||
"at the cost of precision loss")
|
||||
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. "+
|
||||
"This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect")
|
||||
maxTagKeys = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
|
||||
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
|
||||
maxTagValues = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search. "+
|
||||
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
|
||||
maxTagValueSuffixesPerSearch = flag.Int("search.maxTagValueSuffixesPerSearch", 100e3, "The maximum number of tag value suffixes returned from /metrics/find")
|
||||
snapshotsMaxAge = flagutil.NewRetentionDuration("snapshotsMaxAge", "3d", "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted")
|
||||
)
|
||||
|
||||
// newVMStorage creates a new instance of of VMStorage.
|
||||
//
|
||||
// The created VMStorage instance takes ownership of s.
|
||||
func newVMStorage(s *storage.Storage, vmselectMaxConcurrentRequests int) *VMStorage {
|
||||
if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
|
||||
logger.Fatalf("invalid -precisionBits=%d: %s", *precisionBits, err)
|
||||
}
|
||||
|
||||
maxUniqueTimeseriesCalculated := *maxUniqueTimeseries
|
||||
if maxUniqueTimeseriesCalculated <= 0 {
|
||||
maxUniqueTimeseriesCalculated = calculateMaxUniqueTimeseries(vmselectMaxConcurrentRequests, memory.Remaining())
|
||||
}
|
||||
|
||||
vms := &VMStorage{
|
||||
s: s,
|
||||
maxUniqueTimeseries: *maxUniqueTimeseries,
|
||||
maxUniqueTimeSeriesCalculated: maxUniqueTimeseriesCalculated,
|
||||
staleSnapshotsRemoverCh: make(chan struct{}),
|
||||
}
|
||||
vms.initStaleSnapshotsRemover()
|
||||
return vms
|
||||
}
|
||||
|
||||
// calculateMaxUniqueTimeseries calculates the maxUniqueTimeseries based on the
|
||||
// available system resources.
|
||||
func calculateMaxUniqueTimeseries(maxConcurrentRequests, remainingMemory int) int {
|
||||
if maxConcurrentRequests <= 0 {
|
||||
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
|
||||
// In such cases, fallback to unlimited.
|
||||
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
|
||||
return 2e9
|
||||
}
|
||||
|
||||
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
|
||||
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
|
||||
mts := remainingMemory / 200 / maxConcurrentRequests
|
||||
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
|
||||
return mts
|
||||
}
|
||||
|
||||
// VMStorage impelements vmselectapi.API and vminsertapi.API.
|
||||
type VMStorage struct {
|
||||
s *storage.Storage
|
||||
maxUniqueTimeseries int
|
||||
maxUniqueTimeSeriesCalculated int
|
||||
staleSnapshotsRemoverCh chan struct{}
|
||||
staleSnapshotsRemoverWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func (vms *VMStorage) initStaleSnapshotsRemover() {
|
||||
if snapshotsMaxAge.Duration() <= 0 {
|
||||
return
|
||||
}
|
||||
snapshotsMaxAgeDuration := snapshotsMaxAge.Duration()
|
||||
vms.staleSnapshotsRemoverWG.Go(func() {
|
||||
d := timeutil.AddJitterToDuration(time.Second * 11)
|
||||
t := time.NewTicker(d)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-vms.staleSnapshotsRemoverCh:
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
vms.s.MustDeleteStaleSnapshots(snapshotsMaxAgeDuration)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (vms *VMStorage) Stop() {
|
||||
close(vms.staleSnapshotsRemoverCh)
|
||||
vms.staleSnapshotsRemoverWG.Wait()
|
||||
vms.s.MustClose()
|
||||
}
|
||||
|
||||
// WriteRows writes metric rows to the storage.
|
||||
//
|
||||
// The caller should limit the number of concurrent calls to WriteRows() in
|
||||
// order to limit memory usage.
|
||||
func (vms *VMStorage) WriteRows(rows []storage.MetricRow) error {
|
||||
vms.s.AddRows(rows, uint8(*precisionBits))
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteMetadata writes metrics metadata to storage.
|
||||
//
|
||||
// The caller should limit the number of concurrent calls to WriteMetadata() in
|
||||
// order to limit memory usage.
|
||||
func (vms *VMStorage) WriteMetadata(rows []metricsmetadata.Row) error {
|
||||
vms.s.AddMetadataRows(rows)
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsReadOnly returns true is the storage is in read-only mode.
|
||||
func (vms *VMStorage) IsReadOnly() bool {
|
||||
return vms.s.IsReadOnly()
|
||||
}
|
||||
|
||||
func (vms *VMStorage) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := vms.getMaxMetrics(sq.MaxMetrics)
|
||||
tfss, err := vms.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return nil, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
bi := getBlockIterator()
|
||||
bi.sr.Init(qt, vms.s, tfss, tr, maxMetrics, deadline)
|
||||
if err := bi.sr.Error(); err != nil {
|
||||
bi.MustClose()
|
||||
return nil, err
|
||||
}
|
||||
return bi, nil
|
||||
}
|
||||
|
||||
func (vms *VMStorage) getMaxMetrics(searchQueryLimit int) int {
|
||||
if searchQueryLimit <= 0 {
|
||||
return vms.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
|
||||
if vms.maxUniqueTimeseries != 0 && searchQueryLimit > vms.maxUniqueTimeseries {
|
||||
searchQueryLimit = vms.maxUniqueTimeseries
|
||||
}
|
||||
return searchQueryLimit
|
||||
}
|
||||
|
||||
// blockIterator implements vmselectapi.BlockIterator
|
||||
type blockIterator struct {
|
||||
sr storage.Search
|
||||
mb storage.MetricBlock
|
||||
}
|
||||
|
||||
var blockIteratorsPool sync.Pool
|
||||
|
||||
func (bi *blockIterator) MustClose() {
|
||||
bi.sr.MustClose()
|
||||
bi.mb.MetricName = nil
|
||||
bi.mb.Block.Reset()
|
||||
blockIteratorsPool.Put(bi)
|
||||
}
|
||||
|
||||
func getBlockIterator() *blockIterator {
|
||||
v := blockIteratorsPool.Get()
|
||||
if v == nil {
|
||||
v = &blockIterator{}
|
||||
}
|
||||
return v.(*blockIterator)
|
||||
}
|
||||
|
||||
func (bi *blockIterator) NextBlock(dst []byte) ([]byte, bool) {
|
||||
if !bi.sr.NextMetricBlock() {
|
||||
return dst, false
|
||||
}
|
||||
mb := bi.mb
|
||||
mb.MetricName = bi.sr.MetricBlockRef.MetricName
|
||||
bi.sr.MetricBlockRef.BlockRef.MustReadBlock(&mb.Block)
|
||||
dst = mb.Marshal(dst[:0])
|
||||
return dst, true
|
||||
}
|
||||
|
||||
func (bi *blockIterator) Error() error {
|
||||
return bi.sr.Error()
|
||||
}
|
||||
|
||||
// SearchMetricNames returns metric names for the given tfss on the given tr.
|
||||
func (vms *VMStorage) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = vms.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := vms.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return nil, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return vms.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
// SearchLabelValues searches for label values for the given labelName, tfss and
|
||||
// tr.
|
||||
func (vms *VMStorage) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
if maxLabelValues <= 0 || maxLabelValues > *maxTagValues {
|
||||
maxLabelValues = *maxTagValues
|
||||
}
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = vms.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := vms.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return vms.s.SearchLabelValues(qt, sq.AccountID, sq.ProjectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
// TagValueSuffixes returns all the tag value suffixes for the given tagKey and
|
||||
// tagValuePrefix on the given tr.
|
||||
//
|
||||
// This allows implementing
|
||||
// https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or
|
||||
// similar APIs.
|
||||
func (vms *VMStorage) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
|
||||
maxSuffixes int, deadline uint64) ([]string, error) {
|
||||
if maxSuffixes <= 0 || maxSuffixes > *maxTagValueSuffixesPerSearch {
|
||||
maxSuffixes = *maxTagValueSuffixesPerSearch
|
||||
}
|
||||
suffixes, err := vms.s.SearchTagValueSuffixes(qt, accountID, projectID, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(suffixes) >= maxSuffixes {
|
||||
return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
|
||||
"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
|
||||
}
|
||||
return suffixes, nil
|
||||
}
|
||||
|
||||
// SearchLabelNames searches for tag keys matching the given tfss on tr.
|
||||
func (vms *VMStorage) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
if maxLabelNames <= 0 || maxLabelNames > *maxTagKeys {
|
||||
maxLabelNames = *maxTagKeys
|
||||
}
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = vms.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := vms.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return vms.s.SearchLabelNames(qt, sq.AccountID, sq.ProjectID, tfss, tr, maxLabelNames, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (vms *VMStorage) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
|
||||
return vms.s.GetSeriesCount(accountID, projectID, deadline)
|
||||
}
|
||||
|
||||
func (vms *VMStorage) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
|
||||
return vms.s.SearchTenants(qt, tr, deadline)
|
||||
}
|
||||
|
||||
// GetTSDBStatus returns TSDB status for given filters on the given date.
|
||||
func (vms *VMStorage) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = vms.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := vms.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
|
||||
return vms.s.GetTSDBStatus(qt, sq.AccountID, sq.ProjectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
// DeleteSeries deletes series matching tfss.
|
||||
//
|
||||
// Returns the number of deleted series.
|
||||
func (vms *VMStorage) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = vms.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := vms.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return 0, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return vms.s.DeleteSeries(qt, tfss, maxMetrics)
|
||||
}
|
||||
|
||||
func (vms *VMStorage) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
|
||||
vms.s.RegisterMetricNames(qt, mrs)
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetMetricNamesUsageStats returns metric name usage stats.
|
||||
func (vms *VMStorage) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
|
||||
return vms.s.GetMetricNamesStats(qt, tt, limit, le, matchPattern), nil
|
||||
}
|
||||
|
||||
// ResetMetricNamesStats resets state for metric names usage tracker
|
||||
func (vms *VMStorage) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
|
||||
vms.s.ResetMetricNamesStats(qt)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (vms *VMStorage) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
|
||||
tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
|
||||
accountID := sq.AccountID
|
||||
projectID := sq.ProjectID
|
||||
for _, tagFilters := range sq.TagFilterss {
|
||||
tfs := storage.NewTagFilters(accountID, projectID)
|
||||
for i := range tagFilters {
|
||||
tf := &tagFilters[i]
|
||||
if string(tf.Key) == "__graphite__" {
|
||||
query := tf.Value
|
||||
qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
|
||||
paths, err := vms.s.SearchGraphitePaths(qtChild, accountID, projectID, tr, query, maxMetrics, deadline)
|
||||
qtChild.Donef("found %d series", len(paths))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
|
||||
}
|
||||
if len(paths) >= maxMetrics {
|
||||
return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
|
||||
"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
|
||||
"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
|
||||
}
|
||||
tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
|
||||
continue
|
||||
}
|
||||
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
|
||||
}
|
||||
}
|
||||
tfss = append(tfss, tfs)
|
||||
}
|
||||
return tfss, nil
|
||||
}
|
||||
|
||||
func (vms *VMStorage) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
|
||||
return vms.s.GetMetadataRows(qt, tt, limit, metricName, deadline)
|
||||
}
|
||||
|
||||
// deleteSnapshot deletes a snapshot by its name.
|
||||
func (vms *VMStorage) deleteSnapshot(snapshotName string) error {
|
||||
snapshots := vms.s.MustListSnapshots()
|
||||
for _, snName := range snapshots {
|
||||
if snName == snapshotName {
|
||||
if err := vms.s.DeleteSnapshot(snName); err != nil {
|
||||
return fmt.Errorf("cannot delete snapshot %q: %w", snName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("cannot find snapshot %q", snapshotName)
|
||||
}
|
||||
@@ -1,15 +1,19 @@
|
||||
package servers
|
||||
package main
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
func TestCalculateMaxMetricsLimitByResource(t *testing.T) {
|
||||
f := func(maxConcurrentRequest, remainingMemory, expect int) {
|
||||
t.Helper()
|
||||
maxMetricsLimit := calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequest, remainingMemory)
|
||||
maxMetricsLimit := calculateMaxUniqueTimeseries(maxConcurrentRequest, remainingMemory)
|
||||
if maxMetricsLimit != expect {
|
||||
t.Fatalf("unexpected max metrics limit: got %d, want %d", maxMetricsLimit, expect)
|
||||
}
|
||||
@@ -36,11 +40,17 @@ func TestGetMaxMetrics(t *testing.T) {
|
||||
originalMaxUniqueTimeSeries := *maxUniqueTimeseries
|
||||
defer func() {
|
||||
*maxUniqueTimeseries = originalMaxUniqueTimeSeries
|
||||
fs.MustRemoveDir(t.Name())
|
||||
}()
|
||||
|
||||
maxConcurrentRequests := 2 * cgroup.AvailableCPUs()
|
||||
f := func(searchQueryLimit, storageMaxUniqueTimeseries, expect int) {
|
||||
t.Helper()
|
||||
*maxUniqueTimeseries = storageMaxUniqueTimeseries
|
||||
maxMetrics := getMaxMetrics(searchQueryLimit)
|
||||
s := storage.MustOpenStorage(t.Name(), storage.OpenOptions{})
|
||||
vms := newVMStorage(s, maxConcurrentRequests)
|
||||
defer vms.Stop()
|
||||
maxMetrics := vms.getMaxMetrics(searchQueryLimit)
|
||||
if maxMetrics != expect {
|
||||
t.Fatalf("unexpected max metrics: got %d, want %d", maxMetrics, expect)
|
||||
}
|
||||
@@ -1695,7 +1695,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -1960,7 +1960,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2083,7 +2083,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.\n\nSee also major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2727,7 +2727,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nThe lower the better.\n\n- waiting: at least one runnable thread blocked on block-`I/O` (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`; no useful user code ran during these periods → true `I/O` thrashing.\n\nIf stalled > 0 while querying, it's recommended to increase queue depth on NVMe, raise blk-mq budgets, or relax cgroup I/O limits.",
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one runnable thread blocked on IO (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`.\n\nIf stalled > 0, consider increasing queue depth on NVMe, raising blk-mq budgets, or relaxing cgroup I/O limits.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3391,7 +3391,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -4110,7 +4110,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows cache miss ratio. Lower is better.",
|
||||
"description": "Shows cache miss ratio.\n\n**Lower is better.**",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -5715,106 +5715,6 @@
|
||||
],
|
||||
"title": "Pending",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Network usage by internal VictoriaMetrics RPC protocol",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8388
|
||||
},
|
||||
"id": 74,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) * 8",
|
||||
"legendFormat": "network usage",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "RPC network usage ($instance)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Interconnection ($job)",
|
||||
|
||||
@@ -1954,7 +1954,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2063,7 +2063,7 @@
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{job}}-{{instance}} - waiting",
|
||||
"legendFormat": "{{job}}-{{instance}} - stalled",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
@@ -2388,7 +2388,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.\n\nSee also major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3023,7 +3023,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.",
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one runnable thread blocked on IO (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`.\n\nIf stalled > 0, consider increasing queue depth on NVMe, raising blk-mq budgets, or relaxing cgroup I/O limits.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3690,7 +3690,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -4510,7 +4510,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows cache miss ratio. Lower is better.",
|
||||
"description": "Shows cache miss ratio.\n\n**Lower is better.**",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -1696,7 +1696,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -1961,7 +1961,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2084,7 +2084,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.\n\nSee also major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2728,7 +2728,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nThe lower the better.\n\n- waiting: at least one runnable thread blocked on block-`I/O` (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`; no useful user code ran during these periods → true `I/O` thrashing.\n\nIf stalled > 0 while querying, it's recommended to increase queue depth on NVMe, raise blk-mq budgets, or relax cgroup I/O limits.",
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one runnable thread blocked on IO (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`.\n\nIf stalled > 0, consider increasing queue depth on NVMe, raising blk-mq budgets, or relaxing cgroup I/O limits.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3392,7 +3392,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -4111,7 +4111,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows cache miss ratio. Lower is better.",
|
||||
"description": "Shows cache miss ratio.\n\n**Lower is better.**",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -5716,106 +5716,6 @@
|
||||
],
|
||||
"title": "Pending",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Network usage by internal VictoriaMetrics RPC protocol",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"links": [],
|
||||
"mappings": [],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8388
|
||||
},
|
||||
"id": 74,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"mean",
|
||||
"lastNotNull",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "9.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(vm_tcpdialer_written_bytes_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) * 8",
|
||||
"legendFormat": "network usage",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "RPC network usage ($instance)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"title": "Interconnection ($job)",
|
||||
|
||||
@@ -1955,7 +1955,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2064,7 +2064,7 @@
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{job}}-{{instance}} - waiting",
|
||||
"legendFormat": "{{job}}-{{instance}} - stalled",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
@@ -2389,7 +2389,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.\n\nSee also major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3024,7 +3024,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.",
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one runnable thread blocked on IO (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`.\n\nIf stalled > 0, consider increasing queue depth on NVMe, raising blk-mq budgets, or relaxing cgroup I/O limits.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3691,7 +3691,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -4511,7 +4511,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows cache miss ratio. Lower is better.",
|
||||
"description": "Shows cache miss ratio.\n\n**Lower is better.**",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2042,7 +2042,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2165,7 +2165,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2542,7 +2542,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.",
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one runnable thread blocked on IO (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`.\n\nIf stalled > 0, consider increasing queue depth on NVMe, raising blk-mq budgets, or relaxing cgroup I/O limits.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3015,7 +3015,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -1719,7 +1719,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -1840,7 +1840,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2177,7 +2177,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -2041,7 +2041,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2164,7 +2164,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2541,7 +2541,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\nThe lower the better.",
|
||||
"description": "Shows IO pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one runnable thread blocked on IO (disk, NVMe, network-storage) while others could still make progress.\n- stalled: all non-idle threads simultaneously waiting on `I/O`.\n\nIf stalled > 0, consider increasing queue depth on NVMe, raising blk-mq budgets, or relaxing cgroup I/O limits.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3014,7 +3014,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -1718,7 +1718,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html). Helps troubleshoot high CPU usage or throttling.\nLower is better.\n\n- waiting: The percentage of time at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: The percentage of time all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 1%. It only becomes a concern if it consistently climbs above 5–10% and aligns with latency spikes or GC slowdowns.",
|
||||
"description": "CPU pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one task in the process was ready to run (runnable) but couldn't get scheduled on the CPU.\n- stalled: all tasks in the process (except idle ones) were unable to get CPU time — a full CPU stall.\n\nIf there's a CPU burst, it's normal to see waiting or stalled > 100ms. It only becomes a concern if it consistently climbs above 50-100ms and aligns with latency spikes or GC slowdowns.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -1839,7 +1839,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\nLower is better.\n\n- waiting: Time fraction where at least one thread was blocked on memory.\n- stalled: Time fraction where every thread was blocked on memory (severe pressure).\n\nIf queries slow down and both series spike, the host is likely limited by RAM or I/O throughput.",
|
||||
"description": "Shows memory pressure based on [Pressure Stall Information](https://docs.kernel.org/accounting/psi.html).\n\n**Lower is better.**\n\nPressure is measured as amount of time within 1sec time window the process was:\n- waiting: at least one thread was blocked on memory.\n- stalled: every thread was blocked on memory (severe pressure).\n\nElevated memory pressure can slowdown the process performance by utilizing more disk IO. Consider increasing amount of available RAM limit or decreasing the load on the process.\n\nSeу major page faults rate panel in Troubleshooting section if this metric continued to be high.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2176,7 +2176,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running. The lower is better.\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"description": "Shows the time goroutines have spent in runnable state before actually running.\n\n**Lower is better.**\n\nHigh values or values exceeding the threshold is usually a sign of insufficient CPU resources or CPU throttling. \n\nVerify that service has enough CPU resources. Otherwise, the service could work unreliably with delays in processing.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.144.0
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -42,14 +42,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.144.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.144.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.144.0-cluster
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -68,7 +68,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.144.0-cluster
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -80,7 +80,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.144.0-cluster
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -90,7 +90,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.144.0-cluster
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -105,7 +105,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.144.0
|
||||
image: victoriametrics/vmauth:v1.145.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -119,7 +119,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.144.0
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.144.0
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.144.0
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.144.0
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.144.0
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.144.0
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.144.0
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -271,6 +271,8 @@ endif
|
||||
(cd /tmp/vm-opensource && ./bin/vmctl remote-read -help > /tmp/vmctl_remote-read_flags_tmp.md)
|
||||
(cd /tmp/vm-opensource && ./bin/vmctl prometheus -help > /tmp/vmctl_prometheus_flags_tmp.md)
|
||||
(cd /tmp/vm-opensource && ./bin/vmctl vm-native -help > /tmp/vmctl_vm-native_flags_tmp.md)
|
||||
(cd /tmp/vm-opensource && ./bin/vmctl thanos -help > /tmp/vmctl_thanos_flags_tmp.md)
|
||||
(cd /tmp/vm-opensource && ./bin/vmctl mimir -help > /tmp/vmctl_mimir_flags_tmp.md)
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_flags.md && \
|
||||
cat /tmp/vmctl_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_flags.md && \
|
||||
@@ -296,6 +298,14 @@ endif
|
||||
cat /tmp/vmctl_vm-native_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_vm-native_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_vm-native_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_thanos_flags.md && \
|
||||
cat /tmp/vmctl_thanos_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_thanos_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_thanos_flags.md
|
||||
|
||||
echo "$$FLAGS_HEADER" > docs/victoriametrics/vmctl/vmctl_mimir_flags.md && \
|
||||
cat /tmp/vmctl_mimir_flags_tmp.md >> docs/victoriametrics/vmctl/vmctl_mimir_flags.md && \
|
||||
printf '```\n' >> docs/victoriametrics/vmctl/vmctl_mimir_flags.md
|
||||
|
||||
# remove Total time line from all vmctl flag files to reduce diffs noise
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_opentsdb_flags.md
|
||||
@@ -303,6 +313,8 @@ endif
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_remote-read_flags.md
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_prometheus_flags.md
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_vm-native_flags.md
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_thanos_flags.md
|
||||
sed -i '/Total time:/d' docs/victoriametrics/vmctl/vmctl_mimir_flags.md
|
||||
|
||||
# remove Version line and the actual version line from vmctl_flags.md to reduce diffs noise
|
||||
sed -i '/^VERSION:/,+1d' docs/victoriametrics/vmctl/vmctl_flags.md
|
||||
|
||||
@@ -130,7 +130,7 @@ Released: 2025-11-05
|
||||
## v1.27.0
|
||||
Released: 2025-10-31
|
||||
|
||||
- FEATURE: Added runtime state compatibility guard for [stateful](https://docs.victoriametrics.com/anomaly-detection/components/settings/#restore-state) deployments. The service now persists normalized versions, evaluates an [upgrade/downgrade compatibility matrix](https://docs.victoriametrics.com/anomaly-detection/migration/#compatibility-matrix), and selectively drops or reuses DB records and on-disk artifacts to keep migrations safe and automatic. Please refer to the [migration page](https://docs.victoriametrics.com/anomaly-detection/migration/) for more details.
|
||||
- FEATURE: Added runtime state compatibility guard for [stateful](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) deployments. The service now persists normalized versions, evaluates an [upgrade/downgrade compatibility matrix](https://docs.victoriametrics.com/anomaly-detection/migration/#compatibility-matrix), and selectively drops or reuses DB records and on-disk artifacts to keep migrations safe and automatic. Please refer to the [migration page](https://docs.victoriametrics.com/anomaly-detection/migration/) for more details.
|
||||
|
||||
- IMPROVEMENT: Parallelization now honours container cgroup CPU/RAM limits, so `settings.n_workers` in the [settings section](https://docs.victoriametrics.com/anomaly-detection/components/settings/#parallelization), internal routines and the `vmanomaly_available_memory_bytes`/`vmanomaly_cpu_cores_available` [startup metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#startup-metrics) report or use container resources instead of host totals, keeping the [self-monitoring dashboard](https://docs.victoriametrics.com/anomaly-detection/self-monitoring/#grafana-dashboard) accurate.
|
||||
|
||||
@@ -164,7 +164,7 @@ Released: 2025-10-02
|
||||
|
||||
- FEATURE: Introduced vmui-like [UI](https://docs.victoriametrics.com/anomaly-detection/ui/) for `vmanomaly` service to simplify the configuration and backtesting of anomaly detection models before it goes to production. It provides an intuitive interface to finetune model configurations, visualize its predictions and anomaly scores, and perform backtesting on historical data. The UI is accessible via a web browser and can be run as a [standalone service](https://docs.victoriametrics.com/anomaly-detection/ui/#preset-usage) or [integrated with productionalized deployments](https://docs.victoriametrics.com/anomaly-detection/ui/#mixed-usage). For more details, refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/ui/).
|
||||
|
||||
- FEATURE: Added support for reading data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) with `VLogsReader`. This reader allows querying and analyzing log data stored in VictoriaLogs, enabling anomaly detection on metrics generated from logs. It supports similar configuration options as `VmReader`, including `datasource_url`, `tenant_id`, `queries`, etc. For more details, refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader). It can be also used in [UI mode](https://docs.victoriametrics.com/anomaly-detection/ui/) for backtesting log-based anomaly detection configurations.
|
||||
- FEATURE: Added support for reading data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) with `VLogsReader`. This reader allows querying and analyzing log data stored in VictoriaLogs, enabling anomaly detection on metrics generated from logs. It supports similar configuration options as `VmReader`, including `datasource_url`, `tenant_id`, `queries`, etc. For more details, refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader). It can be also used in [UI mode](https://docs.victoriametrics.com/anomaly-detection/ui/) for backtesting log-based anomaly detection configurations.
|
||||
|
||||
- IMPROVEMENT: Resolved the case in the [`IsolationForestModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#isolation-forest-multivariate) with `provide_series` common model [argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#provide-series) including `yhat.*` series (prediction and confidence boundaries), which are not produced by this model. Now config validation will fail with a clear error message if such series names are requested.
|
||||
|
||||
@@ -190,7 +190,7 @@ Released: 2025-08-19
|
||||
## v1.25.2
|
||||
Released: 2025-07-30
|
||||
|
||||
- BUGFIX: Resolved inconsistent state between in-memory models and state database (if [stateful mode](https://docs.victoriametrics.com/anomaly-detection/components/settings/#stateful-mode) is enabled). This bug caused `Model instance not found` warnings during inference calls and prevented proper cleanup of stale models from disk. The fix also prevents state updates when operations are terminated mid-execution of scheduled fit/infer jobs.
|
||||
- BUGFIX: Resolved inconsistent state between in-memory models and state database (if [stateful mode](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) is enabled). This bug caused `Model instance not found` warnings during inference calls and prevented proper cleanup of stale models from disk. The fix also prevents state updates when operations are terminated mid-execution of scheduled fit/infer jobs.
|
||||
|
||||
- BUGFIX: Added explicit handling for inference calls on models that were deleted from disk by the time of their usage, but still referenced in the state database, preventing `'NoneType' object has no attribute 'infer'` rows in logs. Now a warning is logged and the inference call is skipped, which is expected behavior for deleted models.
|
||||
|
||||
@@ -210,7 +210,7 @@ Released: 2025-07-24
|
||||
|
||||
- BUGFIX: Prevented `OneOffScheduler` and `BacktestingScheduler` [schedulers](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/) from receiving no data (when [state restoration](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) is enabled). Now a warning is logged and such scheduler types are implicitly used without state restoration, which is expected behavior for these one-time-job schedulers.
|
||||
|
||||
- BUGFIX: Now the paths to artifact database (if [stateful mode](https://docs.victoriametrics.com/anomaly-detection/components/settings/#stateful-mode) is enabled) are properly resolved to absolute, preventing errors at initialization time (like `sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to open database file`) or warnings (like `SAWarning: fully NULL primary key identity cannot load any object.`).
|
||||
- BUGFIX: Now the paths to artifact database (if [stateful mode](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) is enabled) are properly resolved to absolute, preventing errors at initialization time (like `sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) unable to open database file`) or warnings (like `SAWarning: fully NULL primary key identity cannot load any object.`).
|
||||
|
||||
## v1.25.0
|
||||
Released: 2025-07-17
|
||||
@@ -273,7 +273,7 @@ Released: 2025-06-05
|
||||
|
||||
- FEATURE: Added `decay` [argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#decay) to [online models](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models). This parameters allows for newer data to be weighted more heavily in online models. By default this is set to 1 which means all data points are weighted the same to maintain backward compatibility with existing configs. The closer this value is to 0 the more important new data is.
|
||||
|
||||
- IMPROVEMENT: **Restored back parallelization** in the read/fit/infer pipeline, previously disabled in [v1.22.0](#v1220-experimental) due to deadlock issues. The new implementation prevents deadlocks, allowing to control the parallelization level via `n_workers` in [settings section](https://docs.victoriametrics.com/anomaly-detection/components/settings/). It's suggested to upgrade from [v1.22.0](#v1220) - [v1.22.1](#v1221) to this version to regain the performance benefits of parallel processing.
|
||||
- IMPROVEMENT: **Restored back parallelization** in the read/fit/infer pipeline, previously disabled in [v1.22.0](#v1220-experimental) due to deadlock issues. The new implementation prevents deadlocks, allowing to control the parallelization level via `n_workers` in [settings section](https://docs.victoriametrics.com/anomaly-detection/components/settings/). It's suggested to upgrade from [v1.22.0](#v1220-experimental) - [v1.22.1](#v1221) to this version to regain the performance benefits of parallel processing.
|
||||
|
||||
- IMPROVEMENT: Added `--dryRun` [argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments) to `vmanomaly` to enable dry run mode. This mode allows to validate configuration without executing any actual operations and doesn't require a license. It is particularly useful to test the configurations before deploying them in a production environment.
|
||||
|
||||
@@ -559,7 +559,7 @@ Released: 2024-08-10
|
||||
- **Lowest anomaly scores** (=0) when the *model's predictions (`yhat`) fall outside the expected range*, signaling uncertain predictions.
|
||||
- For more details, please refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/components/reader/#per-query-parameters).
|
||||
|
||||
- IMPROVEMENT: Added `latency_offset` argument to the [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) to override the default `-search.latencyOffset` [flag of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/#list-of-command-line-flags) (30s). The default value is set to 1ms, which should help in cases where `sampling_frequency` is low (10-60s) and `sampling_frequency` equals `infer_every` in the [PeriodicScheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler). This prevents users from receiving `service - WARNING - [Scheduler [scheduler_alias]] No data available for inference.` warnings in logs and allows for consecutive `infer` calls without gaps. To restore the backward compatible behavior, set it equal to your `-search.latencyOffset` value in [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) config section.
|
||||
- IMPROVEMENT: Added `latency_offset` argument to the [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) to override the default `-search.latencyOffset` [flag of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/#list-of-command-line-flags) (30s). The default value is set to 1ms, which should help in cases where `sampling_period` is low (10-60s) and `sampling_period` equals `infer_every` in the [PeriodicScheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler). This prevents users from receiving `service - WARNING - [Scheduler [scheduler_alias]] No data available for inference.` warnings in logs and allows for consecutive `infer` calls without gaps. To restore the backward compatible behavior, set it equal to your `-search.latencyOffset` value in [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) config section.
|
||||
|
||||
- BUGFIX: Ensure the `use_transform` argument of the [`OnlineQuantileModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) functions as intended.
|
||||
- BUGFIX: Add a docstring for `query_from_last_seen_timestamp` arg of [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader).
|
||||
|
||||
@@ -53,7 +53,7 @@ Please see example graph illustrating this logic below:
|
||||
|
||||
**VictoriaMetrics (metrics):** use full [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) for selection, sampling, and processing; [global filters](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-enhancements) are also supported. See the [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) for the details.
|
||||
|
||||
**VictoriaLogs (logs → metrics):** {{% available_from "v1.26.0" anomaly %}} use [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) via the [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader) to create log-derived or traces-derived metrics for anomaly detection (e.g., error rates, request latencies, error spans count).
|
||||
**VictoriaLogs (logs → metrics):** {{% available_from "v1.26.0" anomaly %}} use [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) via the [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader) to create log-derived or traces-derived metrics for anomaly detection (e.g., error rates, request latencies, error spans count).
|
||||
|
||||
> [!NOTE]
|
||||
> Please note that only LogsQL queries with [stats pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) functions [subset](https://docs.victoriametrics.com/anomaly-detection/components/reader/#valid-stats-functions) are supported, as they produce **numeric** time series.
|
||||
@@ -281,7 +281,7 @@ reader:
|
||||
datasource_url: 'some_url_to_read_data_from'
|
||||
queries:
|
||||
query_alias1: 'some_metricsql_query'
|
||||
sampling_frequency: '1m' # change to whatever you need in data granularity
|
||||
sampling_period: '1m' # change to whatever you need in data granularity
|
||||
# other params if needed
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader
|
||||
|
||||
@@ -294,7 +294,7 @@ writer:
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/monitoring/
|
||||
```
|
||||
|
||||
Configuration above will produce N intervals of full length (`fit_window`=14d + `fit_every`=1h) until `to_iso` timestamp is reached to run N consecutive `fit` calls to train models; Then these models will be used to produce `M = [fit_every / sampling_frequency]` infer datapoints for `fit_every` range at the end of each such interval, imitating M consecutive calls of `infer_every` in `PeriodicScheduler` [config](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler). These datapoints then will be written back to VictoriaMetrics TSDB, defined in `writer` [section](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer) for further visualization (i.e. in VMUI or Grafana)
|
||||
Configuration above will produce N intervals of full length (`fit_window`=14d + `fit_every`=1h) until `to_iso` timestamp is reached to run N consecutive `fit` calls to train models; Then these models will be used to produce `M = [fit_every / sampling_period]` infer datapoints for `fit_every` range at the end of each such interval, imitating M consecutive calls of `infer_every` in `PeriodicScheduler` [config](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler). These datapoints then will be written back to VictoriaMetrics TSDB, defined in `writer` [section](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer) for further visualization (i.e. in VMUI or Grafana)
|
||||
|
||||
## Forecasting
|
||||
|
||||
@@ -499,7 +499,7 @@ schedulers:
|
||||
models:
|
||||
zscore_example:
|
||||
class: 'zscore_online'
|
||||
min_n_samples_seen: 120 # i.e. minimal relevant seasonality or (initial) fit_window / sampling_frequency
|
||||
min_n_samples_seen: 120 # i.e. minimal relevant seasonality or (initial) fit_window / sampling_period
|
||||
decay: 0.999 # decay factor to control how fast the model adapts to new data, the lower, the faster it adapts
|
||||
schedulers: ['periodic']
|
||||
# other model params ...
|
||||
|
||||
@@ -406,10 +406,10 @@ For optimal service behavior, consider the following tweaks when configuring `vm
|
||||
|
||||
**Reader**:
|
||||
- Setup the datasource to read data from in the [reader](https://docs.victoriametrics.com/anomaly-detection/components/reader/) section. Include tenant ID if using a [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) (`multitenant` value {{% available_from "v1.16.2" anomaly %}} can be also used here).
|
||||
- Define queries for input data using [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) under `reader.queries` section. Note, it's possible to override reader-level arguments at query level for increased flexibility, e.g. specifying per-query [timezone](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-timezones) or [sampling period](https://docs.victoriametrics.com/anomaly-detection/components/reader/#sampling-period).
|
||||
- Define queries for input data using [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) under `reader.queries` section. Note, it's possible to override reader-level arguments at query level for increased flexibility, e.g. specifying per-query [timezone](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-timezones) or [sampling period](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters).
|
||||
- For longer `fit_window` intervals in scheduler, consider splitting queries into smaller time ranges to avoid excessive memory usage, timeouts and hitting server-side constraints, so they can be queried separately and reconstructed on `vmanomaly` side. Please refer to this [example](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-large-queries-in-vmanomaly) for more details.
|
||||
|
||||
> If applicable - consider [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader) {{% available_from "v1.26.0" anomaly %}} to perform anomaly detection on **log-derived metrics**. This is particularly useful for scenarios where log data needs to be analyzed for unusual patterns or behaviors, such as error rates or request latencies.
|
||||
> If applicable - consider [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader) {{% available_from "v1.26.0" anomaly %}} to perform anomaly detection on **log-derived metrics**. This is particularly useful for scenarios where log data needs to be analyzed for unusual patterns or behaviors, such as error rates or request latencies.
|
||||
|
||||
**Writer**:
|
||||
- Specify where and how to store anomaly detection metrics in the [writer](https://docs.victoriametrics.com/anomaly-detection/components/writer/) section.
|
||||
|
||||
@@ -694,7 +694,7 @@ vmanomaly version: [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/
|
||||
|
||||
- BUGFIX: Now Visualization Panel correctly switches in between "query" and "detect" modes when respective buttons are hit in the [Visualization Panel](#visualization-panel), without showing stale results from the previous mode, once running anomaly detection task is explicitly cancelled (regression introduced in [v1.5.0](#v150)).
|
||||
|
||||
- BUGFIX: Fixed an issue with [crypto.randomUUID](https://developer.mozilla.org/en-US/docs/Web/API/Crypto/randomUUID) introduced in [v1.29.0](#v1290) in [UI copilot](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) that led to the front app showing a blank page.
|
||||
- BUGFIX: Fixed an issue with [crypto.randomUUID](https://developer.mozilla.org/en-US/docs/Web/API/Crypto/randomUUID) introduced in [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) in [UI copilot](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) that led to the front app showing a blank page.
|
||||
|
||||
### v1.5.0
|
||||
Released: 2026-03-05
|
||||
|
||||
@@ -41,7 +41,7 @@ settings:
|
||||
restore_state: True # restore state from previous run, if available
|
||||
retention: # how long to keep stale models on disk/in memory
|
||||
ttl: "1d" # time-to-live duration, if the model was not used for inference within this duration, it will be considered stale
|
||||
check_every: "1h" # how often to check for stale models and remove them
|
||||
check_interval: "1h" # how often to check for stale models and remove them
|
||||
|
||||
# how and when to run the models is defined by schedulers
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/scheduler/
|
||||
@@ -143,15 +143,15 @@ server:
|
||||
|
||||
> This feature is better used in conjunction with [stateful service](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) to preserve the state of the models and schedulers between restarts and reuse what can be reused, thus avoiding unnecessary re-training of models, re-initialization of schedulers and re-reading of data.
|
||||
|
||||
{{% available_from "v1.25.0" anomaly %}} Service supports hot reload of configuration files, which allows for automatic reloading of configurations on config files change filesystem events without the need of explicit service restart. This can be enabled via the `--watch` [CLI argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments). `vmanomaly_hot_reload_enabled` flag in [self-monitoring metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#startup-metrics) will be set to 1 (if enabled) or 0 (if disabled).
|
||||
{{% available_from "v1.25.0" anomaly %}} Service supports hot reload of configuration files, which allows for automatic reloading of configurations on config files change filesystem events without the need of explicit service restart. This can be enabled via the `--watch` [CLI argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments). `vmanomaly_config_reload_enabled` flag in [self-monitoring metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#startup-metrics) will be set to 1 (if enabled) or 0 (if disabled).
|
||||
|
||||
### How it works
|
||||
|
||||
It works by watching for file system events, such as modifications, creations, or deletions of `.yml|.yaml` files in the specified directories. When a change is detected, the service will attempt to reload the configuration files, rebuild the [global config](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#global-config) and reinitialize the components. If the reload is successful, the `vmanomaly_hot_reload_events_total` metric will be incremented for `status="success"` label, otherwise it will be incremented with `status="failure"` label and a respective error message on config validation failure(s) will be logged.
|
||||
It works by watching for file system events, such as modifications, creations, or deletions of `.yml|.yaml` files in the specified directories. When a change is detected, the service will attempt to reload the configuration files, rebuild the [global config](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#global-configuration) and reinitialize the components. If the reload is successful, the `vmanomaly_config_reloads_total` metric will be incremented for `status="success"` label, otherwise it will be incremented with `status="failure"` label and a respective error message on config validation failure(s) will be logged.
|
||||
|
||||
> If the reload fails, the service will log an error message indicating the reason for the failure, and the **previous configuration will remain active until a successful reload occurs** to preserve the service's stability. This means that if there are errors in the new configuration, the service will continue to operate with the last valid configuration until the issues are resolved.
|
||||
|
||||
If used on [sharded setup](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#horizontal-scalability), upon [global config](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#global-config) change, all shards will be reinitialized with the new configurations.
|
||||
If used on [sharded setup](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#horizontal-scalability), upon [global config](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#global-configuration) change, all shards will be reinitialized with the new configurations.
|
||||
|
||||
> Please note, that even if [state restoration](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) is enabled, the models, queries and schedulers might "migrate" to new shards if the order or the amount of [sub-configs](https://docs.victoriametrics.com/anomaly-detection/scaling-vmanomaly/#sub-configuration) changes after new config is hot-reloaded, so the state restoration won't be **fully** efficient in this case.
|
||||
|
||||
@@ -219,7 +219,7 @@ reader:
|
||||
# ... (rest of the config remains unchanged)
|
||||
```
|
||||
|
||||
After saving the changes, hot reload will automatically detect the changes in `config.yaml` and attempt to reload the configuration. As the changes are valid, the service will log a success message and increment the `vmanomaly_hot_reload_events_total` metric with `status="success"` label:
|
||||
After saving the changes, hot reload will automatically detect the changes in `config.yaml` and attempt to reload the configuration. As the changes are valid, the service will log a success message and increment the `vmanomaly_config_reloads_total` metric with `status="success"` label:
|
||||
|
||||
- All the model instances of class `zscore_online`, that were trained on `host_network_receive_errors` can be reused as they are still valid and "fresh" for making inference on new datapoints until the next `fit_every` happens.
|
||||
- All the model instances of class `zscore_online`, that were trained on `cpu_seconds_total` will be re-trained with the new query expression and frequency, as old model instances are not valid anymore.
|
||||
|
||||
@@ -41,7 +41,7 @@ models:
|
||||
# ...
|
||||
```
|
||||
|
||||
Old-style configs (< [1.10.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#1100))
|
||||
Old-style configs (< [1.10.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1100))
|
||||
|
||||
```yaml
|
||||
model:
|
||||
@@ -66,7 +66,7 @@ models:
|
||||
|
||||
## Common args
|
||||
|
||||
From [1.10.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#1100), **common args**, supported by *every model (and model type)* were introduced.
|
||||
From [1.10.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1100), **common args**, supported by *every model (and model type)* were introduced.
|
||||
|
||||
### Queries
|
||||
|
||||
|
||||
@@ -369,7 +369,7 @@ If True, then query will be performed from the last seen timestamp for a given s
|
||||
`1ms`
|
||||
</td>
|
||||
<td>
|
||||
It allows overriding the default `-search.latencyOffset`{{% available_from "v1.15.1" anomaly %}} [flag of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/#list-of-command-line-flags) (30s). The default value is set to 1ms, which should help in cases where `sampling_frequency` is low (10-60s) and `sampling_frequency` equals `infer_every` in the [PeriodicScheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler). This prevents users from receiving `service - WARNING - [Scheduler [scheduler_alias]] No data available for inference.` warnings in logs and allows for consecutive `infer` calls without gaps. To restore the old behavior, set it equal to your `-search.latencyOffset` [flag value](https://docs.victoriametrics.com/victoriametrics/#list-of-command-line-flags).
|
||||
It allows overriding the default `-search.latencyOffset`{{% available_from "v1.15.1" anomaly %}} [flag of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/#list-of-command-line-flags) (30s). The default value is set to 1ms, which should help in cases where `sampling_period` is low (10-60s) and `sampling_period` equals `infer_every` in the [PeriodicScheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler). This prevents users from receiving `service - WARNING - [Scheduler [scheduler_alias]] No data available for inference.` warnings in logs and allows for consecutive `infer` calls without gaps. To restore the old behavior, set it equal to your `-search.latencyOffset` [flag value](https://docs.victoriametrics.com/victoriametrics/#list-of-command-line-flags).
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
@@ -760,7 +760,7 @@ Frequency of the points returned. Will be converted to `/select/stats_query_rang
|
||||
`10000`
|
||||
</td>
|
||||
<td>
|
||||
(Optional) For splitting long `fit_window` [queries](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader) into smaller sub-intervals. This helps users avoid hitting the timeout limits for individual queries by distributing initial query across multiple subquery requests with minimal overhead. Can be also set on [per-query](#per-query-parameters-1) basis to override reader-level settings.
|
||||
(Optional) For splitting long `fit_window` [queries](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader) into smaller sub-intervals. This helps users avoid hitting the timeout limits for individual queries by distributing initial query across multiple subquery requests with minimal overhead. Can be also set on [per-query](#per-query-parameters-1) basis to override reader-level settings.
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
||||
@@ -240,23 +240,23 @@ vmagent will write data into VictoriaMetrics single-node and cluster (with tenan
|
||||
# compose.yaml
|
||||
services:
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.144.0
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.144.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.144.0-cluster
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.144.0-cluster
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.144.0
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -308,7 +308,7 @@ Now add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: docker.io/victoriametrics/vmauth:v1.144.0
|
||||
image: docker.io/victoriametrics/vmauth:v1.145.0
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
|
||||
@@ -155,15 +155,15 @@ These services will store and query the metrics scraped by vmagent.
|
||||
# compose.yaml
|
||||
services:
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.144.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.145.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.144.0-cluster
|
||||
image: victoriametrics/vminsert:v1.145.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.144.0-cluster
|
||||
image: victoriametrics/vmselect:v1.145.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
ports:
|
||||
@@ -196,7 +196,7 @@ Add the vmauth service to `compose.yaml`:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.144.0-enterprise
|
||||
image: victoriametrics/vmauth:v1.145.0-enterprise
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
@@ -251,7 +251,7 @@ Add the vmagent service to `compose.yaml` with OAuth2 configuration:
|
||||
# compose.yaml
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.144.0
|
||||
image: victoriametrics/vmagent:v1.145.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
|
||||
@@ -107,7 +107,7 @@ The final piece is the Docker Compose file. This ties all the services together
|
||||
# compose.yml
|
||||
services:
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.144.0
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
command:
|
||||
- "--storageDataPath=/victoria-metrics-data"
|
||||
- "--selfScrapeInterval=10s"
|
||||
@@ -128,7 +128,7 @@ services:
|
||||
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.144.0
|
||||
image: victoriametrics/vmalert:v1.145.0
|
||||
depends_on:
|
||||
- victoriametrics
|
||||
- alertmanager
|
||||
|
||||
@@ -61,9 +61,9 @@ Download the newest available [VictoriaMetrics release](https://docs.victoriamet
|
||||
from [DockerHub](https://hub.docker.com/r/victoriametrics/victoria-metrics) or [Quay](https://quay.io/repository/victoriametrics/victoria-metrics?tab=tags):
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/victoria-metrics:v1.144.0
|
||||
docker pull victoriametrics/victoria-metrics:v1.145.0
|
||||
docker run -it --rm -v `pwd`/victoria-metrics-data:/victoria-metrics-data -p 8428:8428 \
|
||||
victoriametrics/victoria-metrics:v1.144.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
victoriametrics/victoria-metrics:v1.145.0 --selfScrapeInterval=5s -storageDataPath=victoria-metrics-data
|
||||
```
|
||||
|
||||
_For Enterprise images, see [this link](https://docs.victoriametrics.com/victoriametrics/enterprise/#docker-images)._
|
||||
|
||||
@@ -28,7 +28,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)
|
||||
|
||||
Release candidate
|
||||
Released at 2026-06-08
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.26.3 to Go1.26.4. See [the list of issues addressed in Go1.26.4](https://github.com/golang/go/issues?q=milestone%3AGo1.26.4%20label%3ACherryPickApproved).
|
||||
|
||||
@@ -264,6 +264,24 @@ It enables back `Discovered targets` debug UI by default.
|
||||
* BUGFIX: `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly apply `extra_filters[]` filter when querying `vm_account_id` or `vm_project_id` labels via [multitenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) request for `/api/v1/label/…/values` API. Before, `extra_filters` was ignored. See [#10503](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10503).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): revert the use of rollup result cache for [instant queries](https://docs.victoriametrics.com/keyConcepts.html#instant-query) that contain [`rate`](https://docs.victoriametrics.com/MetricsQL.html#rate) function with a lookbehind window larger than `-search.minWindowForInstantRollupOptimization`. The cache usage was removed since [v1.132.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.132.0). See [#10098](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10098#issuecomment-3895011084) for more details.
|
||||
|
||||
## [v1.136.11](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.11)
|
||||
|
||||
Released at 2026-06-05
|
||||
|
||||
**v1.136.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.136.x line will be supported for at least 12 months since [v1.136.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11360) release**
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.26.3 to Go1.26.4. See [the list of issues addressed in Go1.26.4](https://github.com/golang/go/issues?q=milestone%3AGo1.26.4%20label%3ACherryPickApproved).
|
||||
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): `integrate()` no longer extrapolates the last sample's value past the end of the time series. Previously, querying `integrate(metric[1h])` at a timestamp where the series had already ended would keep accruing area as if the last value continued indefinitely, producing values much larger than the true integral. See [#9474](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9474). Thanks to @wtfashwin for contribution.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): persist the `Disable deduplication` toggle under its own local storage key. Before this fix, the toggle state was lost after reload and could overwrite the `Compact view` table setting. See [#11004](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/11004). Thanks to @immanuwell for the contribution.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): fix the `Notifiers` page in web UI appearing blank despite the API returning notifier data correctly. See [#11035](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11035).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): reset the group evaluation timestamp if it exceeds the current host time. Previously, vmalert could use future timestamps for evaluations if the system clock was shifted backward. See [#10985](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10985).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): avoid returning HTTP 503 for queries with partial results when a storage group is unavailable and `-search.denyPartialResponse` is disabled. See [#11009](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11009). Thanks to @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `utf-8` label names for [/federate](https://docs.victoriametrics.com/victoriametrics/#federation) API requests. See [#10968](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10968).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix intermittent `write: connection timed out` errors caused by silently dropped TCP connections being reused from the connection pool. See [#10735](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10735#issuecomment-4535832301).
|
||||
|
||||
## [v1.136.10](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.10)
|
||||
|
||||
Released at 2026-05-22
|
||||
@@ -607,6 +625,22 @@ See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/ch
|
||||
|
||||
See changes [here](https://docs.victoriametrics.com/victoriametrics/changelog/changelog_2025/#v11230)
|
||||
|
||||
## [v1.122.24](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.24)
|
||||
|
||||
Released at 2026-06-05
|
||||
|
||||
**v1.122.x is a line of [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/). It contains important up-to-date bugfixes for [VictoriaMetrics enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/).
|
||||
All these fixes are also included in [the latest community release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest).
|
||||
The v1.122.x line will be supported for at least 12 months since [v1.122.0](https://docs.victoriametrics.com/victoriametrics/changelog/#v11220) release**
|
||||
|
||||
* SECURITY: upgrade Go builder from Go1.26.3 to Go1.26.4. See [the list of issues addressed in Go1.26.4](https://github.com/golang/go/issues?q=milestone%3AGo1.26.4%20label%3ACherryPickApproved).
|
||||
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): `integrate()` no longer extrapolates the last sample's value past the end of the time series. Previously, querying `integrate(metric[1h])` at a timestamp where the series had already ended would keep accruing area as if the last value continued indefinitely, producing values much larger than the true integral. See [#9474](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9474). Thanks to @wtfashwin for contribution.
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): reset the group evaluation timestamp if it exceeds the current host time. Previously, vmalert could use future timestamps for evaluations if the system clock was shifted backward. See [#10985](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10985).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): avoid returning HTTP 503 for queries with partial results when a storage group is unavailable and `-search.denyPartialResponse` is disabled. See [#11009](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/11009). Thanks to @fxrlv for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly escape `utf-8` label names for [/federate](https://docs.victoriametrics.com/victoriametrics/#federation) API requests. See [#10968](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10968).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix intermittent `write: connection timed out` errors caused by silently dropped TCP connections being reused from the connection pool. See [#10735](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10735#issuecomment-4535832301).
|
||||
|
||||
## [v1.122.23](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.23)
|
||||
|
||||
Released at 2026-05-22
|
||||
|
||||
@@ -121,7 +121,7 @@ It is allowed to run Enterprise components in [cases listed here](https://docs.v
|
||||
Binary releases of Enterprise components are available at [the releases page for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest),
|
||||
[the releases page for VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/releases/latest)
|
||||
and [the releases page for VictoriaTraces](https://github.com/VictoriaMetrics/VictoriaTraces/releases/latest).
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.144.0-enterprise.tar.gz`.
|
||||
Enterprise binaries and packages have `enterprise` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz`.
|
||||
|
||||
In order to run binary release of Enterprise component, please download the `*-enterprise.tar.gz` archive for your OS and architecture
|
||||
from the corresponding releases page and unpack it. Then run the unpacked binary.
|
||||
@@ -139,8 +139,8 @@ For example, the following command runs VictoriaMetrics Enterprise binary with t
|
||||
obtained at [this page](https://victoriametrics.com/products/enterprise/trial/):
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.144.0/victoria-metrics-linux-amd64-v1.144.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.144.0-enterprise.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz
|
||||
tar -xzf victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz
|
||||
./victoria-metrics-prod -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
@@ -155,7 +155,7 @@ Alternatively, VictoriaMetrics Enterprise license can be stored in the file and
|
||||
It is allowed to run Enterprise components in [cases listed here](https://docs.victoriametrics.com/victoriametrics/enterprise/#valid-cases-for-victoriametrics-enterprise).
|
||||
|
||||
Docker images for Enterprise components are available at [VictoriaMetrics Docker Hub](https://hub.docker.com/u/victoriametrics) and [VictoriaMetrics Quay](https://quay.io/organization/victoriametrics).
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.144.0-enterprise`.
|
||||
Enterprise docker images have `enterprise` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.145.0-enterprise`.
|
||||
|
||||
In order to run Docker image of VictoriaMetrics Enterprise component, it is required to provide the license key via the command-line
|
||||
flag as described in the [binary-releases](https://docs.victoriametrics.com/victoriametrics/enterprise/#binary-releases) section.
|
||||
@@ -165,13 +165,13 @@ Enterprise license key can be obtained at [this page](https://victoriametrics.co
|
||||
For example, the following command runs VictoriaMetrics Enterprise Docker image with the specified license key:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.144.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
docker run --name=victoria-metrics victoriametrics/victoria-metrics:v1.145.0-enterprise -license=BASE64_ENCODED_LICENSE_KEY
|
||||
```
|
||||
|
||||
Alternatively, the license code can be stored in the file and then referred via `-licenseFile` command-line flag:
|
||||
|
||||
```sh
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.144.0-enterprise -licenseFile=/path/to/vm-license
|
||||
docker run --name=victoria-metrics -v /vm-license:/vm-license victoriametrics/victoria-metrics:v1.145.0-enterprise -licenseFile=/path/to/vm-license
|
||||
```
|
||||
|
||||
Example docker-compose configuration:
|
||||
@@ -181,7 +181,7 @@ version: "3.5"
|
||||
services:
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.144.0
|
||||
image: victoriametrics/victoria-metrics:v1.145.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -213,7 +213,7 @@ is used to provide the license key in plain-text:
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.144.0-enterprise
|
||||
tag: v1.145.0-enterprise
|
||||
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
@@ -224,7 +224,7 @@ In order to provide the license key via existing secret, the following values fi
|
||||
```yaml
|
||||
server:
|
||||
image:
|
||||
tag: v1.144.0-enterprise
|
||||
tag: v1.145.0-enterprise
|
||||
|
||||
license:
|
||||
secret:
|
||||
@@ -274,7 +274,7 @@ spec:
|
||||
license:
|
||||
key: {BASE64_ENCODED_LICENSE_KEY}
|
||||
image:
|
||||
tag: v1.144.0-enterprise
|
||||
tag: v1.145.0-enterprise
|
||||
```
|
||||
|
||||
In order to provide the license key via an existing secret, the following custom resource is used:
|
||||
@@ -291,7 +291,7 @@ spec:
|
||||
name: vm-license
|
||||
key: license
|
||||
image:
|
||||
tag: v1.144.0-enterprise
|
||||
tag: v1.145.0-enterprise
|
||||
```
|
||||
|
||||
Example secret with license key:
|
||||
@@ -342,7 +342,7 @@ Builds are available for amd64 and arm64 architectures.
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.144.0-enterprise.tar.gz`
|
||||
`victoria-metrics-linux-amd64-v1.145.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
@@ -351,7 +351,7 @@ Includes:
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.144.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
`victoriametrics/victoria-metrics:v1.145.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
|
||||
## What Happens to Licensed Components When a License Expires
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ scrape_configs:
|
||||
After you created the `scrape.yaml` file, download and unpack [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) to the same directory:
|
||||
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.144.0/victoria-metrics-linux-amd64-v1.144.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.144.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
```
|
||||
|
||||
Then start VictoriaMetrics and instruct it to scrape targets defined in `scrape.yaml` and save scraped metrics
|
||||
@@ -150,8 +150,8 @@ Then start [single-node VictoriaMetrics](https://docs.victoriametrics.com/victor
|
||||
|
||||
```yaml
|
||||
# Download and unpack single-node VictoriaMetrics
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.144.0/victoria-metrics-linux-amd64-v1.144.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.144.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
tar xzf victoria-metrics-linux-amd64-v1.145.0.tar.gz
|
||||
|
||||
# Run single-node VictoriaMetrics with the given scrape.yaml
|
||||
./victoria-metrics-prod -promscrape.config=scrape.yaml
|
||||
|
||||
@@ -219,17 +219,21 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentelemetry.ignoreResourceAttributes array
|
||||
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-opentelemetry.labelNameUnderscoreSanitization
|
||||
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.promoteAllResourceAttributes
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'. (default true)
|
||||
-opentelemetry.promoteResourceAttributes array
|
||||
Promote specific list of resource attributes to labels.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-opentelemetry.promoteScopeMetadata
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels. (default true)
|
||||
-opentelemetry.usePrometheusNaming
|
||||
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentsdbHTTPListenAddr string
|
||||
|
||||
@@ -186,17 +186,21 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentelemetry.ignoreResourceAttributes array
|
||||
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-opentelemetry.labelNameUnderscoreSanitization
|
||||
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.promoteAllResourceAttributes
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'. (default true)
|
||||
-opentelemetry.promoteResourceAttributes array
|
||||
Promote specific list of resource attributes to labels.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-opentelemetry.promoteScopeMetadata
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels. (default true)
|
||||
-opentelemetry.usePrometheusNaming
|
||||
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentsdbHTTPListenAddr string
|
||||
|
||||
@@ -136,3 +136,7 @@ Processing blocks: 204 / 204 [??????????????????????????????????????????????????
|
||||
import requests retries: 0;
|
||||
2025/01/18 13:02:18 Total time: 18.867547083s
|
||||
```
|
||||
|
||||
See `./vmctl mimir --help` for details and full list of flags:
|
||||
|
||||
{{% content "vmctl_mimir_flags.md" %}}
|
||||
@@ -260,3 +260,7 @@ Processing ranges: 8799 / 8799 [████████████████
|
||||
See [remote-read mode](https://docs.victoriametrics.com/victoriametrics/vmctl/remoteread/) for more details.
|
||||
|
||||
See also general [vmctl migration tips](https://docs.victoriametrics.com/victoriametrics/vmctl/#migration-tips).
|
||||
|
||||
See `./vmctl thanos --help` for details and full list of flags:
|
||||
|
||||
{{% content "vmctl_thanos_flags.md" %}}
|
||||
@@ -34,9 +34,9 @@ vmctl command-line tool is available as:
|
||||
|
||||
Download and unpack vmctl:
|
||||
```sh
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.144.0/vmutils-darwin-arm64-v1.144.0.tar.gz
|
||||
wget https://github.com/VictoriaMetrics/VictoriaMetrics/releases/download/v1.145.0/vmutils-darwin-arm64-v1.145.0.tar.gz
|
||||
|
||||
tar xzf vmutils-darwin-arm64-v1.144.0.tar.gz
|
||||
tar xzf vmutils-darwin-arm64-v1.145.0.tar.gz
|
||||
```
|
||||
|
||||
Once binary is unpacked, see the full list of supported modes by running the following command:
|
||||
|
||||
@@ -20,6 +20,7 @@ COMMANDS:
|
||||
influx Migrate time series from InfluxDB
|
||||
remote-read Migrate time series via Prometheus remote-read protocol
|
||||
prometheus Migrate time series from Prometheus
|
||||
mimir Migrate time series from Mimir object storage or local filesystem
|
||||
thanos Migrate time series from Thanos blocks (supports raw and downsampled data)
|
||||
vm-native Migrate time series between VictoriaMetrics installations
|
||||
verify-block Verifies exported block with VictoriaMetrics Native format
|
||||
|
||||
68
docs/victoriametrics/vmctl/vmctl_mimir_flags.md
Normal file
68
docs/victoriametrics/vmctl/vmctl_mimir_flags.md
Normal file
@@ -0,0 +1,68 @@
|
||||
---
|
||||
build:
|
||||
list: never
|
||||
publishResources: false
|
||||
render: never
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file should not be updated manually. Run make docs-update-flags while preparing a new release to sync flags in docs from actual binaries. -->
|
||||
```shellhelp
|
||||
NAME:
|
||||
vmctl mimir - Migrate time series from Mimir object storage or local filesystem
|
||||
|
||||
USAGE:
|
||||
vmctl mimir [command options]
|
||||
|
||||
OPTIONS:
|
||||
-s Whether to run in silent mode. If set to true no confirmation prompts will appear. (default: false)
|
||||
--verbose Whether to enable verbosity in logs output. (default: false)
|
||||
--disable-progress-bar Whether to disable progress bar during the import. (default: false)
|
||||
--pushmetrics.url value [ --pushmetrics.url value ] Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics
|
||||
--pushmetrics.interval value Interval for pushing metrics to every -pushmetrics.url (default: 10s)
|
||||
--pushmetrics.extraLabel value [ --pushmetrics.extraLabel value ] Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. Flag can be set multiple times, to add few additional labels. For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
--pushmetrics.header value [ --pushmetrics.header value ] Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.
|
||||
--pushmetrics.disableCompression Whether to disable compression when pushing metrics. (default: false)
|
||||
--mimir-path value Path to Mimir storage bucket or local folder.
|
||||
--mimir-tenant-id value Tenant ID for Mimir storage
|
||||
--mimir-concurrency value Number of concurrently running block readers (default: 1)
|
||||
--mimir-filter-time-start value The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
--mimir-filter-time-end value The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
--mimir-filter-label value Mimir label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.
|
||||
--mimir-filter-label-value value Regular expression to filter label from "mimir-filter-label" flag. (default: ".*")
|
||||
--mimir-creds-file-path value Path to file with GCS or S3 credentials. Credentials are loaded from default locations if not set. See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
--mimir-config-file-path value Path to file with S3 configs. Configs are loaded from default location if not set. See https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html
|
||||
--mimir-config-profile value Profile name for S3 configs. If no set, the value of the environment variable will be loaded (AWS_PROFILE or AWS_DEFAULT_PROFILE), or if both not set, DefaultSharedConfigProfile is used
|
||||
--mimir-custom-s3-endpoint value Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set
|
||||
--mimir-s3-force-path-style Prefixing endpoint with bucket name when set false, true by default. (default: true)
|
||||
--mimir-s3-tls-insecure-skip-verify Whether to skip TLS verification when connecting to the S3 endpoint. (default: false)
|
||||
--mimir-s3-sse-kms-key-id value SSE KMS Key ID for use with S3-compatible storages.
|
||||
--mimir-s3-sse-algorithm value SSE algorithm for use with S3-compatible storages.
|
||||
--vm-addr value VictoriaMetrics address to perform import requests.
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
--vm-concurrency value Number of workers concurrently performing import requests to VM (default: 2)
|
||||
--vm-compress Whether to apply gzip compression to import requests (default: true)
|
||||
--vm-batch-size value How many samples importer collects before sending the import request to VM (default: 200000)
|
||||
--vm-significant-figures value The number of significant figures to leave in metric values before importing. See https://en.wikipedia.org/wiki/Significant_figures. Zero value saves all the significant figures. This option may be used for increasing on-disk compression level for the stored metrics. See also --vm-round-digits option (default: 0)
|
||||
--vm-round-digits value Round metric values to the given number of decimal digits after the point. This option may be used for increasing on-disk compression level for the stored metrics. See also --vm-significant-figures option (default: 100)
|
||||
--vm-extra-label value [ --vm-extra-label value ] Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag will have priority. Flag can be set multiple times, to add few additional labels.
|
||||
--vm-rate-limit value Optional data transfer rate limit in bytes per second.
|
||||
By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination. (default: 0)
|
||||
--vm-cert-file value Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'
|
||||
--vm-key-file value Optional path to client-side TLS key to use when connecting to '--vm-addr'
|
||||
--vm-CA-file value Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used
|
||||
--vm-server-name value Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used
|
||||
--vm-insecure-skip-verify Whether to skip tls verification when connecting to '--vm-addr' (default: false)
|
||||
--vm-backoff-retries value How many import retries to perform before giving up. (default: 10)
|
||||
--vm-backoff-factor value Factor to multiply the base duration after each failed import retry. Must be greater than 1.0 (default: 1.8)
|
||||
--vm-backoff-min-duration value Minimum duration to wait before the first import retry. Each subsequent import retry will be multiplied by the '--vm-backoff-factor'. (default: 2s)
|
||||
--help, -h show help
|
||||
```
|
||||
60
docs/victoriametrics/vmctl/vmctl_thanos_flags.md
Normal file
60
docs/victoriametrics/vmctl/vmctl_thanos_flags.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
build:
|
||||
list: never
|
||||
publishResources: false
|
||||
render: never
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
<!-- The file should not be updated manually. Run make docs-update-flags while preparing a new release to sync flags in docs from actual binaries. -->
|
||||
```shellhelp
|
||||
NAME:
|
||||
vmctl thanos - Migrate time series from Thanos blocks (supports raw and downsampled data)
|
||||
|
||||
USAGE:
|
||||
vmctl thanos [command options]
|
||||
|
||||
OPTIONS:
|
||||
-s Whether to run in silent mode. If set to true no confirmation prompts will appear. (default: false)
|
||||
--verbose Whether to enable verbosity in logs output. (default: false)
|
||||
--disable-progress-bar Whether to disable progress bar during the import. (default: false)
|
||||
--pushmetrics.url value [ --pushmetrics.url value ] Optional URL to push metrics. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics
|
||||
--pushmetrics.interval value Interval for pushing metrics to every -pushmetrics.url (default: 10s)
|
||||
--pushmetrics.extraLabel value [ --pushmetrics.extraLabel value ] Extra labels to add to pushed metrics. In case of collision, label value defined by flag will have priority. Flag can be set multiple times, to add few additional labels. For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
--pushmetrics.header value [ --pushmetrics.header value ] Optional HTTP headers to add to pushed metrics. Flag can be set multiple times, to add few additional headers.
|
||||
--pushmetrics.disableCompression Whether to disable compression when pushing metrics. (default: false)
|
||||
--thanos-snapshot value Path to Thanos snapshot directory containing raw and/or downsampled blocks.
|
||||
--thanos-concurrency value Number of concurrently running snapshot readers (default: 1)
|
||||
--thanos-filter-time-start value The time filter in RFC3339 format to select timeseries with timestamp equal or higher than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
--thanos-filter-time-end value The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'
|
||||
--thanos-filter-label value Thanos label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.
|
||||
--thanos-filter-label-value value Thanos regular expression to filter label from "thanos-filter-label" flag. (default: ".*")
|
||||
--thanos-aggr-types value [ --thanos-aggr-types value ] Aggregate types to import from Thanos downsampled blocks. Supported values: count, sum, min, max, counter. Each aggregate will be imported as a separate metric with the aggregate type as suffix (e.g., metric_name:5m:count). If not specified, all aggregate types will be imported from downsampled blocks.
|
||||
--vm-addr value VictoriaMetrics address to perform import requests.
|
||||
Should be the same as --httpListenAddr value for single-node version or vminsert component.
|
||||
When importing into the clustered version do not forget to set additionally --vm-account-id flag.
|
||||
Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint. (default: "http://localhost:8428")
|
||||
--vm-user value VictoriaMetrics username for basic auth [$VM_USERNAME]
|
||||
--vm-password value VictoriaMetrics password for basic auth [$VM_PASSWORD]
|
||||
--vm-account-id value AccountID is an arbitrary 32-bit integer identifying namespace for data ingestion (aka tenant).
|
||||
AccountID is required when importing into the clustered version of VictoriaMetrics.
|
||||
It is possible to set it as accountID:projectID, where projectID is also arbitrary 32-bit integer.
|
||||
If projectID isn't set, then it equals to 0
|
||||
--vm-concurrency value Number of workers concurrently performing import requests to VM (default: 2)
|
||||
--vm-compress Whether to apply gzip compression to import requests (default: true)
|
||||
--vm-batch-size value How many samples importer collects before sending the import request to VM (default: 200000)
|
||||
--vm-significant-figures value The number of significant figures to leave in metric values before importing. See https://en.wikipedia.org/wiki/Significant_figures. Zero value saves all the significant figures. This option may be used for increasing on-disk compression level for the stored metrics. See also --vm-round-digits option (default: 0)
|
||||
--vm-round-digits value Round metric values to the given number of decimal digits after the point. This option may be used for increasing on-disk compression level for the stored metrics. See also --vm-significant-figures option (default: 100)
|
||||
--vm-extra-label value [ --vm-extra-label value ] Extra labels, that will be added to imported timeseries. In case of collision, label value defined by flag will have priority. Flag can be set multiple times, to add few additional labels.
|
||||
--vm-rate-limit value Optional data transfer rate limit in bytes per second.
|
||||
By default, the rate limit is disabled. It can be useful for limiting load on configured via '--vm-addr' destination. (default: 0)
|
||||
--vm-cert-file value Optional path to client-side TLS certificate file to use when connecting to '--vm-addr'
|
||||
--vm-key-file value Optional path to client-side TLS key to use when connecting to '--vm-addr'
|
||||
--vm-CA-file value Optional path to TLS CA file to use for verifying connections to '--vm-addr'. By default, system CA is used
|
||||
--vm-server-name value Optional TLS server name to use for connections to '--vm-addr'. By default, the server name from '--vm-addr' is used
|
||||
--vm-insecure-skip-verify Whether to skip tls verification when connecting to '--vm-addr' (default: false)
|
||||
--vm-backoff-retries value How many import retries to perform before giving up. (default: 10)
|
||||
--vm-backoff-factor value Factor to multiply the base duration after each failed import retry. Must be greater than 1.0 (default: 1.8)
|
||||
--vm-backoff-min-duration value Minimum duration to wait before the first import retry. Each subsequent import retry will be multiplied by the '--vm-backoff-factor'. (default: 2s)
|
||||
--help, -h show help
|
||||
```
|
||||
@@ -186,17 +186,21 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentelemetry.ignoreResourceAttributes array
|
||||
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-opentelemetry.labelNameUnderscoreSanitization
|
||||
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.promoteAllResourceAttributes
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'. (default true)
|
||||
-opentelemetry.promoteResourceAttributes array
|
||||
Promote specific list of resource attributes to labels.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Each array item can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-opentelemetry.promoteScopeMetadata
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels. (default true)
|
||||
-opentelemetry.usePrometheusNaming
|
||||
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentsdbHTTPListenAddr string
|
||||
|
||||
@@ -20,7 +20,7 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
-cacheExpireDuration duration
|
||||
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
|
||||
-dedup.minScrapeInterval duration
|
||||
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication for details
|
||||
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See also -streamAggr.dedupInterval and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication
|
||||
-denyQueriesOutsideRetention
|
||||
Whether to deny queries outside the configured -retentionPeriod and -futureRetention. When set, then /api/v1/query_range will return an error for queries with 'from' value outside -retentionPeriod or 'to' value beyond -futureRetention. This may be useful when multiple data sources with distinct retentions are hidden behind query-tee
|
||||
-denyQueryTracing
|
||||
@@ -42,11 +42,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
|
||||
-forceFlushAuthKey value
|
||||
authKey, which must be passed in query string to /internal/force_flush pages
|
||||
authKey, which must be passed in query string to /internal/force_flush pages. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -forceFlushAuthKey=file:///abs/path/to/file or -forceFlushAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -forceFlushAuthKey=http://host/path or -forceFlushAuthKey=https://host/path
|
||||
-forceMergeAuthKey value
|
||||
authKey, which must be passed in query string to /internal/force_merge pages
|
||||
authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path
|
||||
-fs.disableMincore
|
||||
@@ -193,7 +193,7 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
-smallMergeConcurrency int
|
||||
Deprecated: this flag does nothing
|
||||
-snapshotAuthKey value
|
||||
authKey, which must be passed in query string to /snapshot* pages
|
||||
authKey, which must be passed in query string to /snapshot* pages. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -snapshotAuthKey=file:///abs/path/to/file or -snapshotAuthKey=file://./relative/path/to/file.
|
||||
Flag value can be read from the given http/https url when using -snapshotAuthKey=http://host/path or -snapshotAuthKey=https://host/path
|
||||
-snapshotCreateTimeout duration
|
||||
|
||||
@@ -51,8 +51,8 @@ type VMInsertServer struct {
|
||||
vminsertMetadataRead *metrics.Counter
|
||||
}
|
||||
|
||||
// NewVMInsertServer starts VMInsertServer at the given addr serving the given storage.
|
||||
func NewVMInsertServer(addr string, connectionTimeout time.Duration, listenerName string, api API, tc *tls.Config) (*VMInsertServer, error) {
|
||||
// NewServer starts a VMInsert server at the given addr serving the given storage.
|
||||
func NewServer(addr string, connectionTimeout time.Duration, listenerName string, api API, tc *tls.Config) (*VMInsertServer, error) {
|
||||
ln, err := netutil.NewTCPListener(listenerName, addr, false, tc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to listen vminsertAddr %s: %w", addr, err)
|
||||
|
||||
@@ -43,7 +43,7 @@ func TestProtocolMigration(t *testing.T) {
|
||||
// test old storage and new client
|
||||
{
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
ts, err := NewVMInsertServer("localhost:0", time.Second, "vminsert-old", &testStorage, nil)
|
||||
ts, err := NewServer("localhost:0", time.Second, "vminsert-old", &testStorage, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot create server: %s", err)
|
||||
}
|
||||
@@ -68,7 +68,7 @@ func TestProtocolMigration(t *testing.T) {
|
||||
// test old client and new storage
|
||||
{
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
ts, err := NewVMInsertServer("localhost:0", time.Second, "vminsert-new", &testStorage, nil)
|
||||
ts, err := NewServer("localhost:0", time.Second, "vminsert-new", &testStorage, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot create server: %s", err)
|
||||
}
|
||||
@@ -94,7 +94,7 @@ func TestProtocolMigration(t *testing.T) {
|
||||
// new client and new storage
|
||||
{
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
ts, err := NewVMInsertServer("localhost:0", time.Second, "vminsert-both-new", &testStorage, nil)
|
||||
ts, err := NewServer("localhost:0", time.Second, "vminsert-both-new", &testStorage, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot create server: %s", err)
|
||||
}
|
||||
@@ -118,7 +118,7 @@ func TestProtocolMigration(t *testing.T) {
|
||||
testStorage.isReadOnly.Store(true)
|
||||
defer testStorage.isReadOnly.Store(false)
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
ts, err := NewVMInsertServer("localhost:0", time.Second, "vminsert-read-only", &testStorage, nil)
|
||||
ts, err := NewServer("localhost:0", time.Second, "vminsert-read-only", &testStorage, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot create server: %s", err)
|
||||
}
|
||||
|
||||
@@ -76,15 +76,6 @@ type Server struct {
|
||||
|
||||
// Limits contains various limits for Server.
|
||||
type Limits struct {
|
||||
// MaxLabelNames is the maximum label names, which may be returned from labelNames request.
|
||||
MaxLabelNames int
|
||||
|
||||
// MaxLabelValues is the maximum label values, which may be returned from labelValues request.
|
||||
MaxLabelValues int
|
||||
|
||||
// MaxTagValueSuffixes is the maximum number of entries, which can be returned from tagValueSuffixes request.
|
||||
MaxTagValueSuffixes int
|
||||
|
||||
// MaxConcurrentRequests is the maximum number of concurrent requests a server can process.
|
||||
//
|
||||
// The remaining requests wait for up to MaxQueueDuration for their execution.
|
||||
@@ -683,9 +674,6 @@ func (s *Server) processLabelNames(ctx *vmselectRequestCtx) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read maxLabelNames: %w", err)
|
||||
}
|
||||
if maxLabelNames <= 0 || maxLabelNames > s.limits.MaxLabelNames {
|
||||
maxLabelNames = s.limits.MaxLabelNames
|
||||
}
|
||||
|
||||
if err := s.beginConcurrentRequest(ctx); err != nil {
|
||||
return ctx.writeErrorMessage(err)
|
||||
@@ -737,9 +725,6 @@ func (s *Server) processLabelValues(ctx *vmselectRequestCtx) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read maxLabelValues: %w", err)
|
||||
}
|
||||
if maxLabelValues <= 0 || maxLabelValues > s.limits.MaxLabelValues {
|
||||
maxLabelValues = s.limits.MaxLabelValues
|
||||
}
|
||||
|
||||
if err := s.beginConcurrentRequest(ctx); err != nil {
|
||||
return ctx.writeErrorMessage(err)
|
||||
@@ -802,9 +787,6 @@ func (s *Server) processTagValueSuffixes(ctx *vmselectRequestCtx) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read maxTagValueSuffixes: %w", err)
|
||||
}
|
||||
if maxSuffixes <= 0 || maxSuffixes > s.limits.MaxTagValueSuffixes {
|
||||
maxSuffixes = s.limits.MaxTagValueSuffixes
|
||||
}
|
||||
|
||||
if err := s.beginConcurrentRequest(ctx); err != nil {
|
||||
return ctx.writeErrorMessage(err)
|
||||
@@ -817,14 +799,6 @@ func (s *Server) processTagValueSuffixes(ctx *vmselectRequestCtx) error {
|
||||
return ctx.writeErrorMessage(err)
|
||||
}
|
||||
|
||||
if len(suffixes) >= s.limits.MaxTagValueSuffixes {
|
||||
err := fmt.Errorf("more than %d tag value suffixes found "+
|
||||
"for tagKey=%q, tagValuePrefix=%q, delimiter=%c on time range %s; "+
|
||||
"either narrow down the query or increase -search.max* command-line flag value; see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits",
|
||||
s.limits.MaxTagValueSuffixes, tagKey, tagValuePrefix, delimiter, tr.String())
|
||||
return ctx.writeErrorMessage(err)
|
||||
}
|
||||
|
||||
// Send an empty error message to vmselect.
|
||||
if err := ctx.writeString(""); err != nil {
|
||||
return fmt.Errorf("cannot send empty error message: %w", err)
|
||||
|
||||
Reference in New Issue
Block a user