mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-02 08:32:46 +03:00
Compare commits
12 Commits
shared-vms
...
v1-143-0-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f884ffb1e3 | ||
|
|
85a5429a2a | ||
|
|
bd836031ca | ||
|
|
0d3bd2d4e6 | ||
|
|
1fe747cf35 | ||
|
|
572f48c110 | ||
|
|
78883a3f98 | ||
|
|
96368dfcfc | ||
|
|
a0220db978 | ||
|
|
ef5e42cfa0 | ||
|
|
1730051e27 | ||
|
|
74ae5b2f0d |
4
Makefile
4
Makefile
@@ -293,8 +293,8 @@ apptest-legacy: vminsert-race vmselect-race vmstorage-race vmbackup-race vmresto
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
|
||||
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
|
||||
); \
|
||||
VMSINGLE_V1_132_0_PATH=$${DIR}/victoria-metrics-prod \
|
||||
VMSTORAGE_V1_132_0_PATH=$${DIR}/vmstorage-prod \
|
||||
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
|
||||
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
|
||||
go test ./apptest/tests -run="^TestLegacyCluster.*"
|
||||
|
||||
benchmark:
|
||||
|
||||
@@ -118,7 +118,6 @@ func main() {
|
||||
remotewrite.InitSecretFlags()
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
opentelemetry.Init()
|
||||
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
|
||||
|
||||
if promscrape.IsDryRun() {
|
||||
|
||||
@@ -25,11 +25,6 @@ var (
|
||||
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
|
||||
)
|
||||
|
||||
// Init must be called after flag.Parse and before using the opentelemetry package.
|
||||
func Init() {
|
||||
stream.InitDecodeOptions()
|
||||
}
|
||||
|
||||
// InsertHandlerForReader processes metrics from given reader.
|
||||
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
|
||||
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {
|
||||
|
||||
@@ -113,15 +113,15 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
|
||||
// because correct types must be inherited after unmarshalling.
|
||||
exprValidator := g.Type.ValidateExpr
|
||||
if err := exprValidator(r.Expr); err != nil {
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
|
||||
}
|
||||
}
|
||||
if validateTplFn != nil {
|
||||
if err := validateTplFn(r.Annotations); err != nil {
|
||||
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
|
||||
}
|
||||
if err := validateTplFn(r.Labels); err != nil {
|
||||
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
|
||||
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ func TestParse_Failure(t *testing.T) {
|
||||
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
|
||||
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
|
||||
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
|
||||
f([]string{"testdata/rules/rules1-bad.rules"}, "bad GraphiteQL expr")
|
||||
f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
|
||||
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
|
||||
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
|
||||
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
|
||||
@@ -283,7 +283,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
Expr: "up | 0",
|
||||
},
|
||||
},
|
||||
}, true, "bad MetricsQL expr")
|
||||
}, true, "bad prometheus expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test graphite expr",
|
||||
@@ -293,7 +293,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
"description": "some-description",
|
||||
}},
|
||||
},
|
||||
}, true, "bad GraphiteQL expr")
|
||||
}, true, "bad graphite expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs expr",
|
||||
@@ -327,7 +327,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
Expr: "sum(up == 0 ) by (host)",
|
||||
},
|
||||
},
|
||||
}, true, "bad GraphiteQL expr")
|
||||
}, true, "bad graphite expr")
|
||||
|
||||
f(&Group{
|
||||
Name: "test vlogs with prometheus exp",
|
||||
@@ -351,7 +351,7 @@ func TestGroupValidate_Failure(t *testing.T) {
|
||||
For: promutil.NewDuration(10 * time.Millisecond),
|
||||
},
|
||||
},
|
||||
}, true, "bad MetricsQL expr")
|
||||
}, true, "bad prometheus expr")
|
||||
}
|
||||
|
||||
func TestGroupValidate_Success(t *testing.T) {
|
||||
|
||||
@@ -66,11 +66,11 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
switch t.String() {
|
||||
case "graphite":
|
||||
if _, err := graphiteql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad GraphiteQL expr: %q, err: %w", expr, err)
|
||||
return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "prometheus":
|
||||
if _, err := metricsql.Parse(expr); err != nil {
|
||||
return fmt.Errorf("bad MetricsQL expr: %q, err: %w", expr, err)
|
||||
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
|
||||
}
|
||||
case "vlogs":
|
||||
q, err := logstorage.ParseStatsQuery(expr, 0)
|
||||
|
||||
@@ -125,7 +125,6 @@ func main() {
|
||||
|
||||
relabel.Init()
|
||||
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
|
||||
opentelemetry.Init()
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
if len(*clusternativeListenAddr) > 0 {
|
||||
s, err := clusternative.NewVMinsertServer(*clusternativeListenAddr, nil)
|
||||
|
||||
@@ -24,11 +24,6 @@ var (
|
||||
metadataInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="opentelemetry"}`)
|
||||
)
|
||||
|
||||
// Init must be called after flag.Parse and before using the opentelemetry package.
|
||||
func Init() {
|
||||
stream.InitDecodeOptions()
|
||||
}
|
||||
|
||||
// InsertHandler processes opentelemetry metrics.
|
||||
func InsertHandler(at *auth.Token, req *http.Request) error {
|
||||
extraLabels, err := protoparserutil.GetExtraLabels(req)
|
||||
|
||||
@@ -36,6 +36,9 @@ var (
|
||||
func NewVMSelectServer(addr string) (*vmselectapi.Server, error) {
|
||||
api := &vmstorageAPI{}
|
||||
limits := vmselectapi.Limits{
|
||||
MaxLabelNames: *maxTagKeys,
|
||||
MaxLabelValues: *maxTagValues,
|
||||
MaxTagValueSuffixes: *maxTagValueSuffixesPerSearch,
|
||||
MaxConcurrentRequests: *maxConcurrentRequests,
|
||||
MaxConcurrentRequestsFlagName: "clusternative.maxConcurrentRequests",
|
||||
MaxQueueDuration: *maxQueueDuration,
|
||||
@@ -66,9 +69,6 @@ func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.S
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
|
||||
if maxLabelValues <= 0 || maxLabelValues > *maxTagValues {
|
||||
maxLabelValues = *maxTagValues
|
||||
}
|
||||
dl := searchutil.DeadlineFromTimestamp(deadline)
|
||||
labelValues, _, err := netstorage.LabelValues(qt, true, labelName, sq, maxLabelValues, dl)
|
||||
return labelValues, wrapClusterNativeError(err)
|
||||
@@ -76,18 +76,12 @@ func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQ
|
||||
|
||||
func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
|
||||
maxSuffixes int, deadline uint64) ([]string, error) {
|
||||
if maxSuffixes <= 0 || maxSuffixes > *maxTagValueSuffixesPerSearch {
|
||||
maxSuffixes = *maxTagValueSuffixesPerSearch
|
||||
}
|
||||
dl := searchutil.DeadlineFromTimestamp(deadline)
|
||||
suffixes, _, err := netstorage.TagValueSuffixes(qt, accountID, projectID, true, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, dl)
|
||||
return suffixes, wrapClusterNativeError(err)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
|
||||
if maxLabelNames <= 0 || maxLabelNames > *maxTagKeys {
|
||||
maxLabelNames = *maxTagKeys
|
||||
}
|
||||
dl := searchutil.DeadlineFromTimestamp(deadline)
|
||||
labelNames, _, err := netstorage.LabelNames(qt, true, sq, maxLabelNames, dl)
|
||||
return labelNames, wrapClusterNativeError(err)
|
||||
|
||||
@@ -9,10 +9,12 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/servers"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
@@ -27,12 +29,10 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vminsertapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
var (
|
||||
storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
|
||||
retentionPeriod = flagutil.NewRetentionDuration("retentionPeriod", "1M", "Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention. See also -retentionFilter")
|
||||
futureRetention = flagutil.NewRetentionDuration("futureRetention", "2d", "Data with timestamps bigger than now+futureRetention is automatically deleted. "+
|
||||
@@ -41,23 +41,13 @@ var (
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the given -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
|
||||
vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
|
||||
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
|
||||
"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
|
||||
"may require high amounts of memory. See also -search.maxQueueDuration")
|
||||
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
|
||||
"when -search.maxConcurrentRequests limit is reached")
|
||||
disableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
|
||||
"This reduces CPU usage at the cost of higher network bandwidth usage")
|
||||
vminsertConnsShutdownDuration = flag.Duration("storage.vminsertConnsShutdownDuration", 10*time.Second, "The time needed for gradual closing of vminsert connections during "+
|
||||
"graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. "+
|
||||
"Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. "+
|
||||
"Configured value must always be lower than the graceful shutdown period configured by the orchestration platform (terminationGracePeriodSeconds for Kubernetes). "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#improving-re-routing-performance-during-restart")
|
||||
snapshotAuthKey = flagutil.NewPassword("snapshotAuthKey", "authKey, which must be passed in query string to /snapshot* pages. It overrides -httpAuth.*")
|
||||
forceMergeAuthKey = flagutil.NewPassword("forceMergeAuthKey", "authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.*")
|
||||
forceFlushAuthKey = flagutil.NewPassword("forceFlushAuthKey", "authKey, which must be passed in query string to /internal/force_flush pages. It overrides -httpAuth.*")
|
||||
storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
|
||||
vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
|
||||
vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
|
||||
snapshotAuthKey = flagutil.NewPassword("snapshotAuthKey", "authKey, which must be passed in query string to /snapshot* pages")
|
||||
forceMergeAuthKey = flagutil.NewPassword("forceMergeAuthKey", "authKey, which must be passed in query string to /internal/force_merge pages")
|
||||
forceFlushAuthKey = flagutil.NewPassword("forceFlushAuthKey", "authKey, which must be passed in query string to /internal/force_flush pages")
|
||||
snapshotsMaxAge = flagutil.NewRetentionDuration("snapshotsMaxAge", "3d", "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted")
|
||||
_ = flag.Duration("snapshotCreateTimeout", 0, "Deprecated: this flag does nothing")
|
||||
|
||||
_ = flag.Duration("finalMergeDelay", 0, "Deprecated: this flag does nothing")
|
||||
@@ -68,7 +58,7 @@ var (
|
||||
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
|
||||
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
|
||||
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
|
||||
"equal to -dedup.minScrapeInterval > 0. See also -streamAggr.dedupInterval and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
|
||||
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication for details")
|
||||
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
|
||||
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+
|
||||
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
|
||||
@@ -129,7 +119,7 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// vmstorage is optimized for reduced memory allocations,
|
||||
// vmstoage is optimized for reduced memory allocations,
|
||||
// so it can run with the reduced GOGC in order to reduce the used memory,
|
||||
// while keeping CPU usage spent in GC at low levels.
|
||||
//
|
||||
@@ -188,7 +178,7 @@ func main() {
|
||||
LogNewSeries: *logNewSeries,
|
||||
}
|
||||
strg := storage.MustOpenStorage(*storageDataPath, opts)
|
||||
vmStorage := newVMStorage(strg, *maxConcurrentRequests)
|
||||
initStaleSnapshotsRemover(strg)
|
||||
|
||||
var m storage.Metrics
|
||||
strg.UpdateMetrics(&m)
|
||||
@@ -203,24 +193,18 @@ func main() {
|
||||
// register storage metrics
|
||||
storageMetrics := metrics.NewSet()
|
||||
storageMetrics.RegisterMetricsWriter(func(w io.Writer) {
|
||||
vmStorage.writeStorageMetrics(w)
|
||||
writeStorageMetrics(w, strg)
|
||||
})
|
||||
metrics.RegisterSet(storageMetrics)
|
||||
|
||||
protoparserutil.StartUnmarshalWorkers()
|
||||
|
||||
vminsertSrv, err := vminsertapi.NewVMInsertServer(*vminsertAddr, *vminsertConnsShutdownDuration, "vminsert", vmStorage, nil)
|
||||
servers.GetMaxUniqueTimeSeries() // for init and logging only.
|
||||
vminsertSrv, err := servers.NewVMInsertServer(*vminsertAddr, strg)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create a server with -vminsertAddr=%s: %s", *vminsertAddr, err)
|
||||
|
||||
}
|
||||
limits := vmselectapi.Limits{
|
||||
MaxConcurrentRequests: *maxConcurrentRequests,
|
||||
MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
|
||||
MaxQueueDuration: *maxQueueDuration,
|
||||
MaxQueueDurationFlagName: "search.maxQueueDuration",
|
||||
}
|
||||
vmselectSrv, err := vmselectapi.NewServer(*vmselectAddr, vmStorage, limits, *disableRPCCompression)
|
||||
vmselectSrv, err := servers.NewVMSelectServer(*vmselectAddr, strg)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create a server with -vmselectAddr=%s: %s", *vmselectAddr, err)
|
||||
}
|
||||
@@ -229,7 +213,8 @@ func main() {
|
||||
if len(listenAddrs) == 0 {
|
||||
listenAddrs = []string{":8482"}
|
||||
}
|
||||
go httpserver.Serve(listenAddrs, vmStorage.requestHandler, httpserver.ServeOptions{UseProxyProtocol: useProxyProtocol})
|
||||
requestHandler := newRequestHandler(strg)
|
||||
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{UseProxyProtocol: useProxyProtocol})
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
@@ -249,6 +234,7 @@ func main() {
|
||||
metrics.UnregisterSet(storageMetrics, true)
|
||||
storageMetrics = nil
|
||||
|
||||
stopStaleSnapshotsRemover()
|
||||
vmselectSrv.MustStop()
|
||||
vminsertSrv.MustStop()
|
||||
protoparserutil.StopUnmarshalWorkers()
|
||||
@@ -256,29 +242,31 @@ func main() {
|
||||
|
||||
logger.Infof("gracefully closing the storage at %s", *storageDataPath)
|
||||
startTime = time.Now()
|
||||
vmStorage.Stop()
|
||||
strg.MustClose()
|
||||
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
fs.MustStopDirRemover()
|
||||
logger.Infof("the vmstorage has been stopped")
|
||||
}
|
||||
|
||||
// requestHandler is a storage request handler.
|
||||
// TODO(@rtm0): Move to a separate file, request_handler.go
|
||||
func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
path := r.URL.Path
|
||||
|
||||
if path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, `vmstorage - a component of VictoriaMetrics cluster<br/>
|
||||
func newRequestHandler(strg *storage.Storage) httpserver.RequestHandler {
|
||||
return func(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, `vmstorage - a component of VictoriaMetrics cluster<br/>
|
||||
<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/">docs</a><br>
|
||||
`)
|
||||
return true
|
||||
return true
|
||||
}
|
||||
return requestHandler(w, r, strg)
|
||||
}
|
||||
}
|
||||
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storage) bool {
|
||||
path := r.URL.Path
|
||||
if path == "/internal/force_merge" {
|
||||
if !httpserver.CheckAuthFlag(w, r, forceMergeAuthKey) {
|
||||
return true
|
||||
@@ -290,7 +278,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
defer activeForceMerges.Dec()
|
||||
logger.Infof("forced merge for partition_prefix=%q has been started", partitionNamePrefix)
|
||||
startTime := time.Now()
|
||||
if err := api.s.ForceMergePartitions(partitionNamePrefix); err != nil {
|
||||
if err := strg.ForceMergePartitions(partitionNamePrefix); err != nil {
|
||||
logger.Errorf("error in forced merge for partition_prefix=%q: %s", partitionNamePrefix, err)
|
||||
return
|
||||
}
|
||||
@@ -303,7 +291,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
return true
|
||||
}
|
||||
logger.Infof("flushing storage to make pending data available for reading")
|
||||
api.s.DebugFlush()
|
||||
strg.DebugFlush()
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -323,7 +311,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
}
|
||||
logger.Infof("enabling logging of new series for the next %s. This may increase resource usage during this period.", time.Duration(dealine)*time.Second)
|
||||
endTime := fasttime.UnixTimestamp() + uint64(dealine)
|
||||
api.s.SetLogNewSeriesUntil(endTime)
|
||||
strg.SetLogNewSeriesUntil(endTime)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"logEndTime":%q}}`, time.Unix(int64(endTime), 0))
|
||||
return true
|
||||
}
|
||||
@@ -339,13 +327,13 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
case "/create":
|
||||
snapshotsCreateTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshotName := api.s.MustCreateSnapshot()
|
||||
snapshotName := strg.MustCreateSnapshot()
|
||||
|
||||
// Verify whether the client already closed the connection.
|
||||
// In this case it is better to drop the created snapshot, since the client isn't interested in it.
|
||||
if err := r.Context().Err(); err != nil {
|
||||
logger.Infof("deleting already created snapshot at %s because the client canceled the request", snapshotName)
|
||||
if err := api.deleteSnapshot(snapshotName); err != nil {
|
||||
if err := deleteSnapshot(strg, snapshotName); err != nil {
|
||||
logger.Infof("cannot delete just created snapshot: %s", err)
|
||||
return true
|
||||
}
|
||||
@@ -357,7 +345,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
case "/list":
|
||||
snapshotsListTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshots := api.s.MustListSnapshots()
|
||||
snapshots := strg.MustListSnapshots()
|
||||
fmt.Fprintf(w, `{"status":"ok","snapshots":[`)
|
||||
if len(snapshots) > 0 {
|
||||
for _, snapshot := range snapshots[:len(snapshots)-1] {
|
||||
@@ -371,7 +359,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
snapshotsDeleteTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshotName := r.FormValue("snapshot")
|
||||
if err := api.deleteSnapshot(snapshotName); err != nil {
|
||||
if err := deleteSnapshot(strg, snapshotName); err != nil {
|
||||
jsonResponseError(w, err)
|
||||
snapshotsDeleteErrorsTotal.Inc()
|
||||
return true
|
||||
@@ -381,10 +369,9 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
case "/delete_all":
|
||||
snapshotsDeleteAllTotal.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
snapshots := api.s.MustListSnapshots()
|
||||
snapshots := strg.MustListSnapshots()
|
||||
for _, snapshotName := range snapshots {
|
||||
// TODO(@rtm0): Use VMStorage.deleteSnapshot()?
|
||||
if err := api.s.DeleteSnapshot(snapshotName); err != nil {
|
||||
if err := strg.DeleteSnapshot(snapshotName); err != nil {
|
||||
err = fmt.Errorf("cannot delete snapshot %q: %w", snapshotName, err)
|
||||
jsonResponseError(w, err)
|
||||
snapshotsDeleteAllErrorsTotal.Inc()
|
||||
@@ -398,6 +385,50 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
|
||||
}
|
||||
}
|
||||
|
||||
func deleteSnapshot(strg *storage.Storage, snapshotName string) error {
|
||||
snapshots := strg.MustListSnapshots()
|
||||
for _, snName := range snapshots {
|
||||
if snName == snapshotName {
|
||||
if err := strg.DeleteSnapshot(snName); err != nil {
|
||||
return fmt.Errorf("cannot delete snapshot %q: %w", snName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("cannot find snapshot %q", snapshotName)
|
||||
}
|
||||
|
||||
func initStaleSnapshotsRemover(strg *storage.Storage) {
|
||||
staleSnapshotsRemoverCh = make(chan struct{})
|
||||
if snapshotsMaxAge.Duration() <= 0 {
|
||||
return
|
||||
}
|
||||
snapshotsMaxAgeDur := snapshotsMaxAge.Duration()
|
||||
staleSnapshotsRemoverWG.Go(func() {
|
||||
d := timeutil.AddJitterToDuration(time.Second * 11)
|
||||
t := time.NewTicker(d)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-staleSnapshotsRemoverCh:
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
strg.MustDeleteStaleSnapshots(snapshotsMaxAgeDur)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func stopStaleSnapshotsRemover() {
|
||||
close(staleSnapshotsRemoverCh)
|
||||
staleSnapshotsRemoverWG.Wait()
|
||||
}
|
||||
|
||||
var (
|
||||
staleSnapshotsRemoverCh chan struct{}
|
||||
staleSnapshotsRemoverWG sync.WaitGroup
|
||||
)
|
||||
|
||||
var (
|
||||
activeForceMerges = metrics.NewCounter("vm_active_force_merges")
|
||||
|
||||
@@ -412,9 +443,7 @@ var (
|
||||
snapshotsDeleteAllErrorsTotal = metrics.NewCounter(`vm_http_request_errors_total{path="/snapshot/delete_all"}`)
|
||||
)
|
||||
|
||||
// TODO(@rtm0): Move to metrics.go.
|
||||
func (api *VMStorage) writeStorageMetrics(w io.Writer) {
|
||||
strg := api.s
|
||||
func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
var m storage.Metrics
|
||||
strg.UpdateMetrics(&m)
|
||||
tm := &m.TableMetrics
|
||||
@@ -638,7 +667,7 @@ func (api *VMStorage) writeStorageMetrics(w io.Writer) {
|
||||
metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled`, tm.ScheduledDownsamplingPartitions)
|
||||
metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled_size_bytes`, tm.ScheduledDownsamplingPartitionsSize)
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(api.maxUniqueTimeSeriesCalculated))
|
||||
metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(servers.GetMaxUniqueTimeSeries()))
|
||||
|
||||
metrics.WriteGaugeUint64(w, `vm_metrics_metadata_storage_items`, m.MetadataStorageItemsCurrent)
|
||||
metrics.WriteCounterUint64(w, `vm_metrics_metadata_storage_size_bytes`, m.MetadataStorageCurrentSizeBytes)
|
||||
|
||||
55
app/vmstorage/servers/vminsert.go
Normal file
55
app/vmstorage/servers/vminsert.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package servers
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vminsertapi"
|
||||
)
|
||||
|
||||
var (
|
||||
precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression "+
|
||||
"at the cost of precision loss")
|
||||
vminsertConnsShutdownDuration = flag.Duration("storage.vminsertConnsShutdownDuration", 10*time.Second, "The time needed for gradual closing of vminsert connections during "+
|
||||
"graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. "+
|
||||
"Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. "+
|
||||
"Configured value must always be lower than the graceful shutdown period configured by the orchestration platform (terminationGracePeriodSeconds for Kubernetes). "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#improving-re-routing-performance-during-restart")
|
||||
)
|
||||
|
||||
// NewVMInsertServer starts vminsertapi.VMInsertServer at the given addr serving the given storage.
|
||||
func NewVMInsertServer(addr string, storage *storage.Storage) (*vminsertapi.VMInsertServer, error) {
|
||||
if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
|
||||
return nil, fmt.Errorf("invalid -precisionBits: %w", err)
|
||||
}
|
||||
api := &vminsertAPI{
|
||||
storage: storage,
|
||||
}
|
||||
|
||||
return vminsertapi.NewVMInsertServer(addr, *vminsertConnsShutdownDuration, "vminsert", api, nil)
|
||||
}
|
||||
|
||||
type vminsertAPI struct {
|
||||
storage *storage.Storage
|
||||
}
|
||||
|
||||
// WriteRows implements lib/vminsertapi.API interface
|
||||
func (v *vminsertAPI) WriteRows(rows []storage.MetricRow) error {
|
||||
v.storage.AddRows(rows, uint8(*precisionBits))
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteMetadata implements lib/vminsertapi.API interface
|
||||
func (v *vminsertAPI) WriteMetadata(rows []metricsmetadata.Row) error {
|
||||
v.storage.AddMetadataRows(rows)
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsReadOnly implements lib/vminsertapi.API interface
|
||||
func (v *vminsertAPI) IsReadOnly() bool {
|
||||
return v.storage.IsReadOnly()
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package main
|
||||
package servers
|
||||
|
||||
import (
|
||||
"flag"
|
||||
@@ -6,21 +6,17 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
|
||||
)
|
||||
|
||||
var (
|
||||
precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression "+
|
||||
"at the cost of precision loss")
|
||||
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. "+
|
||||
"This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect")
|
||||
maxTagKeys = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
|
||||
@@ -28,110 +24,46 @@ var (
|
||||
maxTagValues = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search. "+
|
||||
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
|
||||
maxTagValueSuffixesPerSearch = flag.Int("search.maxTagValueSuffixesPerSearch", 100e3, "The maximum number of tag value suffixes returned from /metrics/find")
|
||||
snapshotsMaxAge = flagutil.NewRetentionDuration("snapshotsMaxAge", "3d", "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted")
|
||||
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
|
||||
"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
|
||||
"may require high amounts of memory. See also -search.maxQueueDuration")
|
||||
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
|
||||
"when -search.maxConcurrentRequests limit is reached")
|
||||
|
||||
disableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
|
||||
"This reduces CPU usage at the cost of higher network bandwidth usage")
|
||||
)
|
||||
|
||||
// newVMStorage creates a new instance of of VMStorage.
|
||||
//
|
||||
// The created VMStorage instance takes ownership of s.
|
||||
func newVMStorage(s *storage.Storage, maxConcurrentRequests int) *VMStorage {
|
||||
if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
|
||||
logger.Fatalf("invalid -precisionBits: %d", err)
|
||||
var (
|
||||
maxUniqueTimeseriesValue int
|
||||
maxUniqueTimeseriesValueOnce sync.Once
|
||||
)
|
||||
|
||||
// NewVMSelectServer starts new server at the given addr, which serves vmselect requests from the given s.
|
||||
func NewVMSelectServer(addr string, s *storage.Storage) (*vmselectapi.Server, error) {
|
||||
api := &vmstorageAPI{
|
||||
s: s,
|
||||
}
|
||||
|
||||
maxUniqueTimeseriesCalculated := *maxUniqueTimeseries
|
||||
if maxUniqueTimeseriesCalculated <= 0 {
|
||||
maxUniqueTimeseriesCalculated = calculateMaxUniqueTimeseries(maxConcurrentRequests, memory.Remaining())
|
||||
limits := vmselectapi.Limits{
|
||||
MaxLabelNames: *maxTagKeys,
|
||||
MaxLabelValues: *maxTagValues,
|
||||
MaxTagValueSuffixes: *maxTagValueSuffixesPerSearch,
|
||||
MaxConcurrentRequests: *maxConcurrentRequests,
|
||||
MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
|
||||
MaxQueueDuration: *maxQueueDuration,
|
||||
MaxQueueDurationFlagName: "search.maxQueueDuration",
|
||||
}
|
||||
|
||||
vms := &VMStorage{
|
||||
s: s,
|
||||
maxUniqueTimeseries: *maxUniqueTimeseries,
|
||||
maxUniqueTimeSeriesCalculated: maxUniqueTimeseriesCalculated,
|
||||
staleSnapshotsRemoverCh: make(chan struct{}),
|
||||
}
|
||||
vms.initStaleSnapshotsRemover()
|
||||
return vms
|
||||
return vmselectapi.NewServer(addr, api, limits, *disableRPCCompression)
|
||||
}
|
||||
|
||||
// calculateMaxUniqueTimeseries calculates the maxUniqueTimeseries based on the
|
||||
// available system resources.
|
||||
func calculateMaxUniqueTimeseries(maxConcurrentRequests, remainingMemory int) int {
|
||||
if maxConcurrentRequests <= 0 {
|
||||
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
|
||||
// In such cases, fallback to unlimited.
|
||||
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
|
||||
return 2e9
|
||||
}
|
||||
|
||||
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
|
||||
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
|
||||
mts := remainingMemory / 200 / maxConcurrentRequests
|
||||
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
|
||||
return mts
|
||||
// vmstorageAPI impelements vmselectapi.API
|
||||
type vmstorageAPI struct {
|
||||
s *storage.Storage
|
||||
}
|
||||
|
||||
// VMStorage impelements vmselectapi.API and vminsertapi.API.
|
||||
type VMStorage struct {
|
||||
s *storage.Storage
|
||||
maxUniqueTimeseries int
|
||||
maxUniqueTimeSeriesCalculated int
|
||||
staleSnapshotsRemoverCh chan struct{}
|
||||
staleSnapshotsRemoverWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func (api *VMStorage) initStaleSnapshotsRemover() {
|
||||
if snapshotsMaxAge.Duration() <= 0 {
|
||||
return
|
||||
}
|
||||
snapshotsMaxAgeDuration := snapshotsMaxAge.Duration()
|
||||
api.staleSnapshotsRemoverWG.Go(func() {
|
||||
d := timeutil.AddJitterToDuration(time.Second * 11)
|
||||
t := time.NewTicker(d)
|
||||
defer t.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-api.staleSnapshotsRemoverCh:
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
api.s.MustDeleteStaleSnapshots(snapshotsMaxAgeDuration)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func (api *VMStorage) Stop() {
|
||||
close(api.staleSnapshotsRemoverCh)
|
||||
api.staleSnapshotsRemoverWG.Wait()
|
||||
api.s.MustClose()
|
||||
}
|
||||
|
||||
// WriteRows writes metric rows to the storage.
|
||||
//
|
||||
// The caller should limit the number of concurrent calls to WriteRows() in
|
||||
// order to limit memory usage.
|
||||
func (api *VMStorage) WriteRows(rows []storage.MetricRow) error {
|
||||
api.s.AddRows(rows, uint8(*precisionBits))
|
||||
return nil
|
||||
}
|
||||
|
||||
// WriteMetadata writes metrics metadata to storage.
|
||||
//
|
||||
// The caller should limit the number of concurrent calls to WriteMetadata() in
|
||||
// order to limit memory usage.
|
||||
func (api *VMStorage) WriteMetadata(rows []metricsmetadata.Row) error {
|
||||
api.s.AddMetadataRows(rows)
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsReadOnly returns true is the storage is in read-only mode.
|
||||
func (api *VMStorage) IsReadOnly() bool {
|
||||
return api.s.IsReadOnly()
|
||||
}
|
||||
|
||||
func (api *VMStorage) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
|
||||
func (api *vmstorageAPI) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := api.getMaxMetrics(sq.MaxMetrics)
|
||||
maxMetrics := getMaxMetrics(sq.MaxMetrics)
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -148,15 +80,158 @@ func (api *VMStorage) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery
|
||||
return bi, nil
|
||||
}
|
||||
|
||||
func (api *VMStorage) getMaxMetrics(searchQueryLimit int) int {
|
||||
if searchQueryLimit <= 0 {
|
||||
return api.maxUniqueTimeSeriesCalculated
|
||||
func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
|
||||
if api.maxUniqueTimeseries != 0 && searchQueryLimit > api.maxUniqueTimeseries {
|
||||
searchQueryLimit = api.maxUniqueTimeseries
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return searchQueryLimit
|
||||
if len(tfss) == 0 {
|
||||
return nil, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return api.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return api.s.SearchLabelValues(qt, sq.AccountID, sq.ProjectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
|
||||
maxSuffixes int, deadline uint64) ([]string, error) {
|
||||
suffixes, err := api.s.SearchTagValueSuffixes(qt, accountID, projectID, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(suffixes) >= maxSuffixes {
|
||||
return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
|
||||
"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
|
||||
}
|
||||
return suffixes, nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return api.s.SearchLabelNames(qt, sq.AccountID, sq.ProjectID, tfss, tr, maxLabelNames, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
|
||||
return api.s.GetSeriesCount(accountID, projectID, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
|
||||
return api.s.SearchTenants(qt, tr, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
|
||||
return api.s.GetTSDBStatus(qt, sq.AccountID, sq.ProjectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return 0, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return api.s.DeleteSeries(qt, tfss, maxMetrics)
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
|
||||
api.s.RegisterMetricNames(qt, mrs)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
|
||||
return api.s.GetMetricNamesStats(qt, tt, limit, le, matchPattern), nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
|
||||
api.s.ResetMetricNamesStats(qt)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
|
||||
tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
|
||||
accountID := sq.AccountID
|
||||
projectID := sq.ProjectID
|
||||
for _, tagFilters := range sq.TagFilterss {
|
||||
tfs := storage.NewTagFilters(accountID, projectID)
|
||||
for i := range tagFilters {
|
||||
tf := &tagFilters[i]
|
||||
if string(tf.Key) == "__graphite__" {
|
||||
query := tf.Value
|
||||
qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
|
||||
paths, err := api.s.SearchGraphitePaths(qtChild, accountID, projectID, tr, query, maxMetrics, deadline)
|
||||
qtChild.Donef("found %d series", len(paths))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
|
||||
}
|
||||
if len(paths) >= maxMetrics {
|
||||
return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
|
||||
"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
|
||||
"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
|
||||
}
|
||||
tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
|
||||
continue
|
||||
}
|
||||
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
|
||||
}
|
||||
}
|
||||
tfss = append(tfss, tfs)
|
||||
}
|
||||
return tfss, nil
|
||||
}
|
||||
|
||||
func (api *vmstorageAPI) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
|
||||
return api.s.GetMetadataRows(qt, tt, limit, metricName, deadline)
|
||||
}
|
||||
|
||||
// blockIterator implements vmselectapi.BlockIterator
|
||||
@@ -197,197 +272,41 @@ func (bi *blockIterator) Error() error {
|
||||
return bi.sr.Error()
|
||||
}
|
||||
|
||||
// SearchMetricNames returns metric names for the given tfss on the given tr.
|
||||
func (api *VMStorage) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = api.maxUniqueTimeSeriesCalculated
|
||||
func getMaxMetrics(searchQueryLimit int) int {
|
||||
if searchQueryLimit <= 0 {
|
||||
return GetMaxUniqueTimeSeries()
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
|
||||
if *maxUniqueTimeseries != 0 && searchQueryLimit > *maxUniqueTimeseries {
|
||||
searchQueryLimit = *maxUniqueTimeseries
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return nil, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return api.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
|
||||
return searchQueryLimit
|
||||
}
|
||||
|
||||
// SearchLabelValues searches for label values for the given labelName, tfss and
|
||||
// tr.
|
||||
func (api *VMStorage) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
if maxLabelValues <= 0 || maxLabelValues > *maxTagValues {
|
||||
maxLabelValues = *maxTagValues
|
||||
}
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = api.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return api.s.SearchLabelValues(qt, sq.AccountID, sq.ProjectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
// TagValueSuffixes returns all the tag value suffixes for the given tagKey and
|
||||
// tagValuePrefix on the given tr.
|
||||
//
|
||||
// This allows implementing
|
||||
// https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or
|
||||
// similar APIs.
|
||||
func (api *VMStorage) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
|
||||
maxSuffixes int, deadline uint64) ([]string, error) {
|
||||
if maxSuffixes <= 0 || maxSuffixes > *maxTagValueSuffixesPerSearch {
|
||||
maxSuffixes = *maxTagValueSuffixesPerSearch
|
||||
}
|
||||
suffixes, err := api.s.SearchTagValueSuffixes(qt, accountID, projectID, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(suffixes) >= maxSuffixes {
|
||||
return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
|
||||
"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
|
||||
}
|
||||
return suffixes, nil
|
||||
}
|
||||
|
||||
// SearchLabelNames searches for tag keys matching the given tfss on tr.
|
||||
func (api *VMStorage) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
if maxLabelNames <= 0 || maxLabelNames > *maxTagKeys {
|
||||
maxLabelNames = *maxTagKeys
|
||||
}
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = api.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return api.s.SearchLabelNames(qt, sq.AccountID, sq.ProjectID, tfss, tr, maxLabelNames, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
func (api *VMStorage) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
|
||||
return api.s.GetSeriesCount(accountID, projectID, deadline)
|
||||
}
|
||||
|
||||
func (api *VMStorage) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
|
||||
return api.s.SearchTenants(qt, tr, deadline)
|
||||
}
|
||||
|
||||
// GetTSDBStatus returns TSDB status for given filters on the given date.
|
||||
func (api *VMStorage) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = api.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
|
||||
return api.s.GetTSDBStatus(qt, sq.AccountID, sq.ProjectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
|
||||
}
|
||||
|
||||
// DeleteSeries deletes series matching tfss.
|
||||
//
|
||||
// Returns the number of deleted series.
|
||||
func (api *VMStorage) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
|
||||
tr := sq.GetTimeRange()
|
||||
maxMetrics := sq.MaxMetrics
|
||||
if maxMetrics <= 0 {
|
||||
// fallback to maxUniqueTimeSeries if no limit is provided,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
|
||||
maxMetrics = api.maxUniqueTimeSeriesCalculated
|
||||
}
|
||||
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(tfss) == 0 {
|
||||
return 0, fmt.Errorf("missing tag filters")
|
||||
}
|
||||
return api.s.DeleteSeries(qt, tfss, maxMetrics)
|
||||
}
|
||||
|
||||
func (api *VMStorage) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
|
||||
api.s.RegisterMetricNames(qt, mrs)
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetMetricNamesUsageStats returns metric name usage stats.
|
||||
func (api *VMStorage) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
|
||||
return api.s.GetMetricNamesStats(qt, tt, limit, le, matchPattern), nil
|
||||
}
|
||||
|
||||
// ResetMetricNamesStats resets state for metric names usage tracker
|
||||
func (api *VMStorage) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
|
||||
api.s.ResetMetricNamesStats(qt)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (api *VMStorage) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
|
||||
tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
|
||||
accountID := sq.AccountID
|
||||
projectID := sq.ProjectID
|
||||
for _, tagFilters := range sq.TagFilterss {
|
||||
tfs := storage.NewTagFilters(accountID, projectID)
|
||||
for i := range tagFilters {
|
||||
tf := &tagFilters[i]
|
||||
if string(tf.Key) == "__graphite__" {
|
||||
query := tf.Value
|
||||
qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
|
||||
paths, err := api.s.SearchGraphitePaths(qtChild, accountID, projectID, tr, query, maxMetrics, deadline)
|
||||
qtChild.Donef("found %d series", len(paths))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
|
||||
}
|
||||
if len(paths) >= maxMetrics {
|
||||
return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
|
||||
"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
|
||||
"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
|
||||
}
|
||||
tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
|
||||
continue
|
||||
}
|
||||
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
|
||||
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
|
||||
}
|
||||
// GetMaxUniqueTimeSeries returns `-search.maxUniqueTimeseries` or the auto-calculated value based on available resources.
|
||||
// The calculation is split into calculateMaxUniqueTimeSeriesForResource for unit testing.
|
||||
func GetMaxUniqueTimeSeries() int {
|
||||
maxUniqueTimeseriesValueOnce.Do(func() {
|
||||
maxUniqueTimeseriesValue = *maxUniqueTimeseries
|
||||
if maxUniqueTimeseriesValue <= 0 {
|
||||
maxUniqueTimeseriesValue = calculateMaxUniqueTimeSeriesForResource(*maxConcurrentRequests, memory.Remaining())
|
||||
}
|
||||
tfss = append(tfss, tfs)
|
||||
}
|
||||
return tfss, nil
|
||||
})
|
||||
return maxUniqueTimeseriesValue
|
||||
}
|
||||
|
||||
func (api *VMStorage) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
|
||||
return api.s.GetMetadataRows(qt, tt, limit, metricName, deadline)
|
||||
}
|
||||
|
||||
// deleteSnapshot deletes a snapshot by its name.
|
||||
//
|
||||
// Callers must wrap the call with wg.Add(1)...wg.Done().
|
||||
func (api *VMStorage) deleteSnapshot(snapshotName string) error {
|
||||
snapshots := api.s.MustListSnapshots()
|
||||
for _, snName := range snapshots {
|
||||
if snName == snapshotName {
|
||||
if err := api.s.DeleteSnapshot(snName); err != nil {
|
||||
return fmt.Errorf("cannot delete snapshot %q: %w", snName, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// calculateMaxUniqueTimeSeriesForResource calculate the max metrics limit calculated by available resources.
|
||||
func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMemory int) int {
|
||||
if maxConcurrentRequests <= 0 {
|
||||
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
|
||||
// In such cases, fallback to unlimited.
|
||||
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
|
||||
return 2e9
|
||||
}
|
||||
return fmt.Errorf("cannot find snapshot %q", snapshotName)
|
||||
|
||||
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
|
||||
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
|
||||
mts := remainingMemory / 200 / maxConcurrentRequests
|
||||
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
|
||||
return mts
|
||||
}
|
||||
@@ -1,25 +1,22 @@
|
||||
package main
|
||||
package servers
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
)
|
||||
|
||||
func TestCalculateMaxMetricsLimitByResource(t *testing.T) {
|
||||
f := func(maxConcurrentRequest, remainingMemory, expect int) {
|
||||
t.Helper()
|
||||
maxMetricsLimit := calculateMaxUniqueTimeseries(maxConcurrentRequest, remainingMemory)
|
||||
maxMetricsLimit := calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequest, remainingMemory)
|
||||
if maxMetricsLimit != expect {
|
||||
t.Fatalf("unexpected max metrics limit: got %d, want %d", maxMetricsLimit, expect)
|
||||
}
|
||||
}
|
||||
|
||||
// 64-bit architectures support memory sizes > 4GB.
|
||||
if strconv.IntSize == 64 {
|
||||
// Skip when GOARCH=386
|
||||
if runtime.GOARCH != "386" {
|
||||
// 8 CPU & 32 GiB
|
||||
f(16, int(math.Round(32*1024*1024*1024*0.4)), 4294967)
|
||||
// 4 CPU & 32 GiB
|
||||
@@ -40,14 +37,10 @@ func TestGetMaxMetrics(t *testing.T) {
|
||||
defer func() {
|
||||
*maxUniqueTimeseries = originalMaxUniqueTimeSeries
|
||||
}()
|
||||
|
||||
maxConcurrentRequests := 2 * cgroup.AvailableCPUs()
|
||||
f := func(searchQueryLimit, storageMaxUniqueTimeseries, expect int) {
|
||||
t.Helper()
|
||||
*maxUniqueTimeseries = storageMaxUniqueTimeseries
|
||||
s := &storage.Storage{}
|
||||
vmstorage := newVMStorage(s, maxConcurrentRequests)
|
||||
maxMetrics := vmstorage.getMaxMetrics(searchQueryLimit)
|
||||
maxMetrics := getMaxMetrics(searchQueryLimit)
|
||||
if maxMetrics != expect {
|
||||
t.Fatalf("unexpected max metrics: got %d, want %d", maxMetrics, expect)
|
||||
}
|
||||
@@ -22,7 +22,6 @@ var (
|
||||
vminsertAddrRE = regexp.MustCompile(`accepting vminsert conns at (.*:\d{1,5})$`)
|
||||
vminsertClusterNativeAddrRE = regexp.MustCompile(`started TCP clusternative server at "(.*:\d{1,5})"`)
|
||||
vmselectAddrRE = regexp.MustCompile(`accepting vmselect conns at (.*:\d{1,5})$`)
|
||||
vmauthHttpListenAddrRE = regexp.MustCompile(`pprof handlers are exposed at http://(.*:\d{1,5})/debug/pprof/`)
|
||||
)
|
||||
|
||||
// app represents an instance of some VictoriaMetrics server (such as vmstorage,
|
||||
|
||||
@@ -88,11 +88,19 @@ func (tc *TestCase) MustStartDefaultVmsingle() *Vmsingle {
|
||||
}
|
||||
|
||||
// MustStartVmsingle is a test helper function that starts an instance of
|
||||
// vmsingle (latest version) and fails the test if the app fails to start.
|
||||
// vmsingle located at ../../bin/victoria-metrics-race and fails the test if the app
|
||||
// fails to start.
|
||||
func (tc *TestCase) MustStartVmsingle(instance string, flags []string) *Vmsingle {
|
||||
tc.t.Helper()
|
||||
return tc.MustStartVmsingleAt(instance, "../../bin/victoria-metrics-race", flags)
|
||||
}
|
||||
|
||||
app, err := StartVmsingle(instance, flags, tc.cli, tc.output)
|
||||
// MustStartVmsingleAt is a test helper function that starts an instance of
|
||||
// vmsingle and fails the test if the app fails to start.
|
||||
func (tc *TestCase) MustStartVmsingleAt(instance, binary string, flags []string) *Vmsingle {
|
||||
tc.t.Helper()
|
||||
|
||||
app, err := StartVmsingleAt(instance, binary, flags, tc.cli, tc.output)
|
||||
if err != nil {
|
||||
tc.t.Fatalf("Could not start %s: %v", instance, err)
|
||||
}
|
||||
@@ -101,11 +109,19 @@ func (tc *TestCase) MustStartVmsingle(instance string, flags []string) *Vmsingle
|
||||
}
|
||||
|
||||
// MustStartVmstorage is a test helper function that starts an instance of
|
||||
// vmstorage (latest version) and fails the test if the app fails to start.
|
||||
// vmstorage located at ../../bin/vmstorage-race and fails the test if the app fails
|
||||
// to start.
|
||||
func (tc *TestCase) MustStartVmstorage(instance string, flags []string) *Vmstorage {
|
||||
tc.t.Helper()
|
||||
return tc.MustStartVmstorageAt(instance, "../../bin/vmstorage-race", flags)
|
||||
}
|
||||
|
||||
app, err := StartVmstorage(instance, flags, tc.cli, tc.output)
|
||||
// MustStartVmstorageAt is a test helper function that starts an instance of
|
||||
// vmstorage and fails the test if the app fails to start.
|
||||
func (tc *TestCase) MustStartVmstorageAt(instance string, binary string, flags []string) *Vmstorage {
|
||||
tc.t.Helper()
|
||||
|
||||
app, err := StartVmstorageAt(instance, binary, flags, tc.cli, tc.output)
|
||||
if err != nil {
|
||||
tc.t.Fatalf("Could not start %s: %v", instance, err)
|
||||
}
|
||||
@@ -114,7 +130,7 @@ func (tc *TestCase) MustStartVmstorage(instance string, flags []string) *Vmstora
|
||||
}
|
||||
|
||||
// MustStartVmselect is a test helper function that starts an instance of
|
||||
// vmselect (latest version) and fails the test if the app fails to start.
|
||||
// vmselect and fails the test if the app fails to start.
|
||||
func (tc *TestCase) MustStartVmselect(instance string, flags []string) *Vmselect {
|
||||
tc.t.Helper()
|
||||
|
||||
@@ -274,8 +290,10 @@ func (tc *TestCase) MustStartDefaultCluster() *Vmcluster {
|
||||
// tests usually come paired with corresponding vmsingle tests.
|
||||
type ClusterOptions struct {
|
||||
Vmstorage1Instance string
|
||||
Vmstorage1Binary string
|
||||
Vmstorage1Flags []string
|
||||
Vmstorage2Instance string
|
||||
Vmstorage2Binary string
|
||||
Vmstorage2Flags []string
|
||||
VminsertInstance string
|
||||
VminsertFlags []string
|
||||
@@ -287,8 +305,15 @@ type ClusterOptions struct {
|
||||
func (tc *TestCase) MustStartCluster(opts *ClusterOptions) *Vmcluster {
|
||||
tc.t.Helper()
|
||||
|
||||
vmstorage1 := tc.MustStartVmstorage(opts.Vmstorage1Instance, opts.Vmstorage1Flags)
|
||||
vmstorage2 := tc.MustStartVmstorage(opts.Vmstorage2Instance, opts.Vmstorage2Flags)
|
||||
if opts.Vmstorage1Binary == "" {
|
||||
opts.Vmstorage1Binary = "../../bin/vmstorage-race"
|
||||
}
|
||||
vmstorage1 := tc.MustStartVmstorageAt(opts.Vmstorage1Instance, opts.Vmstorage1Binary, opts.Vmstorage1Flags)
|
||||
|
||||
if opts.Vmstorage2Binary == "" {
|
||||
opts.Vmstorage2Binary = "../../bin/vmstorage-race"
|
||||
}
|
||||
vmstorage2 := tc.MustStartVmstorageAt(opts.Vmstorage2Instance, opts.Vmstorage2Binary, opts.Vmstorage2Flags)
|
||||
|
||||
opts.VminsertFlags = append(opts.VminsertFlags, []string{
|
||||
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
package apptest
|
||||
|
||||
// MustStartVmsingle_v1_132_0 is a test helper function that starts an instance
|
||||
// of vmsingle-v1.132.0 (last version that uses legacy index) and fails the test
|
||||
// if the app fails to start.
|
||||
func (tc *TestCase) MustStartVmsingle_v1_132_0(instance string, flags []string) *Vmsingle {
|
||||
tc.t.Helper()
|
||||
|
||||
app, err := StartVmsingle_v1_132_0(instance, flags, tc.cli, tc.output)
|
||||
if err != nil {
|
||||
tc.t.Fatalf("Could not start %s: %v", instance, err)
|
||||
}
|
||||
tc.addApp(instance, app)
|
||||
return app
|
||||
}
|
||||
|
||||
// MustStartVmstorage_v1_132_0 is a test helper function that starts an instance
|
||||
// of vmstorage-v1.132.0 (last version that uses legacy index) and fails the
|
||||
// test if the app fails to start.
|
||||
func (tc *TestCase) MustStartVmstorage_v1_132_0(instance string, flags []string) *Vmstorage {
|
||||
tc.t.Helper()
|
||||
|
||||
app, err := StartVmstorage_v1_132_0(instance, flags, tc.cli, tc.output)
|
||||
if err != nil {
|
||||
tc.t.Fatalf("Could not start %s: %v", instance, err)
|
||||
}
|
||||
tc.addApp(instance, app)
|
||||
return app
|
||||
}
|
||||
|
||||
// MustStartCluster_v1_132_0 starts a cluster with vmstorage-v1.132.0 with
|
||||
// custom flags.
|
||||
func (tc *TestCase) MustStartCluster_v1_132_0(opts *ClusterOptions) *Vmcluster {
|
||||
tc.t.Helper()
|
||||
|
||||
vmstorage1 := tc.MustStartVmstorage_v1_132_0(opts.Vmstorage1Instance, opts.Vmstorage1Flags)
|
||||
vmstorage2 := tc.MustStartVmstorage_v1_132_0(opts.Vmstorage2Instance, opts.Vmstorage2Flags)
|
||||
|
||||
opts.VminsertFlags = append(opts.VminsertFlags, []string{
|
||||
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),
|
||||
}...)
|
||||
vminsert := tc.MustStartVminsert(opts.VminsertInstance, opts.VminsertFlags)
|
||||
|
||||
opts.VmselectFlags = append(opts.VmselectFlags, []string{
|
||||
"-storageNode=" + vmstorage1.VmselectAddr() + "," + vmstorage2.VmselectAddr(),
|
||||
}...)
|
||||
vmselect := tc.MustStartVmselect(opts.VmselectInstance, opts.VmselectFlags)
|
||||
|
||||
return &Vmcluster{vminsert, vmselect, []*Vmstorage{vmstorage1, vmstorage2}}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"testing"
|
||||
@@ -10,6 +11,11 @@ import (
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
var (
|
||||
legacyVmsinglePath = os.Getenv("VM_LEGACY_VMSINGLE_PATH")
|
||||
legacyVmstoragePath = os.Getenv("VM_LEGACY_VMSTORAGE_PATH")
|
||||
)
|
||||
|
||||
type testLegacyDeleteSeriesOpts struct {
|
||||
startLegacySUT func() at.PrometheusWriteQuerier
|
||||
startNewSUT func() at.PrometheusWriteQuerier
|
||||
@@ -25,7 +31,7 @@ func TestLegacySingleDeleteSeries(t *testing.T) {
|
||||
|
||||
opts := testLegacyDeleteSeriesOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle_v1_132_0("vmsingle-legacy", []string{
|
||||
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.maxStalenessInterval=1m",
|
||||
@@ -58,13 +64,15 @@ func TestLegacyClusterDeleteSeries(t *testing.T) {
|
||||
|
||||
opts := testLegacyDeleteSeriesOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster_v1_132_0(&at.ClusterOptions{
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-legacy",
|
||||
Vmstorage1Binary: legacyVmstoragePath,
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-legacy",
|
||||
Vmstorage2Binary: legacyVmstoragePath,
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
@@ -247,7 +255,7 @@ func TestLegacySingleBackupRestore(t *testing.T) {
|
||||
|
||||
opts := testLegacyBackupRestoreOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle_v1_132_0("vmsingle-legacy", []string{
|
||||
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.disableCache=true",
|
||||
@@ -290,13 +298,15 @@ func TestLegacyClusterBackupRestore(t *testing.T) {
|
||||
|
||||
opts := testLegacyBackupRestoreOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster_v1_132_0(&at.ClusterOptions{
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-legacy",
|
||||
Vmstorage1Binary: legacyVmstoragePath,
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-legacy",
|
||||
Vmstorage2Binary: legacyVmstoragePath,
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
@@ -573,7 +583,7 @@ func TestLegacySingleDowngrade(t *testing.T) {
|
||||
|
||||
opts := testLegacyDowngradeOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartVmsingle_v1_132_0("vmsingle-legacy", []string{
|
||||
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-retentionPeriod=100y",
|
||||
"-search.disableCache=true",
|
||||
@@ -608,13 +618,15 @@ func TestLegacyClusterDowngrade(t *testing.T) {
|
||||
|
||||
opts := testLegacyDowngradeOpts{
|
||||
startLegacySUT: func() at.PrometheusWriteQuerier {
|
||||
return tc.MustStartCluster_v1_132_0(&at.ClusterOptions{
|
||||
return tc.MustStartCluster(&at.ClusterOptions{
|
||||
Vmstorage1Instance: "vmstorage1-legacy",
|
||||
Vmstorage1Binary: legacyVmstoragePath,
|
||||
Vmstorage1Flags: []string{
|
||||
"-storageDataPath=" + storage1DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
},
|
||||
Vmstorage2Instance: "vmstorage2-legacy",
|
||||
Vmstorage2Binary: legacyVmstoragePath,
|
||||
Vmstorage2Flags: []string{
|
||||
"-storageDataPath=" + storage2DataPath,
|
||||
"-retentionPeriod=100y",
|
||||
|
||||
@@ -16,63 +16,43 @@ import (
|
||||
"github.com/golang/snappy"
|
||||
)
|
||||
|
||||
// StartVmagent starts the latest version of vmagent.
|
||||
//
|
||||
// The path to the binary can be provided via VMAGENT_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmagent-race will be
|
||||
// used.
|
||||
// Vmagent holds the state of a vmagent app and provides vmagent-specific functions
|
||||
type Vmagent struct {
|
||||
*app
|
||||
*metricsClient
|
||||
|
||||
httpListenAddr string
|
||||
|
||||
cli *Client
|
||||
}
|
||||
|
||||
// StartVmagent starts an instance of vmagent with the given flags. It also
|
||||
// sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func StartVmagent(instance string, flags []string, cli *Client, promScrapeConfigFilePath string, output io.Writer) (*Vmagent, error) {
|
||||
binary := os.Getenv("VMAGENT_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmagent-race"
|
||||
extractREs := []*regexp.Regexp{
|
||||
httpListenAddrRE,
|
||||
}
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmagent-race", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-promscrape.config": promScrapeConfigFilePath,
|
||||
"-remoteWrite.tmpDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
},
|
||||
extractREs: []*regexp.Regexp{
|
||||
httpListenAddrRE,
|
||||
},
|
||||
output: output,
|
||||
extractREs: extractREs,
|
||||
output: output,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVmagent(app, cli, vmagentRuntimeValues{
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
}), nil
|
||||
}
|
||||
|
||||
type vmagentRuntimeValues struct {
|
||||
httpListenAddr string
|
||||
}
|
||||
|
||||
func newVmagent(app *app, cli *Client, rt vmagentRuntimeValues) *Vmagent {
|
||||
return &Vmagent{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, stderrExtracts[0]),
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
cli: cli,
|
||||
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
}
|
||||
}
|
||||
|
||||
// Vmagent holds the state of a vmagent app and provides vmagent-specific
|
||||
// functions.
|
||||
type Vmagent struct {
|
||||
*app
|
||||
*metricsClient
|
||||
|
||||
cli *Client
|
||||
httpListenAddr string
|
||||
}
|
||||
|
||||
// HTTPAddr returns the address at which the vmagent process is listening
|
||||
// for http connections.
|
||||
func (app *Vmagent) HTTPAddr() string {
|
||||
return app.httpListenAddr
|
||||
}, nil
|
||||
}
|
||||
|
||||
// APIV1ImportPrometheus is a test helper function that inserts a
|
||||
@@ -223,6 +203,12 @@ func (app *Vmagent) PrometheusAPIV1Write(t *testing.T, wr prompb.WriteRequest, o
|
||||
})
|
||||
}
|
||||
|
||||
// HTTPAddr returns the address at which the vmagent process is listening
|
||||
// for http connections.
|
||||
func (app *Vmagent) HTTPAddr() string {
|
||||
return app.httpListenAddr
|
||||
}
|
||||
|
||||
// sendBlocking sends the data to vmstorage by executing `send` function and
|
||||
// waits until the data is actually sent.
|
||||
//
|
||||
|
||||
@@ -2,7 +2,6 @@ package apptest
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"syscall"
|
||||
"testing"
|
||||
@@ -11,48 +10,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
// StartVmauth starts the latest version of vmauth.
|
||||
//
|
||||
// The path to the binary can be provided via VMAUTH_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmauth-race will be
|
||||
// used.
|
||||
func StartVmauth(instance string, flags []string, cli *Client, configFilePath string, output io.Writer) (*Vmauth, error) {
|
||||
binary := os.Getenv("VMAUTH_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmauth-race"
|
||||
}
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-auth.config": configFilePath,
|
||||
},
|
||||
extractREs: []*regexp.Regexp{
|
||||
vmauthHttpListenAddrRE,
|
||||
},
|
||||
output: output,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVmauth(app, cli, configFilePath, vmauthRuntimeValues{
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
}), nil
|
||||
}
|
||||
|
||||
type vmauthRuntimeValues struct {
|
||||
httpListenAddr string
|
||||
}
|
||||
|
||||
func newVmauth(app *app, cli *Client, configFilePath string, rt vmauthRuntimeValues) *Vmauth {
|
||||
return &Vmauth{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
configFilePath: configFilePath,
|
||||
cli: cli,
|
||||
}
|
||||
}
|
||||
var httpBuilitinListenAddrRE = regexp.MustCompile(`pprof handlers are exposed at http://(.*:\d{1,5})/debug/pprof/`)
|
||||
|
||||
// Vmauth holds the state of a vmauth app and provides vmauth-specific
|
||||
// functions.
|
||||
@@ -60,14 +18,38 @@ type Vmauth struct {
|
||||
*app
|
||||
*metricsClient
|
||||
|
||||
cli *Client
|
||||
httpListenAddr string
|
||||
configFilePath string
|
||||
cli *Client
|
||||
}
|
||||
|
||||
// GetHTTPListenAddr returns listen http addr
|
||||
func (app *Vmauth) GetHTTPListenAddr() string {
|
||||
return app.httpListenAddr
|
||||
// StartVmauth starts an instance of vmauth with the given flags. It also
|
||||
// sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func StartVmauth(instance string, flags []string, cli *Client, configFilePath string, output io.Writer) (*Vmauth, error) {
|
||||
extractREs := []*regexp.Regexp{
|
||||
httpBuilitinListenAddrRE,
|
||||
}
|
||||
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmauth-race", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-auth.config": configFilePath,
|
||||
},
|
||||
extractREs: extractREs,
|
||||
output: output,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Vmauth{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, stderrExtracts[0]),
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
configFilePath: configFilePath,
|
||||
cli: cli,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// UpdateConfiguration updates the vmauth configuration file with the provided YAML content,
|
||||
@@ -97,3 +79,8 @@ func (app *Vmauth) UpdateConfiguration(t *testing.T, configFileYAML string) {
|
||||
|
||||
t.Fatalf("config were not reloaded after SIGHUP signal; previous total: %d, current total: %d", prevTotal, currTotal)
|
||||
}
|
||||
|
||||
// GetHTTPListenAddr returns listen http addr
|
||||
func (app *Vmauth) GetHTTPListenAddr() string {
|
||||
return app.httpListenAddr
|
||||
}
|
||||
|
||||
@@ -1,26 +1,15 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
import "io"
|
||||
|
||||
// StartVmbackup starts the latest version of vmbackup with the given flags and
|
||||
// waits until it exits.
|
||||
//
|
||||
// The path to the binary can be provided via VMBACKUP_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmbackup-race will be
|
||||
// used.
|
||||
// StartVmbackup starts an instance of vmbackup with the given flags and waits
|
||||
// until it exits.
|
||||
func StartVmbackup(instance, storageDataPath, snapshotCreateURL, dst string, output io.Writer) error {
|
||||
binary := os.Getenv("VMBACKUP_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmbackup-race"
|
||||
}
|
||||
flags := []string{
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
"-snapshot.createURL=" + snapshotCreateURL,
|
||||
"-dst=" + dst,
|
||||
}
|
||||
_, _, err := startApp(instance, binary, flags, &appOptions{wait: true, output: output})
|
||||
_, _, err := startApp(instance, "../../bin/vmbackup-race", flags, &appOptions{wait: true, output: output})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1,23 +1,9 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
import "io"
|
||||
|
||||
// StartVmctl starts an instance of vmctl cli with the given flags
|
||||
|
||||
// StartVmctl starts the latest version of vmctl with the given flags and
|
||||
// waits until it exits.
|
||||
//
|
||||
// The path to the binary can be provided via VMCTL_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmctl-race will be
|
||||
// used.
|
||||
func StartVmctl(instance string, flags []string, output io.Writer) error {
|
||||
binary := os.Getenv("VMCTL_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmctl-race"
|
||||
}
|
||||
_, _, err := startApp(instance, binary, flags, &appOptions{wait: true, output: output})
|
||||
_, _, err := startApp(instance, "../../bin/vmctl-race", flags, &appOptions{wait: true, output: output})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,13 +3,23 @@ package apptest
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Vminsert holds the state of a vminsert app and provides vminsert-specific
|
||||
// functions.
|
||||
type Vminsert struct {
|
||||
*app
|
||||
*metricsClient
|
||||
*vminsertClient
|
||||
|
||||
httpListenAddr string
|
||||
clusternativeListenAddr string
|
||||
}
|
||||
|
||||
// storageNodes returns the storage node addresses passed to vminsert via
|
||||
// -storageNode command line flag.
|
||||
func storageNodes(flags []string) []string {
|
||||
@@ -21,11 +31,9 @@ func storageNodes(flags []string) []string {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartVminsert starts the latest version of vminsert.
|
||||
//
|
||||
// The path to the binary can be provided via VMINSERT_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vminsert-race will be
|
||||
// used.
|
||||
// StartVminsert starts an instance of vminsert with the given flags. It also
|
||||
// sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func StartVminsert(instance string, flags []string, cli *Client, output io.Writer) (*Vminsert, error) {
|
||||
extractREs := []*regexp.Regexp{
|
||||
httpListenAddrRE,
|
||||
@@ -40,15 +48,11 @@ func StartVminsert(instance string, flags []string, cli *Client, output io.Write
|
||||
extractREs = append(extractREs, regexp.MustCompile(logRecord))
|
||||
}
|
||||
|
||||
binary := os.Getenv("VMINSERT_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vminsert-race"
|
||||
}
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vminsert-race", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-clusternativeListenAddr": "127.0.0.1:0",
|
||||
"-graphiteListenAddr": "127.0.0.1:0",
|
||||
"-graphiteListenAddr": ":0",
|
||||
"-opentsdbListenAddr": "127.0.0.1:0",
|
||||
"-clusternative.vminsertConnsShutdownDuration": "1ms",
|
||||
},
|
||||
@@ -59,56 +63,27 @@ func StartVminsert(instance string, flags []string, cli *Client, output io.Write
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVminsert(app, cli, vminsertRuntimeValues{
|
||||
metricsClient := newMetricsClient(cli, stderrExtracts[0])
|
||||
return &Vminsert{
|
||||
app: app,
|
||||
metricsClient: metricsClient,
|
||||
vminsertClient: &vminsertClient{
|
||||
vminsertCli: cli,
|
||||
url: func(op, path string, opts QueryOpts) string {
|
||||
return getClusterPath(stderrExtracts[0], op, path, opts)
|
||||
},
|
||||
openTSDBURL: func(op, path string, opts QueryOpts) string {
|
||||
return getClusterPath(stderrExtracts[3], op, path, opts)
|
||||
},
|
||||
graphiteListenAddr: stderrExtracts[2],
|
||||
sendBlocking: func(t *testing.T, numRecordsToSend int, send func()) {
|
||||
t.Helper()
|
||||
sendBlocking(t, metricsClient, numRecordsToSend, send)
|
||||
},
|
||||
},
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
clusternativeListenAddr: stderrExtracts[1],
|
||||
graphiteListenAddr: stderrExtracts[2],
|
||||
openTSDBListenAddr: stderrExtracts[3],
|
||||
}), nil
|
||||
}
|
||||
|
||||
type vminsertRuntimeValues struct {
|
||||
httpListenAddr string
|
||||
clusternativeListenAddr string
|
||||
graphiteListenAddr string
|
||||
openTSDBListenAddr string
|
||||
}
|
||||
|
||||
func newVminsert(app *app, cli *Client, rt vminsertRuntimeValues) *Vminsert {
|
||||
metricsClient := newMetricsClient(cli, rt.httpListenAddr)
|
||||
vminsertClient := &vminsertClient{
|
||||
vminsertCli: cli,
|
||||
url: func(op, path string, opts QueryOpts) string {
|
||||
return getClusterPath(rt.httpListenAddr, op, path, opts)
|
||||
},
|
||||
openTSDBURL: func(op, path string, opts QueryOpts) string {
|
||||
return getClusterPath(rt.openTSDBListenAddr, op, path, opts)
|
||||
},
|
||||
graphiteListenAddr: rt.graphiteListenAddr,
|
||||
sendBlocking: func(t *testing.T, numRecordsToSend int, send func()) {
|
||||
t.Helper()
|
||||
sendBlocking(t, metricsClient, numRecordsToSend, send)
|
||||
},
|
||||
}
|
||||
|
||||
return &Vminsert{
|
||||
app: app,
|
||||
metricsClient: metricsClient,
|
||||
vminsertClient: vminsertClient,
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
clusternativeListenAddr: rt.clusternativeListenAddr,
|
||||
}
|
||||
}
|
||||
|
||||
// Vminsert holds the state of a vminsert app and provides vminsert-specific
|
||||
// functions.
|
||||
type Vminsert struct {
|
||||
*app
|
||||
*metricsClient
|
||||
*vminsertClient
|
||||
|
||||
httpListenAddr string
|
||||
clusternativeListenAddr string
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ClusternativeListenAddr returns the address at which the vminsert process is
|
||||
|
||||
@@ -1,25 +1,14 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
import "io"
|
||||
|
||||
// StartVmrestore starts the latest version of vmrestore with the given flags
|
||||
// and waits until it exits.
|
||||
//
|
||||
// The path to the binary can be provided via VMRESTORE_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmrestore-race will be
|
||||
// used.
|
||||
// StartVmrestore starts an instance of vmrestore with the given flags and waits
|
||||
// until it exits.
|
||||
func StartVmrestore(instance, src, storageDataPath string, output io.Writer) error {
|
||||
binary := os.Getenv("VMRESTORE_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmrestore-race"
|
||||
}
|
||||
flags := []string{
|
||||
"-src=" + src,
|
||||
"-storageDataPath=" + storageDataPath,
|
||||
}
|
||||
_, _, err := startApp(instance, binary, flags, &appOptions{wait: true, output: output})
|
||||
_, _, err := startApp(instance, "../../bin/vmrestore-race", flags, &appOptions{wait: true, output: output})
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -3,21 +3,26 @@ package apptest
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
// StartVmselect starts the latest version of vmselect.
|
||||
//
|
||||
// The path to the binary can be provided via VMSELECT_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmselect-race will be
|
||||
// used.
|
||||
// Vmselect holds the state of a vmselect app and provides vmselect-specific
|
||||
// functions.
|
||||
type Vmselect struct {
|
||||
*app
|
||||
*metricsClient
|
||||
*vmselectClient
|
||||
|
||||
httpListenAddr string
|
||||
clusternativeListenAddr string
|
||||
cli *Client
|
||||
}
|
||||
|
||||
// StartVmselect starts an instance of vmselect with the given flags. It also
|
||||
// sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func StartVmselect(instance string, flags []string, cli *Client, output io.Writer) (*Vmselect, error) {
|
||||
binary := os.Getenv("VMSELECT_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmselect-race"
|
||||
}
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vmselect-race", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-clusternativeListenAddr": "127.0.0.1:0",
|
||||
@@ -32,43 +37,21 @@ func StartVmselect(instance string, flags []string, cli *Client, output io.Write
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVmselect(app, cli, vmselectRuntimeValues{
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
clusternativeListenAddr: stderrExtracts[1],
|
||||
}), nil
|
||||
}
|
||||
|
||||
type vmselectRuntimeValues struct {
|
||||
httpListenAddr string
|
||||
clusternativeListenAddr string
|
||||
}
|
||||
|
||||
func newVmselect(app *app, cli *Client, rt vmselectRuntimeValues) *Vmselect {
|
||||
return &Vmselect{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
|
||||
metricsClient: newMetricsClient(cli, stderrExtracts[0]),
|
||||
vmselectClient: &vmselectClient{
|
||||
vmselectCli: cli,
|
||||
url: func(op, path string, opts QueryOpts) string {
|
||||
return getClusterPath(rt.httpListenAddr, op, path, opts)
|
||||
return getClusterPath(stderrExtracts[0], op, path, opts)
|
||||
},
|
||||
metricNamesStatsResetURL: fmt.Sprintf("http://%s/admin/api/v1/admin/status/metric_names_stats/reset", rt.httpListenAddr),
|
||||
tenantsURL: fmt.Sprintf("http://%s/admin/tenants", rt.httpListenAddr),
|
||||
metricNamesStatsResetURL: fmt.Sprintf("http://%s/admin/api/v1/admin/status/metric_names_stats/reset", stderrExtracts[0]),
|
||||
tenantsURL: fmt.Sprintf("http://%s/admin/tenants", stderrExtracts[0]),
|
||||
},
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
clusternativeListenAddr: rt.clusternativeListenAddr,
|
||||
}
|
||||
}
|
||||
|
||||
// Vmselect holds the state of a vmselect app and provides vmselect-specific
|
||||
// functions.
|
||||
type Vmselect struct {
|
||||
*app
|
||||
*metricsClient
|
||||
*vmselectClient
|
||||
|
||||
httpListenAddr string
|
||||
clusternativeListenAddr string
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
clusternativeListenAddr: stderrExtracts[1],
|
||||
cli: cli,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ClusternativeListenAddr returns the address at which the vmselect process is
|
||||
|
||||
@@ -9,85 +9,6 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// StartVmsingle starts the latest version of vmsingle.
|
||||
//
|
||||
// The path to the binary can be provided via VMSINGLE_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/victoria-metrics-race will be
|
||||
// used.
|
||||
func StartVmsingle(instance string, flags []string, cli *Client, output io.Writer) (*Vmsingle, error) {
|
||||
binary := os.Getenv("VMSINGLE_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/victoria-metrics-race"
|
||||
}
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-graphiteListenAddr": "127.0.0.1:0",
|
||||
"-opentsdbListenAddr": "127.0.0.1:0",
|
||||
},
|
||||
extractREs: []*regexp.Regexp{
|
||||
storageDataPathRE,
|
||||
httpListenAddrRE,
|
||||
graphiteListenAddrRE,
|
||||
openTSDBListenAddrRE,
|
||||
},
|
||||
output: output,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVmsingle(app, cli, vmsingleRuntimeValues{
|
||||
storageDataPath: stderrExtracts[0],
|
||||
httpListenAddr: stderrExtracts[1],
|
||||
graphiteListenAddr: stderrExtracts[2],
|
||||
openTSDBListenAddr: stderrExtracts[3],
|
||||
}), nil
|
||||
}
|
||||
|
||||
type vmsingleRuntimeValues struct {
|
||||
storageDataPath string
|
||||
httpListenAddr string
|
||||
graphiteListenAddr string
|
||||
openTSDBListenAddr string
|
||||
}
|
||||
|
||||
func newVmsingle(app *app, cli *Client, rt vmsingleRuntimeValues) *Vmsingle {
|
||||
return &Vmsingle{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
|
||||
vmstorageClient: &vmstorageClient{
|
||||
vmstorageCli: cli,
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
},
|
||||
vmselectClient: &vmselectClient{
|
||||
vmselectCli: cli,
|
||||
url: func(op, path string, opts QueryOpts) string {
|
||||
return fmt.Sprintf("http://%s/%s", rt.httpListenAddr, path)
|
||||
},
|
||||
metricNamesStatsResetURL: fmt.Sprintf("http://%s/api/v1/admin/status/metric_names_stats/reset", rt.httpListenAddr),
|
||||
tenantsURL: "vmsingle-does-not-serve-tenants",
|
||||
},
|
||||
vminsertClient: &vminsertClient{
|
||||
vminsertCli: cli,
|
||||
url: func(_, path string, _ QueryOpts) string {
|
||||
return fmt.Sprintf("http://%s/%s", rt.httpListenAddr, path)
|
||||
},
|
||||
openTSDBURL: func(_, path string, _ QueryOpts) string {
|
||||
return fmt.Sprintf("http://%s/%s", rt.openTSDBListenAddr, path)
|
||||
},
|
||||
graphiteListenAddr: rt.graphiteListenAddr,
|
||||
sendBlocking: func(t *testing.T, _ int, send func()) {
|
||||
t.Helper()
|
||||
send()
|
||||
},
|
||||
},
|
||||
storageDataPath: rt.storageDataPath,
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
}
|
||||
}
|
||||
|
||||
// Vmsingle holds the state of a vmsingle app and provides vmsingle-specific
|
||||
// functions.
|
||||
type Vmsingle struct {
|
||||
@@ -101,6 +22,63 @@ type Vmsingle struct {
|
||||
httpListenAddr string
|
||||
}
|
||||
|
||||
// StartVmsingleAt starts an instance of vmsingle with the given flags. It also
|
||||
// sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr).
|
||||
func StartVmsingleAt(instance, binary string, flags []string, cli *Client, output io.Writer) (*Vmsingle, error) {
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-graphiteListenAddr": ":0",
|
||||
"-opentsdbListenAddr": "127.0.0.1:0",
|
||||
},
|
||||
extractREs: []*regexp.Regexp{
|
||||
storageDataPathRE,
|
||||
httpListenAddrRE,
|
||||
graphiteListenAddrRE,
|
||||
openTSDBListenAddrRE,
|
||||
},
|
||||
output: output,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Vmsingle{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, stderrExtracts[1]),
|
||||
vmstorageClient: &vmstorageClient{
|
||||
vmstorageCli: cli,
|
||||
httpListenAddr: stderrExtracts[1],
|
||||
},
|
||||
vmselectClient: &vmselectClient{
|
||||
vmselectCli: cli,
|
||||
url: func(op, path string, opts QueryOpts) string {
|
||||
return fmt.Sprintf("http://%s/%s", stderrExtracts[1], path)
|
||||
},
|
||||
metricNamesStatsResetURL: fmt.Sprintf("http://%s/api/v1/admin/status/metric_names_stats/reset", stderrExtracts[1]),
|
||||
tenantsURL: "vmsingle-does-not-serve-tenants",
|
||||
},
|
||||
vminsertClient: &vminsertClient{
|
||||
vminsertCli: cli,
|
||||
url: func(_, path string, _ QueryOpts) string {
|
||||
return fmt.Sprintf("http://%s/%s", stderrExtracts[1], path)
|
||||
},
|
||||
openTSDBURL: func(_, path string, _ QueryOpts) string {
|
||||
return fmt.Sprintf("http://%s/%s", stderrExtracts[3], path)
|
||||
},
|
||||
graphiteListenAddr: stderrExtracts[2],
|
||||
sendBlocking: func(t *testing.T, _ int, send func()) {
|
||||
t.Helper()
|
||||
send()
|
||||
},
|
||||
},
|
||||
storageDataPath: stderrExtracts[0],
|
||||
httpListenAddr: stderrExtracts[1],
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HTTPAddr returns the address at which the vminsert process is
|
||||
// listening for incoming HTTP requests.
|
||||
func (app *Vmsingle) HTTPAddr() string {
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"regexp"
|
||||
"time"
|
||||
)
|
||||
|
||||
// StartVmsingle_v1_132_0 starts vmsingle-v1.132.0 (the last version that uses
|
||||
// legacy index).
|
||||
//
|
||||
// The path to the binary must be provided via VMSINGLE_V1_132_0_PATH
|
||||
// environment variable.
|
||||
func StartVmsingle_v1_132_0(instance string, flags []string, cli *Client, output io.Writer) (*Vmsingle, error) {
|
||||
binary := os.Getenv("VMSINGLE_V1_132_0_PATH")
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-graphiteListenAddr": "127.0.0.1:0",
|
||||
"-opentsdbListenAddr": "127.0.0.1:0",
|
||||
},
|
||||
extractREs: []*regexp.Regexp{
|
||||
storageDataPathRE,
|
||||
httpListenAddrRE,
|
||||
graphiteListenAddrRE,
|
||||
openTSDBListenAddrRE,
|
||||
},
|
||||
output: output,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVmsingle(app, cli, vmsingleRuntimeValues{
|
||||
storageDataPath: stderrExtracts[0],
|
||||
httpListenAddr: stderrExtracts[1],
|
||||
graphiteListenAddr: stderrExtracts[2],
|
||||
openTSDBListenAddr: stderrExtracts[3],
|
||||
}), nil
|
||||
}
|
||||
@@ -8,22 +8,23 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// StartVmstorage starts the latest version of vmstorage.
|
||||
//
|
||||
// The path to the binary can be provided via VMSTORAGE_PATH environment
|
||||
// variable. If the variable is not set, ../../bin/vmstorage-race will be used.
|
||||
func StartVmstorage(instance string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
|
||||
binary := os.Getenv("VMSTORAGE_PATH")
|
||||
if binary == "" {
|
||||
binary = "../../bin/vmstorage-race"
|
||||
}
|
||||
return startVmstorage(instance, binary, flags, cli, output)
|
||||
// Vmstorage holds the state of a vmstorage app and provides vmstorage-specific
|
||||
// functions.
|
||||
type Vmstorage struct {
|
||||
*app
|
||||
*metricsClient
|
||||
*vmstorageClient
|
||||
|
||||
storageDataPath string
|
||||
httpListenAddr string
|
||||
vminsertAddr string
|
||||
vmselectAddr string
|
||||
}
|
||||
|
||||
// startVmstorage starts an instance of vmstorage with the given flags. It also
|
||||
// StartVmstorageAt starts an instance of vmstorage with the given flags. It also
|
||||
// sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func startVmstorage(instance, binary string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
|
||||
func StartVmstorageAt(instance, binary string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
|
||||
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
@@ -43,47 +44,18 @@ func startVmstorage(instance, binary string, flags []string, cli *Client, output
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newVmstorage(app, cli, vmstorageRuntimeValues{
|
||||
return &Vmstorage{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, stderrExtracts[1]),
|
||||
vmstorageClient: &vmstorageClient{
|
||||
vmstorageCli: cli,
|
||||
httpListenAddr: stderrExtracts[1],
|
||||
},
|
||||
storageDataPath: stderrExtracts[0],
|
||||
httpListenAddr: stderrExtracts[1],
|
||||
vminsertAddr: stderrExtracts[2],
|
||||
vmselectAddr: stderrExtracts[3],
|
||||
}), nil
|
||||
}
|
||||
|
||||
type vmstorageRuntimeValues struct {
|
||||
storageDataPath string
|
||||
httpListenAddr string
|
||||
vminsertAddr string
|
||||
vmselectAddr string
|
||||
}
|
||||
|
||||
func newVmstorage(app *app, cli *Client, rt vmstorageRuntimeValues) *Vmstorage {
|
||||
return &Vmstorage{
|
||||
app: app,
|
||||
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
|
||||
vmstorageClient: &vmstorageClient{
|
||||
vmstorageCli: cli,
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
},
|
||||
storageDataPath: rt.storageDataPath,
|
||||
httpListenAddr: rt.httpListenAddr,
|
||||
vminsertAddr: rt.vminsertAddr,
|
||||
vmselectAddr: rt.vmselectAddr,
|
||||
}
|
||||
}
|
||||
|
||||
// Vmstorage holds the state of a vmstorage app and provides vmstorage-specific
|
||||
// functions.
|
||||
type Vmstorage struct {
|
||||
*app
|
||||
*metricsClient
|
||||
*vmstorageClient
|
||||
|
||||
storageDataPath string
|
||||
httpListenAddr string
|
||||
vminsertAddr string
|
||||
vmselectAddr string
|
||||
}, nil
|
||||
}
|
||||
|
||||
// VminsertAddr returns the address at which the vmstorage process is listening
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// StartVmstorage_v1_132_0 starts vmstorage-v1.132.0 (the last version that uses
|
||||
// legacy index).
|
||||
//
|
||||
// The path to the binary must be provided via VMSTORAGE_V1_132_0_PATH
|
||||
// environment variable.
|
||||
func StartVmstorage_v1_132_0(instance string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
|
||||
binary := os.Getenv("VMSTORAGE_V1_132_0_PATH")
|
||||
return startVmstorage(instance, binary, flags, cli, output)
|
||||
}
|
||||
@@ -88,7 +88,6 @@ These skills provide predefined workflows and capabilities such as:
|
||||
* Multi-signal investigations
|
||||
* Cardinality optimization
|
||||
* Unused metric detection
|
||||
* Stream aggregation configuration
|
||||
|
||||
To install the available skills for AI agents, run:
|
||||
```sh
|
||||
|
||||
@@ -150,12 +150,8 @@ You can experiment with your own data during the month‑long trial without depl
|
||||
are fast-booting Linux microVMs that run on a fleet of large bare-metal servers. You can start a playground right from your browser.
|
||||
Once up and running, accessing a playground is no different from SSH-ing into a remote server rented from your favorite VPS or Cloud provider.
|
||||
|
||||
Iximiuz Labs provides various [learning-by-doing resources for VictoriaMetrics](https://labs.iximiuz.com/v/victoriametrics):
|
||||
- Tutorial:
|
||||
- [Getting Started with VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/tutorials/victoriametrics-getting-started-kubernetes)
|
||||
- Playgrounds:
|
||||
- [VictoriaMetrics single node](https://labs.iximiuz.com/playgrounds/victoriametrics)
|
||||
- [VictoriaMetrics cluster](https://labs.iximiuz.com/playgrounds/victoriametrics-cluster)
|
||||
- [VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/playgrounds/victoriametrics-kubernetes)
|
||||
|
||||
Iximiuz Labs requires a [free account](https://labs.iximiuz.com/signup) to access the materials.
|
||||
Iximiuz Labs provides playgrounds for VictoriaMetrics software:
|
||||
- [VictoriaMetrics single node (on Ubuntu)](https://labs.iximiuz.com/playgrounds/victoriametrics-e2f9b613)
|
||||
- [VictoriaMetrics cluster (on Ubuntu)](https://labs.iximiuz.com/playgrounds/victoriametrics-cluster-8eacb19d)
|
||||
- [Getting Started with VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/tutorials/victoriametrics-getting-started-kubernetes-0e9c0993)
|
||||
- [VictoriaMetrics Operator](https://labs.iximiuz.com/playgrounds/victoriametrics-kubernetes-9eebc258)
|
||||
|
||||
@@ -43,9 +43,6 @@ Just download VictoriaMetrics and follow [these instructions](https://docs.victo
|
||||
See [available integrations](https://docs.victoriametrics.com/victoriametrics/integrations/) with other systems like
|
||||
[Prometheus](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/) or [Grafana](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
|
||||
|
||||
> Want to see VictoriaMetrics in action, but without installing anything?
|
||||
> Try [Playgrounds](https://docs.victoriametrics.com/playgrounds/) - a list of publicly available playgrounds for VictoriaMetrics software.
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended to periodically check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/)
|
||||
and perform [regular upgrades](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/), [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `-opentelemetry.promoteAllResourceAttributes` and `-opentelemetry.promoteScopeMetadata` command-line flags to allow managing label promotion for resource attributes and OTel scope metadata. See [OpenTelemetry](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/) docs and [#10931](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10931).
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix intermittent `write: connection timed out` errors caused by silently dropped TCP connections being reused from the connection pool. See [#10735-comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10735#issuecomment-4535832301).
|
||||
|
||||
## [v1.144.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.144.0)
|
||||
|
||||
|
||||
@@ -59,10 +59,6 @@ Once connected, you can build graphs and dashboards using [PromQL](https://prome
|
||||
_Creating a datasource may require [specific permissions](https://grafana.com/docs/grafana/latest/administration/data-source-management/).
|
||||
If you don't see an option to create a data source - try contacting system administrator._
|
||||
|
||||
If you run [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) and want to see its rules in [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/),
|
||||
then set configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
|
||||
|
||||
## Multi-tenant access with vmauth and OIDC
|
||||
|
||||
[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) can proxy Grafana datasource requests and enforce
|
||||
|
||||
@@ -28,17 +28,9 @@ The following label sanitization options can be enabled:
|
||||
|
||||
> These flags can be applied on vmagent, vminsert or VictoriaMetrics single-node.
|
||||
|
||||
## Instrumentation Scope
|
||||
|
||||
By default, VictoriaMetrics promotes [OTel scope metadata](https://opentelemetry.io/docs/specs/otel/common/instrumentation-scope/) to metric labels. This behavior can be disabled via `-opentelemetry.promoteScopeMetadata`.
|
||||
|
||||
## Resource Attributes
|
||||
|
||||
By default, VictoriaMetrics promotes all [OpenTelemetry resource](https://opentelemetry.io/docs/specs/otel/resource/data-model/) attributes to labels and attaches them to all ingested OTLP metrics.
|
||||
The following attribute promotion options can be configured:
|
||||
* `-opentelemetry.promoteAllResourceAttributes` - promotes all resource attributes to labels, except for the ones configured with `-opentelemetry.ignoreResourceAttributes`.
|
||||
* `-opentelemetry.promoteResourceAttributes` - promotes specific list of resource attributes to labels. It cannot be configured simultaneously with `-opentelemetry.promoteAllResourceAttributes`.
|
||||
* `-opentelemetry.ignoreResourceAttributes` - controls which resource attributes to ignore, can only be set when `-opentelemetry.promoteAllResourceAttributes` is true.
|
||||
|
||||
## Exponential histograms
|
||||
|
||||
|
||||
@@ -26,7 +26,6 @@ Stream aggregation has the following features:
|
||||
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter)
|
||||
- It can filter out raw samples matched by aggregation rules, so raw data will never reach the remote destination. See `-streamAggr.keepInput` and `-streamAggr.dropInput` in [aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/);
|
||||
- It allows building [flexible processing pipelines](#routing);
|
||||
- It is [horizontally scalable](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#scaling-aggregation-horizontally).
|
||||
|
||||
# Limitations
|
||||
|
||||
@@ -599,47 +598,6 @@ Below is an example of an `aggr.yaml` configuration that drops the `replica` and
|
||||
keep_metric_names: true
|
||||
```
|
||||
|
||||
## Scaling aggregation horizontally
|
||||
|
||||
Aggregation output is only correct when all contributing samples are processed by the same aggregator instance.
|
||||
|
||||
To scale the aggregation horizontally, always shard the input samples in a deterministic way. This can be achieved by
|
||||
building a two layer topology of vmagents where the first layer is responsible for sharding, and the second layer is responsible for aggregating:
|
||||
```mermaid
|
||||
flowchart LR
|
||||
V1[vmagent-shard-1] -- requests_total{env=test, pod=foo} --> SV1[vmagent-aggr-1]
|
||||
V1[vmagent-shard-1] -- requests_total{env=prod, pod=bar} --> SV2[vmagent-aggr-1]
|
||||
V2[vmagent-shard-2] -- requests_total{env=prod, pod=baz} --> SV2[vmagent-aggr-2]
|
||||
SV1 -- requests_total:5m_without_pod_total{env=test} --> x(( ))
|
||||
SV2 -- requests_total:5m_without_pod_total{env=prod} --> y(( ))
|
||||
style x fill:none,stroke:none
|
||||
style y fill:none,stroke:none
|
||||
```
|
||||
|
||||
The sharding layer of vmagents can be configured via the `-remoteWrite.shardByURL.labels` or `-remoteWrite.shardByURL.ignoreLabels`
|
||||
command line flags. See how to [shard data across remote write destinations](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages) for more details.
|
||||
|
||||
The following requirements must be met for sharded aggregation to work correctly:
|
||||
- All sharding vmagents should have the same deterministic sharding configuration.
|
||||
- The sharding configuration must align with the `by` and `without` lists:
|
||||
- Labels listed in `by` setting should be a subset of shard's routing key `-remoteWrite.shardByURL.labels`.
|
||||
With `-remoteWrite.shardByURL.labels=env,job` aggregator's `by` should include `by: env`, `by: job` or both: `by: [env, job]`.
|
||||
This makes sure that all the samples for the same `env` and `job` are aggregated together and produce the complete output.
|
||||
- Labels listed in `without` setting should be a superset of shard's routing key `--remoteWrite.shardByURL.ignoreLabels`.
|
||||
With `-remoteWrite.shardByURL.ignoreLabels=env,job` aggegator's `without` should include at least both labels `without: [env,job]`.
|
||||
This makes sure that `requests_total{env=test, job=foo}` and `requests_total{env=prod, job=foo}` are routed to the same aggregator
|
||||
and are aggregated together. See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938#issuecomment-2018470324).
|
||||
- Aggregating vmagents should not produce collisions: the aggregation output should be unique across all the sharded agents.
|
||||
For example, `requests_total:5m_without_env_pod_total` produced by both `vmagent-aggr-1` and `vmagent-aggr-2` will collide
|
||||
unless they have labels uniquely identifying them. These labels should be either preserved during sharding and aggregation config,
|
||||
or enforced on the output via `-remoteWrite.label` - see [these docs](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#cluster-mode) for more details.
|
||||
|
||||
> Never shard histograms by `le` (or `vmrange` in case of VM histograms) label. A histogram is a logical group of series differing
|
||||
only in the bucket label. All of those buckets must land on the same aggregator at the same time so it can produce a
|
||||
coherent bucket set. See more about [aggregating histograms](https://docs.victoriametrics.com/stream-aggregation/#aggregating-histograms).
|
||||
|
||||
See also [why you shouldn't put an aggregator behind a load balancer](https://docs.victoriametrics.com/stream-aggregation/#put-aggregator-behind-load-balancer).
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
- [Unexpected spikes for `total` or `increase` outputs](#staleness).
|
||||
|
||||
@@ -217,19 +217,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.convertMetricNamesToPrometheus
|
||||
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentelemetry.ignoreResourceAttributes array
|
||||
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
|
||||
-opentelemetry.labelNameUnderscoreSanitization
|
||||
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.promoteAllResourceAttributes
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
|
||||
-opentelemetry.promoteResourceAttributes array
|
||||
Promote specific list of resource attributes to labels.
|
||||
-opentelemetry.promoteScopeMetadata
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
|
||||
-opentelemetry.usePrometheusNaming
|
||||
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentsdbHTTPListenAddr string
|
||||
|
||||
@@ -182,15 +182,15 @@ among remote storage systems specified in `-remoteWrite.url`.
|
||||
> For example, if you set `-remoteWrite.url=srv+foo` and it's resolved to three addresses (`192.168.1.1`, `192.168.1.2`, `192.168.1.3`),
|
||||
> vmagent will only choose **one** randomly every time it (re-)creates the connection. In contrast, specifying the addresses manually (`-remoteWrite.url=192.168.1.1 -remoteWrite.url=192.168.1.2 -remoteWrite.url=192.168.1.3`) will shard samples across all three URLs.
|
||||
|
||||
Use `-remoteWrite.shardByURL.labels` to route metrics among `-remoteWrite.url` based on their label values.
|
||||
For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
|
||||
label to the same `-remoteWrite.url`. This command-line flag allows specifying a comma-separated list of labels.
|
||||
Sometimes, it may be necessary to use only a particular set of labels for sharding. For example, it may be necessary to route all the metrics with the same `instance` label
|
||||
to the same `-remoteWrite.url`. In this case, you can specify a comma-separated list of these labels in the `-remoteWrite.shardByURL.labels`
|
||||
command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
|
||||
label to the same `-remoteWrite.url`.
|
||||
|
||||
Alternatively, you can use `-remoteWrite.shardByURL.ignoreLabels` to route metrics among `-remoteWrite.url` based on their label values, excluding the specified labels.
|
||||
For example, `-remoteWrite.shardByURL.ignoreLabels=pod` would shard metrics `metric{pod="foo"}` and `metric{pod="bar"}` to the same `-remoteWrite.url`
|
||||
by ignoring the `pod` label. This command-line flag allows specifying a comma-separated list of labels.
|
||||
|
||||
> Command-line flags `-remoteWrite.shardByURL.labels` and `-remoteWrite.shardByURL.ignoreLabels` are mutually exclusive.
|
||||
Sometimes, it may be necessary to ignore some labels when sharding samples across multiple `-remoteWrite.url` backends.
|
||||
For example, if all the [raw samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) with the same set of labels
|
||||
except for the labels `instance` and `pod` must be routed to the same backend. In this case the list of ignored labels must be passed to
|
||||
`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`.
|
||||
|
||||
See also [how to scrape a large number of targets](#scraping-big-number-of-targets).
|
||||
|
||||
|
||||
@@ -184,19 +184,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.convertMetricNamesToPrometheus
|
||||
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentelemetry.ignoreResourceAttributes array
|
||||
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
|
||||
-opentelemetry.labelNameUnderscoreSanitization
|
||||
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.promoteAllResourceAttributes
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
|
||||
-opentelemetry.promoteResourceAttributes array
|
||||
Promote specific list of resource attributes to labels.
|
||||
-opentelemetry.promoteScopeMetadata
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
|
||||
-opentelemetry.usePrometheusNaming
|
||||
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentsdbHTTPListenAddr string
|
||||
|
||||
@@ -21,9 +21,23 @@ Recording rules results are persisted via remote write protocols and require `-r
|
||||
`vmalert` is heavily inspired by [Prometheus](https://prometheus.io/docs/alerting/latest/overview/)
|
||||
implementation and aims to be compatible with its syntax.
|
||||
|
||||
Configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
|
||||
to proxy requests to `vmalert`. Proxying is needed for the following cases:
|
||||
|
||||
* to proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
|
||||
* to access `vmalert`'s UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui).
|
||||
|
||||
[VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_intro)
|
||||
provides out-of-the-box alerting functionality based on `vmalert`. This service simplifies the setup
|
||||
and management of alerting and recording rules as well as the integration with Alertmanager. For more details,
|
||||
please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/).
|
||||
|
||||
## Features
|
||||
|
||||
* Integration with VictoriaMetrics, VictoriaLogs, VictoriaTraces, Graphite and Prometheus compatible storages. See [Integrations](https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations) for details;
|
||||
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) and [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/);
|
||||
* Integration with [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) and [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/);
|
||||
* Integration with [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) which also uses [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victoriatraces/vmalert/);
|
||||
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
|
||||
support;
|
||||
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager) starting from [Alertmanager v0.16.0-alpha](https://github.com/prometheus/alertmanager/releases/tag/v0.16.0-alpha.0);
|
||||
@@ -48,8 +62,8 @@ implementation and aims to be compatible with its syntax.
|
||||
|
||||
To start using `vmalert` you will need the following things:
|
||||
|
||||
* list of rules - PromQL/MetricsQL/LogsQL/GraphiteQL expressions to execute;
|
||||
* datasource address - a storage that [vmalert integrates with](https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations) for executing queries;
|
||||
* list of rules - PromQL/MetricsQL expressions to execute;
|
||||
* datasource address - reachable endpoint with [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) support for running queries against;
|
||||
* notifier address [optional] - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing,
|
||||
aggregating alerts, and sending notifications. Please note, notifier address also supports Consul and DNS Service Discovery via
|
||||
[config file](https://docs.victoriametrics.com/victoriametrics/vmalert/#notifier-configuration-file).
|
||||
@@ -59,7 +73,7 @@ To start using `vmalert` you will need the following things:
|
||||
* remote read address [optional] - MetricsQL compatible datasource to restore alerts state from.
|
||||
|
||||
You can use the existing [docker-compose environment](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#victoriametrics-single-server)
|
||||
as an example. It already contains vmalert configured with the list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
|
||||
as example. It already contains vmalert configured with list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
|
||||
|
||||
Alternatively, build `vmalert` from sources:
|
||||
|
||||
@@ -73,7 +87,7 @@ Then run `vmalert`:
|
||||
|
||||
```sh
|
||||
./bin/vmalert -rule=alert.rules \ # Path to the file with rules configuration. Supports wildcard and HTTP URL (S3/GCS are available in Enterprise).
|
||||
-datasource.url=http://localhost:8428 \ # VictoriaMetrics URL to query for rules evaluation. See other available Integrations above.
|
||||
-datasource.url=http://localhost:8428 \ # Prometheus HTTP API compatible datasource
|
||||
-notifier.url=http://localhost:9093 \ # AlertManager URL (required if alerting rules are used)
|
||||
-notifier.url=http://127.0.0.1:9093 \ # AlertManager replica URL
|
||||
-remoteWrite.url=http://localhost:8428 \ # Remote write compatible storage to persist rules and alerts state info (required if recording rules are used)
|
||||
@@ -93,7 +107,7 @@ See also [stream aggregation](https://docs.victoriametrics.com/victoriametrics/s
|
||||
|
||||
See the full list of configuration flags in [configuration](#configuration) section.
|
||||
|
||||
If you run multiple `vmalert` services for the same datasource or AlertManager and need to distinguish the results or alerts,
|
||||
If you run multiple `vmalert` services on the same datastore or AlertManager and need to distinguish the results or alerts,
|
||||
specify different `-external.label` command-line flags to indicate which `vmalert` generated them.
|
||||
If rule result metrics have label that conflict with `-external.label`, `vmalert` will automatically rename
|
||||
it with prefix `exported_`.
|
||||
@@ -102,7 +116,6 @@ Configuration for [recording](https://prometheus.io/docs/prometheus/latest/confi
|
||||
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
|
||||
similar to Prometheus rules and configured using YAML. Configuration examples may be found
|
||||
in [testdata](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/config/testdata) folder.
|
||||
|
||||
Every `rule` belongs to a `group` and every configuration file may contain arbitrary number of groups:
|
||||
|
||||
```yaml
|
||||
@@ -110,7 +123,13 @@ groups:
|
||||
[ - <rule_group> ]
|
||||
```
|
||||
|
||||
## Groups
|
||||
> Explore how to integrate `vmalert` with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/) in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
|
||||
|
||||
> For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
|
||||
> many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
|
||||
> Please, refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
|
||||
|
||||
### Groups
|
||||
|
||||
Each group has the following attributes:
|
||||
|
||||
@@ -211,11 +230,9 @@ rules:
|
||||
|
||||
### Rules
|
||||
|
||||
Every rule contains an `expr` field for the expression to evaluate against the configured datasource.
|
||||
Depending on `group.type` value or `-rule.defaultRuleType` cmd-line flag expression can be one of the following types:
|
||||
- `prometheus` (default) - [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) or [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expression.
|
||||
- `vlogs` - [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/vmalert/) expression.
|
||||
- `graphite` - [Graphite](https://graphite.readthedocs.io/en/stable/render_api.html) expression.
|
||||
Every rule contains `expr` field for [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
|
||||
or [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expression. `vmalert` will execute the configured
|
||||
expression and then act according to the Rule type.
|
||||
|
||||
There are two types of Rules:
|
||||
|
||||
@@ -227,7 +244,8 @@ There are two types of Rules:
|
||||
`-remoteWrite.url`. Recording rules are used to precompute frequently needed or computationally
|
||||
expensive expressions and save their result as a new set of time series ([Prometheus recording rules docs](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)).
|
||||
|
||||
> `vmalert` forbids defining duplicates - rules with the same combination of name, expression and labels within one group.
|
||||
`vmalert` forbids defining duplicates - rules with the same combination of name, expression, and labels
|
||||
within one group.
|
||||
|
||||
#### Alerting rules
|
||||
|
||||
@@ -238,8 +256,8 @@ The syntax for alerting rule is the following:
|
||||
alert: <string>
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default, PromQL/MetricsQL expression is used. Other available types are "graphite" and "vlogs".
|
||||
# See https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations
|
||||
# By default, PromQL/MetricsQL expression is used. If group.type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Alerts are considered firing once they have been returned for this long.
|
||||
@@ -285,44 +303,7 @@ annotations:
|
||||
[ <labelname>: <tmpl_string> ]
|
||||
```
|
||||
|
||||
#### Recording rules
|
||||
|
||||
The syntax for recording rules is the following:
|
||||
|
||||
```yaml
|
||||
# The name of the time series to output to. Must be a valid metric name.
|
||||
record: <string>
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default, PromQL/MetricsQL expression is used. Other available types are "graphite" and "vlogs".
|
||||
# See https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations
|
||||
expr: <string>
|
||||
|
||||
# Labels to add or overwrite labels from other external label sources, such as group labels, before storing the result.
|
||||
#
|
||||
# In case of conflicts, original labels are kept with prefix `exported_`.
|
||||
# As a special case, specifying a label with an empty string value removes the label from the result if it exists
|
||||
# in the original query result; otherwise, it is ignored.
|
||||
#
|
||||
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
# Whether to print debug information into logs.
|
||||
# Information includes requests sent to the datasource.
|
||||
# information - it will be printed to logs.
|
||||
# Logs are printed with INFO level, so make sure that -loggerLevel=INFO to see the output.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule updates entries stored in memory
|
||||
# and available for view on rule Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
|
||||
## Templating
|
||||
#### Templating
|
||||
|
||||
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations and labels(with limited support) to format data, iterate over
|
||||
or execute expressions.
|
||||
@@ -344,7 +325,7 @@ The following variables are available in templating:
|
||||
|
||||
Additionally, `vmalert` provides some extra templating functions listed in [template functions](#template-functions) and [reusable templates](#reusable-templates).
|
||||
|
||||
### Template functions
|
||||
#### Template functions
|
||||
|
||||
`vmalert` provides the following template functions, which can be used during [templating](#templating):
|
||||
|
||||
@@ -368,7 +349,7 @@ Additionally, `vmalert` provides some extra templating functions listed in [temp
|
||||
* `parseDurationTime` - parses the input string into [time.Duration](https://pkg.go.dev/time#Duration).
|
||||
* `pathEscape` - escapes the input string, so it can be safely put inside path part of URL.
|
||||
* `pathPrefix` - returns the path part of the `-external.url` command-line flag.
|
||||
* `query` - executes query against `-datasource.url` and returns the query result.
|
||||
* `query` - executes the [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query against `-datasource.url` and returns the query result.
|
||||
For example, `{{ query "sort_desc(process_resident_memory_bytes)" | first | value }}` executes the `sort_desc(process_resident_memory_bytes)`
|
||||
query at `-datasource.url` and returns the first result.
|
||||
* `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
|
||||
@@ -388,7 +369,7 @@ Additionally, `vmalert` provides some extra templating functions listed in [temp
|
||||
* `toUpper` - converts all the chars in the input string to uppercase.
|
||||
* `value` - returns the numeric value from the input query result.
|
||||
|
||||
### Reusable templates
|
||||
#### Reusable templates
|
||||
|
||||
Like in Alertmanager you can define [reusable templates](https://prometheus.io/docs/prometheus/latest/configuration/template_examples/#defining-reusable-templates)
|
||||
to share same templates across annotations. Just define the templates in a file and
|
||||
@@ -426,8 +407,44 @@ groups:
|
||||
The `-rule.templates` flag supports wildcards so multiple files with templates can be loaded.
|
||||
The content of `-rule.templates` can be also [hot reloaded](#hot-config-reload).
|
||||
|
||||
#### Recording rules
|
||||
|
||||
## Alerts state on restarts
|
||||
The syntax for recording rules is following:
|
||||
|
||||
```yaml
|
||||
# The name of the time series to output to. Must be a valid metric name.
|
||||
record: <string>
|
||||
|
||||
# The expression to evaluate. The expression language depends on the type value.
|
||||
# By default, MetricsQL expression is used. If group.type="graphite", then the expression
|
||||
# must contain valid Graphite expression.
|
||||
expr: <string>
|
||||
|
||||
# Labels to add or overwrite labels from other external label sources, such as group labels, before storing the result.
|
||||
#
|
||||
# In case of conflicts, original labels are kept with prefix `exported_`.
|
||||
# As a special case, specifying a label with an empty string value removes the label from the result if it exists
|
||||
# in the original query result; otherwise, it is ignored.
|
||||
#
|
||||
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
|
||||
labels:
|
||||
[ <labelname>: <labelvalue> ]
|
||||
|
||||
# Whether to print debug information into logs.
|
||||
# Information includes requests sent to the datasource.
|
||||
# information - it will be printed to logs.
|
||||
# Logs are printed with INFO level, so make sure that -loggerLevel=INFO to see the output.
|
||||
[ debug: <bool> | default = false ]
|
||||
|
||||
# Defines the number of rule's updates entries stored in memory
|
||||
# and available for view on rule's Details page.
|
||||
# Overrides `rule.updateEntriesLimit` value for this specific rule.
|
||||
[ update_entries_limit: <integer> | default 0 ]
|
||||
```
|
||||
|
||||
For recording rules to work `-remoteWrite.url` must be specified.
|
||||
|
||||
### Alerts state on restarts
|
||||
|
||||
`vmalert` holds alerts state in the memory. Restart of the `vmalert` process will reset the state of all active alerts
|
||||
in the memory. To prevent `vmalert` from losing the state on restarts configure it to persist the state
|
||||
@@ -448,7 +465,7 @@ in configured `-remoteRead.url`, weren't updated in the last `1h` (controlled by
|
||||
or received state doesn't match current `vmalert` rules configuration. `vmalert` marks successfully restored rules
|
||||
with `restored` label in [web UI](#web).
|
||||
|
||||
## Link to alert source
|
||||
### Link to alert source
|
||||
|
||||
Alerting notifications sent by vmalert always contain a `source` link. By default, the link format
|
||||
is the following `http://<vmalert-addr>/vmalert/alert?group_id=<group_id>&alert_id=<alert_id>`. On click, it opens
|
||||
@@ -484,9 +501,7 @@ In addition to `source` link, some extra links could be added to alert's [annota
|
||||
field. See [how we use them](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/9751ea10983d42068487624849cac7ad6fd7e1d8/deployment/docker/rules/alerts-cluster.yml#L44)
|
||||
to link alerting rule and the corresponding panel on Grafana dashboard.
|
||||
|
||||
## Multitenancy
|
||||
|
||||
> See how to use [multitenancy in rules for VictoriaLogs](https://docs.victoriametrics.com/victorialogs/vmalert/#how-to-use-multitenancy-in-rules).
|
||||
### Multitenancy
|
||||
|
||||
There are the following approaches exist for alerting and recording rules across
|
||||
[multiple tenants](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy):
|
||||
@@ -544,7 +559,7 @@ The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz`
|
||||
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) and in `*-enterprise`
|
||||
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags) and [Quay](https://quay.io/repository/victoriametrics/vmalert?tab=tags).
|
||||
|
||||
## Reading rules from object storage
|
||||
### Reading rules from object storage
|
||||
|
||||
[Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/) of `vmalert` may read alerting and recording rules
|
||||
from object storage:
|
||||
@@ -563,7 +578,7 @@ The following [command-line flags](#flags) can be used for fine-tuning access to
|
||||
* `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
|
||||
* `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
|
||||
|
||||
## Topology examples
|
||||
### Topology examples
|
||||
|
||||
The following sections are showing how `vmalert` may be used and configured
|
||||
for different scenarios.
|
||||
@@ -574,7 +589,7 @@ Please note, not all flags in examples are required:
|
||||
you have recording rules or want to store [alerts state](#alerts-state-on-restarts) on `vmalert` restarts;
|
||||
* `-notifier.url` is optional and is needed only if you have alerting rules.
|
||||
|
||||
### Single-node VictoriaMetrics
|
||||
#### Single-node VictoriaMetrics
|
||||
|
||||
The simplest configuration where one single-node VM server is used for
|
||||
rules execution, storing recording rules results and alerts state.
|
||||
@@ -592,7 +607,7 @@ rules execution, storing recording rules results and alerts state.
|
||||

|
||||
{width="500"}
|
||||
|
||||
### Cluster VictoriaMetrics
|
||||
#### Cluster VictoriaMetrics
|
||||
|
||||
In [cluster mode](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
VictoriaMetrics has separate components for writing and reading path:
|
||||
@@ -615,7 +630,7 @@ Cluster mode could have multiple `vminsert` and `vmselect` components.
|
||||
In case when you want to spread the load on these components - add balancers before them and configure
|
||||
`vmalert` with balancer addresses. Please, see more about [VictoriaMetrics cluster architecture](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
|
||||
### HA vmalert
|
||||
#### HA vmalert
|
||||
|
||||
For High Availability(HA) user can run multiple identically configured `vmalert` instances.
|
||||
It means all of them will execute the same rules, write state and results to
|
||||
@@ -659,12 +674,12 @@ to ensure [high availability](https://github.com/prometheus/alertmanager#high-av
|
||||
This example uses single-node VM server for the sake of simplicity.
|
||||
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
|
||||
|
||||
### Downsampling and aggregation via vmalert
|
||||
#### Downsampling and aggregation via vmalert
|
||||
|
||||
_Please note, [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) might be more efficient
|
||||
for cases when downsampling or aggregation need to be applied **before data gets into the TSDB.**_
|
||||
|
||||
`vmalert` can't modify existing data. But it can run arbitrary queries
|
||||
`vmalert` can't modify existing data. But it can run arbitrary PromQL/MetricsQL queries
|
||||
via [recording rules](#recording-rules) and backfill results to the configured `-remoteWrite.url`.
|
||||
This ability allows to aggregate data. For example, the following rule will calculate the average value for
|
||||
metric `http_requests` on the `5m` interval:
|
||||
@@ -717,7 +732,7 @@ Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only rec
|
||||
|
||||
See also [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) and [downsampling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#downsampling).
|
||||
|
||||
### Multiple remote writes
|
||||
#### Multiple remote writes
|
||||
|
||||
For persisting recording or alerting rule results `vmalert` requires `-remoteWrite.url` to be set.
|
||||
But this flag supports only one destination. To persist rule results to multiple destinations
|
||||
@@ -731,72 +746,7 @@ Using `vmagent` as a proxy provides additional benefits such as
|
||||
[data persisting when storage is unreachable](https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability),
|
||||
or time series modification via [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/).
|
||||
|
||||
## Integrations
|
||||
|
||||
vmalert can be integrated with different data sources for alerting and recording rules. But it deliberately allows
|
||||
configuring only one `datasource.url`. We recommend running separate instances of vmalert for each datasource type
|
||||
with the specified `-rule.defaultRuleType=<datasource_type>` command-line flag.
|
||||
|
||||
###### VictoriaMetrics
|
||||
|
||||
vmalert natively integrates with [VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/) for alerting and
|
||||
recording rules.
|
||||
|
||||
###### VictoriaLogs
|
||||
|
||||
vmalert integrates with [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) and allows configuring alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
|
||||
Results of recording rules and alerting state should be persisted to the remote-write compatible storage, such as VictoriaMetrics.
|
||||
To enable VictoriaLogs compatibility set the `-rule.defaultRuleType=vlogs` command-line flag.
|
||||
|
||||
See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/) for details.
|
||||
|
||||
###### VictoriaTraces
|
||||
|
||||
vmalert integrates with [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) in exactly the same way as
|
||||
with [VictoriaLogs](https://docs.victoriametrics.com/victoriametrics/vmalert/#victorialogs).
|
||||
|
||||
###### Graphite
|
||||
|
||||
vmalert integrates with [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) and allows configuring alerting and recording rules.
|
||||
During evaluation, vmalert will send requests to `<-datasource.url>/render?format=json`.
|
||||
To enable Graphite compatibility set the `-rule.defaultRuleType=graphite` command-line flag.
|
||||
|
||||
Since VictoriaMetrics supports both Graphite and Prometheus APIs, it is possible to mix Graphite and VictoriaMetrics rules.
|
||||
On the group level, set the `type` field to specify to which datasource type it should belong: `prometheus` (MetricsQL) or `graphite` (GraphiteQL).
|
||||
When using vmalert with both `graphite` and `prometheus` rules configured against the cluster version of VictoriaMetrics, don't forget
|
||||
to set the `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on the query type.
|
||||
|
||||
###### Prometheus
|
||||
|
||||
vmalert uses [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) for querying
|
||||
and [Prometheus Remote Write v1 protocol](https://prometheus.io/docs/specs/prw/remote_write_spec/) for persisting
|
||||
recording rules results and alerting state. Hence, it can be integrated with any Prometheus-compatible storage
|
||||
that supports these protocols.
|
||||
|
||||
###### Grafana
|
||||
|
||||
To proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/) configure `-vmalert.proxyURL`
|
||||
on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
|
||||
|
||||
###### vmui
|
||||
|
||||
To access rules UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui) configure `-vmalert.proxyURL`
|
||||
on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
|
||||
|
||||
###### vmanomaly
|
||||
|
||||
See how to integrate vmalert with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/)
|
||||
in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
|
||||
|
||||
###### VictoriaMetrics Cloud
|
||||
|
||||
For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
|
||||
many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
|
||||
Please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
|
||||
|
||||
## Web
|
||||
### Web
|
||||
|
||||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
|
||||
@@ -821,11 +771,28 @@ This may be used for better integration with Grafana unified alerting system. Se
|
||||
* [How to query vmalert from single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
|
||||
* [How to query vmalert from VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
|
||||
|
||||
## Graphite
|
||||
|
||||
vmalert sends requests to `<-datasource.url>/render?format=json` during evaluation of alerting and recording rules
|
||||
if the corresponding group or rule contains `type: "graphite"` config option. It is expected that the `<-datasource.url>/render`
|
||||
implements [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) for `format=json`.
|
||||
When using vmalert with both `graphite` and `prometheus` rules configured against cluster version of VM do not forget
|
||||
to set `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on the query type.
|
||||
|
||||
## VictoriaLogs
|
||||
|
||||
vmalert supports [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) as a datasource for writing alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/) for details.
|
||||
|
||||
## VictoriaTraces
|
||||
|
||||
vmalert supports [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) as a (`vlogs`) datasource for writing alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victoriatraces/vmalert/) for details.
|
||||
|
||||
## Rules backfilling
|
||||
|
||||
vmalert supports alerting and recording rules backfilling (aka `replay`). In replay mode vmalert
|
||||
can read the same rules configuration as normal, evaluate them on the given time range and backfill
|
||||
results via remote write to the configured storage. vmalert supports only the `prometheus` datasource type for backfilling.
|
||||
results via remote write to the configured storage. vmalert supports any PromQL/MetricsQL compatible
|
||||
data source for backfilling.
|
||||
|
||||
Please note, that response caching may lead to unexpected results during and after backfilling process.
|
||||
In order to avoid this you need to reset cache contents or disable caching when using backfilling
|
||||
@@ -885,9 +852,13 @@ vmalert respects `evaluationInterval` value set by flag or per-group during the
|
||||
vmalert automatically disables caching on VictoriaMetrics side by sending `nocache=1` param. It allows
|
||||
to prevent cache pollution and unwanted time range boundaries adjustment during backfilling.
|
||||
|
||||
Results of recording rules `replay` should match the results of normal rules evaluation.
|
||||
#### Recording rules
|
||||
|
||||
Results of alerting rules `replay` are the time series reflecting the [state of the alert](#alerts-state-on-restarts).
|
||||
The result of recording rules `replay` should match with results of normal rules evaluation.
|
||||
|
||||
#### Alerting rules
|
||||
|
||||
The result of alerting rules `replay` is time series reflecting [alert's state](#alerts-state-on-restarts).
|
||||
To see if `replayed` alert has fired in the past use the following PromQL/MetricsQL expression:
|
||||
|
||||
```
|
||||
|
||||
@@ -184,19 +184,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.convertMetricNamesToPrometheus
|
||||
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentelemetry.ignoreResourceAttributes array
|
||||
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
|
||||
-opentelemetry.labelNameUnderscoreSanitization
|
||||
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-opentelemetry.promoteAllResourceAttributes
|
||||
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
|
||||
-opentelemetry.promoteResourceAttributes array
|
||||
Promote specific list of resource attributes to labels.
|
||||
-opentelemetry.promoteScopeMetadata
|
||||
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
|
||||
-opentelemetry.usePrometheusNaming
|
||||
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
|
||||
-opentsdbHTTPListenAddr string
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package netutil
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -40,6 +42,7 @@ type ConnPool struct {
|
||||
type connWithTimestamp struct {
|
||||
bc *handshake.BufferedConn
|
||||
lastActiveTime uint64
|
||||
watchCh chan bool
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -202,20 +205,27 @@ func (cp *ConnPool) dialAndHandshake() (*handshake.BufferedConn, error) {
|
||||
}
|
||||
|
||||
func (cp *ConnPool) tryGetConn() (*handshake.BufferedConn, error) {
|
||||
cp.mu.Lock()
|
||||
defer cp.mu.Unlock()
|
||||
for {
|
||||
cp.mu.Lock()
|
||||
if cp.isStopped {
|
||||
cp.mu.Unlock()
|
||||
return nil, fmt.Errorf("conn pool to %s cannot be used, since it is stopped", cp.d.addr)
|
||||
}
|
||||
if len(cp.conns) == 0 {
|
||||
err := cp.lastDialError
|
||||
cp.mu.Unlock()
|
||||
return nil, err
|
||||
}
|
||||
c := cp.conns[len(cp.conns)-1]
|
||||
cp.conns = cp.conns[:len(cp.conns)-1]
|
||||
cp.mu.Unlock()
|
||||
|
||||
if cp.isStopped {
|
||||
return nil, fmt.Errorf("conn pool to %s cannot be used, since it is stopped", cp.d.addr)
|
||||
bc := c.stopWatcher()
|
||||
if bc == nil {
|
||||
continue
|
||||
}
|
||||
return bc, nil
|
||||
}
|
||||
if len(cp.conns) == 0 {
|
||||
return nil, cp.lastDialError
|
||||
}
|
||||
c := cp.conns[len(cp.conns)-1]
|
||||
bc := c.bc
|
||||
c.bc = nil
|
||||
cp.conns = cp.conns[:len(cp.conns)-1]
|
||||
return bc, nil
|
||||
}
|
||||
|
||||
// Put puts bc back to the pool.
|
||||
@@ -228,6 +238,8 @@ func (cp *ConnPool) Put(bc *handshake.BufferedConn) {
|
||||
_ = bc.Close()
|
||||
return
|
||||
}
|
||||
watchCh := make(chan bool, 1)
|
||||
go watchConn(bc, watchCh)
|
||||
cp.mu.Lock()
|
||||
if cp.isStopped {
|
||||
_ = bc.Close()
|
||||
@@ -235,6 +247,7 @@ func (cp *ConnPool) Put(bc *handshake.BufferedConn) {
|
||||
cp.conns = append(cp.conns, connWithTimestamp{
|
||||
bc: bc,
|
||||
lastActiveTime: fasttime.UnixTimestamp(),
|
||||
watchCh: watchCh,
|
||||
})
|
||||
}
|
||||
cp.mu.Unlock()
|
||||
@@ -325,3 +338,41 @@ func forEachConnPool(f func(cp *ConnPool)) {
|
||||
wg.Wait()
|
||||
connPoolsMu.Unlock()
|
||||
}
|
||||
|
||||
// watchConn blocks on Read until the connection is closed or enters an error state.
|
||||
// It sends true to watchCh if the connection is dead, false if it was stopped by a deadline.
|
||||
func watchConn(bc *handshake.BufferedConn, watchCh chan<- bool) {
|
||||
var buf [1]byte
|
||||
_, err := bc.Conn.Read(buf[:])
|
||||
watchCh <- !errors.Is(err, os.ErrDeadlineExceeded)
|
||||
}
|
||||
|
||||
// stopWatcher stops the watchConn goroutine and returns the connection if it is still alive.
|
||||
// Returns nil if the connection is dead.
|
||||
func (c *connWithTimestamp) stopWatcher() *handshake.BufferedConn {
|
||||
// Fast path: goroutine already reported the connection state.
|
||||
select {
|
||||
case dead := <-c.watchCh:
|
||||
if dead {
|
||||
_ = c.bc.Close()
|
||||
return nil
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
// Stop the goroutine by setting a deadline in the past, then wait for its response.
|
||||
if err := c.bc.Conn.SetReadDeadline(time.Now()); err != nil {
|
||||
_ = c.bc.Close()
|
||||
return nil
|
||||
}
|
||||
dead := <-c.watchCh
|
||||
if err := c.bc.Conn.SetReadDeadline(time.Time{}); err != nil {
|
||||
_ = c.bc.Close()
|
||||
return nil
|
||||
}
|
||||
if dead {
|
||||
_ = c.bc.Close()
|
||||
return nil
|
||||
}
|
||||
return c.bc
|
||||
}
|
||||
|
||||
@@ -14,14 +14,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
// DecodeMetricsOptions defines options for DecodeMetricsData
|
||||
type DecodeMetricsOptions struct {
|
||||
DisableScopeMetadata bool
|
||||
DisableResourceAttributes bool
|
||||
// ResourceAttributesList stored a list of resource attributes to ignore or promote based on the value of DisableResourceAttributes.
|
||||
ResourceAttributesList map[string]struct{}
|
||||
}
|
||||
|
||||
// MetricPusher must push the parsed samples and metric metadata to the underlying storage.
|
||||
type MetricPusher interface {
|
||||
// PushSample must store a sample with the given args.
|
||||
@@ -81,8 +73,8 @@ func (r *MetricsData) marshalProtobuf(mm *easyproto.MessageMarshaler) {
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeMetricsData decodes metricsData with given options from src and sends the decoded data to mp.
|
||||
func DecodeMetricsData(src []byte, mp MetricPusher, options DecodeMetricsOptions) (err error) {
|
||||
// DecodeMetricsData decodes metricsData from src and sends the decoded data to mp.
|
||||
func DecodeMetricsData(src []byte, mp MetricPusher) (err error) {
|
||||
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/metrics/v1/metrics.proto#L56
|
||||
//
|
||||
// message MetricsData {
|
||||
@@ -104,7 +96,7 @@ func DecodeMetricsData(src []byte, mp MetricPusher, options DecodeMetricsOptions
|
||||
if !ok {
|
||||
return fmt.Errorf("cannot read ResourceMetrics data")
|
||||
}
|
||||
if err := dctx.decodeResourceMetrics(data, options); err != nil {
|
||||
if err := dctx.decodeResourceMetrics(data); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal ResourceMetrics: %w", err)
|
||||
}
|
||||
}
|
||||
@@ -129,7 +121,7 @@ func (rm *ResourceMetrics) marshalProtobuf(mm *easyproto.MessageMarshaler) {
|
||||
}
|
||||
}
|
||||
|
||||
func (dctx *decoderContext) decodeResourceMetrics(src []byte, options DecodeMetricsOptions) error {
|
||||
func (dctx *decoderContext) decodeResourceMetrics(src []byte) error {
|
||||
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/metrics/v1/metrics.proto#L66
|
||||
//
|
||||
// message ResourceMetrics {
|
||||
@@ -145,7 +137,7 @@ func (dctx *decoderContext) decodeResourceMetrics(src []byte, options DecodeMetr
|
||||
return fmt.Errorf("cannot read Resource data: %w", err)
|
||||
}
|
||||
if ok {
|
||||
if err := dctx.decodeResource(resourceData, options.DisableResourceAttributes, options.ResourceAttributesList); err != nil {
|
||||
if err := dctx.decodeResource(resourceData); err != nil {
|
||||
return fmt.Errorf("cannot decode Resource: %w", err)
|
||||
}
|
||||
}
|
||||
@@ -165,7 +157,7 @@ func (dctx *decoderContext) decodeResourceMetrics(src []byte, options DecodeMetr
|
||||
return fmt.Errorf("cannot read ScopeMetrics data")
|
||||
}
|
||||
|
||||
if err := dctx.decodeScopeMetrics(data, options.DisableScopeMetadata); err != nil {
|
||||
if err := dctx.decodeScopeMetrics(data); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal ScopeMetrics: %w", err)
|
||||
}
|
||||
|
||||
@@ -188,7 +180,7 @@ func (r *Resource) marshalProtobuf(mm *easyproto.MessageMarshaler) {
|
||||
}
|
||||
}
|
||||
|
||||
func (dctx *decoderContext) decodeResource(src []byte, disableResourceAttributes bool, attributeKeys map[string]struct{}) (err error) {
|
||||
func (dctx *decoderContext) decodeResource(src []byte) (err error) {
|
||||
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/resource/v1/resource.proto#L28
|
||||
//
|
||||
// message Resource {
|
||||
@@ -207,34 +199,7 @@ func (dctx *decoderContext) decodeResource(src []byte, disableResourceAttributes
|
||||
if !ok {
|
||||
return fmt.Errorf("cannot read Attributes")
|
||||
}
|
||||
keySuffix, ok, err := easyproto.GetString(data, 1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot find Key in KeyValue: %w", err)
|
||||
}
|
||||
if !ok {
|
||||
// Key is missing, skip it.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/869#issuecomment-3631307996
|
||||
continue
|
||||
}
|
||||
if _, ok := attributeKeys[keySuffix]; ok != disableResourceAttributes {
|
||||
// Skip the attribute if:
|
||||
// 1. it is in the list of ignore attributes when disableResourceAttributes is false,
|
||||
// 2. it isn't in the list of promote attributes when disableResourceAttributes is true.
|
||||
continue
|
||||
}
|
||||
key := dctx.fb.formatSubFieldName("", keySuffix)
|
||||
|
||||
// Decode value
|
||||
value, ok, err := easyproto.GetMessageData(data, 2)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot find Value in KeyValue: %w", err)
|
||||
}
|
||||
if !ok {
|
||||
// Value is null, skip it.
|
||||
continue
|
||||
}
|
||||
|
||||
if err := decodeAnyValue(value, &dctx.ls, &dctx.fb, key); err != nil {
|
||||
if err := decodeKeyValue(data, &dctx.ls, &dctx.fb, ""); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal Attributes: %w", err)
|
||||
}
|
||||
}
|
||||
@@ -292,6 +257,7 @@ func decodeKeyValue(src []byte, ls *promutil.Labels, fb *fmtBuffer, keyPrefix st
|
||||
if err := decodeAnyValue(valueData, ls, fb, key); err != nil {
|
||||
return fmt.Errorf("cannot decode AnyValue: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -486,7 +452,7 @@ func (sm *ScopeMetrics) marshalProtobuf(mm *easyproto.MessageMarshaler) {
|
||||
}
|
||||
}
|
||||
|
||||
func (dctx *decoderContext) decodeScopeMetrics(src []byte, disableScopeMetadata bool) error {
|
||||
func (dctx *decoderContext) decodeScopeMetrics(src []byte) error {
|
||||
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/metrics/v1/metrics.proto#L86
|
||||
//
|
||||
// message ScopeMetrics {
|
||||
@@ -494,22 +460,19 @@ func (dctx *decoderContext) decodeScopeMetrics(src []byte, disableScopeMetadata
|
||||
// repeated Metric metrics = 2;
|
||||
// }
|
||||
|
||||
if !disableScopeMetadata {
|
||||
scopeData, ok, err := easyproto.GetMessageData(src, 1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read InstrumentationScope: %w", err)
|
||||
}
|
||||
if ok {
|
||||
if err := dctx.decodeInstrumentationScope(scopeData); err != nil {
|
||||
return fmt.Errorf("cannot decode InstrumentationScope: %w", err)
|
||||
}
|
||||
scopeData, ok, err := easyproto.GetMessageData(src, 1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read InstrumentationScope: %w", err)
|
||||
}
|
||||
if ok {
|
||||
if err := dctx.decodeInstrumentationScope(scopeData); err != nil {
|
||||
return fmt.Errorf("cannot decode InstrumentationScope: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
dctxSnapshot := dctx.getSnapshot()
|
||||
|
||||
var fc easyproto.FieldContext
|
||||
var err error
|
||||
for len(src) > 0 {
|
||||
src, err = fc.NextField(src)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
package pb
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
type testMetricPusher struct {
|
||||
samples []testSample
|
||||
metadata []MetricMetadata
|
||||
}
|
||||
|
||||
type testSample struct {
|
||||
mm MetricMetadata
|
||||
suffix string
|
||||
labels []prompb.Label
|
||||
ts uint64
|
||||
value float64
|
||||
}
|
||||
|
||||
func (p *testMetricPusher) PushSample(mm *MetricMetadata, suffix string, ls *promutil.Labels, timestampNsecs uint64, value float64, _ uint32) {
|
||||
labels := make([]prompb.Label, len(ls.Labels))
|
||||
copy(labels, ls.Labels)
|
||||
p.samples = append(p.samples, testSample{
|
||||
mm: *mm,
|
||||
suffix: suffix,
|
||||
labels: labels,
|
||||
ts: timestampNsecs,
|
||||
value: value,
|
||||
})
|
||||
}
|
||||
|
||||
func (p *testMetricPusher) PushMetricMetadata(mm *MetricMetadata) {
|
||||
p.metadata = append(p.metadata, *mm)
|
||||
}
|
||||
|
||||
func TestDecodeScopeMetrics(t *testing.T) {
|
||||
scopeName := "my-scope"
|
||||
scopeVersion := "v1.0"
|
||||
envVal := "prod"
|
||||
intVal := int64(1)
|
||||
md := &MetricsData{
|
||||
ResourceMetrics: []*ResourceMetrics{
|
||||
{
|
||||
Resource: &Resource{
|
||||
Attributes: []*KeyValue{
|
||||
{Key: "job", Value: &AnyValue{StringValue: new("vm")}},
|
||||
{Key: "region", Value: &AnyValue{StringValue: new("us-east-1")}},
|
||||
},
|
||||
},
|
||||
ScopeMetrics: []*ScopeMetrics{
|
||||
{
|
||||
Scope: &InstrumentationScope{
|
||||
Name: &scopeName,
|
||||
Version: &scopeVersion,
|
||||
Attributes: []*KeyValue{
|
||||
{Key: "env", Value: &AnyValue{StringValue: &envVal}},
|
||||
},
|
||||
},
|
||||
Metrics: []*Metric{
|
||||
{
|
||||
Name: "my-gauge",
|
||||
Description: "a test gauge",
|
||||
Gauge: &Gauge{
|
||||
DataPoints: []*NumberDataPoint{
|
||||
{
|
||||
Attributes: []*KeyValue{{Key: "label1", Value: &AnyValue{StringValue: new("value1")}}},
|
||||
IntValue: &intVal,
|
||||
TimeUnixNano: 1000,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
data := md.MarshalProtobuf(nil)
|
||||
|
||||
f := func(options DecodeMetricsOptions, wantLabels map[string]string) {
|
||||
t.Helper()
|
||||
mp := &testMetricPusher{}
|
||||
if err := DecodeMetricsData(data, mp, options); err != nil {
|
||||
t.Fatalf("DecodeMetricsData error: %v", err)
|
||||
}
|
||||
if len(mp.samples) != 1 {
|
||||
t.Fatalf("expected 1 sample, got %d", len(mp.samples))
|
||||
}
|
||||
gotMap := make(map[string]string, len(mp.samples[0].labels))
|
||||
for _, l := range mp.samples[0].labels {
|
||||
gotMap[l.Name] = l.Value
|
||||
}
|
||||
if !reflect.DeepEqual(gotMap, wantLabels) {
|
||||
t.Errorf("unexpected labels:\n got: %v\n want: %v", gotMap, wantLabels)
|
||||
}
|
||||
}
|
||||
|
||||
// PromoteScopeMetadata=true + PromoteAllResourceAttributes=true:
|
||||
// got all scope labels and resource attrs
|
||||
f(DecodeMetricsOptions{
|
||||
DisableScopeMetadata: false,
|
||||
DisableResourceAttributes: false,
|
||||
}, map[string]string{
|
||||
"job": "vm",
|
||||
"region": "us-east-1",
|
||||
"scope.name": "my-scope",
|
||||
"scope.version": "v1.0",
|
||||
"scope.attributes.env": "prod",
|
||||
"label1": "value1",
|
||||
})
|
||||
|
||||
// PromoteScopeMetadata=false + PromoteAllResourceAttributes=true:
|
||||
// got all resource attrs, no scope labels
|
||||
f(DecodeMetricsOptions{
|
||||
DisableScopeMetadata: true,
|
||||
DisableResourceAttributes: false,
|
||||
}, map[string]string{
|
||||
"job": "vm",
|
||||
"region": "us-east-1",
|
||||
"label1": "value1",
|
||||
})
|
||||
|
||||
// PromoteScopeMetadata=true + PromoteAllResourceAttributes=false + ResourceAttributesList=[region]:
|
||||
// got only the `region` attr from resource
|
||||
f(DecodeMetricsOptions{
|
||||
DisableScopeMetadata: false,
|
||||
DisableResourceAttributes: true,
|
||||
ResourceAttributesList: map[string]struct{}{"region": {}},
|
||||
}, map[string]string{
|
||||
"region": "us-east-1",
|
||||
"scope.name": "my-scope",
|
||||
"scope.version": "v1.0",
|
||||
"scope.attributes.env": "prod",
|
||||
"label1": "value1",
|
||||
})
|
||||
|
||||
// PromoteScopeMetadata=true + PromoteAllResourceAttributes=true + ResourceAttributesList=[region]:
|
||||
// got all resource attrs except `region` (ignore list)
|
||||
f(DecodeMetricsOptions{
|
||||
DisableScopeMetadata: false,
|
||||
DisableResourceAttributes: false,
|
||||
ResourceAttributesList: map[string]struct{}{"region": {}},
|
||||
}, map[string]string{
|
||||
"job": "vm",
|
||||
"scope.name": "my-scope",
|
||||
"scope.version": "v1.0",
|
||||
"scope.attributes.env": "prod",
|
||||
"label1": "value1",
|
||||
})
|
||||
|
||||
// PromoteScopeMetadata=false + PromoteAllResourceAttributes=false + ResourceAttributesList=[job]:
|
||||
// got only `job` attr
|
||||
f(DecodeMetricsOptions{
|
||||
DisableScopeMetadata: true,
|
||||
DisableResourceAttributes: true,
|
||||
ResourceAttributesList: map[string]struct{}{"job": {}},
|
||||
}, map[string]string{
|
||||
"job": "vm",
|
||||
"label1": "value1",
|
||||
})
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
package stream
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
@@ -11,43 +10,13 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/pb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
|
||||
)
|
||||
|
||||
var (
|
||||
maxRequestSize = flagutil.NewBytes("opentelemetry.maxRequestSize", 64*1024*1024, "The maximum size in bytes of a single OpenTelemetry request")
|
||||
|
||||
promoteScopeMetadata = flag.Bool("opentelemetry.promoteScopeMetadata", true, "Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.")
|
||||
promoteAllResourceAttributes = flag.Bool("opentelemetry.promoteAllResourceAttributes", true, "Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.")
|
||||
promoteResourceAttributes = flagutil.NewArrayString("opentelemetry.promoteResourceAttributes", "Promote specific list of resource attributes to labels.")
|
||||
ignoreResourceAttributes = flagutil.NewArrayString("opentelemetry.ignoreResourceAttributes", "Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.")
|
||||
)
|
||||
|
||||
// InitDecodeOptions configures decoding settings for the parser
|
||||
func InitDecodeOptions() {
|
||||
if *promoteAllResourceAttributes && len(*promoteResourceAttributes) > 0 {
|
||||
logger.Fatalf("cannot set both '-opentelemetry.promoteAllResourceAttributes' and '-opentelemetry.promoteResourceAttributes'")
|
||||
}
|
||||
if !*promoteAllResourceAttributes && len(*ignoreResourceAttributes) > 0 {
|
||||
logger.Fatalf("'-opentelemetry.ignoreResourceAttributes' can only be set when '-opentelemetry.promoteAllResourceAttributes' is true.")
|
||||
}
|
||||
defaultDecodeMetricsOptions.DisableScopeMetadata = !*promoteScopeMetadata
|
||||
defaultDecodeMetricsOptions.DisableResourceAttributes = !*promoteAllResourceAttributes
|
||||
attributes := *ignoreResourceAttributes
|
||||
if !*promoteAllResourceAttributes {
|
||||
attributes = *promoteResourceAttributes
|
||||
}
|
||||
defaultDecodeMetricsOptions.ResourceAttributesList = make(map[string]struct{}, len(attributes))
|
||||
for _, a := range attributes {
|
||||
defaultDecodeMetricsOptions.ResourceAttributesList[a] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
var defaultDecodeMetricsOptions = pb.DecodeMetricsOptions{}
|
||||
var maxRequestSize = flagutil.NewBytes("opentelemetry.maxRequestSize", 64*1024*1024, "The maximum size in bytes of a single OpenTelemetry request")
|
||||
|
||||
// ParseStream parses OpenTelemetry protobuf or json data from r and calls callback for the parsed rows.
|
||||
//
|
||||
@@ -78,7 +47,7 @@ func parseData(data []byte, callback func(tss []prompb.TimeSeries, mms []prompb.
|
||||
// the flushFunc will be called multiple time if the request is big, to avoid over allocating memory for such request.
|
||||
wctx.flushFunc = callback
|
||||
|
||||
if err := pb.DecodeMetricsData(data, wctx, defaultDecodeMetricsOptions); err != nil {
|
||||
if err := pb.DecodeMetricsData(data, wctx); err != nil {
|
||||
return fmt.Errorf("cannot unmarshal request from %d bytes: %w", len(data), err)
|
||||
}
|
||||
|
||||
|
||||
@@ -76,6 +76,15 @@ type Server struct {
|
||||
|
||||
// Limits contains various limits for Server.
|
||||
type Limits struct {
|
||||
// MaxLabelNames is the maximum label names, which may be returned from labelNames request.
|
||||
MaxLabelNames int
|
||||
|
||||
// MaxLabelValues is the maximum label values, which may be returned from labelValues request.
|
||||
MaxLabelValues int
|
||||
|
||||
// MaxTagValueSuffixes is the maximum number of entries, which can be returned from tagValueSuffixes request.
|
||||
MaxTagValueSuffixes int
|
||||
|
||||
// MaxConcurrentRequests is the maximum number of concurrent requests a server can process.
|
||||
//
|
||||
// The remaining requests wait for up to MaxQueueDuration for their execution.
|
||||
@@ -125,7 +134,7 @@ func NewServer(addr string, api API, limits Limits, disableResponseCompression b
|
||||
labelNamesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="labelNames",addr=%q}`, addr)),
|
||||
labelValuesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="labelValues",addr=%q}`, addr)),
|
||||
tagValueSuffixesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="tagValueSuffixes",addr=%q}`, addr)),
|
||||
seriesCountRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="seriesCount",addr=%q}`, addr)),
|
||||
seriesCountRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="seriesSount",addr=%q}`, addr)),
|
||||
tsdbStatusRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="tsdbStatus",addr=%q}`, addr)),
|
||||
searchMetricNamesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="searchMetricNames",addr=%q}`, addr)),
|
||||
searchRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="search",addr=%q}`, addr)),
|
||||
@@ -674,6 +683,9 @@ func (s *Server) processLabelNames(ctx *vmselectRequestCtx) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read maxLabelNames: %w", err)
|
||||
}
|
||||
if maxLabelNames <= 0 || maxLabelNames > s.limits.MaxLabelNames {
|
||||
maxLabelNames = s.limits.MaxLabelNames
|
||||
}
|
||||
|
||||
if err := s.beginConcurrentRequest(ctx); err != nil {
|
||||
return ctx.writeErrorMessage(err)
|
||||
@@ -725,6 +737,9 @@ func (s *Server) processLabelValues(ctx *vmselectRequestCtx) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read maxLabelValues: %w", err)
|
||||
}
|
||||
if maxLabelValues <= 0 || maxLabelValues > s.limits.MaxLabelValues {
|
||||
maxLabelValues = s.limits.MaxLabelValues
|
||||
}
|
||||
|
||||
if err := s.beginConcurrentRequest(ctx); err != nil {
|
||||
return ctx.writeErrorMessage(err)
|
||||
@@ -785,7 +800,10 @@ func (s *Server) processTagValueSuffixes(ctx *vmselectRequestCtx) error {
|
||||
}
|
||||
maxSuffixes, err := ctx.readLimit()
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot read maxTagValueSuffixes: %w", err)
|
||||
return fmt.Errorf("cannot read maxTagValueSuffixes: %d", err)
|
||||
}
|
||||
if maxSuffixes <= 0 || maxSuffixes > s.limits.MaxTagValueSuffixes {
|
||||
maxSuffixes = s.limits.MaxTagValueSuffixes
|
||||
}
|
||||
|
||||
if err := s.beginConcurrentRequest(ctx); err != nil {
|
||||
@@ -799,6 +817,14 @@ func (s *Server) processTagValueSuffixes(ctx *vmselectRequestCtx) error {
|
||||
return ctx.writeErrorMessage(err)
|
||||
}
|
||||
|
||||
if len(suffixes) >= s.limits.MaxTagValueSuffixes {
|
||||
err := fmt.Errorf("more than %d tag value suffixes found "+
|
||||
"for tagKey=%q, tagValuePrefix=%q, delimiter=%c on time range %s; "+
|
||||
"either narrow down the query or increase -search.max* command-line flag value; see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits",
|
||||
s.limits.MaxTagValueSuffixes, tagKey, tagValuePrefix, delimiter, tr.String())
|
||||
return ctx.writeErrorMessage(err)
|
||||
}
|
||||
|
||||
// Send an empty error message to vmselect.
|
||||
if err := ctx.writeString(""); err != nil {
|
||||
return fmt.Errorf("cannot send empty error message: %w", err)
|
||||
|
||||
Reference in New Issue
Block a user