Compare commits

..

12 Commits

Author SHA1 Message Date
Max Kotliar
f884ffb1e3 lib/netutil: use watch gourouting appraoch
This one actually worked. Unfortunatly, isConnAlive before get does not
work
2026-05-26 18:50:18 +03:00
Max Kotliar
85a5429a2a fix changelog 2026-05-26 17:14:27 +03:00
Max Kotliar
bd836031ca check ErrDeadlineExceeded error 2026-05-26 17:00:37 +03:00
Max Kotliar
0d3bd2d4e6 Update docs/victoriametrics/changelog/CHANGELOG.md
Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com>
Signed-off-by: Max Kotliar <kotlyar.maksim@gmail.com>
2026-05-26 16:52:19 +03:00
Max Kotliar
1fe747cf35 remove debug code 2026-05-26 15:48:21 +03:00
Max Kotliar
572f48c110 remove debug code 2026-05-26 15:24:40 +03:00
Max Kotliar
78883a3f98 remove debug code 2026-05-26 15:23:14 +03:00
Max Kotliar
96368dfcfc lib/netutil: watch idle conns proactively
lib/netutil: check pooled conn liveness before reuse

Probe each connection with a non-blocking read before returning it to a
caller. Connections silently dropped at the network layer (no RST/FIN)
stay alive from the OS perspective until TCP_USER_TIMEOUT fires. This
causes the next write to fail with ETIMEDOUT immediately, producing
user-visible query/insert errors.

```
{"ts":"2026-03-18T12:13:37.303Z","level":"warn","caller":"VictoriaMetrics/app/vmselect/main.go:357","msg":"remoteAddr:
\"10.42.156.237:54634\"; requestURI:
/select/0/prometheus/api/v1/query?query=X&time=1773836017.264876; error
when executing query="X" for (time=1773835987281, step=300000): cannot
execute \"X\": cannot evaluateX\": error occured during search: cannot
fetch query results from vmstorage nodes: cannot perform search on
vmstorage vmstorage-vmc-metrics-1.vmstorage-vmc-metrics.metrics:8401:
cannot execute funcName=\"search_v7\" on vmstorage
\"10.42.154.239:8401\": cannot flush requestData to conn: write tcp4
10.42.104.242:52460->10.42.154.239:8401: write: connection timed out"}
```

The probe sets ReadDeadline=now and reads 1 byte from the underlying TCP
conn. A timeout error means the conn is alive; any other result
(ETIMEDOUT, EOF, unexpected data) means it is dead and is discarded.

The comment
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10735#issuecomment-4535832301
described one user case where it happened.

The change would be useful by users with unstable network
infrastructure. Which could broken established connections that sits in
the pool. Check the logs for write: connection timedout` errors or
execute query:

```
sum(increase(vm_request_errors_total{action="search",type="rpcClient",name="vmselect",job="vmselect-aggregated"}[1m]))
by (job)
/
sum(increase(vm_requests_total{action="search",type="rpcClient",name="vmselect",job="vmselect-aggregated"}[1m]))
by (job)
*
100
```
2026-05-26 15:17:58 +03:00
Max Kotliar
a0220db978 Merge remote-tracking branch 'opensource/cluster' into v1-143-0-debug-rpc-write-timeout 2026-05-25 19:20:14 +03:00
Max Kotliar
ef5e42cfa0 add debug flush log message 2026-05-20 19:42:11 +03:00
Max Kotliar
1730051e27 cleanup pool completly 2026-05-20 18:46:51 +03:00
Max Kotliar
74ae5b2f0d cleanup pool completly 2026-05-20 18:04:54 +03:00
45 changed files with 871 additions and 1393 deletions

View File

@@ -293,8 +293,8 @@ apptest-legacy: vminsert-race vmselect-race vmstorage-race vmbackup-race vmresto
curl --output-dir /tmp -LO $${URL}/$${VMSINGLE} && tar xzf /tmp/$${VMSINGLE} -C $${DIR} && \
curl --output-dir /tmp -LO $${URL}/$${VMCLUSTER} && tar xzf /tmp/$${VMCLUSTER} -C $${DIR} \
); \
VMSINGLE_V1_132_0_PATH=$${DIR}/victoria-metrics-prod \
VMSTORAGE_V1_132_0_PATH=$${DIR}/vmstorage-prod \
VM_LEGACY_VMSINGLE_PATH=$${DIR}/victoria-metrics-prod \
VM_LEGACY_VMSTORAGE_PATH=$${DIR}/vmstorage-prod \
go test ./apptest/tests -run="^TestLegacyCluster.*"
benchmark:

View File

@@ -118,7 +118,6 @@ func main() {
remotewrite.InitSecretFlags()
buildinfo.Init()
logger.Init()
opentelemetry.Init()
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
if promscrape.IsDryRun() {

View File

@@ -25,11 +25,6 @@ var (
rowsPerInsert = metrics.NewHistogram(`vmagent_rows_per_insert{type="opentelemetry"}`)
)
// Init must be called after flag.Parse and before using the opentelemetry package.
func Init() {
stream.InitDecodeOptions()
}
// InsertHandlerForReader processes metrics from given reader.
func InsertHandlerForReader(at *auth.Token, r io.Reader, encoding string) error {
return stream.ParseStream(r, encoding, nil, func(tss []prompb.TimeSeries, mms []prompb.MetricMetadata) error {

View File

@@ -113,15 +113,15 @@ func (g *Group) Validate(validateTplFn ValidateTplFn, validateExpressions bool)
// because correct types must be inherited after unmarshalling.
exprValidator := g.Type.ValidateExpr
if err := exprValidator(r.Expr); err != nil {
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid expression for rule %q: %w", ruleName, err)
}
}
if validateTplFn != nil {
if err := validateTplFn(r.Annotations); err != nil {
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid annotations for rule %q: %w", ruleName, err)
}
if err := validateTplFn(r.Labels); err != nil {
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
return fmt.Errorf("invalid labels for rule %q: %w", ruleName, err)
}
}
}

View File

@@ -121,7 +121,7 @@ func TestParse_Failure(t *testing.T) {
f([]string{"testdata/dir/rules2-bad.rules"}, "function \"unknown\" not defined")
f([]string{"testdata/dir/rules3-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/dir/rules4-bad.rules"}, "either `record` or `alert` must be set")
f([]string{"testdata/rules/rules1-bad.rules"}, "bad GraphiteQL expr")
f([]string{"testdata/rules/rules1-bad.rules"}, "bad graphite expr")
f([]string{"testdata/rules/vlog-rules0-bad.rules"}, "bad LogsQL expr")
f([]string{"testdata/dir/rules6-bad.rules"}, "missing ':' in header")
f([]string{"testdata/rules/rules-multi-doc-bad.rules"}, "unknown fields")
@@ -283,7 +283,7 @@ func TestGroupValidate_Failure(t *testing.T) {
Expr: "up | 0",
},
},
}, true, "bad MetricsQL expr")
}, true, "bad prometheus expr")
f(&Group{
Name: "test graphite expr",
@@ -293,7 +293,7 @@ func TestGroupValidate_Failure(t *testing.T) {
"description": "some-description",
}},
},
}, true, "bad GraphiteQL expr")
}, true, "bad graphite expr")
f(&Group{
Name: "test vlogs expr",
@@ -327,7 +327,7 @@ func TestGroupValidate_Failure(t *testing.T) {
Expr: "sum(up == 0 ) by (host)",
},
},
}, true, "bad GraphiteQL expr")
}, true, "bad graphite expr")
f(&Group{
Name: "test vlogs with prometheus exp",
@@ -351,7 +351,7 @@ func TestGroupValidate_Failure(t *testing.T) {
For: promutil.NewDuration(10 * time.Millisecond),
},
},
}, true, "bad MetricsQL expr")
}, true, "bad prometheus expr")
}
func TestGroupValidate_Success(t *testing.T) {

View File

@@ -66,11 +66,11 @@ func (t *Type) ValidateExpr(expr string) error {
switch t.String() {
case "graphite":
if _, err := graphiteql.Parse(expr); err != nil {
return fmt.Errorf("bad GraphiteQL expr: %q, err: %w", expr, err)
return fmt.Errorf("bad graphite expr: %q, err: %w", expr, err)
}
case "prometheus":
if _, err := metricsql.Parse(expr); err != nil {
return fmt.Errorf("bad MetricsQL expr: %q, err: %w", expr, err)
return fmt.Errorf("bad prometheus expr: %q, err: %w", expr, err)
}
case "vlogs":
q, err := logstorage.ParseStatsQuery(expr, 0)

View File

@@ -125,7 +125,6 @@ func main() {
relabel.Init()
timeserieslimits.Init(*maxLabelsPerTimeseries, *maxLabelNameLen, *maxLabelValueLen)
opentelemetry.Init()
protoparserutil.StartUnmarshalWorkers()
if len(*clusternativeListenAddr) > 0 {
s, err := clusternative.NewVMinsertServer(*clusternativeListenAddr, nil)

View File

@@ -24,11 +24,6 @@ var (
metadataInserted = metrics.NewCounter(`vm_metadata_rows_inserted_total{type="opentelemetry"}`)
)
// Init must be called after flag.Parse and before using the opentelemetry package.
func Init() {
stream.InitDecodeOptions()
}
// InsertHandler processes opentelemetry metrics.
func InsertHandler(at *auth.Token, req *http.Request) error {
extraLabels, err := protoparserutil.GetExtraLabels(req)

View File

@@ -36,6 +36,9 @@ var (
func NewVMSelectServer(addr string) (*vmselectapi.Server, error) {
api := &vmstorageAPI{}
limits := vmselectapi.Limits{
MaxLabelNames: *maxTagKeys,
MaxLabelValues: *maxTagValues,
MaxTagValueSuffixes: *maxTagValueSuffixesPerSearch,
MaxConcurrentRequests: *maxConcurrentRequests,
MaxConcurrentRequestsFlagName: "clusternative.maxConcurrentRequests",
MaxQueueDuration: *maxQueueDuration,
@@ -66,9 +69,6 @@ func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.S
}
func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
if maxLabelValues <= 0 || maxLabelValues > *maxTagValues {
maxLabelValues = *maxTagValues
}
dl := searchutil.DeadlineFromTimestamp(deadline)
labelValues, _, err := netstorage.LabelValues(qt, true, labelName, sq, maxLabelValues, dl)
return labelValues, wrapClusterNativeError(err)
@@ -76,18 +76,12 @@ func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQ
func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
maxSuffixes int, deadline uint64) ([]string, error) {
if maxSuffixes <= 0 || maxSuffixes > *maxTagValueSuffixesPerSearch {
maxSuffixes = *maxTagValueSuffixesPerSearch
}
dl := searchutil.DeadlineFromTimestamp(deadline)
suffixes, _, err := netstorage.TagValueSuffixes(qt, accountID, projectID, true, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, dl)
return suffixes, wrapClusterNativeError(err)
}
func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
if maxLabelNames <= 0 || maxLabelNames > *maxTagKeys {
maxLabelNames = *maxTagKeys
}
dl := searchutil.DeadlineFromTimestamp(deadline)
labelNames, _, err := netstorage.LabelNames(qt, true, sq, maxLabelNames, dl)
return labelNames, wrapClusterNativeError(err)

View File

@@ -9,10 +9,12 @@ import (
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/servers"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
@@ -27,12 +29,10 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vminsertapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
var (
storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
retentionPeriod = flagutil.NewRetentionDuration("retentionPeriod", "1M", "Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. "+
"See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention. See also -retentionFilter")
futureRetention = flagutil.NewRetentionDuration("futureRetention", "2d", "Data with timestamps bigger than now+futureRetention is automatically deleted. "+
@@ -41,23 +41,13 @@ var (
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the given -httpListenAddr . "+
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
"may require high amounts of memory. See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
"when -search.maxConcurrentRequests limit is reached")
disableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
"This reduces CPU usage at the cost of higher network bandwidth usage")
vminsertConnsShutdownDuration = flag.Duration("storage.vminsertConnsShutdownDuration", 10*time.Second, "The time needed for gradual closing of vminsert connections during "+
"graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. "+
"Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. "+
"Configured value must always be lower than the graceful shutdown period configured by the orchestration platform (terminationGracePeriodSeconds for Kubernetes). "+
"See https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#improving-re-routing-performance-during-restart")
snapshotAuthKey = flagutil.NewPassword("snapshotAuthKey", "authKey, which must be passed in query string to /snapshot* pages. It overrides -httpAuth.*")
forceMergeAuthKey = flagutil.NewPassword("forceMergeAuthKey", "authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.*")
forceFlushAuthKey = flagutil.NewPassword("forceFlushAuthKey", "authKey, which must be passed in query string to /internal/force_flush pages. It overrides -httpAuth.*")
storageDataPath = flag.String("storageDataPath", "vmstorage-data", "Path to storage data")
vminsertAddr = flag.String("vminsertAddr", ":8400", "TCP address to accept connections from vminsert services")
vmselectAddr = flag.String("vmselectAddr", ":8401", "TCP address to accept connections from vmselect services")
snapshotAuthKey = flagutil.NewPassword("snapshotAuthKey", "authKey, which must be passed in query string to /snapshot* pages")
forceMergeAuthKey = flagutil.NewPassword("forceMergeAuthKey", "authKey, which must be passed in query string to /internal/force_merge pages")
forceFlushAuthKey = flagutil.NewPassword("forceFlushAuthKey", "authKey, which must be passed in query string to /internal/force_flush pages")
snapshotsMaxAge = flagutil.NewRetentionDuration("snapshotsMaxAge", "3d", "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted")
_ = flag.Duration("snapshotCreateTimeout", 0, "Deprecated: this flag does nothing")
_ = flag.Duration("finalMergeDelay", 0, "Deprecated: this flag does nothing")
@@ -68,7 +58,7 @@ var (
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. "+
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)")
minScrapeInterval = flag.Duration("dedup.minScrapeInterval", 0, "Leave only the last sample in every time series per each discrete interval "+
"equal to -dedup.minScrapeInterval > 0. See also -streamAggr.dedupInterval and https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication")
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication for details")
inmemoryDataFlushInterval = flag.Duration("inmemoryDataFlushInterval", 5*time.Second, "The interval for guaranteed saving of in-memory data to disk. "+
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. "+
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). "+
@@ -129,7 +119,7 @@ var (
)
func main() {
// vmstorage is optimized for reduced memory allocations,
// vmstoage is optimized for reduced memory allocations,
// so it can run with the reduced GOGC in order to reduce the used memory,
// while keeping CPU usage spent in GC at low levels.
//
@@ -188,7 +178,7 @@ func main() {
LogNewSeries: *logNewSeries,
}
strg := storage.MustOpenStorage(*storageDataPath, opts)
vmStorage := newVMStorage(strg, *maxConcurrentRequests)
initStaleSnapshotsRemover(strg)
var m storage.Metrics
strg.UpdateMetrics(&m)
@@ -203,24 +193,18 @@ func main() {
// register storage metrics
storageMetrics := metrics.NewSet()
storageMetrics.RegisterMetricsWriter(func(w io.Writer) {
vmStorage.writeStorageMetrics(w)
writeStorageMetrics(w, strg)
})
metrics.RegisterSet(storageMetrics)
protoparserutil.StartUnmarshalWorkers()
vminsertSrv, err := vminsertapi.NewVMInsertServer(*vminsertAddr, *vminsertConnsShutdownDuration, "vminsert", vmStorage, nil)
servers.GetMaxUniqueTimeSeries() // for init and logging only.
vminsertSrv, err := servers.NewVMInsertServer(*vminsertAddr, strg)
if err != nil {
logger.Fatalf("cannot create a server with -vminsertAddr=%s: %s", *vminsertAddr, err)
}
limits := vmselectapi.Limits{
MaxConcurrentRequests: *maxConcurrentRequests,
MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
MaxQueueDuration: *maxQueueDuration,
MaxQueueDurationFlagName: "search.maxQueueDuration",
}
vmselectSrv, err := vmselectapi.NewServer(*vmselectAddr, vmStorage, limits, *disableRPCCompression)
vmselectSrv, err := servers.NewVMSelectServer(*vmselectAddr, strg)
if err != nil {
logger.Fatalf("cannot create a server with -vmselectAddr=%s: %s", *vmselectAddr, err)
}
@@ -229,7 +213,8 @@ func main() {
if len(listenAddrs) == 0 {
listenAddrs = []string{":8482"}
}
go httpserver.Serve(listenAddrs, vmStorage.requestHandler, httpserver.ServeOptions{UseProxyProtocol: useProxyProtocol})
requestHandler := newRequestHandler(strg)
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{UseProxyProtocol: useProxyProtocol})
pushmetrics.Init()
sig := procutil.WaitForSigterm()
@@ -249,6 +234,7 @@ func main() {
metrics.UnregisterSet(storageMetrics, true)
storageMetrics = nil
stopStaleSnapshotsRemover()
vmselectSrv.MustStop()
vminsertSrv.MustStop()
protoparserutil.StopUnmarshalWorkers()
@@ -256,29 +242,31 @@ func main() {
logger.Infof("gracefully closing the storage at %s", *storageDataPath)
startTime = time.Now()
vmStorage.Stop()
strg.MustClose()
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
fs.MustStopDirRemover()
logger.Infof("the vmstorage has been stopped")
}
// requestHandler is a storage request handler.
// TODO(@rtm0): Move to a separate file, request_handler.go
func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) bool {
path := r.URL.Path
if path == "/" {
if r.Method != http.MethodGet {
return false
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, `vmstorage - a component of VictoriaMetrics cluster<br/>
func newRequestHandler(strg *storage.Storage) httpserver.RequestHandler {
return func(w http.ResponseWriter, r *http.Request) bool {
if r.URL.Path == "/" {
if r.Method != http.MethodGet {
return false
}
w.Header().Add("Content-Type", "text/html; charset=utf-8")
fmt.Fprintf(w, `vmstorage - a component of VictoriaMetrics cluster<br/>
<a href="https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/">docs</a><br>
`)
return true
return true
}
return requestHandler(w, r, strg)
}
}
func requestHandler(w http.ResponseWriter, r *http.Request, strg *storage.Storage) bool {
path := r.URL.Path
if path == "/internal/force_merge" {
if !httpserver.CheckAuthFlag(w, r, forceMergeAuthKey) {
return true
@@ -290,7 +278,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
defer activeForceMerges.Dec()
logger.Infof("forced merge for partition_prefix=%q has been started", partitionNamePrefix)
startTime := time.Now()
if err := api.s.ForceMergePartitions(partitionNamePrefix); err != nil {
if err := strg.ForceMergePartitions(partitionNamePrefix); err != nil {
logger.Errorf("error in forced merge for partition_prefix=%q: %s", partitionNamePrefix, err)
return
}
@@ -303,7 +291,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
return true
}
logger.Infof("flushing storage to make pending data available for reading")
api.s.DebugFlush()
strg.DebugFlush()
return true
}
@@ -323,7 +311,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
}
logger.Infof("enabling logging of new series for the next %s. This may increase resource usage during this period.", time.Duration(dealine)*time.Second)
endTime := fasttime.UnixTimestamp() + uint64(dealine)
api.s.SetLogNewSeriesUntil(endTime)
strg.SetLogNewSeriesUntil(endTime)
fmt.Fprintf(w, `{"status":"success","data":{"logEndTime":%q}}`, time.Unix(int64(endTime), 0))
return true
}
@@ -339,13 +327,13 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
case "/create":
snapshotsCreateTotal.Inc()
w.Header().Set("Content-Type", "application/json")
snapshotName := api.s.MustCreateSnapshot()
snapshotName := strg.MustCreateSnapshot()
// Verify whether the client already closed the connection.
// In this case it is better to drop the created snapshot, since the client isn't interested in it.
if err := r.Context().Err(); err != nil {
logger.Infof("deleting already created snapshot at %s because the client canceled the request", snapshotName)
if err := api.deleteSnapshot(snapshotName); err != nil {
if err := deleteSnapshot(strg, snapshotName); err != nil {
logger.Infof("cannot delete just created snapshot: %s", err)
return true
}
@@ -357,7 +345,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
case "/list":
snapshotsListTotal.Inc()
w.Header().Set("Content-Type", "application/json")
snapshots := api.s.MustListSnapshots()
snapshots := strg.MustListSnapshots()
fmt.Fprintf(w, `{"status":"ok","snapshots":[`)
if len(snapshots) > 0 {
for _, snapshot := range snapshots[:len(snapshots)-1] {
@@ -371,7 +359,7 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
snapshotsDeleteTotal.Inc()
w.Header().Set("Content-Type", "application/json")
snapshotName := r.FormValue("snapshot")
if err := api.deleteSnapshot(snapshotName); err != nil {
if err := deleteSnapshot(strg, snapshotName); err != nil {
jsonResponseError(w, err)
snapshotsDeleteErrorsTotal.Inc()
return true
@@ -381,10 +369,9 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
case "/delete_all":
snapshotsDeleteAllTotal.Inc()
w.Header().Set("Content-Type", "application/json")
snapshots := api.s.MustListSnapshots()
snapshots := strg.MustListSnapshots()
for _, snapshotName := range snapshots {
// TODO(@rtm0): Use VMStorage.deleteSnapshot()?
if err := api.s.DeleteSnapshot(snapshotName); err != nil {
if err := strg.DeleteSnapshot(snapshotName); err != nil {
err = fmt.Errorf("cannot delete snapshot %q: %w", snapshotName, err)
jsonResponseError(w, err)
snapshotsDeleteAllErrorsTotal.Inc()
@@ -398,6 +385,50 @@ func (api *VMStorage) requestHandler(w http.ResponseWriter, r *http.Request) boo
}
}
func deleteSnapshot(strg *storage.Storage, snapshotName string) error {
snapshots := strg.MustListSnapshots()
for _, snName := range snapshots {
if snName == snapshotName {
if err := strg.DeleteSnapshot(snName); err != nil {
return fmt.Errorf("cannot delete snapshot %q: %w", snName, err)
}
return nil
}
}
return fmt.Errorf("cannot find snapshot %q", snapshotName)
}
func initStaleSnapshotsRemover(strg *storage.Storage) {
staleSnapshotsRemoverCh = make(chan struct{})
if snapshotsMaxAge.Duration() <= 0 {
return
}
snapshotsMaxAgeDur := snapshotsMaxAge.Duration()
staleSnapshotsRemoverWG.Go(func() {
d := timeutil.AddJitterToDuration(time.Second * 11)
t := time.NewTicker(d)
defer t.Stop()
for {
select {
case <-staleSnapshotsRemoverCh:
return
case <-t.C:
}
strg.MustDeleteStaleSnapshots(snapshotsMaxAgeDur)
}
})
}
func stopStaleSnapshotsRemover() {
close(staleSnapshotsRemoverCh)
staleSnapshotsRemoverWG.Wait()
}
var (
staleSnapshotsRemoverCh chan struct{}
staleSnapshotsRemoverWG sync.WaitGroup
)
var (
activeForceMerges = metrics.NewCounter("vm_active_force_merges")
@@ -412,9 +443,7 @@ var (
snapshotsDeleteAllErrorsTotal = metrics.NewCounter(`vm_http_request_errors_total{path="/snapshot/delete_all"}`)
)
// TODO(@rtm0): Move to metrics.go.
func (api *VMStorage) writeStorageMetrics(w io.Writer) {
strg := api.s
func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
var m storage.Metrics
strg.UpdateMetrics(&m)
tm := &m.TableMetrics
@@ -638,7 +667,7 @@ func (api *VMStorage) writeStorageMetrics(w io.Writer) {
metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled`, tm.ScheduledDownsamplingPartitions)
metrics.WriteGaugeUint64(w, `vm_downsampling_partitions_scheduled_size_bytes`, tm.ScheduledDownsamplingPartitionsSize)
metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(api.maxUniqueTimeSeriesCalculated))
metrics.WriteGaugeUint64(w, `vm_search_max_unique_timeseries`, uint64(servers.GetMaxUniqueTimeSeries()))
metrics.WriteGaugeUint64(w, `vm_metrics_metadata_storage_items`, m.MetadataStorageItemsCurrent)
metrics.WriteCounterUint64(w, `vm_metrics_metadata_storage_size_bytes`, m.MetadataStorageCurrentSizeBytes)

View File

@@ -0,0 +1,55 @@
package servers
import (
"flag"
"fmt"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vminsertapi"
)
var (
precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression "+
"at the cost of precision loss")
vminsertConnsShutdownDuration = flag.Duration("storage.vminsertConnsShutdownDuration", 10*time.Second, "The time needed for gradual closing of vminsert connections during "+
"graceful shutdown. Bigger duration reduces spikes in CPU, RAM and disk IO load on the remaining vmstorage nodes during rolling restart. "+
"Smaller duration reduces the time needed to close all the vminsert connections, thus reducing the time for graceful shutdown. "+
"Configured value must always be lower than the graceful shutdown period configured by the orchestration platform (terminationGracePeriodSeconds for Kubernetes). "+
"See https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#improving-re-routing-performance-during-restart")
)
// NewVMInsertServer starts vminsertapi.VMInsertServer at the given addr serving the given storage.
func NewVMInsertServer(addr string, storage *storage.Storage) (*vminsertapi.VMInsertServer, error) {
if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
return nil, fmt.Errorf("invalid -precisionBits: %w", err)
}
api := &vminsertAPI{
storage: storage,
}
return vminsertapi.NewVMInsertServer(addr, *vminsertConnsShutdownDuration, "vminsert", api, nil)
}
type vminsertAPI struct {
storage *storage.Storage
}
// WriteRows implements lib/vminsertapi.API interface
func (v *vminsertAPI) WriteRows(rows []storage.MetricRow) error {
v.storage.AddRows(rows, uint8(*precisionBits))
return nil
}
// WriteMetadata implements lib/vminsertapi.API interface
func (v *vminsertAPI) WriteMetadata(rows []metricsmetadata.Row) error {
v.storage.AddMetadataRows(rows)
return nil
}
// IsReadOnly implements lib/vminsertapi.API interface
func (v *vminsertAPI) IsReadOnly() bool {
return v.storage.IsReadOnly()
}

View File

@@ -1,4 +1,4 @@
package main
package servers
import (
"flag"
@@ -6,21 +6,17 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricnamestats"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/metricsmetadata"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/vmselectapi"
)
var (
precisionBits = flag.Int("precisionBits", 64, "The number of precision bits to store per each value. Lower precision bits improves data compression "+
"at the cost of precision loss")
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be scanned during every query. "+
"This allows protecting against heavy queries, which select unexpectedly high number of series. When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional). See also -search.max* command-line flags at vmselect")
maxTagKeys = flag.Int("search.maxTagKeys", 100e3, "The maximum number of tag keys returned per search. "+
@@ -28,110 +24,46 @@ var (
maxTagValues = flag.Int("search.maxTagValues", 100e3, "The maximum number of tag values returned per search. "+
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
maxTagValueSuffixesPerSearch = flag.Int("search.maxTagValueSuffixesPerSearch", 100e3, "The maximum number of tag value suffixes returned from /metrics/find")
snapshotsMaxAge = flagutil.NewRetentionDuration("snapshotsMaxAge", "3d", "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted")
maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", 2*cgroup.AvailableCPUs(), "The maximum number of concurrent vmselect requests "+
"the vmstorage can process at -vmselectAddr. It shouldn't be high, since a single request usually saturates a CPU core, and many concurrently executed requests "+
"may require high amounts of memory. See also -search.maxQueueDuration")
maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the incoming vmselect request waits for execution "+
"when -search.maxConcurrentRequests limit is reached")
disableRPCCompression = flag.Bool("rpc.disableCompression", false, "Whether to disable compression of the data sent from vmstorage to vmselect. "+
"This reduces CPU usage at the cost of higher network bandwidth usage")
)
// newVMStorage creates a new instance of of VMStorage.
//
// The created VMStorage instance takes ownership of s.
func newVMStorage(s *storage.Storage, maxConcurrentRequests int) *VMStorage {
if err := encoding.CheckPrecisionBits(uint8(*precisionBits)); err != nil {
logger.Fatalf("invalid -precisionBits: %d", err)
var (
maxUniqueTimeseriesValue int
maxUniqueTimeseriesValueOnce sync.Once
)
// NewVMSelectServer starts new server at the given addr, which serves vmselect requests from the given s.
func NewVMSelectServer(addr string, s *storage.Storage) (*vmselectapi.Server, error) {
api := &vmstorageAPI{
s: s,
}
maxUniqueTimeseriesCalculated := *maxUniqueTimeseries
if maxUniqueTimeseriesCalculated <= 0 {
maxUniqueTimeseriesCalculated = calculateMaxUniqueTimeseries(maxConcurrentRequests, memory.Remaining())
limits := vmselectapi.Limits{
MaxLabelNames: *maxTagKeys,
MaxLabelValues: *maxTagValues,
MaxTagValueSuffixes: *maxTagValueSuffixesPerSearch,
MaxConcurrentRequests: *maxConcurrentRequests,
MaxConcurrentRequestsFlagName: "search.maxConcurrentRequests",
MaxQueueDuration: *maxQueueDuration,
MaxQueueDurationFlagName: "search.maxQueueDuration",
}
vms := &VMStorage{
s: s,
maxUniqueTimeseries: *maxUniqueTimeseries,
maxUniqueTimeSeriesCalculated: maxUniqueTimeseriesCalculated,
staleSnapshotsRemoverCh: make(chan struct{}),
}
vms.initStaleSnapshotsRemover()
return vms
return vmselectapi.NewServer(addr, api, limits, *disableRPCCompression)
}
// calculateMaxUniqueTimeseries calculates the maxUniqueTimeseries based on the
// available system resources.
func calculateMaxUniqueTimeseries(maxConcurrentRequests, remainingMemory int) int {
if maxConcurrentRequests <= 0 {
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
// In such cases, fallback to unlimited.
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
return 2e9
}
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
mts := remainingMemory / 200 / maxConcurrentRequests
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
return mts
// vmstorageAPI impelements vmselectapi.API
type vmstorageAPI struct {
s *storage.Storage
}
// VMStorage impelements vmselectapi.API and vminsertapi.API.
type VMStorage struct {
s *storage.Storage
maxUniqueTimeseries int
maxUniqueTimeSeriesCalculated int
staleSnapshotsRemoverCh chan struct{}
staleSnapshotsRemoverWG sync.WaitGroup
}
func (api *VMStorage) initStaleSnapshotsRemover() {
if snapshotsMaxAge.Duration() <= 0 {
return
}
snapshotsMaxAgeDuration := snapshotsMaxAge.Duration()
api.staleSnapshotsRemoverWG.Go(func() {
d := timeutil.AddJitterToDuration(time.Second * 11)
t := time.NewTicker(d)
defer t.Stop()
for {
select {
case <-api.staleSnapshotsRemoverCh:
return
case <-t.C:
}
api.s.MustDeleteStaleSnapshots(snapshotsMaxAgeDuration)
}
})
}
func (api *VMStorage) Stop() {
close(api.staleSnapshotsRemoverCh)
api.staleSnapshotsRemoverWG.Wait()
api.s.MustClose()
}
// WriteRows writes metric rows to the storage.
//
// The caller should limit the number of concurrent calls to WriteRows() in
// order to limit memory usage.
func (api *VMStorage) WriteRows(rows []storage.MetricRow) error {
api.s.AddRows(rows, uint8(*precisionBits))
return nil
}
// WriteMetadata writes metrics metadata to storage.
//
// The caller should limit the number of concurrent calls to WriteMetadata() in
// order to limit memory usage.
func (api *VMStorage) WriteMetadata(rows []metricsmetadata.Row) error {
api.s.AddMetadataRows(rows)
return nil
}
// IsReadOnly returns true is the storage is in read-only mode.
func (api *VMStorage) IsReadOnly() bool {
return api.s.IsReadOnly()
}
func (api *VMStorage) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
func (api *vmstorageAPI) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (vmselectapi.BlockIterator, error) {
tr := sq.GetTimeRange()
maxMetrics := api.getMaxMetrics(sq.MaxMetrics)
maxMetrics := getMaxMetrics(sq.MaxMetrics)
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
@@ -148,15 +80,158 @@ func (api *VMStorage) InitSearch(qt *querytracer.Tracer, sq *storage.SearchQuery
return bi, nil
}
func (api *VMStorage) getMaxMetrics(searchQueryLimit int) int {
if searchQueryLimit <= 0 {
return api.maxUniqueTimeSeriesCalculated
func (api *vmstorageAPI) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = GetMaxUniqueTimeSeries()
}
// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
if api.maxUniqueTimeseries != 0 && searchQueryLimit > api.maxUniqueTimeseries {
searchQueryLimit = api.maxUniqueTimeseries
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
return searchQueryLimit
if len(tfss) == 0 {
return nil, fmt.Errorf("missing tag filters")
}
return api.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
}
func (api *vmstorageAPI) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = GetMaxUniqueTimeSeries()
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
return api.s.SearchLabelValues(qt, sq.AccountID, sq.ProjectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
}
func (api *vmstorageAPI) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
maxSuffixes int, deadline uint64) ([]string, error) {
suffixes, err := api.s.SearchTagValueSuffixes(qt, accountID, projectID, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
if err != nil {
return nil, err
}
if len(suffixes) >= maxSuffixes {
return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
}
return suffixes, nil
}
func (api *vmstorageAPI) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = GetMaxUniqueTimeSeries()
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
return api.s.SearchLabelNames(qt, sq.AccountID, sq.ProjectID, tfss, tr, maxLabelNames, maxMetrics, deadline)
}
func (api *vmstorageAPI) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
return api.s.GetSeriesCount(accountID, projectID, deadline)
}
func (api *vmstorageAPI) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
return api.s.SearchTenants(qt, tr, deadline)
}
func (api *vmstorageAPI) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = GetMaxUniqueTimeSeries()
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
return api.s.GetTSDBStatus(qt, sq.AccountID, sq.ProjectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
}
func (api *vmstorageAPI) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = GetMaxUniqueTimeSeries()
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return 0, err
}
if len(tfss) == 0 {
return 0, fmt.Errorf("missing tag filters")
}
return api.s.DeleteSeries(qt, tfss, maxMetrics)
}
func (api *vmstorageAPI) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
api.s.RegisterMetricNames(qt, mrs)
return nil
}
func (api *vmstorageAPI) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
return api.s.GetMetricNamesStats(qt, tt, limit, le, matchPattern), nil
}
func (api *vmstorageAPI) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
api.s.ResetMetricNamesStats(qt)
return nil
}
func (api *vmstorageAPI) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
accountID := sq.AccountID
projectID := sq.ProjectID
for _, tagFilters := range sq.TagFilterss {
tfs := storage.NewTagFilters(accountID, projectID)
for i := range tagFilters {
tf := &tagFilters[i]
if string(tf.Key) == "__graphite__" {
query := tf.Value
qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
paths, err := api.s.SearchGraphitePaths(qtChild, accountID, projectID, tr, query, maxMetrics, deadline)
qtChild.Donef("found %d series", len(paths))
if err != nil {
return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
}
if len(paths) >= maxMetrics {
return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
}
tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
continue
}
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
}
}
tfss = append(tfss, tfs)
}
return tfss, nil
}
func (api *vmstorageAPI) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
return api.s.GetMetadataRows(qt, tt, limit, metricName, deadline)
}
// blockIterator implements vmselectapi.BlockIterator
@@ -197,197 +272,41 @@ func (bi *blockIterator) Error() error {
return bi.sr.Error()
}
// SearchMetricNames returns metric names for the given tfss on the given tr.
func (api *VMStorage) SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) ([]string, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = api.maxUniqueTimeSeriesCalculated
func getMaxMetrics(searchQueryLimit int) int {
if searchQueryLimit <= 0 {
return GetMaxUniqueTimeSeries()
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
// searchQueryLimit cannot exceed `-search.maxUniqueTimeseries`
if *maxUniqueTimeseries != 0 && searchQueryLimit > *maxUniqueTimeseries {
searchQueryLimit = *maxUniqueTimeseries
}
if len(tfss) == 0 {
return nil, fmt.Errorf("missing tag filters")
}
return api.s.SearchMetricNames(qt, tfss, tr, maxMetrics, deadline)
return searchQueryLimit
}
// SearchLabelValues searches for label values for the given labelName, tfss and
// tr.
func (api *VMStorage) LabelValues(qt *querytracer.Tracer, sq *storage.SearchQuery, labelName string, maxLabelValues int, deadline uint64) ([]string, error) {
tr := sq.GetTimeRange()
if maxLabelValues <= 0 || maxLabelValues > *maxTagValues {
maxLabelValues = *maxTagValues
}
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = api.maxUniqueTimeSeriesCalculated
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
return api.s.SearchLabelValues(qt, sq.AccountID, sq.ProjectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
}
// TagValueSuffixes returns all the tag value suffixes for the given tagKey and
// tagValuePrefix on the given tr.
//
// This allows implementing
// https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or
// similar APIs.
func (api *VMStorage) TagValueSuffixes(qt *querytracer.Tracer, accountID, projectID uint32, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte,
maxSuffixes int, deadline uint64) ([]string, error) {
if maxSuffixes <= 0 || maxSuffixes > *maxTagValueSuffixesPerSearch {
maxSuffixes = *maxTagValueSuffixesPerSearch
}
suffixes, err := api.s.SearchTagValueSuffixes(qt, accountID, projectID, tr, tagKey, tagValuePrefix, delimiter, maxSuffixes, deadline)
if err != nil {
return nil, err
}
if len(suffixes) >= maxSuffixes {
return nil, fmt.Errorf("more than -search.maxTagValueSuffixesPerSearch=%d suffixes returned; "+
"either narrow down the search or increase -search.maxTagValueSuffixesPerSearch command-line flag value", maxSuffixes)
}
return suffixes, nil
}
// SearchLabelNames searches for tag keys matching the given tfss on tr.
func (api *VMStorage) LabelNames(qt *querytracer.Tracer, sq *storage.SearchQuery, maxLabelNames int, deadline uint64) ([]string, error) {
tr := sq.GetTimeRange()
if maxLabelNames <= 0 || maxLabelNames > *maxTagKeys {
maxLabelNames = *maxTagKeys
}
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = api.maxUniqueTimeSeriesCalculated
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
return api.s.SearchLabelNames(qt, sq.AccountID, sq.ProjectID, tfss, tr, maxLabelNames, maxMetrics, deadline)
}
func (api *VMStorage) SeriesCount(_ *querytracer.Tracer, accountID, projectID uint32, deadline uint64) (uint64, error) {
return api.s.GetSeriesCount(accountID, projectID, deadline)
}
func (api *VMStorage) Tenants(qt *querytracer.Tracer, tr storage.TimeRange, deadline uint64) ([]string, error) {
return api.s.SearchTenants(qt, tr, deadline)
}
// GetTSDBStatus returns TSDB status for given filters on the given date.
func (api *VMStorage) TSDBStatus(qt *querytracer.Tracer, sq *storage.SearchQuery, focusLabel string, topN int, deadline uint64) (*storage.TSDBStatus, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = api.maxUniqueTimeSeriesCalculated
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return nil, err
}
date := uint64(sq.MinTimestamp) / (24 * 3600 * 1000)
return api.s.GetTSDBStatus(qt, sq.AccountID, sq.ProjectID, tfss, date, focusLabel, topN, maxMetrics, deadline)
}
// DeleteSeries deletes series matching tfss.
//
// Returns the number of deleted series.
func (api *VMStorage) DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline uint64) (int, error) {
tr := sq.GetTimeRange()
maxMetrics := sq.MaxMetrics
if maxMetrics <= 0 {
// fallback to maxUniqueTimeSeries if no limit is provided,
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7857
maxMetrics = api.maxUniqueTimeSeriesCalculated
}
tfss, err := api.setupTfss(qt, sq, tr, maxMetrics, deadline)
if err != nil {
return 0, err
}
if len(tfss) == 0 {
return 0, fmt.Errorf("missing tag filters")
}
return api.s.DeleteSeries(qt, tfss, maxMetrics)
}
func (api *VMStorage) RegisterMetricNames(qt *querytracer.Tracer, mrs []storage.MetricRow, _ uint64) error {
api.s.RegisterMetricNames(qt, mrs)
return nil
}
// GetMetricNamesUsageStats returns metric name usage stats.
func (api *VMStorage) GetMetricNamesUsageStats(qt *querytracer.Tracer, tt *storage.TenantToken, limit, le int, matchPattern string, _ uint64) (metricnamestats.StatsResult, error) {
return api.s.GetMetricNamesStats(qt, tt, limit, le, matchPattern), nil
}
// ResetMetricNamesStats resets state for metric names usage tracker
func (api *VMStorage) ResetMetricNamesUsageStats(qt *querytracer.Tracer, _ uint64) error {
api.s.ResetMetricNamesStats(qt)
return nil
}
func (api *VMStorage) setupTfss(qt *querytracer.Tracer, sq *storage.SearchQuery, tr storage.TimeRange, maxMetrics int, deadline uint64) ([]*storage.TagFilters, error) {
tfss := make([]*storage.TagFilters, 0, len(sq.TagFilterss))
accountID := sq.AccountID
projectID := sq.ProjectID
for _, tagFilters := range sq.TagFilterss {
tfs := storage.NewTagFilters(accountID, projectID)
for i := range tagFilters {
tf := &tagFilters[i]
if string(tf.Key) == "__graphite__" {
query := tf.Value
qtChild := qt.NewChild("searching for series matching __graphite__=%q", query)
paths, err := api.s.SearchGraphitePaths(qtChild, accountID, projectID, tr, query, maxMetrics, deadline)
qtChild.Donef("found %d series", len(paths))
if err != nil {
return nil, fmt.Errorf("error when searching for Graphite paths for query %q: %w", query, err)
}
if len(paths) >= maxMetrics {
return nil, fmt.Errorf("more than %d time series match Graphite query %q; "+
"either narrow down the query or increase the corresponding -search.max* command-line flag value at vmselect nodes; "+
"see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits", maxMetrics, query)
}
tfs.AddGraphiteQuery(query, paths, tf.IsNegative)
continue
}
if err := tfs.Add(tf.Key, tf.Value, tf.IsNegative, tf.IsRegexp); err != nil {
return nil, fmt.Errorf("cannot parse tag filter %s: %w", tf, err)
}
// GetMaxUniqueTimeSeries returns `-search.maxUniqueTimeseries` or the auto-calculated value based on available resources.
// The calculation is split into calculateMaxUniqueTimeSeriesForResource for unit testing.
func GetMaxUniqueTimeSeries() int {
maxUniqueTimeseriesValueOnce.Do(func() {
maxUniqueTimeseriesValue = *maxUniqueTimeseries
if maxUniqueTimeseriesValue <= 0 {
maxUniqueTimeseriesValue = calculateMaxUniqueTimeSeriesForResource(*maxConcurrentRequests, memory.Remaining())
}
tfss = append(tfss, tfs)
}
return tfss, nil
})
return maxUniqueTimeseriesValue
}
func (api *VMStorage) GetMetadataRecords(qt *querytracer.Tracer, tt *storage.TenantToken, limit int, metricName string, deadline uint64) ([]*metricsmetadata.Row, error) {
return api.s.GetMetadataRows(qt, tt, limit, metricName, deadline)
}
// deleteSnapshot deletes a snapshot by its name.
//
// Callers must wrap the call with wg.Add(1)...wg.Done().
func (api *VMStorage) deleteSnapshot(snapshotName string) error {
snapshots := api.s.MustListSnapshots()
for _, snName := range snapshots {
if snName == snapshotName {
if err := api.s.DeleteSnapshot(snName); err != nil {
return fmt.Errorf("cannot delete snapshot %q: %w", snName, err)
}
return nil
}
// calculateMaxUniqueTimeSeriesForResource calculate the max metrics limit calculated by available resources.
func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMemory int) int {
if maxConcurrentRequests <= 0 {
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
// In such cases, fallback to unlimited.
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
return 2e9
}
return fmt.Errorf("cannot find snapshot %q", snapshotName)
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
mts := remainingMemory / 200 / maxConcurrentRequests
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
return mts
}

View File

@@ -1,25 +1,22 @@
package main
package servers
import (
"math"
"strconv"
"runtime"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
func TestCalculateMaxMetricsLimitByResource(t *testing.T) {
f := func(maxConcurrentRequest, remainingMemory, expect int) {
t.Helper()
maxMetricsLimit := calculateMaxUniqueTimeseries(maxConcurrentRequest, remainingMemory)
maxMetricsLimit := calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequest, remainingMemory)
if maxMetricsLimit != expect {
t.Fatalf("unexpected max metrics limit: got %d, want %d", maxMetricsLimit, expect)
}
}
// 64-bit architectures support memory sizes > 4GB.
if strconv.IntSize == 64 {
// Skip when GOARCH=386
if runtime.GOARCH != "386" {
// 8 CPU & 32 GiB
f(16, int(math.Round(32*1024*1024*1024*0.4)), 4294967)
// 4 CPU & 32 GiB
@@ -40,14 +37,10 @@ func TestGetMaxMetrics(t *testing.T) {
defer func() {
*maxUniqueTimeseries = originalMaxUniqueTimeSeries
}()
maxConcurrentRequests := 2 * cgroup.AvailableCPUs()
f := func(searchQueryLimit, storageMaxUniqueTimeseries, expect int) {
t.Helper()
*maxUniqueTimeseries = storageMaxUniqueTimeseries
s := &storage.Storage{}
vmstorage := newVMStorage(s, maxConcurrentRequests)
maxMetrics := vmstorage.getMaxMetrics(searchQueryLimit)
maxMetrics := getMaxMetrics(searchQueryLimit)
if maxMetrics != expect {
t.Fatalf("unexpected max metrics: got %d, want %d", maxMetrics, expect)
}

View File

@@ -22,7 +22,6 @@ var (
vminsertAddrRE = regexp.MustCompile(`accepting vminsert conns at (.*:\d{1,5})$`)
vminsertClusterNativeAddrRE = regexp.MustCompile(`started TCP clusternative server at "(.*:\d{1,5})"`)
vmselectAddrRE = regexp.MustCompile(`accepting vmselect conns at (.*:\d{1,5})$`)
vmauthHttpListenAddrRE = regexp.MustCompile(`pprof handlers are exposed at http://(.*:\d{1,5})/debug/pprof/`)
)
// app represents an instance of some VictoriaMetrics server (such as vmstorage,

View File

@@ -88,11 +88,19 @@ func (tc *TestCase) MustStartDefaultVmsingle() *Vmsingle {
}
// MustStartVmsingle is a test helper function that starts an instance of
// vmsingle (latest version) and fails the test if the app fails to start.
// vmsingle located at ../../bin/victoria-metrics-race and fails the test if the app
// fails to start.
func (tc *TestCase) MustStartVmsingle(instance string, flags []string) *Vmsingle {
tc.t.Helper()
return tc.MustStartVmsingleAt(instance, "../../bin/victoria-metrics-race", flags)
}
app, err := StartVmsingle(instance, flags, tc.cli, tc.output)
// MustStartVmsingleAt is a test helper function that starts an instance of
// vmsingle and fails the test if the app fails to start.
func (tc *TestCase) MustStartVmsingleAt(instance, binary string, flags []string) *Vmsingle {
tc.t.Helper()
app, err := StartVmsingleAt(instance, binary, flags, tc.cli, tc.output)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
@@ -101,11 +109,19 @@ func (tc *TestCase) MustStartVmsingle(instance string, flags []string) *Vmsingle
}
// MustStartVmstorage is a test helper function that starts an instance of
// vmstorage (latest version) and fails the test if the app fails to start.
// vmstorage located at ../../bin/vmstorage-race and fails the test if the app fails
// to start.
func (tc *TestCase) MustStartVmstorage(instance string, flags []string) *Vmstorage {
tc.t.Helper()
return tc.MustStartVmstorageAt(instance, "../../bin/vmstorage-race", flags)
}
app, err := StartVmstorage(instance, flags, tc.cli, tc.output)
// MustStartVmstorageAt is a test helper function that starts an instance of
// vmstorage and fails the test if the app fails to start.
func (tc *TestCase) MustStartVmstorageAt(instance string, binary string, flags []string) *Vmstorage {
tc.t.Helper()
app, err := StartVmstorageAt(instance, binary, flags, tc.cli, tc.output)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
@@ -114,7 +130,7 @@ func (tc *TestCase) MustStartVmstorage(instance string, flags []string) *Vmstora
}
// MustStartVmselect is a test helper function that starts an instance of
// vmselect (latest version) and fails the test if the app fails to start.
// vmselect and fails the test if the app fails to start.
func (tc *TestCase) MustStartVmselect(instance string, flags []string) *Vmselect {
tc.t.Helper()
@@ -274,8 +290,10 @@ func (tc *TestCase) MustStartDefaultCluster() *Vmcluster {
// tests usually come paired with corresponding vmsingle tests.
type ClusterOptions struct {
Vmstorage1Instance string
Vmstorage1Binary string
Vmstorage1Flags []string
Vmstorage2Instance string
Vmstorage2Binary string
Vmstorage2Flags []string
VminsertInstance string
VminsertFlags []string
@@ -287,8 +305,15 @@ type ClusterOptions struct {
func (tc *TestCase) MustStartCluster(opts *ClusterOptions) *Vmcluster {
tc.t.Helper()
vmstorage1 := tc.MustStartVmstorage(opts.Vmstorage1Instance, opts.Vmstorage1Flags)
vmstorage2 := tc.MustStartVmstorage(opts.Vmstorage2Instance, opts.Vmstorage2Flags)
if opts.Vmstorage1Binary == "" {
opts.Vmstorage1Binary = "../../bin/vmstorage-race"
}
vmstorage1 := tc.MustStartVmstorageAt(opts.Vmstorage1Instance, opts.Vmstorage1Binary, opts.Vmstorage1Flags)
if opts.Vmstorage2Binary == "" {
opts.Vmstorage2Binary = "../../bin/vmstorage-race"
}
vmstorage2 := tc.MustStartVmstorageAt(opts.Vmstorage2Instance, opts.Vmstorage2Binary, opts.Vmstorage2Flags)
opts.VminsertFlags = append(opts.VminsertFlags, []string{
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),

View File

@@ -1,50 +0,0 @@
package apptest
// MustStartVmsingle_v1_132_0 is a test helper function that starts an instance
// of vmsingle-v1.132.0 (last version that uses legacy index) and fails the test
// if the app fails to start.
func (tc *TestCase) MustStartVmsingle_v1_132_0(instance string, flags []string) *Vmsingle {
tc.t.Helper()
app, err := StartVmsingle_v1_132_0(instance, flags, tc.cli, tc.output)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
tc.addApp(instance, app)
return app
}
// MustStartVmstorage_v1_132_0 is a test helper function that starts an instance
// of vmstorage-v1.132.0 (last version that uses legacy index) and fails the
// test if the app fails to start.
func (tc *TestCase) MustStartVmstorage_v1_132_0(instance string, flags []string) *Vmstorage {
tc.t.Helper()
app, err := StartVmstorage_v1_132_0(instance, flags, tc.cli, tc.output)
if err != nil {
tc.t.Fatalf("Could not start %s: %v", instance, err)
}
tc.addApp(instance, app)
return app
}
// MustStartCluster_v1_132_0 starts a cluster with vmstorage-v1.132.0 with
// custom flags.
func (tc *TestCase) MustStartCluster_v1_132_0(opts *ClusterOptions) *Vmcluster {
tc.t.Helper()
vmstorage1 := tc.MustStartVmstorage_v1_132_0(opts.Vmstorage1Instance, opts.Vmstorage1Flags)
vmstorage2 := tc.MustStartVmstorage_v1_132_0(opts.Vmstorage2Instance, opts.Vmstorage2Flags)
opts.VminsertFlags = append(opts.VminsertFlags, []string{
"-storageNode=" + vmstorage1.VminsertAddr() + "," + vmstorage2.VminsertAddr(),
}...)
vminsert := tc.MustStartVminsert(opts.VminsertInstance, opts.VminsertFlags)
opts.VmselectFlags = append(opts.VmselectFlags, []string{
"-storageNode=" + vmstorage1.VmselectAddr() + "," + vmstorage2.VmselectAddr(),
}...)
vmselect := tc.MustStartVmselect(opts.VmselectInstance, opts.VmselectFlags)
return &Vmcluster{vminsert, vmselect, []*Vmstorage{vmstorage1, vmstorage2}}
}

View File

@@ -2,6 +2,7 @@ package tests
import (
"fmt"
"os"
"path/filepath"
"slices"
"testing"
@@ -10,6 +11,11 @@ import (
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
)
var (
legacyVmsinglePath = os.Getenv("VM_LEGACY_VMSINGLE_PATH")
legacyVmstoragePath = os.Getenv("VM_LEGACY_VMSTORAGE_PATH")
)
type testLegacyDeleteSeriesOpts struct {
startLegacySUT func() at.PrometheusWriteQuerier
startNewSUT func() at.PrometheusWriteQuerier
@@ -25,7 +31,7 @@ func TestLegacySingleDeleteSeries(t *testing.T) {
opts := testLegacyDeleteSeriesOpts{
startLegacySUT: func() at.PrometheusWriteQuerier {
return tc.MustStartVmsingle_v1_132_0("vmsingle-legacy", []string{
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
"-storageDataPath=" + storageDataPath,
"-retentionPeriod=100y",
"-search.maxStalenessInterval=1m",
@@ -58,13 +64,15 @@ func TestLegacyClusterDeleteSeries(t *testing.T) {
opts := testLegacyDeleteSeriesOpts{
startLegacySUT: func() at.PrometheusWriteQuerier {
return tc.MustStartCluster_v1_132_0(&at.ClusterOptions{
return tc.MustStartCluster(&at.ClusterOptions{
Vmstorage1Instance: "vmstorage1-legacy",
Vmstorage1Binary: legacyVmstoragePath,
Vmstorage1Flags: []string{
"-storageDataPath=" + storage1DataPath,
"-retentionPeriod=100y",
},
Vmstorage2Instance: "vmstorage2-legacy",
Vmstorage2Binary: legacyVmstoragePath,
Vmstorage2Flags: []string{
"-storageDataPath=" + storage2DataPath,
"-retentionPeriod=100y",
@@ -247,7 +255,7 @@ func TestLegacySingleBackupRestore(t *testing.T) {
opts := testLegacyBackupRestoreOpts{
startLegacySUT: func() at.PrometheusWriteQuerier {
return tc.MustStartVmsingle_v1_132_0("vmsingle-legacy", []string{
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
"-storageDataPath=" + storageDataPath,
"-retentionPeriod=100y",
"-search.disableCache=true",
@@ -290,13 +298,15 @@ func TestLegacyClusterBackupRestore(t *testing.T) {
opts := testLegacyBackupRestoreOpts{
startLegacySUT: func() at.PrometheusWriteQuerier {
return tc.MustStartCluster_v1_132_0(&at.ClusterOptions{
return tc.MustStartCluster(&at.ClusterOptions{
Vmstorage1Instance: "vmstorage1-legacy",
Vmstorage1Binary: legacyVmstoragePath,
Vmstorage1Flags: []string{
"-storageDataPath=" + storage1DataPath,
"-retentionPeriod=100y",
},
Vmstorage2Instance: "vmstorage2-legacy",
Vmstorage2Binary: legacyVmstoragePath,
Vmstorage2Flags: []string{
"-storageDataPath=" + storage2DataPath,
"-retentionPeriod=100y",
@@ -573,7 +583,7 @@ func TestLegacySingleDowngrade(t *testing.T) {
opts := testLegacyDowngradeOpts{
startLegacySUT: func() at.PrometheusWriteQuerier {
return tc.MustStartVmsingle_v1_132_0("vmsingle-legacy", []string{
return tc.MustStartVmsingleAt("vmsingle-legacy", legacyVmsinglePath, []string{
"-storageDataPath=" + storageDataPath,
"-retentionPeriod=100y",
"-search.disableCache=true",
@@ -608,13 +618,15 @@ func TestLegacyClusterDowngrade(t *testing.T) {
opts := testLegacyDowngradeOpts{
startLegacySUT: func() at.PrometheusWriteQuerier {
return tc.MustStartCluster_v1_132_0(&at.ClusterOptions{
return tc.MustStartCluster(&at.ClusterOptions{
Vmstorage1Instance: "vmstorage1-legacy",
Vmstorage1Binary: legacyVmstoragePath,
Vmstorage1Flags: []string{
"-storageDataPath=" + storage1DataPath,
"-retentionPeriod=100y",
},
Vmstorage2Instance: "vmstorage2-legacy",
Vmstorage2Binary: legacyVmstoragePath,
Vmstorage2Flags: []string{
"-storageDataPath=" + storage2DataPath,
"-retentionPeriod=100y",

View File

@@ -16,63 +16,43 @@ import (
"github.com/golang/snappy"
)
// StartVmagent starts the latest version of vmagent.
//
// The path to the binary can be provided via VMAGENT_PATH environment
// variable. If the variable is not set, ../../bin/vmagent-race will be
// used.
// Vmagent holds the state of a vmagent app and provides vmagent-specific functions
type Vmagent struct {
*app
*metricsClient
httpListenAddr string
cli *Client
}
// StartVmagent starts an instance of vmagent with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmagent(instance string, flags []string, cli *Client, promScrapeConfigFilePath string, output io.Writer) (*Vmagent, error) {
binary := os.Getenv("VMAGENT_PATH")
if binary == "" {
binary = "../../bin/vmagent-race"
extractREs := []*regexp.Regexp{
httpListenAddrRE,
}
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
app, stderrExtracts, err := startApp(instance, "../../bin/vmagent-race", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-promscrape.config": promScrapeConfigFilePath,
"-remoteWrite.tmpDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
},
extractREs: []*regexp.Regexp{
httpListenAddrRE,
},
output: output,
extractREs: extractREs,
output: output,
})
if err != nil {
return nil, err
}
return newVmagent(app, cli, vmagentRuntimeValues{
httpListenAddr: stderrExtracts[0],
}), nil
}
type vmagentRuntimeValues struct {
httpListenAddr string
}
func newVmagent(app *app, cli *Client, rt vmagentRuntimeValues) *Vmagent {
return &Vmagent{
app: app,
metricsClient: newMetricsClient(cli, stderrExtracts[0]),
httpListenAddr: stderrExtracts[0],
cli: cli,
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
httpListenAddr: rt.httpListenAddr,
}
}
// Vmagent holds the state of a vmagent app and provides vmagent-specific
// functions.
type Vmagent struct {
*app
*metricsClient
cli *Client
httpListenAddr string
}
// HTTPAddr returns the address at which the vmagent process is listening
// for http connections.
func (app *Vmagent) HTTPAddr() string {
return app.httpListenAddr
}, nil
}
// APIV1ImportPrometheus is a test helper function that inserts a
@@ -223,6 +203,12 @@ func (app *Vmagent) PrometheusAPIV1Write(t *testing.T, wr prompb.WriteRequest, o
})
}
// HTTPAddr returns the address at which the vmagent process is listening
// for http connections.
func (app *Vmagent) HTTPAddr() string {
return app.httpListenAddr
}
// sendBlocking sends the data to vmstorage by executing `send` function and
// waits until the data is actually sent.
//

View File

@@ -2,7 +2,6 @@ package apptest
import (
"io"
"os"
"regexp"
"syscall"
"testing"
@@ -11,48 +10,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
)
// StartVmauth starts the latest version of vmauth.
//
// The path to the binary can be provided via VMAUTH_PATH environment
// variable. If the variable is not set, ../../bin/vmauth-race will be
// used.
func StartVmauth(instance string, flags []string, cli *Client, configFilePath string, output io.Writer) (*Vmauth, error) {
binary := os.Getenv("VMAUTH_PATH")
if binary == "" {
binary = "../../bin/vmauth-race"
}
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-auth.config": configFilePath,
},
extractREs: []*regexp.Regexp{
vmauthHttpListenAddrRE,
},
output: output,
})
if err != nil {
return nil, err
}
return newVmauth(app, cli, configFilePath, vmauthRuntimeValues{
httpListenAddr: stderrExtracts[0],
}), nil
}
type vmauthRuntimeValues struct {
httpListenAddr string
}
func newVmauth(app *app, cli *Client, configFilePath string, rt vmauthRuntimeValues) *Vmauth {
return &Vmauth{
app: app,
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
httpListenAddr: rt.httpListenAddr,
configFilePath: configFilePath,
cli: cli,
}
}
var httpBuilitinListenAddrRE = regexp.MustCompile(`pprof handlers are exposed at http://(.*:\d{1,5})/debug/pprof/`)
// Vmauth holds the state of a vmauth app and provides vmauth-specific
// functions.
@@ -60,14 +18,38 @@ type Vmauth struct {
*app
*metricsClient
cli *Client
httpListenAddr string
configFilePath string
cli *Client
}
// GetHTTPListenAddr returns listen http addr
func (app *Vmauth) GetHTTPListenAddr() string {
return app.httpListenAddr
// StartVmauth starts an instance of vmauth with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmauth(instance string, flags []string, cli *Client, configFilePath string, output io.Writer) (*Vmauth, error) {
extractREs := []*regexp.Regexp{
httpBuilitinListenAddrRE,
}
app, stderrExtracts, err := startApp(instance, "../../bin/vmauth-race", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-auth.config": configFilePath,
},
extractREs: extractREs,
output: output,
})
if err != nil {
return nil, err
}
return &Vmauth{
app: app,
metricsClient: newMetricsClient(cli, stderrExtracts[0]),
httpListenAddr: stderrExtracts[0],
configFilePath: configFilePath,
cli: cli,
}, nil
}
// UpdateConfiguration updates the vmauth configuration file with the provided YAML content,
@@ -97,3 +79,8 @@ func (app *Vmauth) UpdateConfiguration(t *testing.T, configFileYAML string) {
t.Fatalf("config were not reloaded after SIGHUP signal; previous total: %d, current total: %d", prevTotal, currTotal)
}
// GetHTTPListenAddr returns listen http addr
func (app *Vmauth) GetHTTPListenAddr() string {
return app.httpListenAddr
}

View File

@@ -1,26 +1,15 @@
package apptest
import (
"io"
"os"
)
import "io"
// StartVmbackup starts the latest version of vmbackup with the given flags and
// waits until it exits.
//
// The path to the binary can be provided via VMBACKUP_PATH environment
// variable. If the variable is not set, ../../bin/vmbackup-race will be
// used.
// StartVmbackup starts an instance of vmbackup with the given flags and waits
// until it exits.
func StartVmbackup(instance, storageDataPath, snapshotCreateURL, dst string, output io.Writer) error {
binary := os.Getenv("VMBACKUP_PATH")
if binary == "" {
binary = "../../bin/vmbackup-race"
}
flags := []string{
"-storageDataPath=" + storageDataPath,
"-snapshot.createURL=" + snapshotCreateURL,
"-dst=" + dst,
}
_, _, err := startApp(instance, binary, flags, &appOptions{wait: true, output: output})
_, _, err := startApp(instance, "../../bin/vmbackup-race", flags, &appOptions{wait: true, output: output})
return err
}

View File

@@ -1,23 +1,9 @@
package apptest
import (
"io"
"os"
)
import "io"
// StartVmctl starts an instance of vmctl cli with the given flags
// StartVmctl starts the latest version of vmctl with the given flags and
// waits until it exits.
//
// The path to the binary can be provided via VMCTL_PATH environment
// variable. If the variable is not set, ../../bin/vmctl-race will be
// used.
func StartVmctl(instance string, flags []string, output io.Writer) error {
binary := os.Getenv("VMCTL_PATH")
if binary == "" {
binary = "../../bin/vmctl-race"
}
_, _, err := startApp(instance, binary, flags, &appOptions{wait: true, output: output})
_, _, err := startApp(instance, "../../bin/vmctl-race", flags, &appOptions{wait: true, output: output})
return err
}

View File

@@ -3,13 +3,23 @@ package apptest
import (
"fmt"
"io"
"os"
"regexp"
"strings"
"testing"
"time"
)
// Vminsert holds the state of a vminsert app and provides vminsert-specific
// functions.
type Vminsert struct {
*app
*metricsClient
*vminsertClient
httpListenAddr string
clusternativeListenAddr string
}
// storageNodes returns the storage node addresses passed to vminsert via
// -storageNode command line flag.
func storageNodes(flags []string) []string {
@@ -21,11 +31,9 @@ func storageNodes(flags []string) []string {
return nil
}
// StartVminsert starts the latest version of vminsert.
//
// The path to the binary can be provided via VMINSERT_PATH environment
// variable. If the variable is not set, ../../bin/vminsert-race will be
// used.
// StartVminsert starts an instance of vminsert with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVminsert(instance string, flags []string, cli *Client, output io.Writer) (*Vminsert, error) {
extractREs := []*regexp.Regexp{
httpListenAddrRE,
@@ -40,15 +48,11 @@ func StartVminsert(instance string, flags []string, cli *Client, output io.Write
extractREs = append(extractREs, regexp.MustCompile(logRecord))
}
binary := os.Getenv("VMINSERT_PATH")
if binary == "" {
binary = "../../bin/vminsert-race"
}
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
app, stderrExtracts, err := startApp(instance, "../../bin/vminsert-race", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-clusternativeListenAddr": "127.0.0.1:0",
"-graphiteListenAddr": "127.0.0.1:0",
"-graphiteListenAddr": ":0",
"-opentsdbListenAddr": "127.0.0.1:0",
"-clusternative.vminsertConnsShutdownDuration": "1ms",
},
@@ -59,56 +63,27 @@ func StartVminsert(instance string, flags []string, cli *Client, output io.Write
return nil, err
}
return newVminsert(app, cli, vminsertRuntimeValues{
metricsClient := newMetricsClient(cli, stderrExtracts[0])
return &Vminsert{
app: app,
metricsClient: metricsClient,
vminsertClient: &vminsertClient{
vminsertCli: cli,
url: func(op, path string, opts QueryOpts) string {
return getClusterPath(stderrExtracts[0], op, path, opts)
},
openTSDBURL: func(op, path string, opts QueryOpts) string {
return getClusterPath(stderrExtracts[3], op, path, opts)
},
graphiteListenAddr: stderrExtracts[2],
sendBlocking: func(t *testing.T, numRecordsToSend int, send func()) {
t.Helper()
sendBlocking(t, metricsClient, numRecordsToSend, send)
},
},
httpListenAddr: stderrExtracts[0],
clusternativeListenAddr: stderrExtracts[1],
graphiteListenAddr: stderrExtracts[2],
openTSDBListenAddr: stderrExtracts[3],
}), nil
}
type vminsertRuntimeValues struct {
httpListenAddr string
clusternativeListenAddr string
graphiteListenAddr string
openTSDBListenAddr string
}
func newVminsert(app *app, cli *Client, rt vminsertRuntimeValues) *Vminsert {
metricsClient := newMetricsClient(cli, rt.httpListenAddr)
vminsertClient := &vminsertClient{
vminsertCli: cli,
url: func(op, path string, opts QueryOpts) string {
return getClusterPath(rt.httpListenAddr, op, path, opts)
},
openTSDBURL: func(op, path string, opts QueryOpts) string {
return getClusterPath(rt.openTSDBListenAddr, op, path, opts)
},
graphiteListenAddr: rt.graphiteListenAddr,
sendBlocking: func(t *testing.T, numRecordsToSend int, send func()) {
t.Helper()
sendBlocking(t, metricsClient, numRecordsToSend, send)
},
}
return &Vminsert{
app: app,
metricsClient: metricsClient,
vminsertClient: vminsertClient,
httpListenAddr: rt.httpListenAddr,
clusternativeListenAddr: rt.clusternativeListenAddr,
}
}
// Vminsert holds the state of a vminsert app and provides vminsert-specific
// functions.
type Vminsert struct {
*app
*metricsClient
*vminsertClient
httpListenAddr string
clusternativeListenAddr string
}, nil
}
// ClusternativeListenAddr returns the address at which the vminsert process is

View File

@@ -1,25 +1,14 @@
package apptest
import (
"io"
"os"
)
import "io"
// StartVmrestore starts the latest version of vmrestore with the given flags
// and waits until it exits.
//
// The path to the binary can be provided via VMRESTORE_PATH environment
// variable. If the variable is not set, ../../bin/vmrestore-race will be
// used.
// StartVmrestore starts an instance of vmrestore with the given flags and waits
// until it exits.
func StartVmrestore(instance, src, storageDataPath string, output io.Writer) error {
binary := os.Getenv("VMRESTORE_PATH")
if binary == "" {
binary = "../../bin/vmrestore-race"
}
flags := []string{
"-src=" + src,
"-storageDataPath=" + storageDataPath,
}
_, _, err := startApp(instance, binary, flags, &appOptions{wait: true, output: output})
_, _, err := startApp(instance, "../../bin/vmrestore-race", flags, &appOptions{wait: true, output: output})
return err
}

View File

@@ -3,21 +3,26 @@ package apptest
import (
"fmt"
"io"
"os"
"regexp"
)
// StartVmselect starts the latest version of vmselect.
//
// The path to the binary can be provided via VMSELECT_PATH environment
// variable. If the variable is not set, ../../bin/vmselect-race will be
// used.
// Vmselect holds the state of a vmselect app and provides vmselect-specific
// functions.
type Vmselect struct {
*app
*metricsClient
*vmselectClient
httpListenAddr string
clusternativeListenAddr string
cli *Client
}
// StartVmselect starts an instance of vmselect with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func StartVmselect(instance string, flags []string, cli *Client, output io.Writer) (*Vmselect, error) {
binary := os.Getenv("VMSELECT_PATH")
if binary == "" {
binary = "../../bin/vmselect-race"
}
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
app, stderrExtracts, err := startApp(instance, "../../bin/vmselect-race", flags, &appOptions{
defaultFlags: map[string]string{
"-httpListenAddr": "127.0.0.1:0",
"-clusternativeListenAddr": "127.0.0.1:0",
@@ -32,43 +37,21 @@ func StartVmselect(instance string, flags []string, cli *Client, output io.Write
return nil, err
}
return newVmselect(app, cli, vmselectRuntimeValues{
httpListenAddr: stderrExtracts[0],
clusternativeListenAddr: stderrExtracts[1],
}), nil
}
type vmselectRuntimeValues struct {
httpListenAddr string
clusternativeListenAddr string
}
func newVmselect(app *app, cli *Client, rt vmselectRuntimeValues) *Vmselect {
return &Vmselect{
app: app,
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
metricsClient: newMetricsClient(cli, stderrExtracts[0]),
vmselectClient: &vmselectClient{
vmselectCli: cli,
url: func(op, path string, opts QueryOpts) string {
return getClusterPath(rt.httpListenAddr, op, path, opts)
return getClusterPath(stderrExtracts[0], op, path, opts)
},
metricNamesStatsResetURL: fmt.Sprintf("http://%s/admin/api/v1/admin/status/metric_names_stats/reset", rt.httpListenAddr),
tenantsURL: fmt.Sprintf("http://%s/admin/tenants", rt.httpListenAddr),
metricNamesStatsResetURL: fmt.Sprintf("http://%s/admin/api/v1/admin/status/metric_names_stats/reset", stderrExtracts[0]),
tenantsURL: fmt.Sprintf("http://%s/admin/tenants", stderrExtracts[0]),
},
httpListenAddr: rt.httpListenAddr,
clusternativeListenAddr: rt.clusternativeListenAddr,
}
}
// Vmselect holds the state of a vmselect app and provides vmselect-specific
// functions.
type Vmselect struct {
*app
*metricsClient
*vmselectClient
httpListenAddr string
clusternativeListenAddr string
httpListenAddr: stderrExtracts[0],
clusternativeListenAddr: stderrExtracts[1],
cli: cli,
}, nil
}
// ClusternativeListenAddr returns the address at which the vmselect process is

View File

@@ -9,85 +9,6 @@ import (
"time"
)
// StartVmsingle starts the latest version of vmsingle.
//
// The path to the binary can be provided via VMSINGLE_PATH environment
// variable. If the variable is not set, ../../bin/victoria-metrics-race will be
// used.
func StartVmsingle(instance string, flags []string, cli *Client, output io.Writer) (*Vmsingle, error) {
binary := os.Getenv("VMSINGLE_PATH")
if binary == "" {
binary = "../../bin/victoria-metrics-race"
}
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
"-httpListenAddr": "127.0.0.1:0",
"-graphiteListenAddr": "127.0.0.1:0",
"-opentsdbListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
storageDataPathRE,
httpListenAddrRE,
graphiteListenAddrRE,
openTSDBListenAddrRE,
},
output: output,
})
if err != nil {
return nil, err
}
return newVmsingle(app, cli, vmsingleRuntimeValues{
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
graphiteListenAddr: stderrExtracts[2],
openTSDBListenAddr: stderrExtracts[3],
}), nil
}
type vmsingleRuntimeValues struct {
storageDataPath string
httpListenAddr string
graphiteListenAddr string
openTSDBListenAddr string
}
func newVmsingle(app *app, cli *Client, rt vmsingleRuntimeValues) *Vmsingle {
return &Vmsingle{
app: app,
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
vmstorageClient: &vmstorageClient{
vmstorageCli: cli,
httpListenAddr: rt.httpListenAddr,
},
vmselectClient: &vmselectClient{
vmselectCli: cli,
url: func(op, path string, opts QueryOpts) string {
return fmt.Sprintf("http://%s/%s", rt.httpListenAddr, path)
},
metricNamesStatsResetURL: fmt.Sprintf("http://%s/api/v1/admin/status/metric_names_stats/reset", rt.httpListenAddr),
tenantsURL: "vmsingle-does-not-serve-tenants",
},
vminsertClient: &vminsertClient{
vminsertCli: cli,
url: func(_, path string, _ QueryOpts) string {
return fmt.Sprintf("http://%s/%s", rt.httpListenAddr, path)
},
openTSDBURL: func(_, path string, _ QueryOpts) string {
return fmt.Sprintf("http://%s/%s", rt.openTSDBListenAddr, path)
},
graphiteListenAddr: rt.graphiteListenAddr,
sendBlocking: func(t *testing.T, _ int, send func()) {
t.Helper()
send()
},
},
storageDataPath: rt.storageDataPath,
httpListenAddr: rt.httpListenAddr,
}
}
// Vmsingle holds the state of a vmsingle app and provides vmsingle-specific
// functions.
type Vmsingle struct {
@@ -101,6 +22,63 @@ type Vmsingle struct {
httpListenAddr string
}
// StartVmsingleAt starts an instance of vmsingle with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr).
func StartVmsingleAt(instance, binary string, flags []string, cli *Client, output io.Writer) (*Vmsingle, error) {
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
"-httpListenAddr": "127.0.0.1:0",
"-graphiteListenAddr": ":0",
"-opentsdbListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
storageDataPathRE,
httpListenAddrRE,
graphiteListenAddrRE,
openTSDBListenAddrRE,
},
output: output,
})
if err != nil {
return nil, err
}
return &Vmsingle{
app: app,
metricsClient: newMetricsClient(cli, stderrExtracts[1]),
vmstorageClient: &vmstorageClient{
vmstorageCli: cli,
httpListenAddr: stderrExtracts[1],
},
vmselectClient: &vmselectClient{
vmselectCli: cli,
url: func(op, path string, opts QueryOpts) string {
return fmt.Sprintf("http://%s/%s", stderrExtracts[1], path)
},
metricNamesStatsResetURL: fmt.Sprintf("http://%s/api/v1/admin/status/metric_names_stats/reset", stderrExtracts[1]),
tenantsURL: "vmsingle-does-not-serve-tenants",
},
vminsertClient: &vminsertClient{
vminsertCli: cli,
url: func(_, path string, _ QueryOpts) string {
return fmt.Sprintf("http://%s/%s", stderrExtracts[1], path)
},
openTSDBURL: func(_, path string, _ QueryOpts) string {
return fmt.Sprintf("http://%s/%s", stderrExtracts[3], path)
},
graphiteListenAddr: stderrExtracts[2],
sendBlocking: func(t *testing.T, _ int, send func()) {
t.Helper()
send()
},
},
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
}, nil
}
// HTTPAddr returns the address at which the vminsert process is
// listening for incoming HTTP requests.
func (app *Vmsingle) HTTPAddr() string {

View File

@@ -1,43 +0,0 @@
package apptest
import (
"fmt"
"io"
"os"
"regexp"
"time"
)
// StartVmsingle_v1_132_0 starts vmsingle-v1.132.0 (the last version that uses
// legacy index).
//
// The path to the binary must be provided via VMSINGLE_V1_132_0_PATH
// environment variable.
func StartVmsingle_v1_132_0(instance string, flags []string, cli *Client, output io.Writer) (*Vmsingle, error) {
binary := os.Getenv("VMSINGLE_V1_132_0_PATH")
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
"-httpListenAddr": "127.0.0.1:0",
"-graphiteListenAddr": "127.0.0.1:0",
"-opentsdbListenAddr": "127.0.0.1:0",
},
extractREs: []*regexp.Regexp{
storageDataPathRE,
httpListenAddrRE,
graphiteListenAddrRE,
openTSDBListenAddrRE,
},
output: output,
})
if err != nil {
return nil, err
}
return newVmsingle(app, cli, vmsingleRuntimeValues{
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
graphiteListenAddr: stderrExtracts[2],
openTSDBListenAddr: stderrExtracts[3],
}), nil
}

View File

@@ -8,22 +8,23 @@ import (
"time"
)
// StartVmstorage starts the latest version of vmstorage.
//
// The path to the binary can be provided via VMSTORAGE_PATH environment
// variable. If the variable is not set, ../../bin/vmstorage-race will be used.
func StartVmstorage(instance string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
binary := os.Getenv("VMSTORAGE_PATH")
if binary == "" {
binary = "../../bin/vmstorage-race"
}
return startVmstorage(instance, binary, flags, cli, output)
// Vmstorage holds the state of a vmstorage app and provides vmstorage-specific
// functions.
type Vmstorage struct {
*app
*metricsClient
*vmstorageClient
storageDataPath string
httpListenAddr string
vminsertAddr string
vmselectAddr string
}
// startVmstorage starts an instance of vmstorage with the given flags. It also
// StartVmstorageAt starts an instance of vmstorage with the given flags. It also
// sets the default flags and populates the app instance state with runtime
// values extracted from the application log (such as httpListenAddr)
func startVmstorage(instance, binary string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
func StartVmstorageAt(instance, binary string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
app, stderrExtracts, err := startApp(instance, binary, flags, &appOptions{
defaultFlags: map[string]string{
"-storageDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
@@ -43,47 +44,18 @@ func startVmstorage(instance, binary string, flags []string, cli *Client, output
return nil, err
}
return newVmstorage(app, cli, vmstorageRuntimeValues{
return &Vmstorage{
app: app,
metricsClient: newMetricsClient(cli, stderrExtracts[1]),
vmstorageClient: &vmstorageClient{
vmstorageCli: cli,
httpListenAddr: stderrExtracts[1],
},
storageDataPath: stderrExtracts[0],
httpListenAddr: stderrExtracts[1],
vminsertAddr: stderrExtracts[2],
vmselectAddr: stderrExtracts[3],
}), nil
}
type vmstorageRuntimeValues struct {
storageDataPath string
httpListenAddr string
vminsertAddr string
vmselectAddr string
}
func newVmstorage(app *app, cli *Client, rt vmstorageRuntimeValues) *Vmstorage {
return &Vmstorage{
app: app,
metricsClient: newMetricsClient(cli, rt.httpListenAddr),
vmstorageClient: &vmstorageClient{
vmstorageCli: cli,
httpListenAddr: rt.httpListenAddr,
},
storageDataPath: rt.storageDataPath,
httpListenAddr: rt.httpListenAddr,
vminsertAddr: rt.vminsertAddr,
vmselectAddr: rt.vmselectAddr,
}
}
// Vmstorage holds the state of a vmstorage app and provides vmstorage-specific
// functions.
type Vmstorage struct {
*app
*metricsClient
*vmstorageClient
storageDataPath string
httpListenAddr string
vminsertAddr string
vmselectAddr string
}, nil
}
// VminsertAddr returns the address at which the vmstorage process is listening

View File

@@ -1,16 +0,0 @@
package apptest
import (
"io"
"os"
)
// StartVmstorage_v1_132_0 starts vmstorage-v1.132.0 (the last version that uses
// legacy index).
//
// The path to the binary must be provided via VMSTORAGE_V1_132_0_PATH
// environment variable.
func StartVmstorage_v1_132_0(instance string, flags []string, cli *Client, output io.Writer) (*Vmstorage, error) {
binary := os.Getenv("VMSTORAGE_V1_132_0_PATH")
return startVmstorage(instance, binary, flags, cli, output)
}

View File

@@ -88,7 +88,6 @@ These skills provide predefined workflows and capabilities such as:
* Multi-signal investigations
* Cardinality optimization
* Unused metric detection
* Stream aggregation configuration
To install the available skills for AI agents, run:
```sh

View File

@@ -150,12 +150,8 @@ You can experiment with your own data during the monthlong trial without depl
are fast-booting Linux microVMs that run on a fleet of large bare-metal servers. You can start a playground right from your browser.
Once up and running, accessing a playground is no different from SSH-ing into a remote server rented from your favorite VPS or Cloud provider.
Iximiuz Labs provides various [learning-by-doing resources for VictoriaMetrics](https://labs.iximiuz.com/v/victoriametrics):
- Tutorial:
- [Getting Started with VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/tutorials/victoriametrics-getting-started-kubernetes)
- Playgrounds:
- [VictoriaMetrics single node](https://labs.iximiuz.com/playgrounds/victoriametrics)
- [VictoriaMetrics cluster](https://labs.iximiuz.com/playgrounds/victoriametrics-cluster)
- [VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/playgrounds/victoriametrics-kubernetes)
Iximiuz Labs requires a [free account](https://labs.iximiuz.com/signup) to access the materials.
Iximiuz Labs provides playgrounds for VictoriaMetrics software:
- [VictoriaMetrics single node (on Ubuntu)](https://labs.iximiuz.com/playgrounds/victoriametrics-e2f9b613)
- [VictoriaMetrics cluster (on Ubuntu)](https://labs.iximiuz.com/playgrounds/victoriametrics-cluster-8eacb19d)
- [Getting Started with VictoriaMetrics on Kubernetes](https://labs.iximiuz.com/tutorials/victoriametrics-getting-started-kubernetes-0e9c0993)
- [VictoriaMetrics Operator](https://labs.iximiuz.com/playgrounds/victoriametrics-kubernetes-9eebc258)

View File

@@ -43,9 +43,6 @@ Just download VictoriaMetrics and follow [these instructions](https://docs.victo
See [available integrations](https://docs.victoriametrics.com/victoriametrics/integrations/) with other systems like
[Prometheus](https://docs.victoriametrics.com/victoriametrics/integrations/prometheus/) or [Grafana](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
> Want to see VictoriaMetrics in action, but without installing anything?
> Try [Playgrounds](https://docs.victoriametrics.com/playgrounds/) - a list of publicly available playgrounds for VictoriaMetrics software.
VictoriaMetrics is developed at a fast pace, so it is recommended to periodically check the [CHANGELOG](https://docs.victoriametrics.com/victoriametrics/changelog/)
and perform [regular upgrades](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-upgrade-victoriametrics).

View File

@@ -26,7 +26,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
## tip
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/), [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vminsert` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `-opentelemetry.promoteAllResourceAttributes` and `-opentelemetry.promoteScopeMetadata` command-line flags to allow managing label promotion for resource attributes and OTel scope metadata. See [OpenTelemetry](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/) docs and [#10931](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10931).
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): fix intermittent `write: connection timed out` errors caused by silently dropped TCP connections being reused from the connection pool. See [#10735-comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10735#issuecomment-4535832301).
## [v1.144.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.144.0)

View File

@@ -59,10 +59,6 @@ Once connected, you can build graphs and dashboards using [PromQL](https://prome
_Creating a datasource may require [specific permissions](https://grafana.com/docs/grafana/latest/administration/data-source-management/).
If you don't see an option to create a data source - try contacting system administrator._
If you run [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) and want to see its rules in [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/),
then set configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
## Multi-tenant access with vmauth and OIDC
[vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) can proxy Grafana datasource requests and enforce

View File

@@ -28,17 +28,9 @@ The following label sanitization options can be enabled:
> These flags can be applied on vmagent, vminsert or VictoriaMetrics single-node.
## Instrumentation Scope
By default, VictoriaMetrics promotes [OTel scope metadata](https://opentelemetry.io/docs/specs/otel/common/instrumentation-scope/) to metric labels. This behavior can be disabled via `-opentelemetry.promoteScopeMetadata`.
## Resource Attributes
By default, VictoriaMetrics promotes all [OpenTelemetry resource](https://opentelemetry.io/docs/specs/otel/resource/data-model/) attributes to labels and attaches them to all ingested OTLP metrics.
The following attribute promotion options can be configured:
* `-opentelemetry.promoteAllResourceAttributes` - promotes all resource attributes to labels, except for the ones configured with `-opentelemetry.ignoreResourceAttributes`.
* `-opentelemetry.promoteResourceAttributes` - promotes specific list of resource attributes to labels. It cannot be configured simultaneously with `-opentelemetry.promoteAllResourceAttributes`.
* `-opentelemetry.ignoreResourceAttributes` - controls which resource attributes to ignore, can only be set when `-opentelemetry.promoteAllResourceAttributes` is true.
## Exponential histograms

View File

@@ -26,7 +26,6 @@ Stream aggregation has the following features:
and/or scraped from [Prometheus-compatible targets](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter)
- It can filter out raw samples matched by aggregation rules, so raw data will never reach the remote destination. See `-streamAggr.keepInput` and `-streamAggr.dropInput` in [aggregation config](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/configuration/);
- It allows building [flexible processing pipelines](#routing);
- It is [horizontally scalable](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#scaling-aggregation-horizontally).
# Limitations
@@ -599,47 +598,6 @@ Below is an example of an `aggr.yaml` configuration that drops the `replica` and
keep_metric_names: true
```
## Scaling aggregation horizontally
Aggregation output is only correct when all contributing samples are processed by the same aggregator instance.
To scale the aggregation horizontally, always shard the input samples in a deterministic way. This can be achieved by
building a two layer topology of vmagents where the first layer is responsible for sharding, and the second layer is responsible for aggregating:
```mermaid
flowchart LR
V1[vmagent-shard-1] -- requests_total{env=test, pod=foo} --> SV1[vmagent-aggr-1]
V1[vmagent-shard-1] -- requests_total{env=prod, pod=bar} --> SV2[vmagent-aggr-1]
V2[vmagent-shard-2] -- requests_total{env=prod, pod=baz} --> SV2[vmagent-aggr-2]
SV1 -- requests_total:5m_without_pod_total{env=test} --> x(( ))
SV2 -- requests_total:5m_without_pod_total{env=prod} --> y(( ))
style x fill:none,stroke:none
style y fill:none,stroke:none
```
The sharding layer of vmagents can be configured via the `-remoteWrite.shardByURL.labels` or `-remoteWrite.shardByURL.ignoreLabels`
command line flags. See how to [shard data across remote write destinations](https://docs.victoriametrics.com/victoriametrics/vmagent/#sharding-among-remote-storages) for more details.
The following requirements must be met for sharded aggregation to work correctly:
- All sharding vmagents should have the same deterministic sharding configuration.
- The sharding configuration must align with the `by` and `without` lists:
- Labels listed in `by` setting should be a subset of shard's routing key `-remoteWrite.shardByURL.labels`.
With `-remoteWrite.shardByURL.labels=env,job` aggregator's `by` should include `by: env`, `by: job` or both: `by: [env, job]`.
This makes sure that all the samples for the same `env` and `job` are aggregated together and produce the complete output.
- Labels listed in `without` setting should be a superset of shard's routing key `--remoteWrite.shardByURL.ignoreLabels`.
With `-remoteWrite.shardByURL.ignoreLabels=env,job` aggegator's `without` should include at least both labels `without: [env,job]`.
This makes sure that `requests_total{env=test, job=foo}` and `requests_total{env=prod, job=foo}` are routed to the same aggregator
and are aggregated together. See also [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938#issuecomment-2018470324).
- Aggregating vmagents should not produce collisions: the aggregation output should be unique across all the sharded agents.
For example, `requests_total:5m_without_env_pod_total` produced by both `vmagent-aggr-1` and `vmagent-aggr-2` will collide
unless they have labels uniquely identifying them. These labels should be either preserved during sharding and aggregation config,
or enforced on the output via `-remoteWrite.label` - see [these docs](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#cluster-mode) for more details.
> Never shard histograms by `le` (or `vmrange` in case of VM histograms) label. A histogram is a logical group of series differing
only in the bucket label. All of those buckets must land on the same aggregator at the same time so it can produce a
coherent bucket set. See more about [aggregating histograms](https://docs.victoriametrics.com/stream-aggregation/#aggregating-histograms).
See also [why you shouldn't put an aggregator behind a load balancer](https://docs.victoriametrics.com/stream-aggregation/#put-aggregator-behind-load-balancer).
# Troubleshooting
- [Unexpected spikes for `total` or `increase` outputs](#staleness).

View File

@@ -217,19 +217,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentelemetry.convertMetricNamesToPrometheus
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
-opentelemetry.ignoreResourceAttributes array
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
-opentelemetry.labelNameUnderscoreSanitization
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
-opentelemetry.maxRequestSize size
The maximum size in bytes of a single OpenTelemetry request
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentelemetry.promoteAllResourceAttributes
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
-opentelemetry.promoteResourceAttributes array
Promote specific list of resource attributes to labels.
-opentelemetry.promoteScopeMetadata
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
-opentelemetry.usePrometheusNaming
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
-opentsdbHTTPListenAddr string

View File

@@ -182,15 +182,15 @@ among remote storage systems specified in `-remoteWrite.url`.
> For example, if you set `-remoteWrite.url=srv+foo` and it's resolved to three addresses (`192.168.1.1`, `192.168.1.2`, `192.168.1.3`),
> vmagent will only choose **one** randomly every time it (re-)creates the connection. In contrast, specifying the addresses manually (`-remoteWrite.url=192.168.1.1 -remoteWrite.url=192.168.1.2 -remoteWrite.url=192.168.1.3`) will shard samples across all three URLs.
Use `-remoteWrite.shardByURL.labels` to route metrics among `-remoteWrite.url` based on their label values.
For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
label to the same `-remoteWrite.url`. This command-line flag allows specifying a comma-separated list of labels.
Sometimes, it may be necessary to use only a particular set of labels for sharding. For example, it may be necessary to route all the metrics with the same `instance` label
to the same `-remoteWrite.url`. In this case, you can specify a comma-separated list of these labels in the `-remoteWrite.shardByURL.labels`
command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
label to the same `-remoteWrite.url`.
Alternatively, you can use `-remoteWrite.shardByURL.ignoreLabels` to route metrics among `-remoteWrite.url` based on their label values, excluding the specified labels.
For example, `-remoteWrite.shardByURL.ignoreLabels=pod` would shard metrics `metric{pod="foo"}` and `metric{pod="bar"}` to the same `-remoteWrite.url`
by ignoring the `pod` label. This command-line flag allows specifying a comma-separated list of labels.
> Command-line flags `-remoteWrite.shardByURL.labels` and `-remoteWrite.shardByURL.ignoreLabels` are mutually exclusive.
Sometimes, it may be necessary to ignore some labels when sharding samples across multiple `-remoteWrite.url` backends.
For example, if all the [raw samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) with the same set of labels
except for the labels `instance` and `pod` must be routed to the same backend. In this case the list of ignored labels must be passed to
`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`.
See also [how to scrape a large number of targets](#scraping-big-number-of-targets).

View File

@@ -184,19 +184,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/vmagent/ .
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentelemetry.convertMetricNamesToPrometheus
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
-opentelemetry.ignoreResourceAttributes array
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
-opentelemetry.labelNameUnderscoreSanitization
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
-opentelemetry.maxRequestSize size
The maximum size in bytes of a single OpenTelemetry request
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentelemetry.promoteAllResourceAttributes
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
-opentelemetry.promoteResourceAttributes array
Promote specific list of resource attributes to labels.
-opentelemetry.promoteScopeMetadata
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
-opentelemetry.usePrometheusNaming
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
-opentsdbHTTPListenAddr string

View File

@@ -21,9 +21,23 @@ Recording rules results are persisted via remote write protocols and require `-r
`vmalert` is heavily inspired by [Prometheus](https://prometheus.io/docs/alerting/latest/overview/)
implementation and aims to be compatible with its syntax.
Configure `-vmalert.proxyURL` on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
to proxy requests to `vmalert`. Proxying is needed for the following cases:
* to proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/);
* to access `vmalert`'s UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui).
[VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_intro)
provides out-of-the-box alerting functionality based on `vmalert`. This service simplifies the setup
and management of alerting and recording rules as well as the integration with Alertmanager. For more details,
please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/).
## Features
* Integration with VictoriaMetrics, VictoriaLogs, VictoriaTraces, Graphite and Prometheus compatible storages. See [Integrations](https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations) for details;
* Integration with [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics) and [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/);
* Integration with [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) and [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/);
* Integration with [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) which also uses [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victoriatraces/vmalert/);
* Prometheus [alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/#defining-alerting-rules)
support;
* Integration with [Alertmanager](https://github.com/prometheus/alertmanager) starting from [Alertmanager v0.16.0-alpha](https://github.com/prometheus/alertmanager/releases/tag/v0.16.0-alpha.0);
@@ -48,8 +62,8 @@ implementation and aims to be compatible with its syntax.
To start using `vmalert` you will need the following things:
* list of rules - PromQL/MetricsQL/LogsQL/GraphiteQL expressions to execute;
* datasource address - a storage that [vmalert integrates with](https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations) for executing queries;
* list of rules - PromQL/MetricsQL expressions to execute;
* datasource address - reachable endpoint with [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) support for running queries against;
* notifier address [optional] - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing,
aggregating alerts, and sending notifications. Please note, notifier address also supports Consul and DNS Service Discovery via
[config file](https://docs.victoriametrics.com/victoriametrics/vmalert/#notifier-configuration-file).
@@ -59,7 +73,7 @@ To start using `vmalert` you will need the following things:
* remote read address [optional] - MetricsQL compatible datasource to restore alerts state from.
You can use the existing [docker-compose environment](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#victoriametrics-single-server)
as an example. It already contains vmalert configured with the list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
as example. It already contains vmalert configured with list of alerting rules and integrated with Alert Manager and VictoriaMetrics.
Alternatively, build `vmalert` from sources:
@@ -73,7 +87,7 @@ Then run `vmalert`:
```sh
./bin/vmalert -rule=alert.rules \ # Path to the file with rules configuration. Supports wildcard and HTTP URL (S3/GCS are available in Enterprise).
-datasource.url=http://localhost:8428 \ # VictoriaMetrics URL to query for rules evaluation. See other available Integrations above.
-datasource.url=http://localhost:8428 \ # Prometheus HTTP API compatible datasource
-notifier.url=http://localhost:9093 \ # AlertManager URL (required if alerting rules are used)
-notifier.url=http://127.0.0.1:9093 \ # AlertManager replica URL
-remoteWrite.url=http://localhost:8428 \ # Remote write compatible storage to persist rules and alerts state info (required if recording rules are used)
@@ -93,7 +107,7 @@ See also [stream aggregation](https://docs.victoriametrics.com/victoriametrics/s
See the full list of configuration flags in [configuration](#configuration) section.
If you run multiple `vmalert` services for the same datasource or AlertManager and need to distinguish the results or alerts,
If you run multiple `vmalert` services on the same datastore or AlertManager and need to distinguish the results or alerts,
specify different `-external.label` command-line flags to indicate which `vmalert` generated them.
If rule result metrics have label that conflict with `-external.label`, `vmalert` will automatically rename
it with prefix `exported_`.
@@ -102,7 +116,6 @@ Configuration for [recording](https://prometheus.io/docs/prometheus/latest/confi
and [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) rules is very
similar to Prometheus rules and configured using YAML. Configuration examples may be found
in [testdata](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/app/vmalert/config/testdata) folder.
Every `rule` belongs to a `group` and every configuration file may contain arbitrary number of groups:
```yaml
@@ -110,7 +123,13 @@ groups:
[ - <rule_group> ]
```
## Groups
> Explore how to integrate `vmalert` with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/) in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
> For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
> many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
> Please, refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
### Groups
Each group has the following attributes:
@@ -211,11 +230,9 @@ rules:
### Rules
Every rule contains an `expr` field for the expression to evaluate against the configured datasource.
Depending on `group.type` value or `-rule.defaultRuleType` cmd-line flag expression can be one of the following types:
- `prometheus` (default) - [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/) or [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expression.
- `vlogs` - [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/vmalert/) expression.
- `graphite` - [Graphite](https://graphite.readthedocs.io/en/stable/render_api.html) expression.
Every rule contains `expr` field for [PromQL](https://prometheus.io/docs/prometheus/latest/querying/basics/)
or [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) expression. `vmalert` will execute the configured
expression and then act according to the Rule type.
There are two types of Rules:
@@ -227,7 +244,8 @@ There are two types of Rules:
`-remoteWrite.url`. Recording rules are used to precompute frequently needed or computationally
expensive expressions and save their result as a new set of time series ([Prometheus recording rules docs](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/)).
> `vmalert` forbids defining duplicates - rules with the same combination of name, expression and labels within one group.
`vmalert` forbids defining duplicates - rules with the same combination of name, expression, and labels
within one group.
#### Alerting rules
@@ -238,8 +256,8 @@ The syntax for alerting rule is the following:
alert: <string>
# The expression to evaluate. The expression language depends on the type value.
# By default, PromQL/MetricsQL expression is used. Other available types are "graphite" and "vlogs".
# See https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations
# By default, PromQL/MetricsQL expression is used. If group.type="graphite", then the expression
# must contain valid Graphite expression.
expr: <string>
# Alerts are considered firing once they have been returned for this long.
@@ -285,44 +303,7 @@ annotations:
[ <labelname>: <tmpl_string> ]
```
#### Recording rules
The syntax for recording rules is the following:
```yaml
# The name of the time series to output to. Must be a valid metric name.
record: <string>
# The expression to evaluate. The expression language depends on the type value.
# By default, PromQL/MetricsQL expression is used. Other available types are "graphite" and "vlogs".
# See https://docs.victoriametrics.com/victoriametrics/vmalert/#integrations
expr: <string>
# Labels to add or overwrite labels from other external label sources, such as group labels, before storing the result.
#
# In case of conflicts, original labels are kept with prefix `exported_`.
# As a special case, specifying a label with an empty string value removes the label from the result if it exists
# in the original query result; otherwise, it is ignored.
#
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
labels:
[ <labelname>: <labelvalue> ]
# Whether to print debug information into logs.
# Information includes requests sent to the datasource.
# information - it will be printed to logs.
# Logs are printed with INFO level, so make sure that -loggerLevel=INFO to see the output.
[ debug: <bool> | default = false ]
# Defines the number of rule updates entries stored in memory
# and available for view on rule Details page.
# Overrides `rule.updateEntriesLimit` value for this specific rule.
[ update_entries_limit: <integer> | default 0 ]
```
For recording rules to work `-remoteWrite.url` must be specified.
## Templating
#### Templating
It is allowed to use [Go templating](https://golang.org/pkg/text/template/) in annotations and labels(with limited support) to format data, iterate over
or execute expressions.
@@ -344,7 +325,7 @@ The following variables are available in templating:
Additionally, `vmalert` provides some extra templating functions listed in [template functions](#template-functions) and [reusable templates](#reusable-templates).
### Template functions
#### Template functions
`vmalert` provides the following template functions, which can be used during [templating](#templating):
@@ -368,7 +349,7 @@ Additionally, `vmalert` provides some extra templating functions listed in [temp
* `parseDurationTime` - parses the input string into [time.Duration](https://pkg.go.dev/time#Duration).
* `pathEscape` - escapes the input string, so it can be safely put inside path part of URL.
* `pathPrefix` - returns the path part of the `-external.url` command-line flag.
* `query` - executes query against `-datasource.url` and returns the query result.
* `query` - executes the [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) query against `-datasource.url` and returns the query result.
For example, `{{ query "sort_desc(process_resident_memory_bytes)" | first | value }}` executes the `sort_desc(process_resident_memory_bytes)`
query at `-datasource.url` and returns the first result.
* `queryEscape` - escapes the input string, so it can be safely put inside [query arg](https://en.wikipedia.org/wiki/Percent-encoding) part of URL.
@@ -388,7 +369,7 @@ Additionally, `vmalert` provides some extra templating functions listed in [temp
* `toUpper` - converts all the chars in the input string to uppercase.
* `value` - returns the numeric value from the input query result.
### Reusable templates
#### Reusable templates
Like in Alertmanager you can define [reusable templates](https://prometheus.io/docs/prometheus/latest/configuration/template_examples/#defining-reusable-templates)
to share same templates across annotations. Just define the templates in a file and
@@ -426,8 +407,44 @@ groups:
The `-rule.templates` flag supports wildcards so multiple files with templates can be loaded.
The content of `-rule.templates` can be also [hot reloaded](#hot-config-reload).
#### Recording rules
## Alerts state on restarts
The syntax for recording rules is following:
```yaml
# The name of the time series to output to. Must be a valid metric name.
record: <string>
# The expression to evaluate. The expression language depends on the type value.
# By default, MetricsQL expression is used. If group.type="graphite", then the expression
# must contain valid Graphite expression.
expr: <string>
# Labels to add or overwrite labels from other external label sources, such as group labels, before storing the result.
#
# In case of conflicts, original labels are kept with prefix `exported_`.
# As a special case, specifying a label with an empty string value removes the label from the result if it exists
# in the original query result; otherwise, it is ignored.
#
# Labels do not support templating in https://docs.victoriametrics.com/victoriametrics/vmalert/#templating due to cardinality concerns. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8171.
labels:
[ <labelname>: <labelvalue> ]
# Whether to print debug information into logs.
# Information includes requests sent to the datasource.
# information - it will be printed to logs.
# Logs are printed with INFO level, so make sure that -loggerLevel=INFO to see the output.
[ debug: <bool> | default = false ]
# Defines the number of rule's updates entries stored in memory
# and available for view on rule's Details page.
# Overrides `rule.updateEntriesLimit` value for this specific rule.
[ update_entries_limit: <integer> | default 0 ]
```
For recording rules to work `-remoteWrite.url` must be specified.
### Alerts state on restarts
`vmalert` holds alerts state in the memory. Restart of the `vmalert` process will reset the state of all active alerts
in the memory. To prevent `vmalert` from losing the state on restarts configure it to persist the state
@@ -448,7 +465,7 @@ in configured `-remoteRead.url`, weren't updated in the last `1h` (controlled by
or received state doesn't match current `vmalert` rules configuration. `vmalert` marks successfully restored rules
with `restored` label in [web UI](#web).
## Link to alert source
### Link to alert source
Alerting notifications sent by vmalert always contain a `source` link. By default, the link format
is the following `http://<vmalert-addr>/vmalert/alert?group_id=<group_id>&alert_id=<alert_id>`. On click, it opens
@@ -484,9 +501,7 @@ In addition to `source` link, some extra links could be added to alert's [annota
field. See [how we use them](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/9751ea10983d42068487624849cac7ad6fd7e1d8/deployment/docker/rules/alerts-cluster.yml#L44)
to link alerting rule and the corresponding panel on Grafana dashboard.
## Multitenancy
> See how to use [multitenancy in rules for VictoriaLogs](https://docs.victoriametrics.com/victorialogs/vmalert/#how-to-use-multitenancy-in-rules).
### Multitenancy
There are the following approaches exist for alerting and recording rules across
[multiple tenants](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy):
@@ -544,7 +559,7 @@ The enterprise version of vmalert is available in `vmutils-*-enterprise.tar.gz`
at [release page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) and in `*-enterprise`
tags at [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert/tags) and [Quay](https://quay.io/repository/victoriametrics/vmalert?tab=tags).
## Reading rules from object storage
### Reading rules from object storage
[Enterprise version](https://docs.victoriametrics.com/victoriametrics/enterprise/) of `vmalert` may read alerting and recording rules
from object storage:
@@ -563,7 +578,7 @@ The following [command-line flags](#flags) can be used for fine-tuning access to
* `-s3.customEndpoint` - custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set.
* `-s3.forcePathStyle` - prefixing endpoint with bucket name when set false, true by default.
## Topology examples
### Topology examples
The following sections are showing how `vmalert` may be used and configured
for different scenarios.
@@ -574,7 +589,7 @@ Please note, not all flags in examples are required:
you have recording rules or want to store [alerts state](#alerts-state-on-restarts) on `vmalert` restarts;
* `-notifier.url` is optional and is needed only if you have alerting rules.
### Single-node VictoriaMetrics
#### Single-node VictoriaMetrics
The simplest configuration where one single-node VM server is used for
rules execution, storing recording rules results and alerts state.
@@ -592,7 +607,7 @@ rules execution, storing recording rules results and alerts state.
![vmalert single](vmalert_single.webp)
{width="500"}
### Cluster VictoriaMetrics
#### Cluster VictoriaMetrics
In [cluster mode](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
VictoriaMetrics has separate components for writing and reading path:
@@ -615,7 +630,7 @@ Cluster mode could have multiple `vminsert` and `vmselect` components.
In case when you want to spread the load on these components - add balancers before them and configure
`vmalert` with balancer addresses. Please, see more about [VictoriaMetrics cluster architecture](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
### HA vmalert
#### HA vmalert
For High Availability(HA) user can run multiple identically configured `vmalert` instances.
It means all of them will execute the same rules, write state and results to
@@ -659,12 +674,12 @@ to ensure [high availability](https://github.com/prometheus/alertmanager#high-av
This example uses single-node VM server for the sake of simplicity.
Check how to replace it with [cluster VictoriaMetrics](#cluster-victoriametrics) if needed.
### Downsampling and aggregation via vmalert
#### Downsampling and aggregation via vmalert
_Please note, [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) might be more efficient
for cases when downsampling or aggregation need to be applied **before data gets into the TSDB.**_
`vmalert` can't modify existing data. But it can run arbitrary queries
`vmalert` can't modify existing data. But it can run arbitrary PromQL/MetricsQL queries
via [recording rules](#recording-rules) and backfill results to the configured `-remoteWrite.url`.
This ability allows to aggregate data. For example, the following rule will calculate the average value for
metric `http_requests` on the `5m` interval:
@@ -717,7 +732,7 @@ Flags `-remoteRead.url` and `-notifier.url` are omitted since we assume only rec
See also [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) and [downsampling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#downsampling).
### Multiple remote writes
#### Multiple remote writes
For persisting recording or alerting rule results `vmalert` requires `-remoteWrite.url` to be set.
But this flag supports only one destination. To persist rule results to multiple destinations
@@ -731,72 +746,7 @@ Using `vmagent` as a proxy provides additional benefits such as
[data persisting when storage is unreachable](https://docs.victoriametrics.com/victoriametrics/vmagent/#replication-and-high-availability),
or time series modification via [relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/).
## Integrations
vmalert can be integrated with different data sources for alerting and recording rules. But it deliberately allows
configuring only one `datasource.url`. We recommend running separate instances of vmalert for each datasource type
with the specified `-rule.defaultRuleType=<datasource_type>` command-line flag.
###### VictoriaMetrics
vmalert natively integrates with [VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/) for alerting and
recording rules.
###### VictoriaLogs
vmalert integrates with [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) and allows configuring alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
Results of recording rules and alerting state should be persisted to the remote-write compatible storage, such as VictoriaMetrics.
To enable VictoriaLogs compatibility set the `-rule.defaultRuleType=vlogs` command-line flag.
See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/) for details.
###### VictoriaTraces
vmalert integrates with [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) in exactly the same way as
with [VictoriaLogs](https://docs.victoriametrics.com/victoriametrics/vmalert/#victorialogs).
###### Graphite
vmalert integrates with [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) and allows configuring alerting and recording rules.
During evaluation, vmalert will send requests to `<-datasource.url>/render?format=json`.
To enable Graphite compatibility set the `-rule.defaultRuleType=graphite` command-line flag.
Since VictoriaMetrics supports both Graphite and Prometheus APIs, it is possible to mix Graphite and VictoriaMetrics rules.
On the group level, set the `type` field to specify to which datasource type it should belong: `prometheus` (MetricsQL) or `graphite` (GraphiteQL).
When using vmalert with both `graphite` and `prometheus` rules configured against the cluster version of VictoriaMetrics, don't forget
to set the `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on the query type.
###### Prometheus
vmalert uses [Prometheus HTTP API](https://prometheus.io/docs/prometheus/latest/querying/api/#http-api) for querying
and [Prometheus Remote Write v1 protocol](https://prometheus.io/docs/specs/prw/remote_write_spec/) for persisting
recording rules results and alerting state. Hence, it can be integrated with any Prometheus-compatible storage
that supports these protocols.
###### Grafana
To proxy requests from [Grafana Alerting UI](https://grafana.com/docs/grafana/latest/alerting/) configure `-vmalert.proxyURL`
on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
###### vmui
To access rules UI through [vmui](https://docs.victoriametrics.com/victoriametrics/#vmui) configure `-vmalert.proxyURL`
on VictoriaMetrics [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
or [vmselect in cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert).
###### vmanomaly
See how to integrate vmalert with [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/)
in the following [guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/).
###### VictoriaMetrics Cloud
For users of [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_vmalert_config),
many of the configuration steps (including highly available setup of `vmalert` for cluster deployments) are handled automatically.
Please refer to the [VictoriaMetrics Cloud documentation](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/) for more details.
## Web
### Web
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
@@ -821,11 +771,28 @@ This may be used for better integration with Grafana unified alerting system. Se
* [How to query vmalert from single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmalert)
* [How to query vmalert from VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert)
## Graphite
vmalert sends requests to `<-datasource.url>/render?format=json` during evaluation of alerting and recording rules
if the corresponding group or rule contains `type: "graphite"` config option. It is expected that the `<-datasource.url>/render`
implements [Graphite Render API](https://graphite.readthedocs.io/en/stable/render_api.html) for `format=json`.
When using vmalert with both `graphite` and `prometheus` rules configured against cluster version of VM do not forget
to set `-datasource.appendTypePrefix` flag to `true`, so vmalert can adjust URL prefix automatically based on the query type.
## VictoriaLogs
vmalert supports [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) as a datasource for writing alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victorialogs/vmalert/) for details.
## VictoriaTraces
vmalert supports [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) as a (`vlogs`) datasource for writing alerting and recording rules using [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/). See [this doc](https://docs.victoriametrics.com/victoriatraces/vmalert/) for details.
## Rules backfilling
vmalert supports alerting and recording rules backfilling (aka `replay`). In replay mode vmalert
can read the same rules configuration as normal, evaluate them on the given time range and backfill
results via remote write to the configured storage. vmalert supports only the `prometheus` datasource type for backfilling.
results via remote write to the configured storage. vmalert supports any PromQL/MetricsQL compatible
data source for backfilling.
Please note, that response caching may lead to unexpected results during and after backfilling process.
In order to avoid this you need to reset cache contents or disable caching when using backfilling
@@ -885,9 +852,13 @@ vmalert respects `evaluationInterval` value set by flag or per-group during the
vmalert automatically disables caching on VictoriaMetrics side by sending `nocache=1` param. It allows
to prevent cache pollution and unwanted time range boundaries adjustment during backfilling.
Results of recording rules `replay` should match the results of normal rules evaluation.
#### Recording rules
Results of alerting rules `replay` are the time series reflecting the [state of the alert](#alerts-state-on-restarts).
The result of recording rules `replay` should match with results of normal rules evaluation.
#### Alerting rules
The result of alerting rules `replay` is time series reflecting [alert's state](#alerts-state-on-restarts).
To see if `replayed` alert has fired in the past use the following PromQL/MetricsQL expression:
```

View File

@@ -184,19 +184,11 @@ See the docs at https://docs.victoriametrics.com/victoriametrics/cluster-victori
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentelemetry.convertMetricNamesToPrometheus
Whether to convert only metric names into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
-opentelemetry.ignoreResourceAttributes array
Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.
-opentelemetry.labelNameUnderscoreSanitization
Whether to enable prepending of 'key' to labels starting with '_' when -opentelemetry.usePrometheusNaming is enabled. Reserved labels starting with '__' are not modified. See https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/ (default true)
-opentelemetry.maxRequestSize size
The maximum size in bytes of a single OpenTelemetry request
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
-opentelemetry.promoteAllResourceAttributes
Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.
-opentelemetry.promoteResourceAttributes array
Promote specific list of resource attributes to labels.
-opentelemetry.promoteScopeMetadata
Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.
-opentelemetry.usePrometheusNaming
Whether to convert metric names and labels into Prometheus-compatible format for the metrics ingested via OpenTelemetry protocol; see https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/
-opentsdbHTTPListenAddr string

View File

@@ -1,7 +1,9 @@
package netutil
import (
"errors"
"fmt"
"os"
"sync"
"time"
@@ -40,6 +42,7 @@ type ConnPool struct {
type connWithTimestamp struct {
bc *handshake.BufferedConn
lastActiveTime uint64
watchCh chan bool
}
var (
@@ -202,20 +205,27 @@ func (cp *ConnPool) dialAndHandshake() (*handshake.BufferedConn, error) {
}
func (cp *ConnPool) tryGetConn() (*handshake.BufferedConn, error) {
cp.mu.Lock()
defer cp.mu.Unlock()
for {
cp.mu.Lock()
if cp.isStopped {
cp.mu.Unlock()
return nil, fmt.Errorf("conn pool to %s cannot be used, since it is stopped", cp.d.addr)
}
if len(cp.conns) == 0 {
err := cp.lastDialError
cp.mu.Unlock()
return nil, err
}
c := cp.conns[len(cp.conns)-1]
cp.conns = cp.conns[:len(cp.conns)-1]
cp.mu.Unlock()
if cp.isStopped {
return nil, fmt.Errorf("conn pool to %s cannot be used, since it is stopped", cp.d.addr)
bc := c.stopWatcher()
if bc == nil {
continue
}
return bc, nil
}
if len(cp.conns) == 0 {
return nil, cp.lastDialError
}
c := cp.conns[len(cp.conns)-1]
bc := c.bc
c.bc = nil
cp.conns = cp.conns[:len(cp.conns)-1]
return bc, nil
}
// Put puts bc back to the pool.
@@ -228,6 +238,8 @@ func (cp *ConnPool) Put(bc *handshake.BufferedConn) {
_ = bc.Close()
return
}
watchCh := make(chan bool, 1)
go watchConn(bc, watchCh)
cp.mu.Lock()
if cp.isStopped {
_ = bc.Close()
@@ -235,6 +247,7 @@ func (cp *ConnPool) Put(bc *handshake.BufferedConn) {
cp.conns = append(cp.conns, connWithTimestamp{
bc: bc,
lastActiveTime: fasttime.UnixTimestamp(),
watchCh: watchCh,
})
}
cp.mu.Unlock()
@@ -325,3 +338,41 @@ func forEachConnPool(f func(cp *ConnPool)) {
wg.Wait()
connPoolsMu.Unlock()
}
// watchConn blocks on Read until the connection is closed or enters an error state.
// It sends true to watchCh if the connection is dead, false if it was stopped by a deadline.
func watchConn(bc *handshake.BufferedConn, watchCh chan<- bool) {
var buf [1]byte
_, err := bc.Conn.Read(buf[:])
watchCh <- !errors.Is(err, os.ErrDeadlineExceeded)
}
// stopWatcher stops the watchConn goroutine and returns the connection if it is still alive.
// Returns nil if the connection is dead.
func (c *connWithTimestamp) stopWatcher() *handshake.BufferedConn {
// Fast path: goroutine already reported the connection state.
select {
case dead := <-c.watchCh:
if dead {
_ = c.bc.Close()
return nil
}
default:
}
// Stop the goroutine by setting a deadline in the past, then wait for its response.
if err := c.bc.Conn.SetReadDeadline(time.Now()); err != nil {
_ = c.bc.Close()
return nil
}
dead := <-c.watchCh
if err := c.bc.Conn.SetReadDeadline(time.Time{}); err != nil {
_ = c.bc.Close()
return nil
}
if dead {
_ = c.bc.Close()
return nil
}
return c.bc
}

View File

@@ -14,14 +14,6 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
)
// DecodeMetricsOptions defines options for DecodeMetricsData
type DecodeMetricsOptions struct {
DisableScopeMetadata bool
DisableResourceAttributes bool
// ResourceAttributesList stored a list of resource attributes to ignore or promote based on the value of DisableResourceAttributes.
ResourceAttributesList map[string]struct{}
}
// MetricPusher must push the parsed samples and metric metadata to the underlying storage.
type MetricPusher interface {
// PushSample must store a sample with the given args.
@@ -81,8 +73,8 @@ func (r *MetricsData) marshalProtobuf(mm *easyproto.MessageMarshaler) {
}
}
// DecodeMetricsData decodes metricsData with given options from src and sends the decoded data to mp.
func DecodeMetricsData(src []byte, mp MetricPusher, options DecodeMetricsOptions) (err error) {
// DecodeMetricsData decodes metricsData from src and sends the decoded data to mp.
func DecodeMetricsData(src []byte, mp MetricPusher) (err error) {
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/metrics/v1/metrics.proto#L56
//
// message MetricsData {
@@ -104,7 +96,7 @@ func DecodeMetricsData(src []byte, mp MetricPusher, options DecodeMetricsOptions
if !ok {
return fmt.Errorf("cannot read ResourceMetrics data")
}
if err := dctx.decodeResourceMetrics(data, options); err != nil {
if err := dctx.decodeResourceMetrics(data); err != nil {
return fmt.Errorf("cannot unmarshal ResourceMetrics: %w", err)
}
}
@@ -129,7 +121,7 @@ func (rm *ResourceMetrics) marshalProtobuf(mm *easyproto.MessageMarshaler) {
}
}
func (dctx *decoderContext) decodeResourceMetrics(src []byte, options DecodeMetricsOptions) error {
func (dctx *decoderContext) decodeResourceMetrics(src []byte) error {
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/metrics/v1/metrics.proto#L66
//
// message ResourceMetrics {
@@ -145,7 +137,7 @@ func (dctx *decoderContext) decodeResourceMetrics(src []byte, options DecodeMetr
return fmt.Errorf("cannot read Resource data: %w", err)
}
if ok {
if err := dctx.decodeResource(resourceData, options.DisableResourceAttributes, options.ResourceAttributesList); err != nil {
if err := dctx.decodeResource(resourceData); err != nil {
return fmt.Errorf("cannot decode Resource: %w", err)
}
}
@@ -165,7 +157,7 @@ func (dctx *decoderContext) decodeResourceMetrics(src []byte, options DecodeMetr
return fmt.Errorf("cannot read ScopeMetrics data")
}
if err := dctx.decodeScopeMetrics(data, options.DisableScopeMetadata); err != nil {
if err := dctx.decodeScopeMetrics(data); err != nil {
return fmt.Errorf("cannot unmarshal ScopeMetrics: %w", err)
}
@@ -188,7 +180,7 @@ func (r *Resource) marshalProtobuf(mm *easyproto.MessageMarshaler) {
}
}
func (dctx *decoderContext) decodeResource(src []byte, disableResourceAttributes bool, attributeKeys map[string]struct{}) (err error) {
func (dctx *decoderContext) decodeResource(src []byte) (err error) {
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/resource/v1/resource.proto#L28
//
// message Resource {
@@ -207,34 +199,7 @@ func (dctx *decoderContext) decodeResource(src []byte, disableResourceAttributes
if !ok {
return fmt.Errorf("cannot read Attributes")
}
keySuffix, ok, err := easyproto.GetString(data, 1)
if err != nil {
return fmt.Errorf("cannot find Key in KeyValue: %w", err)
}
if !ok {
// Key is missing, skip it.
// See https://github.com/VictoriaMetrics/VictoriaLogs/issues/869#issuecomment-3631307996
continue
}
if _, ok := attributeKeys[keySuffix]; ok != disableResourceAttributes {
// Skip the attribute if:
// 1. it is in the list of ignore attributes when disableResourceAttributes is false,
// 2. it isn't in the list of promote attributes when disableResourceAttributes is true.
continue
}
key := dctx.fb.formatSubFieldName("", keySuffix)
// Decode value
value, ok, err := easyproto.GetMessageData(data, 2)
if err != nil {
return fmt.Errorf("cannot find Value in KeyValue: %w", err)
}
if !ok {
// Value is null, skip it.
continue
}
if err := decodeAnyValue(value, &dctx.ls, &dctx.fb, key); err != nil {
if err := decodeKeyValue(data, &dctx.ls, &dctx.fb, ""); err != nil {
return fmt.Errorf("cannot unmarshal Attributes: %w", err)
}
}
@@ -292,6 +257,7 @@ func decodeKeyValue(src []byte, ls *promutil.Labels, fb *fmtBuffer, keyPrefix st
if err := decodeAnyValue(valueData, ls, fb, key); err != nil {
return fmt.Errorf("cannot decode AnyValue: %w", err)
}
return nil
}
@@ -486,7 +452,7 @@ func (sm *ScopeMetrics) marshalProtobuf(mm *easyproto.MessageMarshaler) {
}
}
func (dctx *decoderContext) decodeScopeMetrics(src []byte, disableScopeMetadata bool) error {
func (dctx *decoderContext) decodeScopeMetrics(src []byte) error {
// See https://github.com/open-telemetry/opentelemetry-proto/blob/049d4332834935792fd4dbd392ecd31904f99ba2/opentelemetry/proto/metrics/v1/metrics.proto#L86
//
// message ScopeMetrics {
@@ -494,22 +460,19 @@ func (dctx *decoderContext) decodeScopeMetrics(src []byte, disableScopeMetadata
// repeated Metric metrics = 2;
// }
if !disableScopeMetadata {
scopeData, ok, err := easyproto.GetMessageData(src, 1)
if err != nil {
return fmt.Errorf("cannot read InstrumentationScope: %w", err)
}
if ok {
if err := dctx.decodeInstrumentationScope(scopeData); err != nil {
return fmt.Errorf("cannot decode InstrumentationScope: %w", err)
}
scopeData, ok, err := easyproto.GetMessageData(src, 1)
if err != nil {
return fmt.Errorf("cannot read InstrumentationScope: %w", err)
}
if ok {
if err := dctx.decodeInstrumentationScope(scopeData); err != nil {
return fmt.Errorf("cannot decode InstrumentationScope: %w", err)
}
}
dctxSnapshot := dctx.getSnapshot()
var fc easyproto.FieldContext
var err error
for len(src) > 0 {
src, err = fc.NextField(src)
if err != nil {

View File

@@ -1,166 +0,0 @@
package pb
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
)
type testMetricPusher struct {
samples []testSample
metadata []MetricMetadata
}
type testSample struct {
mm MetricMetadata
suffix string
labels []prompb.Label
ts uint64
value float64
}
func (p *testMetricPusher) PushSample(mm *MetricMetadata, suffix string, ls *promutil.Labels, timestampNsecs uint64, value float64, _ uint32) {
labels := make([]prompb.Label, len(ls.Labels))
copy(labels, ls.Labels)
p.samples = append(p.samples, testSample{
mm: *mm,
suffix: suffix,
labels: labels,
ts: timestampNsecs,
value: value,
})
}
func (p *testMetricPusher) PushMetricMetadata(mm *MetricMetadata) {
p.metadata = append(p.metadata, *mm)
}
func TestDecodeScopeMetrics(t *testing.T) {
scopeName := "my-scope"
scopeVersion := "v1.0"
envVal := "prod"
intVal := int64(1)
md := &MetricsData{
ResourceMetrics: []*ResourceMetrics{
{
Resource: &Resource{
Attributes: []*KeyValue{
{Key: "job", Value: &AnyValue{StringValue: new("vm")}},
{Key: "region", Value: &AnyValue{StringValue: new("us-east-1")}},
},
},
ScopeMetrics: []*ScopeMetrics{
{
Scope: &InstrumentationScope{
Name: &scopeName,
Version: &scopeVersion,
Attributes: []*KeyValue{
{Key: "env", Value: &AnyValue{StringValue: &envVal}},
},
},
Metrics: []*Metric{
{
Name: "my-gauge",
Description: "a test gauge",
Gauge: &Gauge{
DataPoints: []*NumberDataPoint{
{
Attributes: []*KeyValue{{Key: "label1", Value: &AnyValue{StringValue: new("value1")}}},
IntValue: &intVal,
TimeUnixNano: 1000,
},
},
},
},
},
},
},
},
},
}
data := md.MarshalProtobuf(nil)
f := func(options DecodeMetricsOptions, wantLabels map[string]string) {
t.Helper()
mp := &testMetricPusher{}
if err := DecodeMetricsData(data, mp, options); err != nil {
t.Fatalf("DecodeMetricsData error: %v", err)
}
if len(mp.samples) != 1 {
t.Fatalf("expected 1 sample, got %d", len(mp.samples))
}
gotMap := make(map[string]string, len(mp.samples[0].labels))
for _, l := range mp.samples[0].labels {
gotMap[l.Name] = l.Value
}
if !reflect.DeepEqual(gotMap, wantLabels) {
t.Errorf("unexpected labels:\n got: %v\n want: %v", gotMap, wantLabels)
}
}
// PromoteScopeMetadata=true + PromoteAllResourceAttributes=true:
// got all scope labels and resource attrs
f(DecodeMetricsOptions{
DisableScopeMetadata: false,
DisableResourceAttributes: false,
}, map[string]string{
"job": "vm",
"region": "us-east-1",
"scope.name": "my-scope",
"scope.version": "v1.0",
"scope.attributes.env": "prod",
"label1": "value1",
})
// PromoteScopeMetadata=false + PromoteAllResourceAttributes=true:
// got all resource attrs, no scope labels
f(DecodeMetricsOptions{
DisableScopeMetadata: true,
DisableResourceAttributes: false,
}, map[string]string{
"job": "vm",
"region": "us-east-1",
"label1": "value1",
})
// PromoteScopeMetadata=true + PromoteAllResourceAttributes=false + ResourceAttributesList=[region]:
// got only the `region` attr from resource
f(DecodeMetricsOptions{
DisableScopeMetadata: false,
DisableResourceAttributes: true,
ResourceAttributesList: map[string]struct{}{"region": {}},
}, map[string]string{
"region": "us-east-1",
"scope.name": "my-scope",
"scope.version": "v1.0",
"scope.attributes.env": "prod",
"label1": "value1",
})
// PromoteScopeMetadata=true + PromoteAllResourceAttributes=true + ResourceAttributesList=[region]:
// got all resource attrs except `region` (ignore list)
f(DecodeMetricsOptions{
DisableScopeMetadata: false,
DisableResourceAttributes: false,
ResourceAttributesList: map[string]struct{}{"region": {}},
}, map[string]string{
"job": "vm",
"scope.name": "my-scope",
"scope.version": "v1.0",
"scope.attributes.env": "prod",
"label1": "value1",
})
// PromoteScopeMetadata=false + PromoteAllResourceAttributes=false + ResourceAttributesList=[job]:
// got only `job` attr
f(DecodeMetricsOptions{
DisableScopeMetadata: true,
DisableResourceAttributes: true,
ResourceAttributesList: map[string]struct{}{"job": {}},
}, map[string]string{
"job": "vm",
"label1": "value1",
})
}

View File

@@ -1,7 +1,6 @@
package stream
import (
"flag"
"fmt"
"io"
"sync"
@@ -11,43 +10,13 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/opentelemetry/pb"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/protoparserutil"
)
var (
maxRequestSize = flagutil.NewBytes("opentelemetry.maxRequestSize", 64*1024*1024, "The maximum size in bytes of a single OpenTelemetry request")
promoteScopeMetadata = flag.Bool("opentelemetry.promoteScopeMetadata", true, "Whether to promote OTel scope metadata (i.e. name, version, schema URL, and attributes) to metric labels.")
promoteAllResourceAttributes = flag.Bool("opentelemetry.promoteAllResourceAttributes", true, "Whether to promote all resource attributes to labels, except for the ones configured with 'opentelemetry.ignoreResourceAttributes'.")
promoteResourceAttributes = flagutil.NewArrayString("opentelemetry.promoteResourceAttributes", "Promote specific list of resource attributes to labels.")
ignoreResourceAttributes = flagutil.NewArrayString("opentelemetry.ignoreResourceAttributes", "Control which resource attributes to ignore, can only be set when 'opentelemetry.promoteAllResourceAttributes' is true.")
)
// InitDecodeOptions configures decoding settings for the parser
func InitDecodeOptions() {
if *promoteAllResourceAttributes && len(*promoteResourceAttributes) > 0 {
logger.Fatalf("cannot set both '-opentelemetry.promoteAllResourceAttributes' and '-opentelemetry.promoteResourceAttributes'")
}
if !*promoteAllResourceAttributes && len(*ignoreResourceAttributes) > 0 {
logger.Fatalf("'-opentelemetry.ignoreResourceAttributes' can only be set when '-opentelemetry.promoteAllResourceAttributes' is true.")
}
defaultDecodeMetricsOptions.DisableScopeMetadata = !*promoteScopeMetadata
defaultDecodeMetricsOptions.DisableResourceAttributes = !*promoteAllResourceAttributes
attributes := *ignoreResourceAttributes
if !*promoteAllResourceAttributes {
attributes = *promoteResourceAttributes
}
defaultDecodeMetricsOptions.ResourceAttributesList = make(map[string]struct{}, len(attributes))
for _, a := range attributes {
defaultDecodeMetricsOptions.ResourceAttributesList[a] = struct{}{}
}
}
var defaultDecodeMetricsOptions = pb.DecodeMetricsOptions{}
var maxRequestSize = flagutil.NewBytes("opentelemetry.maxRequestSize", 64*1024*1024, "The maximum size in bytes of a single OpenTelemetry request")
// ParseStream parses OpenTelemetry protobuf or json data from r and calls callback for the parsed rows.
//
@@ -78,7 +47,7 @@ func parseData(data []byte, callback func(tss []prompb.TimeSeries, mms []prompb.
// the flushFunc will be called multiple time if the request is big, to avoid over allocating memory for such request.
wctx.flushFunc = callback
if err := pb.DecodeMetricsData(data, wctx, defaultDecodeMetricsOptions); err != nil {
if err := pb.DecodeMetricsData(data, wctx); err != nil {
return fmt.Errorf("cannot unmarshal request from %d bytes: %w", len(data), err)
}

View File

@@ -76,6 +76,15 @@ type Server struct {
// Limits contains various limits for Server.
type Limits struct {
// MaxLabelNames is the maximum label names, which may be returned from labelNames request.
MaxLabelNames int
// MaxLabelValues is the maximum label values, which may be returned from labelValues request.
MaxLabelValues int
// MaxTagValueSuffixes is the maximum number of entries, which can be returned from tagValueSuffixes request.
MaxTagValueSuffixes int
// MaxConcurrentRequests is the maximum number of concurrent requests a server can process.
//
// The remaining requests wait for up to MaxQueueDuration for their execution.
@@ -125,7 +134,7 @@ func NewServer(addr string, api API, limits Limits, disableResponseCompression b
labelNamesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="labelNames",addr=%q}`, addr)),
labelValuesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="labelValues",addr=%q}`, addr)),
tagValueSuffixesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="tagValueSuffixes",addr=%q}`, addr)),
seriesCountRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="seriesCount",addr=%q}`, addr)),
seriesCountRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="seriesSount",addr=%q}`, addr)),
tsdbStatusRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="tsdbStatus",addr=%q}`, addr)),
searchMetricNamesRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="searchMetricNames",addr=%q}`, addr)),
searchRequests: metrics.NewCounter(fmt.Sprintf(`vm_vmselect_rpc_requests_total{action="search",addr=%q}`, addr)),
@@ -674,6 +683,9 @@ func (s *Server) processLabelNames(ctx *vmselectRequestCtx) error {
if err != nil {
return fmt.Errorf("cannot read maxLabelNames: %w", err)
}
if maxLabelNames <= 0 || maxLabelNames > s.limits.MaxLabelNames {
maxLabelNames = s.limits.MaxLabelNames
}
if err := s.beginConcurrentRequest(ctx); err != nil {
return ctx.writeErrorMessage(err)
@@ -725,6 +737,9 @@ func (s *Server) processLabelValues(ctx *vmselectRequestCtx) error {
if err != nil {
return fmt.Errorf("cannot read maxLabelValues: %w", err)
}
if maxLabelValues <= 0 || maxLabelValues > s.limits.MaxLabelValues {
maxLabelValues = s.limits.MaxLabelValues
}
if err := s.beginConcurrentRequest(ctx); err != nil {
return ctx.writeErrorMessage(err)
@@ -785,7 +800,10 @@ func (s *Server) processTagValueSuffixes(ctx *vmselectRequestCtx) error {
}
maxSuffixes, err := ctx.readLimit()
if err != nil {
return fmt.Errorf("cannot read maxTagValueSuffixes: %w", err)
return fmt.Errorf("cannot read maxTagValueSuffixes: %d", err)
}
if maxSuffixes <= 0 || maxSuffixes > s.limits.MaxTagValueSuffixes {
maxSuffixes = s.limits.MaxTagValueSuffixes
}
if err := s.beginConcurrentRequest(ctx); err != nil {
@@ -799,6 +817,14 @@ func (s *Server) processTagValueSuffixes(ctx *vmselectRequestCtx) error {
return ctx.writeErrorMessage(err)
}
if len(suffixes) >= s.limits.MaxTagValueSuffixes {
err := fmt.Errorf("more than %d tag value suffixes found "+
"for tagKey=%q, tagValuePrefix=%q, delimiter=%c on time range %s; "+
"either narrow down the query or increase -search.max* command-line flag value; see https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#resource-usage-limits",
s.limits.MaxTagValueSuffixes, tagKey, tagValuePrefix, delimiter, tr.String())
return ctx.writeErrorMessage(err)
}
// Send an empty error message to vmselect.
if err := ctx.writeString(""); err != nil {
return fmt.Errorf("cannot send empty error message: %w", err)