mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-06-05 18:13:06 +03:00
Compare commits
1 Commits
v1.121.0
...
stats-pip-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9b4a8da177 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -12,7 +12,6 @@
|
||||
/victoria-logs-data
|
||||
/victoria-metrics-data
|
||||
/vmagent-remotewrite-data
|
||||
/vlagent-remotewritewrite
|
||||
/vmstorage-data
|
||||
/vmselect-cache
|
||||
/package/temp-deb-*
|
||||
|
||||
29
Makefile
29
Makefile
@@ -11,8 +11,6 @@ ifeq ($(PKG_TAG),)
|
||||
PKG_TAG := $(BUILDINFO_TAG)
|
||||
endif
|
||||
|
||||
EXTRA_DOCKER_TAG_SUFFIX ?= EXTRA_DOCKER_TAG_SUFFIX
|
||||
|
||||
GO_BUILDINFO = -X '$(PKG_PREFIX)/lib/buildinfo.Version=$(APP_NAME)-$(DATEINFO_TAG)-$(BUILDINFO_TAG)'
|
||||
TAR_OWNERSHIP ?= --owner=1000 --group=1000
|
||||
|
||||
@@ -197,31 +195,6 @@ vmutils-crossbuild: \
|
||||
vmutils-openbsd-amd64 \
|
||||
vmutils-windows-amd64
|
||||
|
||||
publish-final-images:
|
||||
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmalert-tool $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmauth $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmbackup $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmrestore $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmctl $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=victoria-metrics $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmagent $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmalert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmauth $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackup $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmrestore $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vminsert $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vmselect $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise-cluster APP_NAME=vmstorage $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmgateway $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG)-enterprise APP_NAME=vmbackupmanager $(MAKE) publish-via-docker-from-rc && \
|
||||
PKG_TAG=$(TAG) $(MAKE) publish-latest
|
||||
|
||||
publish-latest:
|
||||
PKG_TAG=$(TAG) APP_NAME=victoria-metrics $(MAKE) publish-via-docker-latest && \
|
||||
PKG_TAG=$(TAG) APP_NAME=vmagent $(MAKE) publish-via-docker-latest && \
|
||||
@@ -572,7 +545,7 @@ test-full:
|
||||
test-full-386:
|
||||
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
integration-test: victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore victoria-logs vlagent
|
||||
integration-test: victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore victoria-logs
|
||||
go test ./apptest/... -skip="^TestCluster.*"
|
||||
|
||||
benchmark:
|
||||
|
||||
14
README.md
14
README.md
@@ -1,14 +1,14 @@
|
||||
# VictoriaMetrics
|
||||
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||

|
||||

|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
|
||||
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||

|
||||

|
||||

|
||||

|
||||

|
||||
[](https://x.com/VictoriaMetrics/)
|
||||
[](https://www.reddit.com/r/VictoriaMetrics/)
|
||||

|
||||

|
||||
|
||||
<picture>
|
||||
<source srcset="docs/victoriametrics/logo_white.webp" media="(prefers-color-scheme: dark)">
|
||||
|
||||
@@ -1,106 +0,0 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
vlagent:
|
||||
APP_NAME=vlagent $(MAKE) app-local
|
||||
|
||||
vlagent-race:
|
||||
APP_NAME=vlagent RACE=-race $(MAKE) app-local
|
||||
|
||||
vlagent-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker
|
||||
|
||||
vlagent-pure-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-pure
|
||||
|
||||
vlagent-linux-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-amd64
|
||||
|
||||
vlagent-linux-arm-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-arm
|
||||
|
||||
vlagent-linux-arm64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-arm64
|
||||
|
||||
vlagent-linux-ppc64le-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-ppc64le
|
||||
|
||||
vlagent-linux-386-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vlagent-darwin-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
vlagent-darwin-arm64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-darwin-arm64
|
||||
|
||||
vlagent-freebsd-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-freebsd-amd64
|
||||
|
||||
vlagent-openbsd-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-openbsd-amd64
|
||||
|
||||
vlagent-windows-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
package-vlagent:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker
|
||||
|
||||
package-vlagent-pure:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-pure
|
||||
|
||||
package-vlagent-amd64:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-vlagent-arm:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-arm
|
||||
|
||||
package-vlagent-arm64:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-vlagent-ppc64le:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-vlagent-386:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-386
|
||||
|
||||
publish-vlagent:
|
||||
APP_NAME=vlagent $(MAKE) publish-via-docker
|
||||
|
||||
vlagent-linux-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-arm:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-arm64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-ppc64le:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-s390x:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-loong64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=loong64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-386:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-darwin-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-darwin-arm64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-freebsd-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-openbsd-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vlagent $(MAKE) app-local-windows-goarch
|
||||
|
||||
vlagent-pure:
|
||||
APP_NAME=vlagent $(MAKE) app-local-pure
|
||||
@@ -1,3 +0,0 @@
|
||||
See vlagent docs [here](https://docs.victoriametrics.com/victorialogs/vlagent/).
|
||||
|
||||
vlagent docs can be edited at [docs/vlagent.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victorialogs/vlagent.md).
|
||||
@@ -1,8 +0,0 @@
|
||||
ARG base_image=non-existing
|
||||
FROM $base_image
|
||||
|
||||
EXPOSE 9429
|
||||
|
||||
ENTRYPOINT ["/vlagent-prod"]
|
||||
ARG src_binary=non-existing
|
||||
COPY $src_binary ./vlagent-prod
|
||||
@@ -1,97 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "TCP address to listen for incoming http requests. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vlagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
remotewrite.InitSecretFlags()
|
||||
logger.Init()
|
||||
|
||||
remotewrite.Init()
|
||||
vlinsert.Init()
|
||||
|
||||
insertutil.SetLogRowsStorage(&remotewrite.Storage{})
|
||||
listenAddrs := *httpListenAddrs
|
||||
if len(listenAddrs) == 0 {
|
||||
listenAddrs = []string{":9429"}
|
||||
}
|
||||
logger.Infof("starting vlagent at %q...", listenAddrs)
|
||||
startTime := time.Now()
|
||||
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{
|
||||
UseProxyProtocol: useProxyProtocol,
|
||||
})
|
||||
logger.Infof("started vlagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime = time.Now()
|
||||
logger.Infof("gracefully shutting down webservice at %q", listenAddrs)
|
||||
if err := httpserver.Stop(listenAddrs); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
vlinsert.Stop()
|
||||
remotewrite.Stop()
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
logger.Infof("successfully stopped vlagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
// RequestHandler handles insert requests for VictoriaLogs
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>vlagent</h2>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victorialogs/vlagent/'>https://docs.victoriametrics.com/victorialogs/vlagent/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
})
|
||||
return true
|
||||
}
|
||||
return vlinsert.RequestHandler(w, r)
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vlagent collects logs via popular data ingestion protocols and routes it to VictoriaLogs.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victorialogs/vlagent/ .
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image=non-existing
|
||||
ARG root_image=non-existing
|
||||
FROM $certs_image AS certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 9429
|
||||
ENTRYPOINT ["/vlagent-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vlagent-linux-${TARGETARCH}-prod ./vlagent-prod
|
||||
@@ -1,462 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
var (
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data ")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxTime")
|
||||
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
tlsHandshakeTimeout = flagutil.NewArrayDuration("remoteWrite.tlsHandshakeTimeout", 20*time.Second, "The timeout for establishing tls connections to the corresponding -remoteWrite.url")
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCertFile = flagutil.NewArrayString("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting "+
|
||||
"to the corresponding -remoteWrite.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArrayString("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
|
||||
"By default, system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
|
||||
headers = flagutil.NewArrayString("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
|
||||
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. "+
|
||||
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
|
||||
"The file is re-read every second")
|
||||
bearerToken = flagutil.NewArrayString("remoteWrite.bearerToken", "Optional bearer auth token to use for the corresponding -remoteWrite.url")
|
||||
bearerTokenFile = flagutil.NewArrayString("remoteWrite.bearerTokenFile", "Optional path to bearer token file to use for the corresponding -remoteWrite.url. "+
|
||||
"The token is re-read from the file every second")
|
||||
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
)
|
||||
|
||||
type client struct {
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
retryMinInterval time.Duration
|
||||
retryMaxTime time.Duration
|
||||
|
||||
sendBlock func(block []byte) bool
|
||||
authCfg *promauth.Config
|
||||
|
||||
rl *ratelimiter.RateLimiter
|
||||
|
||||
bytesSent *metrics.Counter
|
||||
blocksSent *metrics.Counter
|
||||
requestDuration *metrics.Histogram
|
||||
requestsOKCount *metrics.Counter
|
||||
errorsCount *metrics.Counter
|
||||
packetsDropped *metrics.Counter
|
||||
rateLimit *metrics.Gauge
|
||||
retriesCount *metrics.Counter
|
||||
sendDuration *metrics.FloatCounter
|
||||
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
|
||||
tr := httputil.NewTransport(false, "vlagent_remotewrite")
|
||||
tr.TLSHandshakeTimeout = tlsHandshakeTimeout.GetOptionalArg(argIdx)
|
||||
tr.MaxConnsPerHost = 2 * concurrency
|
||||
tr.MaxIdleConnsPerHost = 2 * concurrency
|
||||
tr.IdleConnTimeout = time.Minute
|
||||
tr.WriteBufferSize = 64 * 1024
|
||||
|
||||
pURL := proxyURL.GetOptionalArg(argIdx)
|
||||
if len(pURL) > 0 {
|
||||
if !strings.Contains(pURL, "://") {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: it must start with `http://`, `https://` or `socks5://`", pURL)
|
||||
}
|
||||
pu, err := url.Parse(pURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: %s", pURL, err)
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(pu)
|
||||
}
|
||||
hc := &http.Client{
|
||||
Transport: authCfg.NewRoundTripper(tr),
|
||||
Timeout: sendTimeout.GetOptionalArg(argIdx),
|
||||
}
|
||||
c := &client{
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
authCfg: authCfg,
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
retryMinInterval: retryMinInterval.GetOptionalArg(argIdx),
|
||||
retryMaxTime: retryMaxTime.GetOptionalArg(argIdx),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.sendBlock = c.sendBlockHTTP
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
limitReached := metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
c.rl = ratelimiter.New(int64(bytesPerSec), limitReached, c.stopCh)
|
||||
}
|
||||
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(rateLimit.GetOptionalArg(argIdx))
|
||||
})
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vlagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
|
||||
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL))
|
||||
c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_errors_total{url=%q}`, c.sanitizedURL))
|
||||
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vlagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(*queues)
|
||||
})
|
||||
for i := 0; i < concurrency; i++ {
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
c.runWorker()
|
||||
}()
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
|
||||
func (c *client) MustStop() {
|
||||
close(c.stopCh)
|
||||
c.wg.Wait()
|
||||
logger.Infof("stopped client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
|
||||
func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
headersValue := headers.GetOptionalArg(argIdx)
|
||||
var hdrs []string
|
||||
if headersValue != "" {
|
||||
hdrs = strings.Split(headersValue, "^^")
|
||||
}
|
||||
username := basicAuthUsername.GetOptionalArg(argIdx)
|
||||
password := basicAuthPassword.GetOptionalArg(argIdx)
|
||||
passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx)
|
||||
var basicAuthCfg *promauth.BasicAuthConfig
|
||||
if username != "" || password != "" || passwordFile != "" {
|
||||
basicAuthCfg = &promauth.BasicAuthConfig{
|
||||
Username: username,
|
||||
Password: promauth.NewSecret(password),
|
||||
PasswordFile: passwordFile,
|
||||
}
|
||||
}
|
||||
|
||||
token := bearerToken.GetOptionalArg(argIdx)
|
||||
tokenFile := bearerTokenFile.GetOptionalArg(argIdx)
|
||||
|
||||
var oauth2Cfg *promauth.OAuth2Config
|
||||
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
|
||||
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
oauth2Cfg = &promauth.OAuth2Config{
|
||||
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
|
||||
}
|
||||
}
|
||||
|
||||
tlsCfg := &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(argIdx),
|
||||
CertFile: tlsCertFile.GetOptionalArg(argIdx),
|
||||
KeyFile: tlsKeyFile.GetOptionalArg(argIdx),
|
||||
ServerName: tlsServerName.GetOptionalArg(argIdx),
|
||||
InsecureSkipVerify: tlsInsecureSkipVerify.GetOptionalArg(argIdx),
|
||||
}
|
||||
|
||||
opts := &promauth.Options{
|
||||
BasicAuth: basicAuthCfg,
|
||||
BearerToken: token,
|
||||
BearerTokenFile: tokenFile,
|
||||
OAuth2: oauth2Cfg,
|
||||
TLSConfig: tlsCfg,
|
||||
Headers: hdrs,
|
||||
}
|
||||
authCfg, err := opts.NewConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate auth config for remoteWrite idx: %d, err: %w", argIdx, err)
|
||||
}
|
||||
return authCfg, nil
|
||||
}
|
||||
|
||||
func (c *client) runWorker() {
|
||||
var ok bool
|
||||
var block []byte
|
||||
ch := make(chan bool, 1)
|
||||
for {
|
||||
block, ok = c.fq.MustReadBlock(block[:0])
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if len(block) == 0 {
|
||||
// skip empty data blocks from sending
|
||||
continue
|
||||
}
|
||||
go func() {
|
||||
startTime := time.Now()
|
||||
ch <- c.sendBlock(block)
|
||||
c.sendDuration.Add(time.Since(startTime).Seconds())
|
||||
}()
|
||||
select {
|
||||
case ok := <-ch:
|
||||
if ok {
|
||||
// The block has been sent successfully
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
graceDuration := 5 * time.Second
|
||||
select {
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
req, err := c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.hc.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
}
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, err
|
||||
}
|
||||
// It is likely connection became stale or timed out during the first request.
|
||||
// Make another attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, err = c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
reqBody := bytes.NewBuffer(body)
|
||||
req, err := http.NewRequest(http.MethodPost, url, reqBody)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
|
||||
}
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vlagent")
|
||||
h.Set("Content-Encoding", "zstd")
|
||||
h.Set("Content-Type", "application/octet-stream")
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// sendBlockHTTP sends the given block to c.remoteWriteURL.
|
||||
//
|
||||
// The function returns false only if c.stopCh is closed.
|
||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.Register(len(block))
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxTime)
|
||||
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
startTime := time.Now()
|
||||
resp, err := c.doRequest(c.remoteWriteURL, block)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
statusCode := resp.StatusCode
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
return true
|
||||
}
|
||||
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
|
||||
if statusCode == 400 || statusCode == 404 {
|
||||
logBlockRejected(block, c.sanitizedURL, resp)
|
||||
_ = resp.Body.Close()
|
||||
c.packetsDropped.Inc()
|
||||
return true
|
||||
}
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
||||
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
|
||||
|
||||
// Handle response
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
||||
} else {
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||
}
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
||||
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
||||
|
||||
// getRetryDuration returns retry duration.
|
||||
// retryAfterDuration has the highest priority.
|
||||
// If retryAfterDuration is not specified, retryDuration gets doubled.
|
||||
// retryDuration can't exceed maxRetryDuration.
|
||||
//
|
||||
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
|
||||
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterDuration > 0 {
|
||||
return timeutil.AddJitterToDuration(retryAfterDuration)
|
||||
}
|
||||
|
||||
// default backoff retry policy
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
|
||||
return retryDuration
|
||||
}
|
||||
|
||||
func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; "+
|
||||
"failed to read response body: %s",
|
||||
len(block), sanitizedURL, resp.StatusCode, err)
|
||||
} else {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; response body: %s",
|
||||
len(block), sanitizedURL, resp.StatusCode, string(body))
|
||||
}
|
||||
}
|
||||
|
||||
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
||||
// retryAfterString should be in either HTTP-date or a number of seconds.
|
||||
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
|
||||
if retryAfterString == "" {
|
||||
return retryAfterDuration
|
||||
}
|
||||
|
||||
defer func() {
|
||||
v := retryAfterDuration.Seconds()
|
||||
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
|
||||
}()
|
||||
|
||||
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
|
||||
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
||||
}
|
||||
// Retry-After could be in seconds.
|
||||
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"math"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestCalculateRetryDuration(t *testing.T) {
|
||||
// `testFunc` call `calculateRetryDuration` for `n` times
|
||||
// and evaluate if the result of `calculateRetryDuration` is
|
||||
// 1. >= expectMinDuration
|
||||
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
|
||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||
}
|
||||
|
||||
expectMaxDuration := helper(expectMinDuration)
|
||||
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
|
||||
|
||||
if !(retryDuration >= expectMinDuration && retryDuration <= expectMaxDuration) {
|
||||
t.Fatalf(
|
||||
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
|
||||
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
|
||||
retryDuration.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for 1 time.
|
||||
{
|
||||
// default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
// default backoff policy exceed max limit"
|
||||
f(0, 10*time.Minute, 1, time.Minute)
|
||||
|
||||
// retry after > default backoff policy
|
||||
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
|
||||
// retry after < default backoff policy
|
||||
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
|
||||
// retry after invalid and < default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for multiple times.
|
||||
{
|
||||
// default backoff policy 2 times
|
||||
f(0, time.Second, 2, 4*time.Second)
|
||||
// default backoff policy 3 times
|
||||
f(0, time.Second, 3, 8*time.Second)
|
||||
// default backoff policy N times exceed max limit
|
||||
f(0, time.Second, 10, time.Minute)
|
||||
|
||||
// retry after 120s 1 times
|
||||
f(120*time.Second, time.Second, 1, 120*time.Second)
|
||||
// retry after 120s 2 times
|
||||
f(120*time.Second, time.Second, 2, 120*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f := func(retryAfterString string, expectResult time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
result := parseRetryAfterHeader(retryAfterString)
|
||||
// expect `expectResult == result` when retryAfterString is in seconds or invalid
|
||||
// expect the difference between result and expectResult to be lower than 10%
|
||||
if !(expectResult == result || math.Abs(float64(expectResult-result))/float64(expectResult) < 0.10) {
|
||||
t.Fatalf(
|
||||
"incorrect retry after duration, want (ms): %d, got (ms): %d",
|
||||
expectResult.Milliseconds(), result.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// retry after header in seconds
|
||||
f("10", 10*time.Second)
|
||||
// retry after header in date time
|
||||
f(time.Now().Add(30*time.Second).UTC().Format(http.TimeFormat), 30*time.Second)
|
||||
// retry after header invalid
|
||||
f("invalid-retry-after", 0)
|
||||
// retry after header not in GMT
|
||||
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
||||
}
|
||||
|
||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||
func helper(d time.Duration) time.Duration {
|
||||
dv := d / 10
|
||||
if dv > 10*time.Second {
|
||||
dv = 10 * time.Second
|
||||
}
|
||||
|
||||
return d + dv
|
||||
}
|
||||
@@ -1,158 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
var (
|
||||
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage.")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
|
||||
"This option takes effect only when less than 2MB of data per second are pushed to -remoteWrite.url")
|
||||
)
|
||||
|
||||
type pendingLogs struct {
|
||||
lastFlushTime atomic.Uint64
|
||||
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
|
||||
// mu protects wr
|
||||
mu sync.Mutex
|
||||
wr writeRequest
|
||||
|
||||
stopCh chan struct{}
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingLogs(fq *persistentqueue.FastQueue) *pendingLogs {
|
||||
pl := &pendingLogs{
|
||||
fq: fq,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
pl.periodicFlusherWG.Add(1)
|
||||
go func() {
|
||||
defer pl.periodicFlusherWG.Done()
|
||||
pl.periodicFlusher()
|
||||
}()
|
||||
|
||||
return pl
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) add(lr *logstorage.LogRows) {
|
||||
lr.ForEachRow(func(_ uint64, r *logstorage.InsertRow) {
|
||||
pl.addLogRow(r)
|
||||
})
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) addLogRow(r *logstorage.InsertRow) {
|
||||
bb := bbPool.Get()
|
||||
bb.B = r.Marshal(bb.B)
|
||||
|
||||
pl.mu.Lock()
|
||||
_, _ = pl.wr.pendingData.Write(bb.B)
|
||||
pl.wr.pendingLogRowsCount++
|
||||
if len(pl.wr.pendingData.B) > maxUnpackedBlockSize.IntN() {
|
||||
pl.mustFlushLocked()
|
||||
}
|
||||
pl.mu.Unlock()
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) mustFlushLocked() {
|
||||
pl.lastFlushTime.Store(fasttime.UnixTimestamp())
|
||||
pl.wr.push(func(b []byte) {
|
||||
if !pl.fq.TryWriteBlock(b) {
|
||||
logger.Fatalf("BUG: TryWriteBlock cannot return false")
|
||||
}
|
||||
})
|
||||
pl.wr.reset()
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) periodicFlusher() {
|
||||
flushSeconds := int64(flushInterval.Seconds())
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
d := timeutil.AddJitterToDuration(*flushInterval)
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-pl.stopCh:
|
||||
pl.mu.Lock()
|
||||
pl.mustFlushOnStop()
|
||||
pl.mu.Unlock()
|
||||
return
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-pl.lastFlushTime.Load() < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
pl.mu.Lock()
|
||||
pl.mustFlushLocked()
|
||||
pl.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// mustFlushOnStop force pushes wr data
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (pl *pendingLogs) mustFlushOnStop() {
|
||||
pl.wr.push(pl.fq.MustWriteBlockIgnoreDisabledPQ)
|
||||
pl.wr.reset()
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) mustStop() {
|
||||
close(pl.stopCh)
|
||||
pl.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
type writeRequest struct {
|
||||
pendingData bytesutil.ByteBuffer
|
||||
pendingLogRowsCount int64
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(pushBlock func([]byte)) {
|
||||
if len(wr.pendingData.B) == 0 {
|
||||
return
|
||||
}
|
||||
b := wr.pendingData.B
|
||||
|
||||
zb := compressBufPool.Get()
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], b, 1)
|
||||
zbLen := len(zb.B)
|
||||
pushBlock(zb.B)
|
||||
compressBufPool.Put(zb)
|
||||
blockSizeBytes.Update(float64(zbLen))
|
||||
blockSizeLogRows.Update(float64(wr.pendingLogRowsCount))
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
wr.pendingData.Reset()
|
||||
wr.pendingLogRowsCount = 0
|
||||
}
|
||||
|
||||
var (
|
||||
blockSizeBytes = metrics.NewHistogram(`vlagent_remotewrite_block_size_bytes`)
|
||||
blockSizeLogRows = metrics.NewHistogram(`vlagent_remotewrite_block_size_rows`)
|
||||
)
|
||||
|
||||
var (
|
||||
compressBufPool bytesutil.ByteBufferPool
|
||||
bbPool bytesutil.ByteBufferPool
|
||||
)
|
||||
@@ -1,277 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage/netinsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
)
|
||||
|
||||
var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support VictoriaLogs native protocol. "+
|
||||
"Example url: http://<victorialogs-host>:9428/internal/insert. "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems.")
|
||||
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vlagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
)
|
||||
|
||||
// rwctxsGlobal contains statically populated entries when -remoteWrite.url is specified.
|
||||
var rwctxsGlobal []*remoteWriteCtx
|
||||
|
||||
// Storage implements insertutil.LogRowsStorage interface
|
||||
type Storage struct{}
|
||||
|
||||
// MustAddRows implements insertutil.LogRowsStorage interface
|
||||
func (*Storage) MustAddRows(lr *logstorage.LogRows) {
|
||||
pushToRemoteStorages(lr)
|
||||
}
|
||||
|
||||
// CanWriteData implements insertutil.LogRowsStorage interface
|
||||
func (*Storage) CanWriteData() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// maxQueues limits the maximum value for `-remoteWrite.queues`. There is no sense in setting too high value,
|
||||
// since it may lead to high memory usage due to big number of buffers.
|
||||
var maxQueues = cgroup.AvailableCPUs() * 16
|
||||
|
||||
const persistentQueueDirname = "persistent-queue"
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging.
|
||||
func InitSecretFlags() {
|
||||
if !*showRemoteWriteURL {
|
||||
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
|
||||
flagutil.RegisterSecretFlag("remoteWrite.url")
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
// It must be called after flag.Parse().
|
||||
//
|
||||
// Stop must be called for graceful shutdown.
|
||||
func Init() {
|
||||
if len(*remoteWriteURLs) == 0 {
|
||||
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
||||
}
|
||||
if *queues > maxQueues {
|
||||
*queues = maxQueues
|
||||
}
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
initRemoteWriteCtxs(*remoteWriteURLs)
|
||||
dropDanglingQueues()
|
||||
}
|
||||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls TryPush during and after the call to this func.
|
||||
func Stop() {
|
||||
for _, rwctx := range rwctxsGlobal {
|
||||
rwctx.mustStop()
|
||||
}
|
||||
rwctxsGlobal = nil
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
// Remove dangling persistent queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
//
|
||||
// In case if there were many persistent queues with identical *remoteWriteURLs
|
||||
// the queue with the last index will be dropped.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6140
|
||||
existingQueues := make(map[string]struct{}, len(rwctxsGlobal))
|
||||
for _, rwctx := range rwctxsGlobal {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxsGlobal))
|
||||
}
|
||||
}
|
||||
|
||||
func initRemoteWriteCtxs(urls []string) {
|
||||
if len(urls) == 0 {
|
||||
logger.Panicf("BUG: urls must be non-empty")
|
||||
}
|
||||
|
||||
maxInmemoryBlocks := memory.Allowed() / len(urls) / 10000
|
||||
if maxInmemoryBlocks / *queues > 100 {
|
||||
// There is no much sense in keeping higher number of blocks in memory,
|
||||
// since this means that the producer outperforms consumer and the queue
|
||||
// will continue growing. It is better storing the queue to file.
|
||||
maxInmemoryBlocks = 100 * *queues
|
||||
}
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||
rwctxIdx := make([]int, len(urls))
|
||||
for i, remoteWriteURLRaw := range urls {
|
||||
remoteWriteURL, err := url.Parse(remoteWriteURLRaw)
|
||||
if err != nil {
|
||||
logger.Fatalf("invalid -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
sanitizedURL := fmt.Sprintf("%d:secret-url", i+1)
|
||||
if *showRemoteWriteURL {
|
||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
rwctxIdx[i] = i
|
||||
}
|
||||
|
||||
rwctxsGlobal = rwctxs
|
||||
}
|
||||
|
||||
func pushToRemoteStorages(lr *logstorage.LogRows) {
|
||||
rwctxs := rwctxsGlobal
|
||||
if len(rwctxs) == 1 {
|
||||
// fast path
|
||||
rwctxs[0].push(lr)
|
||||
return
|
||||
}
|
||||
// Push samples to remote storage systems in parallel in order to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
for _, rwctx := range rwctxs {
|
||||
wg.Add(1)
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
rwctx.push(lr)
|
||||
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
type remoteWriteCtx struct {
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
|
||||
pls []*pendingLogs
|
||||
pssNextIdx atomic.Uint64
|
||||
}
|
||||
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||
// protocol version is required by victoria-logs
|
||||
q := remoteWriteURL.Query()
|
||||
q.Set("version", netinsert.ProtocolVersion)
|
||||
remoteWriteURL.RawQuery = q.Encode()
|
||||
|
||||
// strip query params, otherwise changing params resets pq
|
||||
pqURL := *remoteWriteURL
|
||||
pqURL.RawQuery = ""
|
||||
pqURL.Fragment = ""
|
||||
h := xxhash.Sum64([]byte(pqURL.String()))
|
||||
queuePath := filepath.Join(*tmpDataPath, persistentQueueDirname, fmt.Sprintf("%d_%016X", argIdx+1, h))
|
||||
maxPendingBytes := maxPendingBytesPerURL.GetOptionalArg(argIdx)
|
||||
if maxPendingBytes != 0 && maxPendingBytes < persistentqueue.DefaultChunkFileSize {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4195
|
||||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, false)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
if fq.IsWriteBlocked() {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
case "http", "https":
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
plsLen := *queues
|
||||
if n := cgroup.AvailableCPUs(); plsLen > n {
|
||||
// There is no sense in running more than availableCPUs concurrent pendingLogs,
|
||||
// since every pendingLogs can saturate up to a single CPU.
|
||||
plsLen = n
|
||||
}
|
||||
pls := make([]*pendingLogs, plsLen)
|
||||
for i := range pls {
|
||||
pls[i] = newPendingLogs(fq)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pls: pls,
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) push(lr *logstorage.LogRows) {
|
||||
pls := rwctx.pls
|
||||
idx := rwctx.pssNextIdx.Add(1) % uint64(len(pls))
|
||||
pls[idx].add(lr)
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) mustStop() {
|
||||
for _, ps := range rwctx.pls {
|
||||
ps.mustStop()
|
||||
}
|
||||
rwctx.idx = 0
|
||||
rwctx.pls = nil
|
||||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
}
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -193,7 +192,6 @@ type logMessageProcessor struct {
|
||||
|
||||
rowsIngestedTotal *metrics.Counter
|
||||
bytesIngestedTotal *metrics.Counter
|
||||
flushDuration *metrics.Summary
|
||||
}
|
||||
|
||||
func (lmp *logMessageProcessor) initPeriodicFlush() {
|
||||
@@ -292,11 +290,9 @@ func (lmp *logMessageProcessor) AddInsertRow(r *logstorage.InsertRow) {
|
||||
|
||||
// flushLocked must be called under locked lmp.mu.
|
||||
func (lmp *logMessageProcessor) flushLocked() {
|
||||
start := time.Now()
|
||||
lmp.lastFlushTime = start
|
||||
lmp.lastFlushTime = time.Now()
|
||||
logRowsStorage.MustAddRows(lmp.lr)
|
||||
lmp.lr.ResetKeepSettings()
|
||||
lmp.flushDuration.UpdateDuration(start)
|
||||
}
|
||||
|
||||
// MustClose flushes the remaining data to the underlying storage and closes lmp.
|
||||
@@ -307,7 +303,6 @@ func (lmp *logMessageProcessor) MustClose() {
|
||||
lmp.flushLocked()
|
||||
logstorage.PutLogRows(lmp.lr)
|
||||
lmp.lr = nil
|
||||
messageProcessorCount.Add(-1)
|
||||
}
|
||||
|
||||
// NewLogMessageProcessor returns new LogMessageProcessor for the given cp.
|
||||
@@ -317,14 +312,12 @@ func (cp *CommonParams) NewLogMessageProcessor(protocolName string, isStreamMode
|
||||
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields, cp.DecolorizeFields, cp.ExtraFields, *defaultMsgValue)
|
||||
rowsIngestedTotal := metrics.GetOrCreateCounter(fmt.Sprintf("vl_rows_ingested_total{type=%q}", protocolName))
|
||||
bytesIngestedTotal := metrics.GetOrCreateCounter(fmt.Sprintf("vl_bytes_ingested_total{type=%q}", protocolName))
|
||||
flushDuration := metrics.GetOrCreateSummary(fmt.Sprintf("vl_insert_flush_duration_seconds{type=%q}", protocolName))
|
||||
lmp := &logMessageProcessor{
|
||||
cp: cp,
|
||||
lr: lr,
|
||||
|
||||
rowsIngestedTotal: rowsIngestedTotal,
|
||||
bytesIngestedTotal: bytesIngestedTotal,
|
||||
flushDuration: flushDuration,
|
||||
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
@@ -333,13 +326,10 @@ func (cp *CommonParams) NewLogMessageProcessor(protocolName string, isStreamMode
|
||||
lmp.initPeriodicFlush()
|
||||
}
|
||||
|
||||
messageProcessorCount.Add(1)
|
||||
return lmp
|
||||
}
|
||||
|
||||
var (
|
||||
rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
|
||||
rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
|
||||
_ = metrics.NewGauge(`vl_insert_processors_count`, func() float64 { return float64(messageProcessorCount.Load()) })
|
||||
messageProcessorCount atomic.Int64
|
||||
)
|
||||
|
||||
@@ -276,7 +276,7 @@ func readJournaldLogEntry(streamName string, lr *insertutil.LineReader, lmp inse
|
||||
}
|
||||
size := binary.LittleEndian.Uint64(fb.value[:8])
|
||||
|
||||
// Read the value until its length exceeds the given size - the last char in the read value will always be '\n'
|
||||
// Read the value until its lenth exceeds the given size - the last char in the read value will always be '\n'
|
||||
// because it is appended by appendNextLineToValue().
|
||||
for uint64(len(fb.value[8:])) <= size {
|
||||
if err := fb.appendNextLineToValue(lr); err != nil {
|
||||
|
||||
@@ -354,10 +354,6 @@ func writeStorageMetrics(w io.Writer, strg *logstorage.Storage) {
|
||||
var ss logstorage.StorageStats
|
||||
strg.UpdateStats(&ss)
|
||||
|
||||
if maxDiskSpaceUsageBytes.N > 0 {
|
||||
metrics.WriteGaugeUint64(w, fmt.Sprintf(`vl_max_disk_space_usage_bytes{path=%q}`, *storageDataPath), uint64(maxDiskSpaceUsageBytes.N))
|
||||
}
|
||||
|
||||
metrics.WriteGaugeUint64(w, fmt.Sprintf(`vl_free_disk_space_bytes{path=%q}`, *storageDataPath), fs.MustGetFreeSpace(*storageDataPath))
|
||||
|
||||
isReadOnly := uint64(0)
|
||||
|
||||
@@ -38,10 +38,8 @@ var (
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data "+
|
||||
"is sent after temporary unavailability of the remote storage. See also -maxIngestionRate")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxInterval")
|
||||
// deprecated in the future. use -remoteWrite.retryMaxInterval instead
|
||||
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. This flag is deprecated, use -remoteWrite.retryMaxInterval instead")
|
||||
retryMaxInterval = flagutil.NewArrayDuration("remoteWrite.retryMaxInterval", time.Minute, "The maximum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. The delay doubles with each retry until this maximum is reached, after which it remains constant. See also -remoteWrite.retryMinInterval")
|
||||
retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxTime")
|
||||
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
@@ -99,7 +97,7 @@ type client struct {
|
||||
hc *http.Client
|
||||
|
||||
retryMinInterval time.Duration
|
||||
retryMaxInterval time.Duration
|
||||
retryMaxTime time.Duration
|
||||
|
||||
sendBlock func(block []byte) bool
|
||||
authCfg *promauth.Config
|
||||
@@ -153,10 +151,6 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
Transport: authCfg.NewRoundTripper(tr),
|
||||
Timeout: sendTimeout.GetOptionalArg(argIdx),
|
||||
}
|
||||
retryMaxIntervalFlag := retryMaxTime
|
||||
if retryMaxInterval.String() != "" {
|
||||
retryMaxIntervalFlag = retryMaxInterval
|
||||
}
|
||||
c := &client{
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
@@ -165,7 +159,7 @@ func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persiste
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
retryMinInterval: retryMinInterval.GetOptionalArg(argIdx),
|
||||
retryMaxInterval: retryMaxIntervalFlag.GetOptionalArg(argIdx),
|
||||
retryMaxTime: retryMaxTime.GetOptionalArg(argIdx),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.sendBlock = c.sendBlockHTTP
|
||||
@@ -410,7 +404,7 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.Register(len(block))
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxInterval)
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxTime)
|
||||
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||
retriesCount := 0
|
||||
|
||||
|
||||
@@ -279,9 +279,6 @@ func initRemoteWriteCtxs(urls []string) {
|
||||
}
|
||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||
rwctxIdx := make([]int, len(urls))
|
||||
if retryMaxTime.String() != "" {
|
||||
logger.Warnf("-remoteWrite.retryMaxTime is deprecated; use -remoteWrite.retryMaxInterval instead")
|
||||
}
|
||||
for i, remoteWriteURLRaw := range urls {
|
||||
remoteWriteURL, err := url.Parse(remoteWriteURLRaw)
|
||||
if err != nil {
|
||||
|
||||
@@ -18,7 +18,9 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
var defaultRuleType = flag.String("rule.defaultRuleType", "prometheus", `Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/victoriametrics/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs".`)
|
||||
var (
|
||||
defaultRuleType = flag.String("rule.defaultRuleType", "prometheus", `Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/victoriametrics/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs".`)
|
||||
)
|
||||
|
||||
// Group contains list of Rules grouped into
|
||||
// entity with one name and evaluation interval
|
||||
@@ -291,6 +293,12 @@ func parse(files map[string][]byte, validateTplFn ValidateTplFn, validateExpress
|
||||
if err := errGroup.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
sort.SliceStable(groups, func(i, j int) bool {
|
||||
if groups[i].File != groups[j].File {
|
||||
return groups[i].File < groups[j].File
|
||||
}
|
||||
return groups[i].Name < groups[j].Name
|
||||
})
|
||||
return groups, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -408,21 +407,6 @@ func configsEqual(a, b []config.Group) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
|
||||
// sort both slices by file and name before comparing them
|
||||
sort.SliceStable(a, func(i, j int) bool {
|
||||
if a[i].File != a[j].File {
|
||||
return a[i].File < a[j].File
|
||||
}
|
||||
return a[i].Name < a[j].Name
|
||||
})
|
||||
sort.SliceStable(b, func(i, j int) bool {
|
||||
if b[i].File != b[j].File {
|
||||
return b[i].File < b[j].File
|
||||
}
|
||||
return b[i].Name < b[j].Name
|
||||
})
|
||||
|
||||
for i := range a {
|
||||
if a[i].Checksum != b[i].Checksum {
|
||||
return false
|
||||
|
||||
@@ -194,10 +194,9 @@ func mergeLabels(target string, metaLabels *promutil.Labels, cfg *Config) *promu
|
||||
alertsPath = address[n:]
|
||||
address = address[:n]
|
||||
}
|
||||
m.Add("__address__", address)
|
||||
m.Add("__scheme__", scheme)
|
||||
m.Add("__alerts_path__", alertsPath)
|
||||
m.AddFrom(metaLabels)
|
||||
// force labels
|
||||
m.Set("__address__", address)
|
||||
m.Set("__scheme__", scheme)
|
||||
m.Set("__alerts_path__", alertsPath)
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -54,11 +53,8 @@ func (cw *configWatcher) notifiers() []Notifier {
|
||||
for _, n := range ns {
|
||||
notifiers = append(notifiers, n.Notifier)
|
||||
}
|
||||
|
||||
}
|
||||
// deterministically sort the output
|
||||
sort.Slice(notifiers, func(i, j int) bool {
|
||||
return notifiers[i].Addr() < notifiers[j].Addr()
|
||||
})
|
||||
return notifiers
|
||||
}
|
||||
|
||||
@@ -89,12 +85,12 @@ func (cw *configWatcher) reload(path string) error {
|
||||
}
|
||||
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
targets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
for _, err := range errors {
|
||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
cw.setTargets(typeK, targets)
|
||||
|
||||
cw.wg.Add(1)
|
||||
go func() {
|
||||
@@ -109,22 +105,22 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
updateTargets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
cw.setTargets(typeK, updateTargets)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
|
||||
func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator) ([]Target, []error) {
|
||||
metaLabels, err := labelsFn()
|
||||
if err != nil {
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||
}
|
||||
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
|
||||
var targets []Target
|
||||
var errors []error
|
||||
duplicates := make(map[string]struct{})
|
||||
for _, labels := range metaLabels {
|
||||
@@ -147,9 +143,18 @@ func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.La
|
||||
continue
|
||||
}
|
||||
duplicates[u] = struct{}{}
|
||||
targetMetadata[u] = processedLabels
|
||||
|
||||
am, err := NewAlertManager(u, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
targets = append(targets, Target{
|
||||
Notifier: am,
|
||||
Labels: processedLabels,
|
||||
})
|
||||
}
|
||||
return targetMetadata, errors
|
||||
return targets, errors
|
||||
}
|
||||
|
||||
type getLabels func() ([]*promutil.Labels, error)
|
||||
@@ -236,40 +241,21 @@ func (cw *configWatcher) mustStop() {
|
||||
|
||||
func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
||||
cw.targetsMu.Lock()
|
||||
newT := make(map[string]Target)
|
||||
for _, t := range targets {
|
||||
newT[t.Addr()] = t
|
||||
}
|
||||
oldT := cw.targets[key]
|
||||
|
||||
for _, ot := range oldT {
|
||||
if _, ok := newT[ot.Addr()]; !ok {
|
||||
ot.Notifier.Close()
|
||||
}
|
||||
}
|
||||
cw.targets[key] = targets
|
||||
cw.targetsMu.Unlock()
|
||||
}
|
||||
|
||||
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
|
||||
cw.targetsMu.Lock()
|
||||
defer cw.targetsMu.Unlock()
|
||||
oldTargets := cw.targets[key]
|
||||
var updatedTargets []Target
|
||||
for _, ot := range oldTargets {
|
||||
if _, ok := targetMetadata[ot.Addr()]; !ok {
|
||||
// if target not exists in currentTargets, close it
|
||||
ot.Notifier.Close()
|
||||
} else {
|
||||
updatedTargets = append(updatedTargets, ot)
|
||||
delete(targetMetadata, ot.Addr())
|
||||
}
|
||||
}
|
||||
// create new resources for the new targets
|
||||
for addr, labels := range targetMetadata {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||
continue
|
||||
}
|
||||
updatedTargets = append(updatedTargets, Target{
|
||||
Notifier: am,
|
||||
Labels: labels,
|
||||
})
|
||||
}
|
||||
|
||||
cw.targets[key] = updatedTargets
|
||||
}
|
||||
|
||||
// mergeHTTPClientConfigs merges fields between child and parent params
|
||||
// by populating child from parent params if they're missing.
|
||||
func mergeHTTPClientConfigs(parent, child promauth.HTTPClientConfig) promauth.HTTPClientConfig {
|
||||
|
||||
@@ -8,10 +8,8 @@ import (
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
)
|
||||
|
||||
func TestConfigWatcherReload(t *testing.T) {
|
||||
@@ -61,11 +59,6 @@ static_configs:
|
||||
}
|
||||
|
||||
func TestConfigWatcherStart(t *testing.T) {
|
||||
oldSDCheckInterval := consul.SDCheckInterval
|
||||
defer func() { consul.SDCheckInterval = oldSDCheckInterval }()
|
||||
consulCheckInterval := 100 * time.Millisecond
|
||||
consul.SDCheckInterval = &consulCheckInterval
|
||||
|
||||
consulSDServer := newFakeConsulServer()
|
||||
defer consulSDServer.Close()
|
||||
|
||||
@@ -104,11 +97,6 @@ consul_sd_configs:
|
||||
if n2.Addr() != expAddr2 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
||||
}
|
||||
|
||||
f := func() bool { return len(cw.notifiers()) == 1 }
|
||||
if !waitFor(f, time.Second) {
|
||||
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
||||
@@ -205,7 +193,6 @@ const (
|
||||
)
|
||||
|
||||
func newFakeConsulServer() *httptest.Server {
|
||||
requestCount := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
||||
@@ -220,9 +207,8 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}`))
|
||||
})
|
||||
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
if requestCount == 0 {
|
||||
rw.Header().Set("X-Consul-Index", "1")
|
||||
rw.Write([]byte(`
|
||||
rw.Header().Set("X-Consul-Index", "1")
|
||||
rw.Write([]byte(`
|
||||
[
|
||||
{
|
||||
"Node": {
|
||||
@@ -311,56 +297,6 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}
|
||||
}
|
||||
]`))
|
||||
} else {
|
||||
rw.Header().Set("X-Consul-Index", "2")
|
||||
rw.Write([]byte(`
|
||||
[
|
||||
{
|
||||
"Node": {
|
||||
"ID": "e8e3629a-3f50-9d6e-aaf8-f173b5b05c72",
|
||||
"Node": "machine",
|
||||
"Address": "127.0.0.1",
|
||||
"Datacenter": "dc1",
|
||||
"TaggedAddresses": {
|
||||
"lan": "127.0.0.1",
|
||||
"lan_ipv4": "127.0.0.1",
|
||||
"wan": "127.0.0.1",
|
||||
"wan_ipv4": "127.0.0.1"
|
||||
},
|
||||
"Meta": {
|
||||
"consul-network-segment": ""
|
||||
},
|
||||
"CreateIndex": 13,
|
||||
"ModifyIndex": 14
|
||||
},
|
||||
"Service": {
|
||||
"ID": "am3",
|
||||
"Service": "alertmanager",
|
||||
"Tags": [
|
||||
"alertmanager",
|
||||
"__scheme__=http"
|
||||
],
|
||||
"Address": "",
|
||||
"Meta": null,
|
||||
"Port": 9097,
|
||||
"Weights": {
|
||||
"Passing": 1,
|
||||
"Warning": 1
|
||||
},
|
||||
"EnableTagOverride": false,
|
||||
"Proxy": {
|
||||
"Mode": "",
|
||||
"MeshGateway": {},
|
||||
"Expose": {}
|
||||
},
|
||||
"Connect": {},
|
||||
"CreateIndex": 16,
|
||||
"ModifyIndex": 16
|
||||
}
|
||||
}
|
||||
]`))
|
||||
}
|
||||
requestCount++
|
||||
})
|
||||
|
||||
return httptest.NewServer(mux)
|
||||
@@ -421,13 +357,3 @@ func TestParseLabels_Success(t *testing.T) {
|
||||
PathPrefix: "test",
|
||||
}, "https://alertmanager:9093/api/v1/alerts")
|
||||
}
|
||||
|
||||
func waitFor(f func() bool, timeout time.Duration) bool {
|
||||
for start := time.Now(); time.Since(start) < timeout; {
|
||||
if f() == true {
|
||||
return true
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -28,8 +28,6 @@ var (
|
||||
"Defines how many retries to make before giving up on rule if request for it returns an error.")
|
||||
disableProgressBar = flag.Bool("replay.disableProgressBar", false, "Whether to disable rendering progress bars during the replay. "+
|
||||
"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.")
|
||||
ruleEvaluationConcurrency = flag.Int("replay.ruleEvaluationConcurrency", 1, "The maximum number of concurrent `/query_range` requests for a single rule. "+
|
||||
"Increasing this value when replaying for a long time and a single request range is limited by `-replay.maxDatapointsPerQuery`.")
|
||||
)
|
||||
|
||||
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewrite.RWClient) (totalRows, droppedRows int, err error) {
|
||||
@@ -73,7 +71,7 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
|
||||
|
||||
for _, cfg := range groupsCfg {
|
||||
ng := rule.NewGroup(cfg, qb, *evaluationInterval, labels)
|
||||
totalRows += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar, *ruleEvaluationConcurrency)
|
||||
totalRows += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar)
|
||||
}
|
||||
logger.Infof("replay evaluation finished, generated %d samples", totalRows)
|
||||
if err := rw.Close(); err != nil {
|
||||
|
||||
@@ -46,7 +46,7 @@ func (fr *fakeReplayQuerier) QueryRange(_ context.Context, q string, from, to ti
|
||||
}
|
||||
|
||||
func TestReplay(t *testing.T) {
|
||||
f := func(from, to string, maxDP, ruleConcurrency int, ruleDelay time.Duration, cfg []config.Group, qb *fakeReplayQuerier, expectTotalRows int) {
|
||||
f := func(from, to string, maxDP int, ruleDelay time.Duration, cfg []config.Group, qb *fakeReplayQuerier, expectTotalRows int) {
|
||||
t.Helper()
|
||||
|
||||
fromOrig, toOrig, maxDatapointsOrig := *replayFrom, *replayTo, *replayMaxDatapoints
|
||||
@@ -62,7 +62,6 @@ func TestReplay(t *testing.T) {
|
||||
rwb := &fakeRWClient{}
|
||||
*replayFrom = from
|
||||
*replayTo = to
|
||||
*ruleEvaluationConcurrency = ruleConcurrency
|
||||
*replayMaxDatapoints = maxDP
|
||||
totalRows, _, err := replay(cfg, qb, rwb)
|
||||
if err != nil {
|
||||
@@ -74,7 +73,7 @@ func TestReplay(t *testing.T) {
|
||||
}
|
||||
|
||||
// one rule + one response
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:00.000Z", 10, 1, time.Millisecond, []config.Group{
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:00.000Z", 10, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
@@ -88,7 +87,7 @@ func TestReplay(t *testing.T) {
|
||||
}, 1)
|
||||
|
||||
// one rule + multiple responses
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 1, time.Millisecond, []config.Group{
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
@@ -111,7 +110,7 @@ func TestReplay(t *testing.T) {
|
||||
}, 2)
|
||||
|
||||
// datapoints per step
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T15:02:30.000Z", 60, 1, time.Millisecond, []config.Group{
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T15:02:30.000Z", 60, time.Millisecond, []config.Group{
|
||||
{Interval: promutil.NewDuration(time.Minute), Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
@@ -135,7 +134,7 @@ func TestReplay(t *testing.T) {
|
||||
}, 3)
|
||||
|
||||
// multiple recording rules + multiple responses
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 1, time.Millisecond, []config.Group{
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
{Rules: []config.Rule{{Record: "bar", Expr: "max(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
@@ -165,7 +164,7 @@ func TestReplay(t *testing.T) {
|
||||
|
||||
// multiple alerting rules + multiple responses
|
||||
// alerting rule generates two series `ALERTS` and `ALERTS_FOR_STATE` when triggered
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 1, time.Millisecond, []config.Group{
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Alert: "foo", Expr: "sum(up) > 1"}}},
|
||||
{Rules: []config.Rule{{Alert: "bar", Expr: "max(up) < 1"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
@@ -194,7 +193,7 @@ func TestReplay(t *testing.T) {
|
||||
}, 6)
|
||||
|
||||
// multiple recording rules in one group+ multiple responses + concurrency
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 1, 0, []config.Group{
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 0, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up) > 1"}, {Record: "bar", Expr: "max(up) < 1"}}, Concurrency: 2}}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up) > 1": {
|
||||
@@ -227,43 +226,4 @@ func TestReplay(t *testing.T) {
|
||||
},
|
||||
},
|
||||
}, 4)
|
||||
|
||||
// single rule + rule concurrency
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 3, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo-concurrent", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up)": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:01:00+12:02:00": {{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
}},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
},
|
||||
}, 1)
|
||||
|
||||
// multiple rules + rule concurrency + group concurrency
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 3, 0, []config.Group{
|
||||
{Rules: []config.Rule{{Alert: "foo-group-single-concurrent", Expr: "sum(up) > 1"}, {Alert: "bar-group-single-concurrent", Expr: "max(up) < 1"}}, Concurrency: 2}}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up) > 1": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:01:00+12:02:00": {{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
}},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
"max(up) < 1": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:01:00+12:02:00": {{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
}},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
},
|
||||
}, 4)
|
||||
}
|
||||
|
||||
@@ -741,13 +741,6 @@ func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts ti
|
||||
}
|
||||
var labelsFilter string
|
||||
for k, v := range ar.Labels {
|
||||
if strings.Contains(v, "{{") && strings.Contains(v, "}}") {
|
||||
// do not append label to the filter when value contains template,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9305.
|
||||
// it's ok to do the simple check to skip some labels,
|
||||
// since we verify the results' hash afterward to ensure the alerts match.
|
||||
continue
|
||||
}
|
||||
labelsFilter += fmt.Sprintf(",%s=%q", k, v)
|
||||
}
|
||||
// use `default_rollup()` instead of `last_over_time()` here to accounts for possible staleness markers
|
||||
|
||||
@@ -815,6 +815,10 @@ func TestGroup_Restore(t *testing.T) {
|
||||
t.Helper()
|
||||
defer fqr.Reset()
|
||||
|
||||
for _, r := range rules {
|
||||
fqr.Set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
||||
}
|
||||
|
||||
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
fg.Init()
|
||||
wg := sync.WaitGroup{}
|
||||
@@ -867,7 +871,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
}
|
||||
|
||||
// one active alert, no previous state
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutil.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
@@ -876,10 +879,10 @@ func TestGroup_Restore(t *testing.T) {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
fqr.Reset()
|
||||
|
||||
// one active alert with state restore
|
||||
ts := time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fn(
|
||||
@@ -891,33 +894,8 @@ func TestGroup_Restore(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
// one rule, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo",
|
||||
metricWithValueAndLabels(t, 0, "__name__", "foo", "env", "prod"),
|
||||
metricWithValueAndLabels(t, 0, "__name__", "foo", "env", "dev"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
// only env=prod has state metric, so only it will have state restore
|
||||
stateMetric("foo", ts, "env", "prod"))
|
||||
fn(
|
||||
[]config.Rule{
|
||||
{Alert: "foo", Expr: "foo", For: promutil.NewDuration(time.Second)},
|
||||
},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "prod"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
})
|
||||
|
||||
// two rules, two active alerts, one with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fqr.Set("bar", metricWithValueAndLabels(t, 0, "__name__", "bar"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
stateMetric("bar", ts))
|
||||
fn(
|
||||
@@ -938,8 +916,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
|
||||
// two rules, two active alerts, two with state restored
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fqr.Set("bar", metricWithValueAndLabels(t, 0, "__name__", "bar"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
||||
@@ -962,7 +938,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
|
||||
// one active alert but wrong state restore
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
||||
stateMetric("wrong alert", ts))
|
||||
fn(
|
||||
@@ -976,7 +951,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
|
||||
// one active alert with labels
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev"))
|
||||
fn(
|
||||
@@ -990,7 +964,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
|
||||
// one active alert with restore labels mismatch
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set("foo", metricWithValueAndLabels(t, 0, "__name__", "foo"))
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
||||
fn(
|
||||
@@ -1001,27 +974,6 @@ func TestGroup_Restore(t *testing.T) {
|
||||
ActiveAt: defaultTS,
|
||||
},
|
||||
})
|
||||
|
||||
// two active alerts with dynamic labels and restore
|
||||
ts = time.Now().Truncate(time.Hour)
|
||||
fqr.Set(`default_rollup(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
||||
stateMetric("foo", ts, "env", "dev"),
|
||||
stateMetric("foo", ts.Add(time.Second), "env", "prod"))
|
||||
fqr.Set("foo",
|
||||
metricWithValueAndLabels(t, 0, "__name__", "foo", "env", "dev"),
|
||||
metricWithValueAndLabels(t, 0, "__name__", "foo", "env", "prod"))
|
||||
fn(
|
||||
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "{{$labels.env}}"}, For: promutil.NewDuration(time.Second)}},
|
||||
map[uint64]*notifier.Alert{
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts,
|
||||
},
|
||||
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "prod"}): {
|
||||
Name: "foo",
|
||||
ActiveAt: ts.Add(time.Second),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
||||
|
||||
@@ -513,7 +513,7 @@ func (g *Group) infof(format string, args ...any) {
|
||||
}
|
||||
|
||||
// Replay performs group replay
|
||||
func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoint, replayRuleRetryAttempts int, replayDelay time.Duration, disableProgressBar bool, ruleEvaluationConcurrency int) int {
|
||||
func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoint, replayRuleRetryAttempts int, replayDelay time.Duration, disableProgressBar bool) int {
|
||||
var total int
|
||||
step := g.Interval * time.Duration(maxDataPoint)
|
||||
ri := rangeIterator{start: start, end: end, step: step}
|
||||
@@ -541,7 +541,8 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
||||
if !disableProgressBar {
|
||||
bar = pb.StartNew(iterations)
|
||||
}
|
||||
total += replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||
// pass ri as a copy, so it can be modified within the replayRuleRange
|
||||
total += replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts)
|
||||
if bar != nil {
|
||||
bar.Finish()
|
||||
}
|
||||
@@ -564,7 +565,7 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
||||
wg.Add(1)
|
||||
go func(r Rule, ri rangeIterator) {
|
||||
// pass ri as a copy, so it can be modified within the replayRuleRange
|
||||
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts)
|
||||
<-sem
|
||||
wg.Done()
|
||||
}(r, ri)
|
||||
@@ -585,34 +586,17 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
||||
return total
|
||||
}
|
||||
|
||||
func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewrite.RWClient, replayRuleRetryAttempts, ruleEvaluationConcurrency int) int {
|
||||
func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewrite.RWClient, replayRuleRetryAttempts int) int {
|
||||
fmt.Printf("> Rule %q (ID: %d)\n", r, r.ID())
|
||||
sem := make(chan struct{}, ruleEvaluationConcurrency)
|
||||
wg := sync.WaitGroup{}
|
||||
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
||||
for ri.next() {
|
||||
sem <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(s, e time.Time) {
|
||||
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
|
||||
if err != nil {
|
||||
logger.Fatalf("rule %q: %s", r, err)
|
||||
}
|
||||
if bar != nil {
|
||||
bar.Increment()
|
||||
}
|
||||
res <- n
|
||||
<-sem
|
||||
wg.Done()
|
||||
}(ri.s, ri.e)
|
||||
}
|
||||
wg.Wait()
|
||||
close(res)
|
||||
close(sem)
|
||||
|
||||
total := 0
|
||||
for n := range res {
|
||||
for ri.next() {
|
||||
n, err := replayRule(r, ri.s, ri.e, rw, replayRuleRetryAttempts)
|
||||
if err != nil {
|
||||
logger.Fatalf("rule %q: %s", r, err)
|
||||
}
|
||||
if bar != nil {
|
||||
bar.Increment()
|
||||
}
|
||||
total += n
|
||||
}
|
||||
return total
|
||||
|
||||
@@ -99,15 +99,3 @@ textarea.curl-area {
|
||||
padding: 0;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
.w-10 {
|
||||
width: 10%;
|
||||
}
|
||||
|
||||
.w-20 {
|
||||
width: 20%;
|
||||
}
|
||||
|
||||
.w-60 {
|
||||
width: 60%;
|
||||
}
|
||||
|
||||
@@ -161,9 +161,9 @@
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" class="w-60">Rule</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
<th scope="col" style="width: 60%">Rule</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -594,9 +594,9 @@
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" style="width: 10%" class="text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
|
||||
@@ -523,9 +523,9 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*apiG
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" class="w-60">Rule</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
<th scope="col" style="width: 60%">Rule</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -1697,17 +1697,17 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:598
|
||||
if seriesFetchedEnabled {
|
||||
//line app/vmalert/web.qtpl:598
|
||||
qw422016.N().S(`<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>`)
|
||||
qw422016.N().S(`<th scope="col" style="width: 10%" class="text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>`)
|
||||
//line app/vmalert/web.qtpl:598
|
||||
}
|
||||
//line app/vmalert/web.qtpl:598
|
||||
qw422016.N().S(`
|
||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
|
||||
@@ -51,13 +51,6 @@ func main() {
|
||||
buildinfo.Init()
|
||||
logger.Init()
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
procutil.WaitForSigterm()
|
||||
logger.Infof("received stop signal, canceling backup operation")
|
||||
cancel()
|
||||
}()
|
||||
|
||||
// Storing snapshot delete function to be able to call it in case
|
||||
// of error since logger.Fatal will exit the program without
|
||||
// calling deferred functions.
|
||||
@@ -86,7 +79,7 @@ func main() {
|
||||
}
|
||||
logger.Infof("Snapshot delete url %s", deleteURL.Redacted())
|
||||
|
||||
name, err := snapshot.Create(ctx, createURL.String())
|
||||
name, err := snapshot.Create(createURL.String())
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot create snapshot: %s", err)
|
||||
}
|
||||
@@ -96,9 +89,7 @@ func main() {
|
||||
}
|
||||
|
||||
deleteSnapshot = func() {
|
||||
// Do not use ctx here as it may be canceled by the time deleteSnapshot is called
|
||||
// if process is interrupted.
|
||||
err := snapshot.Delete(context.Background(), deleteURL.String(), name)
|
||||
err := snapshot.Delete(deleteURL.String(), name)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot delete snapshot: %s", err)
|
||||
}
|
||||
@@ -108,6 +99,13 @@ func main() {
|
||||
listenAddrs := []string{*httpListenAddr}
|
||||
go httpserver.Serve(listenAddrs, nil, httpserver.ServeOptions{})
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
go func() {
|
||||
procutil.WaitForSigterm()
|
||||
logger.Infof("received stop signal, canceling backup operation")
|
||||
cancel()
|
||||
}()
|
||||
|
||||
pushmetrics.Init()
|
||||
err := makeBackup(ctx)
|
||||
deleteSnapshot()
|
||||
|
||||
@@ -140,15 +140,6 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f native.Filter, srcU
|
||||
|
||||
written, err := io.Copy(w, reader)
|
||||
if err != nil {
|
||||
// io.Copy could fail if ImportPipe will fail before and close the pr
|
||||
// so we check if that's the case and to not ignore importErr if it exists.
|
||||
select {
|
||||
case importErr := <-importCh:
|
||||
if importErr != nil {
|
||||
return fmt.Errorf("failed to import %s: %w", p.dst.Addr, importErr)
|
||||
}
|
||||
default:
|
||||
}
|
||||
return fmt.Errorf("failed to write into %q: %s", p.dst.Addr, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package graphite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"sort"
|
||||
@@ -203,7 +202,7 @@ func MetricsExpandHandler(startTime time.Time, w http.ResponseWriter, r *http.Re
|
||||
func MetricsIndexHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
deadline := searchutil.GetDeadlineForQuery(r, startTime)
|
||||
jsonp := r.FormValue("jsonp")
|
||||
sq := storage.NewSearchQuery(0, math.MaxInt, nil, 0)
|
||||
sq := storage.NewSearchQuery(0, 0, nil, 0)
|
||||
metricNames, err := netstorage.LabelValues(nil, "__name__", sq, 0, deadline)
|
||||
if err != nil {
|
||||
return fmt.Errorf(`cannot obtain metric names: %w`, err)
|
||||
|
||||
@@ -562,15 +562,6 @@ func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"status":"success","data":{"alerts":[]}}`)
|
||||
return true
|
||||
case "/api/v1/notifiers", "/notifiers":
|
||||
notifiersRequests.Inc()
|
||||
if len(*vmalertProxyURL) > 0 {
|
||||
proxyVMAlertRequests(w, r)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"status":"success","data":{"notifiers":[]}}`)
|
||||
return true
|
||||
case "/api/v1/metadata":
|
||||
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-metadata
|
||||
metadataRequests.Inc()
|
||||
@@ -700,10 +691,9 @@ var (
|
||||
expandWithExprsRequests = metrics.NewCounter(`vm_http_requests_total{path="/expand-with-exprs"}`)
|
||||
prettifyQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/prettify-query"}`)
|
||||
|
||||
vmalertRequests = metrics.NewCounter(`vm_http_requests_total{path="/vmalert"}`)
|
||||
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
|
||||
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
|
||||
notifiersRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/notifiers"}`)
|
||||
vmalertRequests = metrics.NewCounter(`vm_http_requests_total{path="/vmalert"}`)
|
||||
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
|
||||
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
|
||||
|
||||
metadataRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/metadata"}`)
|
||||
buildInfoRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/buildinfo"}`)
|
||||
|
||||
@@ -377,13 +377,11 @@ func getRollupConfigs(funcName string, rf rollupFunc, expr metricsql.Expr, start
|
||||
preFunc := func(_ []float64, _ []int64) {}
|
||||
funcName = strings.ToLower(funcName)
|
||||
|
||||
// window > lookbackDelta could result in negative delta.
|
||||
// See issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8342
|
||||
stalenessInterval := lookbackDelta
|
||||
if stalenessInterval != 0 {
|
||||
// If stalenessInterval was set, it should additionally account for [window] range to cover following cases:
|
||||
// * window > stalenessInterval, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8342
|
||||
// * window captures prevValue in doInternal while removeCounterResets does not,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-3000735468
|
||||
stalenessInterval += window
|
||||
if stalenessInterval != 0 && stalenessInterval < window {
|
||||
stalenessInterval = window
|
||||
}
|
||||
|
||||
if rollupFuncsRemoveCounterResets[funcName] {
|
||||
|
||||
@@ -746,7 +746,7 @@ See also [irate](#irate), [rollup_rate](#rollup_rate) and [rate_prometheus](#rat
|
||||
|
||||
#### rate_prometheus
|
||||
|
||||
`rate_prometheus(series_selector[d])` {{% available_from "v1.120.0" %}} is a [rollup function](#rollup-functions), which calculates the average per-second
|
||||
`rate_prometheus(series_selector[d])` {{% available_from "#" %}} is a [rollup function](#rollup-functions), which calculates the average per-second
|
||||
increase rate over the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#filtering).
|
||||
The resulting calculation is equivalent to `increase_prometheus(series_selector[d]) / d`.
|
||||
|
||||
File diff suppressed because one or more lines are too long
209
app/vmselect/vmui/assets/index-BiQY-19a.js
Normal file
209
app/vmselect/vmui/assets/index-BiQY-19a.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
67
app/vmselect/vmui/assets/vendor-D8IJGiEn.js
Normal file
67
app/vmselect/vmui/assets/vendor-D8IJGiEn.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,5 +0,0 @@
|
||||
<svg width="48" height="48" fill="#e94600" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M24.5475 0C10.3246.0265251 1.11379 3.06365 4.40623 6.10077c0 0 12.32997 11.23333 16.58217 14.84083.8131.6896 2.1728 1.1936 3.5191 1.2201h.1199c1.3463-.0265 2.706-.5305 3.5191-1.2201 4.2522-3.5942 16.5422-14.84083 16.5422-14.84083C48.0478 3.06365 38.8636.0265251 24.6674 0"/>
|
||||
<path d="M28.1579 27.0159c-.8131.6896-2.1728 1.1936-3.5191 1.2201h-.12c-1.3463-.0265-2.7059-.5305-3.519-1.2201-2.9725-2.5067-13.35639-11.87-17.26201-15.3979v5.4112c0 .5968.22661 1.3793.6265 1.7506C7.00358 21.1936 17.2675 30.5437 20.9731 33.6737c.8132.6896 2.1728 1.1936 3.5191 1.2201h.12c1.3463-.0265 2.7059-.5305 3.519-1.2201 3.679-3.13 13.9429-12.4536 16.6089-14.8939.4132-.3713.6265-1.1538.6265-1.7506V11.618c-3.9323 3.5411-14.3162 12.931-17.2354 15.3979h.0267Z"/>
|
||||
<path d="M28.1579 39.748c-.8131.6897-2.1728 1.1937-3.5191 1.2202h-.12c-1.3463-.0265-2.7059-.5305-3.519-1.2202-2.9725-2.4933-13.35639-11.8567-17.26201-15.3978v5.4111c0 .5969.22661 1.3793.6265 1.7507C7.00358 33.9258 17.2675 43.2759 20.9731 46.4058c.8132.6897 2.1728 1.1937 3.5191 1.2202h.12c1.3463-.0265 2.7059-.5305 3.519-1.2202 3.679-3.1299 13.9429-12.4535 16.6089-14.8938.4132-.3714.6265-1.1538.6265-1.7507v-5.4111c-3.9323 3.5411-14.3162 12.931-17.2354 15.3978h.0267Z"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.3 KiB |
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="./manifest.json" crossorigin="use-credentials"/>
|
||||
<link rel="manifest" href="./manifest.json"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
@@ -36,10 +36,10 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-1uwNuj_1.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-V4vnRsM-.js">
|
||||
<script type="module" crossorigin src="./assets/index-BiQY-19a.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-D8IJGiEn.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-C36SC0pJ.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-ojCMu5lE.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
@@ -644,15 +644,6 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="indexdb/tagFiltersToMetricIDs"}`, idbm.TagFiltersToMetricIDsCacheMisses)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="storage/regexps"}`, storage.RegexpCacheMisses())
|
||||
metrics.WriteCounterUint64(w, `vm_cache_misses_total{type="storage/regexpPrefixes"}`, storage.RegexpPrefixesCacheMisses())
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/tsid", reason="cache_size"}`, m.TSIDCacheSizeEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/tsid", reason="miss_percentage"}`, m.TSIDCacheMissEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/tsid", reason="expiration"}`, m.TSIDCacheExpireEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/metricName", reason="cache_size"}`, m.MetricNameCacheSizeEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/metricName", reason="miss_percentage"}`, m.MetricNameCacheMissEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/metricName", reason="expiration"}`, m.MetricNameCacheExpireEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/metricIDs", reason="cache_size"}`, m.MetricIDCacheSizeEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/metricIDs", reason="miss_percentage"}`, m.MetricIDCacheMissEvictionBytes)
|
||||
metrics.WriteCounterUint64(w, `vm_cache_eviction_bytes_total{type="storage/metricIDs", reason="expiration"}`, m.MetricIDCacheExpireEvictionBytes)
|
||||
|
||||
metrics.WriteCounterUint64(w, `vm_deleted_metrics_total{type="indexdb"}`, idbm.DeletedMetricsCount)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="/manifest.json" crossorigin="use-credentials"/>
|
||||
<link rel="manifest" href="/manifest.json"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="/manifest.json" crossorigin="use-credentials"/>
|
||||
<link rel="manifest" href="/manifest.json"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="/manifest.json" crossorigin="use-credentials"/>
|
||||
<link rel="manifest" href="/manifest.json"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
|
||||
@@ -746,7 +746,7 @@ See also [irate](#irate), [rollup_rate](#rollup_rate) and [rate_prometheus](#rat
|
||||
|
||||
#### rate_prometheus
|
||||
|
||||
`rate_prometheus(series_selector[d])` {{% available_from "v1.120.0" %}} is a [rollup function](#rollup-functions), which calculates the average per-second
|
||||
`rate_prometheus(series_selector[d])` {{% available_from "#" %}} is a [rollup function](#rollup-functions), which calculates the average per-second
|
||||
increase rate over the given lookbehind window `d` per each time series returned from the given [series_selector](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#filtering).
|
||||
The resulting calculation is equivalent to `increase_prometheus(series_selector[d]) / d`.
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ const QueryEditor: FC<QueryEditorProps> = ({
|
||||
const [caretPositionInput, setCaretPositionInput] = useState<[number, number]>([0, 0]);
|
||||
const autocompleteAnchorEl = useRef<HTMLInputElement>(null);
|
||||
|
||||
const [showAutocomplete, setShowAutocomplete] = useState(false);
|
||||
const [showAutocomplete, setShowAutocomplete] = useState(!!AutocompleteEl);
|
||||
const debouncedSetShowAutocomplete = useRef(debounce(setShowAutocomplete, 500)).current;
|
||||
|
||||
const warning = [
|
||||
@@ -128,7 +128,7 @@ const QueryEditor: FC<QueryEditorProps> = ({
|
||||
|
||||
useEffect(() => {
|
||||
setShowAutocomplete(false);
|
||||
debouncedSetShowAutocomplete(caretPositionAutocomplete.every(Boolean));
|
||||
debouncedSetShowAutocomplete(true);
|
||||
}, [caretPositionAutocomplete]);
|
||||
|
||||
return (
|
||||
|
||||
@@ -25,11 +25,6 @@ type PrometheusQuerier interface {
|
||||
PrometheusAPIV1QueryRange(t *testing.T, query string, opts QueryOpts) *PrometheusAPIV1QueryResponse
|
||||
PrometheusAPIV1Series(t *testing.T, matchQuery string, opts QueryOpts) *PrometheusAPIV1SeriesResponse
|
||||
PrometheusAPIV1ExportNative(t *testing.T, query string, opts QueryOpts) []byte
|
||||
|
||||
// TODO(@rtm0): Prometheus does not provide this API. Either move it to a
|
||||
// separate interface or rename this interface to allow for multiple querier
|
||||
// types.
|
||||
GraphiteMetricsIndex(t *testing.T, opts QueryOpts) GraphiteMetricsIndexResponse
|
||||
}
|
||||
|
||||
// Writer contains methods for writing new data
|
||||
@@ -400,10 +395,6 @@ type TSDBStatusResponse struct {
|
||||
Data TSDBStatusResponseData
|
||||
}
|
||||
|
||||
// GraphiteMetricsIndexResponse is an in-memory representation of the json response
|
||||
// returned by the /graphite/metrics/index.json endpoint.
|
||||
type GraphiteMetricsIndexResponse = []string
|
||||
|
||||
// AdminTenantsResponse is an in-memory representation of the json response
|
||||
// returned by the /api/v1/admin/tenants endpoint.
|
||||
type AdminTenantsResponse struct {
|
||||
|
||||
@@ -433,24 +433,3 @@ func (tc *TestCase) MustStartVlsingle(instance string, flags []string) *Vlsingle
|
||||
tc.addApp(instance, app)
|
||||
return app
|
||||
}
|
||||
|
||||
// MustStartDefaultVlagent is a test helper function that starts an instance of
|
||||
// vlagent with defaults suitable for most tests.
|
||||
func (tc *TestCase) MustStartDefaultVlagent(remoteWriteURLs []string) *Vlagent {
|
||||
tc.t.Helper()
|
||||
|
||||
return tc.MustStartVlagent("vlagent", remoteWriteURLs, nil)
|
||||
}
|
||||
|
||||
// MustStartVlagent is a test helper function that starts an instance of
|
||||
// vlagent and fails the test if the app fails to start.
|
||||
func (tc *TestCase) MustStartVlagent(instance string, remoteWriteURLs []string, flags []string) *Vlagent {
|
||||
tc.t.Helper()
|
||||
|
||||
app, err := StartVlagent(instance, remoteWriteURLs, flags, tc.cli)
|
||||
if err != nil {
|
||||
tc.t.Fatalf("Could not start %s: %v", instance, err)
|
||||
}
|
||||
tc.addApp(instance, app)
|
||||
return app
|
||||
}
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
func testMetricsIndex(t *testing.T, sut at.PrometheusWriteQuerier) {
|
||||
// verify index is empty at the start
|
||||
expected := at.GraphiteMetricsIndexResponse{}
|
||||
tenant := "1:2"
|
||||
got := sut.GraphiteMetricsIndex(t, at.QueryOpts{Tenant: tenant})
|
||||
if diff := cmp.Diff(expected, got); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
|
||||
// Mon Feb 5 09:57:36 CET 2024
|
||||
const ingestTimestamp = ` 1707123456700`
|
||||
dataSet := []string{
|
||||
`metric_name_1{label="foo"} 10`,
|
||||
`metric_name_1{label="bar"} 10`,
|
||||
`metric_name_2{label="baz"} 20`,
|
||||
`metric_name_1{label="baz"} 10`,
|
||||
`metric_name_3{label="baz"} 30`,
|
||||
}
|
||||
|
||||
for idx := range dataSet {
|
||||
dataSet[idx] += ingestTimestamp
|
||||
}
|
||||
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, dataSet, at.QueryOpts{Tenant: tenant})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
// verify ingested metrics correctly returned in index response
|
||||
expected = []string{"metric_name_1", "metric_name_2", "metric_name_3"}
|
||||
|
||||
got = sut.GraphiteMetricsIndex(t, at.QueryOpts{Tenant: tenant})
|
||||
if diff := cmp.Diff(expected, got); diff != "" {
|
||||
t.Errorf("unexpected response (-want, +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSingleMetricsIndex(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultVmsingle()
|
||||
|
||||
testMetricsIndex(tc.T(), sut)
|
||||
}
|
||||
|
||||
func TestClusterMetricsIndex(t *testing.T) {
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
sut := tc.MustStartDefaultCluster()
|
||||
|
||||
testMetricsIndex(tc.T(), sut)
|
||||
}
|
||||
@@ -41,7 +41,6 @@ func testSpecialQueryRegression(tc *at.TestCase, sut at.PrometheusWriteQuerier)
|
||||
testDuplicateLabel(tc, sut)
|
||||
testTooBigLookbehindWindow(tc, sut)
|
||||
testMatchSeries(tc, sut)
|
||||
testNegativeIncrease(tc, sut)
|
||||
|
||||
// graphite
|
||||
testComparisonNotInfNotNan(tc, sut)
|
||||
@@ -235,53 +234,6 @@ func testMatchSeries(tc *at.TestCase, sut at.PrometheusWriteQuerier) {
|
||||
})
|
||||
}
|
||||
|
||||
func testNegativeIncrease(tc *at.TestCase, sut at.PrometheusWriteQuerier) {
|
||||
t := tc.T()
|
||||
|
||||
// negative increase when user overrides staleness interval
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-2978728661
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
`foo 108 1750109243514`, // 2025-06-16 21:27:23:514
|
||||
`foo 108 1750109258514`, // 2025-06-16 21:27:38:514
|
||||
// gap 75s
|
||||
`foo 1 1750109333514`, // 2025-06-16 21:28:53:514
|
||||
`foo 1 1750109348514`, // 2025-06-16 21:29:08:514
|
||||
}, at.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "regression for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-2978728661",
|
||||
DoNotRetry: true,
|
||||
Got: func() any {
|
||||
return sut.PrometheusAPIV1QueryRange(t, `increase(foo[1m])`, at.QueryOpts{
|
||||
Start: "2025-06-16T21:28:40.700Z",
|
||||
End: "2025-06-16T21:29:30.700Z",
|
||||
Step: "9s",
|
||||
MaxLookback: "65s",
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: []*at.QueryResult{
|
||||
{
|
||||
Metric: map[string]string{},
|
||||
Samples: []*at.Sample{
|
||||
at.NewSample(t, "2025-06-16T21:28:40.700Z", 0),
|
||||
at.NewSample(t, "2025-06-16T21:28:49.700Z", 0),
|
||||
at.NewSample(t, "2025-06-16T21:28:58.700Z", 1),
|
||||
at.NewSample(t, "2025-06-16T21:29:07.700Z", 1),
|
||||
at.NewSample(t, "2025-06-16T21:29:16.700Z", 0),
|
||||
at.NewSample(t, "2025-06-16T21:29:25.700Z", 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func testComparisonNotInfNotNan(tc *at.TestCase, sut at.PrometheusWriteQuerier) {
|
||||
t := tc.T()
|
||||
|
||||
|
||||
@@ -1,154 +0,0 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
// TestSingleVlagentRemoteWrite performs tests for remote write data ingestion
|
||||
// by vlagent application
|
||||
func TestSingleVlagentRemoteWrite(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
// test data ingestion into
|
||||
const instance = "vlsingle"
|
||||
const r1Port = "50425"
|
||||
sutFlags := []string{
|
||||
"-httpListenAddr=127.0.0.1:" + r1Port,
|
||||
"-storageDataPath=" + tc.Dir() + "/" + instance,
|
||||
"-retentionPeriod=100y",
|
||||
}
|
||||
|
||||
sut := tc.MustStartVlsingle(instance, sutFlags)
|
||||
remoteWriteURL := fmt.Sprintf("http://%s/internal/insert", sut.HTTPAddr())
|
||||
|
||||
vlagent := tc.MustStartDefaultVlagent([]string{remoteWriteURL})
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "foo":"bar"}`,
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "bar":"foo"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
sut.ForceFlush(t)
|
||||
got := sut.LogsQLQuery(t, "ingest jsonline", at.QueryOptsLogs{})
|
||||
wantLogLines := []string{
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
assertLogsQLResponseEqual(t, got, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
// stop log storage and check data buffering works correctly
|
||||
tc.StopApp(instance)
|
||||
|
||||
// ingest some data vlagent must hold it in memory
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline2","_time": "2025-06-05T14:30:19.088007Z", "foo":"bar"}`,
|
||||
`{"_msg":"ingest jsonline2","_time": "2025-06-05T14:30:19.088007Z", "bar":"foo"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
vlagent.WaitQueueEmptyAfter(t, func() {
|
||||
// start storage and check if buffered data correctly ingested
|
||||
sut = tc.MustStartVlsingle(instance, sutFlags)
|
||||
})
|
||||
|
||||
sut.ForceFlush(t)
|
||||
got = sut.LogsQLQuery(t, "ingest jsonline2", at.QueryOptsLogs{})
|
||||
wantLogLines = []string{
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
assertLogsQLResponseEqual(t, got, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
}
|
||||
|
||||
func TestSingleVlagentRemoteWriteReplication(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
const (
|
||||
instanceReplica0 = "vlsingle-0"
|
||||
vlsinglePortR0 = "53541"
|
||||
instanceReplica1 = "vlsingle-1"
|
||||
vlsinglePortR1 = "53124"
|
||||
vlagentInstance = "vlagent"
|
||||
)
|
||||
sutFlagsR0 := []string{
|
||||
"-httpListenAddr=127.0.0.1:" + vlsinglePortR0,
|
||||
"-storageDataPath=" + path.Join(tc.Dir(), instanceReplica0),
|
||||
"-retentionPeriod=100y",
|
||||
}
|
||||
sutFlagsR1 := []string{
|
||||
"-httpListenAddr=127.0.0.1:" + vlsinglePortR1,
|
||||
"-storageDataPath=" + path.Join(tc.Dir(), instanceReplica1),
|
||||
"-retentionPeriod=100y",
|
||||
}
|
||||
|
||||
sutR0 := tc.MustStartVlsingle(instanceReplica0, sutFlagsR0)
|
||||
sutR1 := tc.MustStartVlsingle(instanceReplica1, sutFlagsR1)
|
||||
|
||||
vlagentRemoteWriteURLs := []string{
|
||||
fmt.Sprintf("http://%s/internal/insert", sutR0.HTTPAddr()),
|
||||
fmt.Sprintf("http://%s/internal/insert", sutR1.HTTPAddr()),
|
||||
}
|
||||
vlagentFlags := []string{
|
||||
"-remoteWrite.tmpDataPath=" + fmt.Sprintf("%s/%s-%d", os.TempDir(), vlagentInstance, time.Now().UnixNano()),
|
||||
}
|
||||
vlagent := tc.MustStartVlagent(vlagentInstance, vlagentRemoteWriteURLs, vlagentFlags)
|
||||
|
||||
// ingest data and check if it properly replicated to the vlsingles
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "foo":"bar"}`,
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "bar":"foo"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
wantLogLines := []string{
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
|
||||
sutR0.ForceFlush(t)
|
||||
gotR0 := sutR0.LogsQLQuery(t, "ingest jsonline", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR0, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
sutR1.ForceFlush(t)
|
||||
gotR1 := sutR1.LogsQLQuery(t, "ingest jsonline", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR1, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
// stop log storage and check data buffering works correctly at vlagent
|
||||
tc.StopApp(instanceReplica0)
|
||||
|
||||
// ingest some data vlagent must hold it in memory
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
// check alive storage received data
|
||||
wantLogLines = []string{
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
|
||||
sutR1.ForceFlush(t)
|
||||
gotR1 = sutR1.LogsQLQuery(t, "ingest jsonline2", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR1, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
// stop vmagent, it must buffer data on-disk
|
||||
tc.StopApp(vlagentInstance)
|
||||
|
||||
vlagent = tc.MustStartVlagent(vlagentInstance, vlagentRemoteWriteURLs, vlagentFlags)
|
||||
vlagent.WaitQueueEmptyAfter(t, func() {
|
||||
// start storage and check if buffered data correctly ingested
|
||||
sutR0 = tc.MustStartVlsingle(instanceReplica0, sutFlagsR0)
|
||||
})
|
||||
|
||||
sutR0.ForceFlush(t)
|
||||
gotR0 = sutR0.LogsQLQuery(t, "ingest jsonline2", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR0, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
}
|
||||
@@ -1,159 +0,0 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Vlagent holds the state of a vlagent app and provides vlagent-specific functions
|
||||
type Vlagent struct {
|
||||
*app
|
||||
*ServesMetrics
|
||||
|
||||
remoteStoragesCount int
|
||||
httpListenAddr string
|
||||
}
|
||||
|
||||
// StartVlagent starts an instance of vlagent with the given flags.
|
||||
// It also sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func StartVlagent(instance string, remoteWriteURLs []string, flags []string, cli *Client) (*Vlagent, error) {
|
||||
extractREs := []*regexp.Regexp{
|
||||
httpListenAddrRE,
|
||||
}
|
||||
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vlagent", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-remoteWrite.url": strings.Join(remoteWriteURLs, ","),
|
||||
"-remoteWrite.tmpDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
"-remoteWrite.flushInterval": "10ms",
|
||||
"-remoteWrite.showURL": "true",
|
||||
},
|
||||
extractREs: extractREs,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Vlagent{
|
||||
app: app,
|
||||
remoteStoragesCount: len(remoteWriteURLs),
|
||||
ServesMetrics: &ServesMetrics{
|
||||
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[0]),
|
||||
cli: cli,
|
||||
},
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
}, nil
|
||||
}
|
||||
|
||||
// JSONLineWrite is a test helper function that inserts a
|
||||
// collection of records in json line format by sending a HTTP
|
||||
// POST request to /insert/jsonline vlagent endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#json-stream-api
|
||||
func (app *Vlagent) JSONLineWrite(t *testing.T, records []string, opts QueryOptsLogs) {
|
||||
t.Helper()
|
||||
|
||||
data := []byte(strings.Join(records, "\n"))
|
||||
|
||||
url := fmt.Sprintf("http://%s/insert/jsonline", app.httpListenAddr)
|
||||
uv := opts.asURLValues()
|
||||
uvs := uv.Encode()
|
||||
if len(uvs) > 0 {
|
||||
url += "?" + uvs
|
||||
}
|
||||
app.sendBlocking(t, len(records), func() {
|
||||
_, statusCode := app.cli.Post(t, url, "text/plain", data)
|
||||
if statusCode != http.StatusOK {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusOK)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// WaitQueueEmptyAfter checks that persistent queue is empty
|
||||
// after execution of provided callback
|
||||
func (app *Vlagent) WaitQueueEmptyAfter(t *testing.T, cb func()) {
|
||||
t.Helper()
|
||||
const (
|
||||
retries = 70
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
// vlagent_remotewrite_blocks_sent_total
|
||||
// take in account data replication
|
||||
blocksSent := app.remoteWriteBlocksSent(t)
|
||||
cb()
|
||||
for range retries {
|
||||
if app.remoteWriteBlocksSent(t) > blocksSent && app.persistentQueueSize(t) == 0 {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out while waiting for inserted logs to be flushed to remote storage")
|
||||
|
||||
}
|
||||
|
||||
// sendBlocking sends the data to remote write url by executing `send` function and
|
||||
// waits until the data is actually sent.
|
||||
//
|
||||
// vlagent does not send the data immediately. It first puts the data into a
|
||||
// buffer. Then a background goroutine takes the data from the buffer sends it
|
||||
// to the vmstorage. This happens every 1s by default.
|
||||
//
|
||||
// Waiting is implemented a retrieving the value of `vlagent_remotewrite_block_size_rows_sum`
|
||||
// metric and checking whether it is equal or greater than the wanted value.
|
||||
// If it is, then the data has been sent to remote storage.
|
||||
//
|
||||
// Unreliable if the records are inserted concurrently.
|
||||
func (app *Vlagent) sendBlocking(t *testing.T, numRecordsToSend int, send func()) {
|
||||
t.Helper()
|
||||
|
||||
send()
|
||||
|
||||
const (
|
||||
retries = 50
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
// take in account data replication
|
||||
wantRowsSentCount := app.remoteWriteRowsPushed(t) + numRecordsToSend*app.remoteStoragesCount
|
||||
for range retries {
|
||||
if app.remoteWriteRowsPushed(t) >= wantRowsSentCount {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out while waiting for inserted rows to be sent to remote storage")
|
||||
}
|
||||
|
||||
func (app *Vlagent) remoteWriteBlocksSent(t *testing.T) int {
|
||||
total := 0.0
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_blocks_sent_total") {
|
||||
total += v
|
||||
}
|
||||
return int(total)
|
||||
}
|
||||
|
||||
func (app *Vlagent) remoteWriteRowsPushed(t *testing.T) int {
|
||||
total := 0.0
|
||||
// vlagent_remotewrite_blocks_sent_total
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_block_size_rows_sum") {
|
||||
total += v
|
||||
}
|
||||
return int(total)
|
||||
}
|
||||
|
||||
func (app *Vlagent) persistentQueueSize(t *testing.T) int {
|
||||
total := 0.0
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_pending_data_bytes") {
|
||||
total += v
|
||||
}
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_pending_inmemory_blocks") {
|
||||
total += v
|
||||
}
|
||||
return int(total)
|
||||
}
|
||||
@@ -227,25 +227,6 @@ func (app *Vmselect) APIV1StatusTSDB(t *testing.T, matchQuery string, date strin
|
||||
return status
|
||||
}
|
||||
|
||||
// GraphiteMetricsIndex sends a query to a /graphite/metrics/index.json
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#metrics-api
|
||||
func (app *Vmselect) GraphiteMetricsIndex(t *testing.T, opts QueryOpts) GraphiteMetricsIndexResponse {
|
||||
t.Helper()
|
||||
|
||||
seriesURL := fmt.Sprintf("http://%s/select/%s/graphite/metrics/index.json", app.httpListenAddr, opts.getTenant())
|
||||
res, statusCode := app.cli.Get(t, seriesURL)
|
||||
if statusCode != http.StatusOK {
|
||||
t.Fatalf("unexpected status code: got %d, want %d, resp text=%q", statusCode, http.StatusOK, res)
|
||||
}
|
||||
|
||||
var index GraphiteMetricsIndexResponse
|
||||
if err := json.Unmarshal([]byte(res), &index); err != nil {
|
||||
t.Fatalf("could not unmarshal metrics index response data:\n%s\n err: %v", res, err)
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// APIV1AdminTenants sends a query to a /admin/tenants endpoint
|
||||
func (app *Vmselect) APIV1AdminTenants(t *testing.T) *AdminTenantsResponse {
|
||||
t.Helper()
|
||||
|
||||
@@ -318,25 +318,6 @@ func (app *Vmsingle) PrometheusAPIV1Series(t *testing.T, matchQuery string, opts
|
||||
return NewPrometheusAPIV1SeriesResponse(t, res)
|
||||
}
|
||||
|
||||
// GraphiteMetricsIndex sends a query to a /metrics/index.json
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#metrics-api
|
||||
func (app *Vmsingle) GraphiteMetricsIndex(t *testing.T, _ QueryOpts) GraphiteMetricsIndexResponse {
|
||||
t.Helper()
|
||||
|
||||
seriesURL := fmt.Sprintf("http://%s/metrics/index.json", app.httpListenAddr)
|
||||
res, statusCode := app.cli.Get(t, seriesURL)
|
||||
if statusCode != http.StatusOK {
|
||||
t.Fatalf("unexpected status code: got %d, want %d, resp text=%q", statusCode, http.StatusOK, res)
|
||||
}
|
||||
|
||||
var index GraphiteMetricsIndexResponse
|
||||
if err := json.Unmarshal([]byte(res), &index); err != nil {
|
||||
t.Fatalf("could not unmarshal metrics index response data:\n%s\n err: %v", res, err)
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// APIV1StatusMetricNamesStats sends a query to a /api/v1/status/metric_names_stats endpoint
|
||||
// and returns the statistics response for given params.
|
||||
//
|
||||
|
||||
@@ -5826,7 +5826,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 2387
|
||||
@@ -6110,7 +6110,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 2395
|
||||
@@ -7538,7 +7538,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU and Memory panels show a plenty of free resources - try increasing `-search.maxConcurrentRequests`. Please note, the higher is `-search.maxConcurrentRequests`, the higher could be [peak memory usage](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#out-of-memory-errors).",
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU panel shows a plenty of free resources - try increasing `search.maxConcurrentRequests`.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -10053,7 +10053,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5014
|
||||
@@ -10159,7 +10159,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5014
|
||||
@@ -10687,4 +10687,4 @@
|
||||
"uid": "oS7Bi_0Wz",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5827,7 +5827,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 2387
|
||||
@@ -6111,7 +6111,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 2395
|
||||
@@ -7539,7 +7539,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU and Memory panels show a plenty of free resources - try increasing `-search.maxConcurrentRequests`. Please note, the higher is `-search.maxConcurrentRequests`, the higher could be [peak memory usage](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#out-of-memory-errors).",
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU panel shows a plenty of free resources - try increasing `search.maxConcurrentRequests`.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -10054,7 +10054,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5014
|
||||
@@ -10160,7 +10160,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5014
|
||||
@@ -10688,4 +10688,4 @@
|
||||
"uid": "oS7Bi_0Wz_vm",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
@@ -95,7 +95,7 @@ publish-via-docker:
|
||||
--label "org.opencontainers.image.version=$(PKG_TAG)" \
|
||||
--label "org.opencontainers.image.created=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ")" \
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
--tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)$(EXTRA_DOCKER_TAG_SUFFIX) \
|
||||
--tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)$(EXTRA_TAG_SUFFIX) \
|
||||
) \
|
||||
-o type=image \
|
||||
--provenance=false \
|
||||
@@ -115,7 +115,7 @@ publish-via-docker:
|
||||
--label "org.opencontainers.image.version=$(PKG_TAG)" \
|
||||
--label "org.opencontainers.image.created=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ")" \
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
--tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)$(EXTRA_DOCKER_TAG_SUFFIX)-scratch \
|
||||
--tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(RACE)$(EXTRA_TAG_SUFFIX)-scratch \
|
||||
) \
|
||||
-o type=image \
|
||||
--provenance=false \
|
||||
@@ -129,14 +129,6 @@ publish-via-docker:
|
||||
$(APP_NAME)-linux-ppc64le-prod \
|
||||
$(APP_NAME)-linux-386-prod
|
||||
|
||||
publish-via-docker-from-rc:
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
docker buildx imagetools create --tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG) $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(EXTRA_DOCKER_TAG_SUFFIX); \
|
||||
)
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
docker buildx imagetools create --tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)-scratch $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG)$(EXTRA_DOCKER_TAG_SUFFIX)-scratch; \
|
||||
)
|
||||
|
||||
publish-via-docker-latest:
|
||||
$(foreach registry,$(DOCKER_REGISTRIES),\
|
||||
docker buildx imagetools create --tag $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):latest $(registry)/$(DOCKER_NAMESPACE)/$(APP_NAME):$(PKG_TAG); \
|
||||
|
||||
@@ -21,7 +21,6 @@ to the Internet.
|
||||
* [alertmanager](#alertmanager)
|
||||
* [Grafana](#grafana)
|
||||
* [Alerts](#alerts)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
|
||||
## VictoriaMetrics single server
|
||||
|
||||
@@ -57,8 +56,6 @@ To shutdown environment run:
|
||||
make docker-vm-single-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
## VictoriaMetrics cluster
|
||||
|
||||
To spin-up environment with VictoriaMetrics cluster run the following command:
|
||||
@@ -95,8 +92,6 @@ To shutdown environment execute the following command:
|
||||
make docker-vm-cluster-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
## vmagent
|
||||
|
||||
vmagent is used for scraping and pushing time series to VictoriaMetrics instance.
|
||||
@@ -140,8 +135,6 @@ To shutdown environment execute the following command:
|
||||
make docker-vl-single-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
## VictoriaLogs cluster
|
||||
|
||||
To spin-up environment with VictoriaLogs cluster run the following command:
|
||||
@@ -183,8 +176,6 @@ To shutdown environment execute the following command:
|
||||
make docker-vl-cluster-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
Please see more examples on integration of VictoriaLogs with other log shippers below:
|
||||
* [filebeat](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/filebeat)
|
||||
* [fluentbit](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/fluentbit)
|
||||
@@ -258,34 +249,3 @@ The list of alerting rules is the following:
|
||||
|
||||
Please, also see [how to monitor VictoriaMetrics installations](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
and [how to monitor VictoriaLogs installations](https://docs.victoriametrics.com/victorialogs/#monitoring).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
This environment has the following requirements:
|
||||
* installed [docker compose](https://docs.docker.com/compose/);
|
||||
* access to the Internet for downloading docker images;
|
||||
* **All commands should be executed from the root directory of [the VictoriaMetrics repo](https://github.com/VictoriaMetrics/VictoriaMetrics).**
|
||||
|
||||
The expected output of running a command like `make docker-vm-single-up` is the following:
|
||||
```sh
|
||||
make docker-vm-single-up :(
|
||||
docker compose -f deployment/docker/compose-vm-single.yml up -d
|
||||
[+] Running 9/9
|
||||
✔ Network docker_default Created 0.0s
|
||||
✔ Volume "docker_vmagentdata" Created 0.0s
|
||||
✔ Container docker-alertmanager-1 Started 0.3s
|
||||
✔ Container docker-victoriametrics-1 Started 0.3s
|
||||
...
|
||||
```
|
||||
|
||||
Containers are started in [--detach mode](https://docs.docker.com/reference/cli/docker/compose/up/), meaning they run in the background.
|
||||
As a result, you won't see their logs or exit status directly in the terminal.
|
||||
|
||||
If something isn’t working as expected, try the following troubleshooting steps:
|
||||
1. Run from the correct directory. Make sure you're running the command from the root of the [VictoriaMetrics repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
2. Check container status. Run `docker ps -a` to list all containers and their status. Healthy and running containers should have `STATUS` set to `Up`.
|
||||
3. View container logs. To inspect logs for a specific container, get its container ID from step p2 and run: `docker logs -f <containerID>`.
|
||||
4. Read the logs carefully and follow any suggested actions.
|
||||
5. Check for port conflicts. Some containers (e.g., Grafana) expose HTTP ports. If a port (like `:3000`) is already in use, the container may fail to start. Stop the conflicting process or change the exposed port in the Docker Compose file.
|
||||
6. Shut down the deployment. To tear down the environment, run: `make <environment>-down` (i.e. `make docker-vm-single-down`).
|
||||
Note, this command also removes all attached volumes, so all the temporary created data will be removed too (i.e. Grafana dashboards or collected metrics).
|
||||
@@ -1,17 +1,14 @@
|
||||
# balance load among vmselects
|
||||
# see https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing
|
||||
users:
|
||||
- username: "foo"
|
||||
password: "bar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/select/.*"
|
||||
- "/admin/.*"
|
||||
url_prefix:
|
||||
- http://vmselect-1:8481
|
||||
- http://vmselect-2:8481
|
||||
- src_paths:
|
||||
- "/insert/.*"
|
||||
url_prefix:
|
||||
- http://vminsert-1:8480
|
||||
- http://vminsert-2:8480
|
||||
unauthorized_user:
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/select/.*"
|
||||
url_prefix:
|
||||
- http://vmselect-1:8481
|
||||
- http://vmselect-2:8481
|
||||
- src_paths:
|
||||
- "/insert/.*"
|
||||
url_prefix:
|
||||
- http://vminsert-1:8480
|
||||
- http://vminsert-2:8480
|
||||
|
||||
@@ -44,18 +44,6 @@ services:
|
||||
deploy:
|
||||
replicas: 0
|
||||
|
||||
# vlagent is needed for HA setup and its replica count is set to 1 in compose-ha.yml file
|
||||
vlagent:
|
||||
image: victoriametrics/vlagent:v0.0.1
|
||||
volumes:
|
||||
- vlagent:/vlagent
|
||||
command:
|
||||
- '--remoteWrite.tmpDataPath=/vlagent'
|
||||
- '--remoteWrite.url=http://victorialogs:9428/internal/insert'
|
||||
- '--remoteWrite.url=http://victorialogs-2:9428/internal/insert'
|
||||
deploy:
|
||||
replicas: 0
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.112.0
|
||||
ports:
|
||||
@@ -75,4 +63,3 @@ volumes:
|
||||
victorialogs:
|
||||
victorialogs-2:
|
||||
victoriametrics:
|
||||
vlagent:
|
||||
|
||||
@@ -2,6 +2,3 @@ services:
|
||||
victorialogs-2:
|
||||
deploy:
|
||||
replicas: 1
|
||||
vlagent:
|
||||
deploy:
|
||||
replicas: 1
|
||||
|
||||
@@ -32,8 +32,20 @@
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match *
|
||||
host vlagent
|
||||
port 9429
|
||||
host victorialogs
|
||||
port 9428
|
||||
compress gzip
|
||||
uri /insert/jsonline?_stream_fields=stream,path&_msg_field=log&_time_field=date
|
||||
format json_lines
|
||||
json_date_format iso8601
|
||||
header AccountID 0
|
||||
header ProjectID 0
|
||||
|
||||
[OUTPUT]
|
||||
Name http
|
||||
Match *
|
||||
host victorialogs-2
|
||||
port 9428
|
||||
compress gzip
|
||||
uri /insert/jsonline?_stream_fields=stream,path&_msg_field=log&_time_field=date
|
||||
format json_lines
|
||||
|
||||
@@ -13,7 +13,12 @@ input {
|
||||
|
||||
output {
|
||||
http {
|
||||
url => "http://vlagent:9429/insert/jsonline?_stream_fields=host.name,stream&_msg_field=log&_time_field=time"
|
||||
url => "http://victorialogs:9428/insert/jsonline?_stream_fields=host.name,stream&_msg_field=log&_time_field=time"
|
||||
format => "json"
|
||||
http_method => "post"
|
||||
}
|
||||
http {
|
||||
url => "http://victorialogs-2:9428/insert/jsonline?_stream_fields=host.name,stream&_msg_field=log&_time_field=time"
|
||||
format => "json"
|
||||
http_method => "post"
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
exporters:
|
||||
elasticsearch:
|
||||
endpoints:
|
||||
- http://vlagent:9429/insert/elasticsearch
|
||||
- http://victorialogs:9428/insert/elasticsearch
|
||||
- http://victorialogs-2:9428/insert/elasticsearch
|
||||
receivers:
|
||||
filelog:
|
||||
include: [/var/lib/docker/containers/**/*.log]
|
||||
|
||||
@@ -18,7 +18,26 @@ sinks:
|
||||
type: http
|
||||
inputs:
|
||||
- parser
|
||||
uri: http://vlagent:9429/insert/jsonline
|
||||
uri: http://victorialogs:9428/insert/jsonline
|
||||
encoding:
|
||||
codec: json
|
||||
framing:
|
||||
method: newline_delimited
|
||||
compression: gzip
|
||||
healthcheck:
|
||||
enabled: false
|
||||
request:
|
||||
headers:
|
||||
AccountID: '0'
|
||||
ProjectID: '0'
|
||||
VL-Stream-Fields: source_type,host,container_name,label.com.docker.compose.service
|
||||
VL-Msg-Field: message.msg
|
||||
VL-Time-Field: timestamp
|
||||
vlogs-2:
|
||||
type: http
|
||||
inputs:
|
||||
- parser
|
||||
uri: http://victorialogs-2:9428/insert/jsonline
|
||||
encoding:
|
||||
codec: json
|
||||
framing:
|
||||
|
||||
@@ -126,7 +126,7 @@ Metrics to save the output (in metric names or labels). Must have `__name__` key
|
||||
</td>
|
||||
<td>
|
||||
|
||||
`/api/v1/import`{{% deprecated_from "v1.19.2" anomaly %}}
|
||||
`/api/v1/import`
|
||||
</td>
|
||||
<td>
|
||||
|
||||
|
||||
@@ -75,3 +75,4 @@ Additional context
|
||||
### What more can we do?
|
||||
|
||||
Setup vmagents in Ground Control regions. That allows it to accept data close to storage and add more reliability if storage is temporarily offline.
|
||||
g
|
||||
|
||||
@@ -18,9 +18,6 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||
|
||||
## tip
|
||||
|
||||
* BUGFIX: [`rate_sum` stats function](https://docs.victoriametrics.com/victorialogs/logsql/#rate_sum-stats): fix inconsistent per-second rate calculation when time filters are specified via HTTP query parameters instead of LogsQL expression. This affects recording rule results. See [#9303](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9303).
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): disabled opening of autocomplete popup on initial page load.
|
||||
|
||||
## [v1.24.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.24.0-victorialogs)
|
||||
|
||||
Released at 2025-06-20
|
||||
@@ -62,7 +59,6 @@ Released at 2025-06-20
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix issue with hits chart ignoring selected AccountID and ProjectID. See [#9157](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9157).
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix missing field values in auto-complete. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8749)
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): remove the compact mode of the table tab and add field sorting capabilities to the JSON tab. See [#7047](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7047).
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix errors in console about loading of `manifest.json` when accessing UI through vmauth with Basic Auth enabled.
|
||||
* BUGFIX: [Journald data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/): properly read log timestamp from `__REALTIME_TIMESTAMP` field according to [the docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#time-field). See [#9144](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9144). The bug has been introduced in [v1.22.0-victorialogs](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.22.0-victorialogs).
|
||||
* BUGFIX: [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/): support `-` as a timestamp value, as described in [RFC5424](https://datatracker.ietf.org/doc/html/rfc5424#section-6.2.3).
|
||||
* BUGFIX: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): properly handle quotes inside quoted strings such as `"\""`. Previously this could lead to panics. See [#9219](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9219).
|
||||
|
||||
@@ -34,18 +34,17 @@ VictoriaLogs automatically sets the `level` log field according to the [`PRIORIT
|
||||
|
||||
VictoriaLogs uses `(_MACHINE_ID, _HOSTNAME, _SYSTEMD_UNIT)` as [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
|
||||
for logs ingested via journald protocol. The list of log stream fields can be changed via `-journald.streamFields` command-line flag if needed,
|
||||
by providing comma-separated list of journald fields from [this list](https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html).
|
||||
by providing comma-separated list of journald fields form [this list](https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html).
|
||||
|
||||
Please make sure that the log stream fields passed to `-journald.streamFields` do not contain fields with high number or unbound number of unique values,
|
||||
Please make sure that the log stream fields passed to `-journlad.streamFields` do not contain fields with high number or unbound number of unique values,
|
||||
since this may lead to [high cardinality issues](https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality).
|
||||
This can happen with `_SYSTEMD_UNIT` if you have templated units with non-static instances
|
||||
such as `systemd-coredump@.service` or if you have a `.socket` unit with `Accept=yes`.
|
||||
|
||||
The following Journald fields are also good candidates for stream fields:
|
||||
|
||||
- `_TRANSPORT` (to separate out kernel and audit logs which are not associated with a `_SYSTEMD_UNIT`)
|
||||
- `_TRANSPORT`
|
||||
- `_SYSTEMD_USER_UNIT`
|
||||
|
||||
|
||||
## Dropping fields
|
||||
|
||||
VictoriaLogs can be configured for skipping the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
|
||||
@@ -11,7 +11,6 @@ tags:
|
||||
- logs
|
||||
aliases:
|
||||
- /victorialogs/keyConcepts.html
|
||||
- /victorialogs/keyConcepts/
|
||||
---
|
||||
## Data model
|
||||
|
||||
|
||||
@@ -1,508 +0,0 @@
|
||||
---
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: victorialogs
|
||||
weight: 3
|
||||
title: vlagent
|
||||
tags:
|
||||
- logs
|
||||
aliases:
|
||||
- /vlagent.html
|
||||
- /vlagent/index.html
|
||||
- /vlagent/
|
||||
---
|
||||
|
||||
`vlagent` is a tiny agent which helps you collect logs from various sources
|
||||
and store them in [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/).
|
||||
See [Quick Start](#quick-start) for details.
|
||||
|
||||
|
||||
## Motivation
|
||||
|
||||
While VictoriaLogs provides an efficient solution to store and observe logs, it lacks of replication out of box.
|
||||
Previous solution was to configure clients to replicate log streams into multiple VictoriaLogs installations.
|
||||
`vlagent` is a missing piece of log streams replication.
|
||||
|
||||
## Features
|
||||
|
||||
- It can accept logs from popular log collectors. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
|
||||
* Can replicate collected logs simultaneously to multiple VictoriaLogs instances - see [these docs](#replication-and-high-availability).
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected logs
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered logs are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
## Quick Start
|
||||
|
||||
Please download `vlagent` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) (
|
||||
`vlagent` is also available in docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/vlagent/tags) and [Quay](https://quay.io/repository/victoriametrics/vlagent?tab=tags)),
|
||||
unpack it and pass the following flags to the `vlagent` binary in order to start sending the data to the VictoriaLogs remote storage:
|
||||
|
||||
* `-remoteWrite.url` with VictoriaLogs native protocol compatible remote storage endpoint, where to send the data to.
|
||||
The `-remoteWrite.url` may refer to [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) address. See [these docs](#srv-urls) for details.
|
||||
|
||||
Example command for writing the data received via [supported push-based protocols](#how-to-push-data-to-vlagent)
|
||||
to [single-node VictoriaLogs](https://docs.victoriametrics.com/victorialogs) located at `victoria-logs-host:9428`:
|
||||
|
||||
```sh
|
||||
/path/to/vlagent -remoteWrite.url=https://victoria-logs-host:9428/internal/insert
|
||||
```
|
||||
|
||||
Pass `-help` to `vlagent` in order to see [the full list of supported command-line flags with their descriptions](#advanced-usage).
|
||||
|
||||
### Replication and high availability
|
||||
|
||||
`vlagent` replicates the collected logs among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instance.
|
||||
`vlagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again,
|
||||
and then it sends the buffered data to the remote storage in order to prevent data gaps.
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vlagent` exports various metrics in Prometheus exposition format at `http://vmalent-host:9429/metrics` page.
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus-compatible scraper,
|
||||
so that the exported metrics may be analyzed later.
|
||||
|
||||
Use official [Grafana dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vlagent.json) for `vlagent` state overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* It is recommended [setting up the official Grafana dashboard](#monitoring) in order to monitor the state of `vlagent`.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vlagent_remotewrite_pending_data_bytes` [metric](#monitoring)
|
||||
grows constantly. It is also recommended increasing `-remoteWrite.maxBlockSize` command-line flags in this case.
|
||||
This can improve data ingestion performance to the configured remote storage systems at the cost of higher memory usage.
|
||||
|
||||
* If you see gaps in the data pushed by `vlagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set,
|
||||
try increasing `-remoteWrite.queues`. Such gaps may appear because `vlagent` cannot keep up with sending the collected data to remote storage.
|
||||
Therefore, it starts dropping the buffered data if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
* `vlagent` drops data blocks if remote storage replies with `400 Bad Request` and `404 Not Found` HTTP responses.
|
||||
The number of dropped blocks can be monitored via `vlagent_remotewrite_packets_dropped_total` metric exported at [/metrics page](#monitoring).
|
||||
|
||||
* `vlagent` buffers scraped data at the `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if the maximum directory size isn't limited
|
||||
with `-remoteWrite.maxDiskUsagePerURL` command-line flag.
|
||||
If you don't want to send all the buffered data from the directory to remote storage then simply stop `vlagent` and delete the directory.
|
||||
|
||||
* By default `vlagent` masks `-remoteWrite.url` with `secret-url` values in logs and at `/metrics` page because
|
||||
the url may contain sensitive information such as auth tokens or passwords.
|
||||
Pass `-remoteWrite.showURL` command-line flag when starting `vlagent` in order to see all the valid urls.
|
||||
|
||||
See also:
|
||||
|
||||
- [General Troubleshooting](https://docs.victoriametrics.com/victoriametrics/troubleshooting/)
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
`vlagent` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
* Memory profile can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:9429/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
|
||||
* CPU profile can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:9429/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
|
||||
It is safe sharing the collected profiles from security point of view, since they do not contain sensitive information.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
`vlagent` can be fine-tuned with various command-line flags. Run `./vlagent -help` in order to see the full list of these flags with their descriptions and default values:
|
||||
|
||||
```bash
|
||||
vlagent collects logs via popular data ingestion protocols and routes it to VictoriaLogs.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victorialogs/vlagent/ .
|
||||
|
||||
-blockcache.missesBeforeCaching int
|
||||
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
|
||||
-datadog.ignoreFields array
|
||||
Comma-separated list of fields to ignore for logs ingested via DataDog protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/datadog-agent/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-datadog.maxRequestSize size
|
||||
The maximum size in bytes of a single DataDog request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-datadog.streamFields array
|
||||
Comma-separated list of fields to use as log stream fields for logs ingested via DataDog protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/datadog-agent/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-defaultMsgValue string
|
||||
Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field (default "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field")
|
||||
-elasticsearch.version string
|
||||
Elasticsearch version to report to client (default "8.9.0")
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey value
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming connections to -httpListenAddr are closed after the configured timeout. This may help evenly spreading load among a cluster of services behind TCP-level load balancer. Zero value disables closing of incoming connections (default 2m0s)
|
||||
-http.disableCORS
|
||||
Disable CORS for all origins (*)
|
||||
-http.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive for incoming connections at -httpListenAddr
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header, recommended: "default-src 'self'"
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header, recommended: 'max-age=31536000; includeSubDomains'
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password value
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
Flag value can be read from the given file when using -httpAuth.password=file:///abs/path/to/file or -httpAuth.password=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -httpAuth.password=http://host/path or -httpAuth.password=https://host/path
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr array
|
||||
TCP address to listen for incoming http requests. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vlagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpListenAddr.useProxyProtocol array
|
||||
Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-insert.disable
|
||||
Whether to disable /insert/* HTTP endpoints
|
||||
-insert.maxFieldsPerLine int
|
||||
The maximum number of log fields per line, which can be read by /insert/* handlers; see https://docs.victoriametrics.com/victorialogs/faq/#how-many-fields-a-single-log-entry-may-contain (default 1000)
|
||||
-insert.maxLineSizeBytes size
|
||||
The maximum size of a single line, which can be read by /insert/* handlers; see https://docs.victoriametrics.com/victorialogs/faq/#what-length-a-log-record-is-expected-to-have
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-internalinsert.disable
|
||||
Whether to disable /internal/insert HTTP endpoint
|
||||
-internalinsert.maxRequestSize size
|
||||
The maximum size in bytes of a single request, which can be accepted at /internal/insert HTTP endpoint
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-journald.ignoreFields array
|
||||
Comma-separated list of fields to ignore for logs ingested over journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-journald.includeEntryMetadata
|
||||
Include Journald fields with double underscore prefixes
|
||||
-journald.streamFields array
|
||||
Comma-separated list of fields to use as log stream fields for logs ingested over journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-journald.tenantID string
|
||||
TenantID for logs ingested via the Journald endpoint. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#multitenancy (default "0:0")
|
||||
-journald.timeField string
|
||||
Field to use as a log timestamp for logs ingested via journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#time-field (default "__REALTIME_TIMESTAMP")
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-loki.disableMessageParsing
|
||||
Whether to disable automatic parsing of JSON-encoded log fields inside Loki log message into distinct log fields
|
||||
-loki.maxRequestSize size
|
||||
The maximum size in bytes of a single Loki request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Set higher value when clients send data over slow networks. Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage. See also -insert.maxQueueDuration (default 20)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metrics.exposeMetadata
|
||||
Whether to expose TYPE and HELP metadata at the /metrics page, which is exposed at -httpListenAddr . The metadata may be needed when the /metrics page is consumed by systems, which require this information. For example, Managed Prometheus in Google Cloud - https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type
|
||||
-metricsAuthKey value
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-pprofAuthKey value
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path
|
||||
-pushmetrics.disableCompression
|
||||
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to every -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.header array
|
||||
Optional HTTP request header to send to every -pushmetrics.url . For example, -pushmetrics.header='Authorization: Basic foobar' adds 'Authorization: Basic foobar' header to every request to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to every -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.basicAuth.password array
|
||||
Optional basic auth password to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.basicAuth.passwordFile array
|
||||
Optional path to basic auth password to use for the corresponding -remoteWrite.url. The file is re-read every second
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.basicAuth.username array
|
||||
Optional basic auth username to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.bearerToken array
|
||||
Optional bearer auth token to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.bearerTokenFile array
|
||||
Optional path to bearer token file to use for the corresponding -remoteWrite.url. The token is re-read from the file every second
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.flushInterval duration
|
||||
Interval for flushing the data to remote storage. This option takes effect only when less than 2MB of data per second are pushed to -remoteWrite.url (default 1s)
|
||||
-remoteWrite.headers array
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.maxBlockSize size
|
||||
The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage.
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 8388608)
|
||||
-remoteWrite.maxDiskUsagePerURL array
|
||||
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. Disk usage is unlimited if the value is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB. (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.oauth2.clientID array
|
||||
Optional OAuth2 clientID to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.clientSecret array
|
||||
Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.clientSecretFile array
|
||||
Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.endpointParams array
|
||||
Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.scopes array
|
||||
Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.tokenUrl array
|
||||
Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.proxyURL array
|
||||
Optional proxy URL for writing data to the corresponding -remoteWrite.url. Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.queues int
|
||||
The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues isn't enough for sending high volume of collected data to remote storage. Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage (default 20)
|
||||
-remoteWrite.rateLimit array
|
||||
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.retryMaxTime array
|
||||
The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval (default 1m0s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.retryMinInterval array
|
||||
The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxTime (default 1s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.sendTimeout array
|
||||
Timeout for sending a single block of data to the corresponding -remoteWrite.url (default 1m0s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.showURL
|
||||
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteWrite.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. By default, system CA is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tlsHandshakeTimeout array
|
||||
The timeout for establishing tls connections to the corresponding -remoteWrite.url (default 20s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.tlsInsecureSkipVerify array
|
||||
Whether to skip tls verification when connecting to the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-remoteWrite.tlsKeyFile array
|
||||
Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tlsServerName array
|
||||
Optional TLS server name to use for connections to the corresponding -remoteWrite.url. By default, the server name from -remoteWrite.url is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tmpDataPath string
|
||||
Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . See also -remoteWrite.maxDiskUsagePerURL (default "vlagent-remotewrite-data")
|
||||
-remoteWrite.url array
|
||||
Remote storage URL to write data to. It must support VictoriaLogs native protocol. Example url: http://<victorialogs-host>:9428/internal/insert. Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.compressMethod.tcp array
|
||||
Compression method for syslog messages received at the corresponding -syslog.listenAddr.tcp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.compressMethod.udp array
|
||||
Compression method for syslog messages received at the corresponding -syslog.listenAddr.udp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.decolorizeFields.tcp array
|
||||
Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.decolorizeFields.udp array
|
||||
Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.extraFields.tcp array
|
||||
Fields to add to logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.extraFields.udp array
|
||||
Fields to add to logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.ignoreFields.tcp array
|
||||
Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.ignoreFields.udp array
|
||||
Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.listenAddr.tcp array
|
||||
Comma-separated list of TCP addresses to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.listenAddr.udp array
|
||||
Comma-separated list of UDP address to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.streamFields.tcp array
|
||||
Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.streamFields.udp array
|
||||
Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tenantID.tcp array
|
||||
TenantID for logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tenantID.udp array
|
||||
TenantID for logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.timezone string
|
||||
Timezone to use when parsing timestamps in RFC3164 syslog messages. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 . See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ (default "Local")
|
||||
-syslog.tls array
|
||||
Whether to enable TLS for receiving syslog messages at the corresponding -syslog.listenAddr.tcp. The corresponding -syslog.tlsCertFile and -syslog.tlsKeyFile must be set if -syslog.tls is set. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-syslog.tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for -syslog.listenAddr.tcp if -syslog.tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants . See also https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tlsMinVersion string
|
||||
The minimum TLS version to use for -syslog.listenAddr.tcp if -syslog.tls is set. Supported values: TLS10, TLS11, TLS12, TLS13. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security (default "TLS13")
|
||||
-syslog.useLocalTimestamp.tcp array
|
||||
Whether to use local timestamp instead of the original timestamp for the ingested syslog messages at the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-syslog.useLocalTimestamp.udp array
|
||||
Whether to use local timestamp instead of the original timestamp for the ingested syslog messages at the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tls array
|
||||
Whether to enable TLS for incoming HTTP requests at the given -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set. See also -mtls
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -httpListenAddr if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -httpListenAddr if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsMinVersion array
|
||||
Optional minimum TLS version to use for the corresponding -httpListenAddr if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
@@ -1608,10 +1608,6 @@ Below is the output for `/path/to/vmselect -help`:
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
-search.logSlowQueryStats duration
|
||||
Log query statistics if execution time exceeding this value - see https://docs.victoriametrics.com/victoriametrics/query-stats . Zero disables slow query statistics logging. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-search.logSlowQueryStatsHeaders array
|
||||
White list of header keys to log for queries exceeding -search.logSlowQueryStats. By default, no headers are logged. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-search.maxBinaryOpPushdownLabelValues instance
|
||||
The maximum number of values for a label in the first expression that can be extracted as a common label filter and pushed down to the second expression in a binary operation. A larger value makes the pushed-down filter more complex but fewer time series will be returned. This flag is useful when selective label contains numerous values, for example instance, and storage resources are abundant. (default 100)
|
||||
-search.maxConcurrentRequests int
|
||||
|
||||
@@ -15,15 +15,15 @@ aliases:
|
||||
---
|
||||
## What is the main purpose of VictoriaMetrics?
|
||||
|
||||
To be the best tool for monitoring and observability.
|
||||
To provide the best observability solution.
|
||||
|
||||
## Who uses VictoriaMetrics?
|
||||
|
||||
See [case studies](https://docs.victoriametrics.com/victoriametrics/casestudies/) and [articles](https://docs.victoriametrics.com/victoriametrics/articles).
|
||||
See [case studies](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||
|
||||
## Which features does VictoriaMetrics have?
|
||||
|
||||
See the full list of [Prominent Features](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prominent-features).
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prominent-features).
|
||||
|
||||
## Are there performance comparisons with other solutions?
|
||||
|
||||
@@ -31,39 +31,23 @@ Yes. See [these benchmarks](https://docs.victoriametrics.com/victoriametrics/art
|
||||
|
||||
## How to start using VictoriaMetrics?
|
||||
|
||||
Follow the [Quick Start guide](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
||||
|
||||
## How to contribute to VictoriaMetrics?
|
||||
|
||||
See the [Contributing](https://docs.victoriametrics.com/victoriametrics/contributing/) guide.
|
||||
|
||||
## Does VictoriaMetrics support high availability?
|
||||
|
||||
Yes. Learn more in the High Availability docs for both [single-node](https://docs.victoriametrics.com/victoriametrics/#high-availability)
|
||||
and [cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#high-availability) setups.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/contributing/).
|
||||
|
||||
## Does VictoriaMetrics support replication?
|
||||
|
||||
Yes. See [Replication and data safety](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) for details.
|
||||
|
||||
## What are scalability limits of VictoriaMetrics?
|
||||
|
||||
The single-node version scales vertically. It can handle up to 100 million [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series)
|
||||
and 2 million samples per second (based on real usage).
|
||||
|
||||
The cluster version scales both vertically and horizontally. It can handle billions of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series)
|
||||
and hundreds of millions of samples per second (based on real usage).
|
||||
|
||||
See [performance comparison with other solutions](https://docs.victoriametrics.com/victoriametrics/faq/#are-there-performance-comparisons-with-other-solutions).
|
||||
Yes. See [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) for details.
|
||||
|
||||
## Can I use VictoriaMetrics instead of Prometheus?
|
||||
|
||||
Yes, in most cases. VictoriaMetrics can substitute Prometheus in the following aspects:
|
||||
|
||||
* Prometheus-compatible service discovery and scraping via [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and single-node VictoriaMetrics.
|
||||
See [How to scrape Prometheus exporters such as node-exporter](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* Prometheus-compatible alerting and recording rules via [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/).
|
||||
* Prometheus-compatible querying in Grafana. See [integrations/Grafana](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
|
||||
* Prometheus-compatible service discovery and target scraping can be done with [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and with single-node VictoriaMetrics. See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-scrape-prometheus-exporters-such-as-node-exporter).
|
||||
* Prometheus-compatible alerting rules and recording rules can be processed with [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/).
|
||||
* Prometheus-compatible querying in Grafana is supported by VictoriaMetrics. See [these docs](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
|
||||
|
||||
## What is the difference between vmagent and Prometheus?
|
||||
|
||||
@@ -71,15 +55,15 @@ While both [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/)
|
||||
read Prometheus-compatible [scrape configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs)
|
||||
and send data to multiple remote storage systems, vmagent has the following additional features:
|
||||
|
||||
* vmagent usually requires less CPU, RAM and disk IO compared to Prometheus when scraping an enormous number of targets (more than 1000)
|
||||
* vmagent usually requires lower amounts of CPU, RAM and disk IO compared to Prometheus when scraping an enormous number of targets (more than 1000)
|
||||
or targets with a great number of exposed metrics.
|
||||
* vmagent provides independent [disk-backed buffers](https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue) for each configured remote storage (see `-remoteWrite.url`).
|
||||
This means that slow or temporarily unavailable storage doesn't prevent it from sending data to healthy storage in parallel.
|
||||
Prometheus uses a single shared buffer for all the configured remote storage systems (see `remote_write->url`) with a hardcoded retention of 2 hours.
|
||||
* vmagent can accept, relabel, filter and aggregate data obtained via multiple data ingestion protocols in addition to data scraped from Prometheus targets.
|
||||
That means it supports both [pull](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#pull-model) and [push](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#push-model) protocols for data ingestion.
|
||||
* vmagent provides independent [disk-backed buffers](https://docs.victoriametrics.com/victoriametrics/vmagent/#calculating-disk-space-for-persistence-queue) for each configured remote storage (see `-remoteWrite.url`). This means that slow or temporarily unavailable storage
|
||||
doesn't prevent it from sending data to healthy storage in parallel. Prometheus uses a single shared buffer for all the configured remote storage systems (see `remote_write->url`)
|
||||
with a hardcoded retention of 2 hours.
|
||||
* vmagent may accept, relabel and filter data obtained via multiple data ingestion protocols in addition to data scraped from Prometheus targets.
|
||||
That means it supports both `pull` and `push` protocols for data ingestion.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#features) for details.
|
||||
* vmagent can be used in different [use cases](https://docs.victoriametrics.com/victoriametrics/vmagent/#use-cases):
|
||||
* vmagent may be used in different [use cases](https://docs.victoriametrics.com/victoriametrics/vmagent/#use-cases):
|
||||
* [IoT and edge monitoring](https://docs.victoriametrics.com/victoriametrics/vmagent/#iot-and-edge-monitoring)
|
||||
* [Drop-in replacement for Prometheus](https://docs.victoriametrics.com/victoriametrics/vmagent/#drop-in-replacement-for-prometheus)
|
||||
* [Statsd alternative](https://docs.victoriametrics.com/victoriametrics/vmagent/#statsd-alternative)
|
||||
@@ -93,19 +77,16 @@ and send data to multiple remote storage systems, vmagent has the following addi
|
||||
|
||||
## What is the difference between vmagent and Prometheus agent?
|
||||
|
||||
Both [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [Prometheus agent](https://prometheus.io/blog/2021/11/16/agent/)
|
||||
serve the same purpose – to efficiently scrape Prometheus-compatible targets at the edge. They have the following differences:
|
||||
Both [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [Prometheus agent](https://prometheus.io/blog/2021/11/16/agent/) serve the same purpose – to efficiently scrape Prometheus-compatible targets at the edge. They have the following differences:
|
||||
|
||||
* vmagent usually requires less CPU, RAM and disk IO compared to the Prometheus agent. See [comparsion of metrics collection agents](https://victoriametrics.com/blog/opentelemetry-prometheus-and-more/).
|
||||
* vmagent supports both [pull](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#pull-model) and [push](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#push-model)
|
||||
data collection – it can accept data via many popular data ingestion protocols such as InfluxDB line protocol, Graphite protocol, OpenTSDB protocol, DataDog protocol, Prometheus protocol, OpenTelemetry metrics protocol, CSV and JSON – see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#features).
|
||||
* vmagent usually requires lower amounts of CPU, RAM and disk IO compared to the Prometheus agent.
|
||||
* vmagent supports both `pull` and `push` data collection – it can accept data via many popular data ingestion protocols such as InfluxDB line protocol, Graphite protocol, OpenTSDB protocol, DataDog protocol, Prometheus protocol, OpenTelemetry metrics protocol, CSV and JSON – see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#features).
|
||||
* vmagent doesn't have limitations on backfilling of historical data.
|
||||
* vmagent can easily scale horizontally to multiple instances for scraping a big number of targets – see [Scraping big number of targets](https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets) docs.
|
||||
* vmagent can easily scale horizontally to multiple instances for scraping a big number of targets – see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#scraping-big-number-of-targets).
|
||||
* vmagent supports [improved relabeling](https://docs.victoriametrics.com/victoriametrics/relabeling/).
|
||||
* vmagent can limit the number of scraped metrics per target – see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#cardinality-limiter).
|
||||
* vmagent supports loading scrape configs from multiple files – see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#loading-scrape-configs-from-multiple-files).
|
||||
* vmagent supports data reading and data writing from/to Kafka – see [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#kafka-integration).
|
||||
* vmagent has better remote write compression to reduce transferred traffic - [these docs](https://docs.victoriametrics.com/victoriametrics/vmagent/#victoriametrics-remote-write-protocol).
|
||||
* vmagent can read and update scrape configs from http and https URLs, while the Prometheus agent can only read them from the local file system.
|
||||
* vmagent supports [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/) for performing aggregates on collected or received samples before sending them to remote storage.
|
||||
|
||||
@@ -174,8 +155,8 @@ The main differences between Cortex and VictoriaMetrics:
|
||||
to [VictoriaMetrics' architecture](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
* VictoriaMetrics provides [production-ready single-node solution](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/),
|
||||
which is much easier to set up and operate than a Cortex cluster.
|
||||
* Cortex can lose up to 12 hours of recent data on Ingestor failure – see [the corresponding docs](https://github.com/cortexproject/cortex/blob/fe56f1420099aa1bf1ce09316c186e05bddee879/docs/architecture.md#ingesters-failure-and-data-loss).
|
||||
VictoriaMetrics may lose only a few seconds of recent data, which hasn't been synced to persistent storage yet.
|
||||
* Cortex may lose up to 12 hours of recent data on Ingestor failure – see [the corresponding docs](https://github.com/cortexproject/cortex/blob/fe56f1420099aa1bf1ce09316c186e05bddee879/docs/architecture.md#ingesters-failure-and-data-loss).
|
||||
VictoriaMetrics may lose only a few seconds of recent data, which isn't synced to persistent storage yet.
|
||||
See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
|
||||
* Cortex is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/) and [other case studies](https://docs.victoriametrics.com/victoriametrics/casestudies/).
|
||||
* VictoriaMetrics accepts data in multiple popular data ingestion protocols in addition to Prometheus remote_write protocol – InfluxDB, OpenTSDB, Graphite, CSV, JSON, native binary.
|
||||
@@ -192,12 +173,12 @@ The main differences between Cortex and VictoriaMetrics:
|
||||
* Thanos stores data in object storage (Amazon S3 or Google GCS), while VictoriaMetrics stores data in block storage
|
||||
([GCP persistent disks](https://cloud.google.com/compute/docs/disks#pdspecs), Amazon EBS or bare metal HDD).
|
||||
While object storage is usually less expensive, block storage provides much lower latencies and higher throughput.
|
||||
VictoriaMetrics works perfectly with HDD-based block storage – which eliminates the need for using more expensive SSD or NVMe disks in most cases.
|
||||
* Thanos can lose up to 2 hours of recent data, which hasn't been uploaded yet to object storage yet. VictoriaMetrics may lose only a few seconds of recent data,
|
||||
VictoriaMetrics works perfectly with HDD-based block storage – there is no need for using more expensive SSD or NVMe disks in most cases.
|
||||
* Thanos may lose up to 2 hours of recent data, which wasn't uploaded yet to object storage. VictoriaMetrics may lose only a few seconds of recent data,
|
||||
which hasn't been synced to persistent storage yet. See [this article for details](https://medium.com/@valyala/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704).
|
||||
* VictoriaMetrics provides a [production-ready single-node solution](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/),
|
||||
which is much easier to set up and operate than Thanos components.
|
||||
* Compared to VictoriaMetrics, Thanos has more moving parts and relies more heavily on stable network connections, which can make it trickier to set up and operate.
|
||||
* Thanos may be harder to set up and operate compared to VictoriaMetrics, since it has more moving parts, which can be connected with fewer reliable networks.
|
||||
See [this article for details](https://medium.com/faun/comparing-thanos-to-victoriametrics-cluster-b193bea1683).
|
||||
* Thanos is usually slower and requires more CPU and RAM than VictoriaMetrics. See [this talk from adidas at PromCon 2019](https://promcon.io/2019-munich/talks/remote-write-storage-wars/).
|
||||
* VictoriaMetrics accepts data via multiple popular data ingestion protocols in addition to the Prometheus remote_write protocol – InfluxDB, OpenTSDB, Graphite, CSV, JSON, native binary.
|
||||
@@ -208,18 +189,16 @@ The main differences between Cortex and VictoriaMetrics:
|
||||
## How does VictoriaMetrics compare to [InfluxDB](https://www.influxdata.com/time-series-platform/influxdb/)?
|
||||
|
||||
* VictoriaMetrics requires [10x less RAM](https://medium.com/@valyala/insert-benchmarks-with-inch-influxdb-vs-victoriametrics-e31a41ae2893) and it [works faster](https://medium.com/@valyala/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
* VictoriaMetrics uses less storage space than InfluxDB for production data.
|
||||
* VictoriaMetrics doesn't support InfluxQL or Flux, but provides a better query language – [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/). See [this tutorial](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085) for details.
|
||||
* VictoriaMetrics needs lower amounts of storage space than InfluxDB for production data.
|
||||
* VictoriaMetrics doesn't support InfluxQL or Flux but provides a better query language – [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/). See [this tutorial](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085) for details.
|
||||
* VictoriaMetrics accepts data in multiple popular data ingestion protocols in addition to InfluxDB – Prometheus remote_write, OpenTSDB, Graphite, CSV, JSON, native binary.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data) for details.
|
||||
* VictoriaMetrics can be queried via [Graphite's API](https://docs.victoriametrics.com/victoriametrics/integrations/graphite/#graphite-api-usage).
|
||||
|
||||
See [How to migrate from InfluxDB to VictoriaMetrics](https://docs.victoriametrics.com/guides/migrate-from-influx/).
|
||||
|
||||
## How does VictoriaMetrics compare to [TimescaleDB](https://www.timescale.com/)?
|
||||
|
||||
* TimescaleDB insists on using SQL as a query language. While SQL is more powerful than PromQL, this power is rarely required during typical usages of a TSDB. Real-world queries usually [look clearer and simpler when written in PromQL than in SQL](https://medium.com/@valyala/promql-tutorial-for-beginners-9ab455142085).
|
||||
* VictoriaMetrics requires [up to 70x less storage space compared to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data. The gap in storage space usage can be decreased from 70x to 3x if [compression in TimescaleDB is properly configured](https://docs.timescale.com/use-timescale/latest/compression/) (it isn't an easy task in general :)).
|
||||
* VictoriaMetrics requires [up to 70x less storage space compared to TimescaleDB](https://medium.com/@valyala/when-size-matters-benchmarking-victoriametrics-vs-timescale-and-influxdb-6035811952d4) for storing the same amount of time series data. The gap in storage space usage can be lowered from 70x to 3x if [compression in TimescaleDB is properly configured](https://docs.timescale.com/use-timescale/latest/compression/) (it isn't an easy task in general :)).
|
||||
* VictoriaMetrics requires up to 10x less CPU and RAM resources than TimescaleDB for processing production data. See [this article](https://abiosgaming.com/press/high-cardinality-aggregations/) for details.
|
||||
* TimescaleDB is [harder to set up, configure and operate](https://docs.timescale.com/timescaledb/latest/how-to-guides/install-timescaledb/self-hosted/ubuntu/installation-apt-ubuntu/) than VictoriaMetrics (see [how to run VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-start-victoriametrics)).
|
||||
* VictoriaMetrics accepts data in multiple popular data ingestion protocols – InfluxDB, OpenTSDB, Graphite, CSV – while TimescaleDB supports only SQL inserts.
|
||||
@@ -237,9 +216,17 @@ The following versions are open source and free:
|
||||
* [Single-node version](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/).
|
||||
* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster).
|
||||
|
||||
We provide commercial support for both versions. [Contact us](https://victoriametrics.com/contact-us/) for pricing.
|
||||
We provide commercial support for both versions. [Contact us](mailto:info@victoriametrics.com) for the pricing.
|
||||
|
||||
[VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_faq) – the most cost-efficient hosted monitoring platform, operated by VictoriaMetrics core team.
|
||||
The following commercial versions of VictoriaMetrics are available:
|
||||
|
||||
* [VictoriaMetrics Cloud](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_vm_faq) – the most cost-efficient hosted monitoring platform, operated by VictoriaMetrics core team.
|
||||
|
||||
The following commercial versions of VictoriaMetrics are planned:
|
||||
|
||||
* Cloud monitoring solution based on VictoriaMetrics.
|
||||
|
||||
[Contact us](mailto:info@victoriametrics.com) for more information on our plans.
|
||||
|
||||
## Why doesn't VictoriaMetrics support the [Prometheus remote read API](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#%3Cremote_read%3E)?
|
||||
|
||||
@@ -248,8 +235,7 @@ if a query covers 1000 metrics with 10K values each, then the remote read API ha
|
||||
This is slow and expensive.
|
||||
Prometheus' remote read API isn't intended for querying foreign data – aka `global query view`. See [this issue](https://github.com/prometheus/prometheus/issues/4456) for details.
|
||||
|
||||
Instead, query VictoriaMetrics directly via [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui),
|
||||
the [Prometheus Querying API](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
So just query VictoriaMetrics directly via [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui), the [Prometheus Querying API](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-usage)
|
||||
or via [Prometheus datasource in Grafana](https://docs.victoriametrics.com/victoriametrics/integrations/grafana/).
|
||||
|
||||
## Does VictoriaMetrics deduplicate data from Prometheus instances scraping the same targets (aka `HA pairs`)?
|
||||
@@ -258,7 +244,7 @@ Yes. See [these docs](https://docs.victoriametrics.com/victoriametrics/single-se
|
||||
|
||||
## Where is the source code of VictoriaMetrics?
|
||||
|
||||
Source code for Victoriametrics can be found in the following locations:
|
||||
Source code for the following versions is available in the following places:
|
||||
|
||||
* [Single-node version](https://github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
* [Cluster version](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/cluster)
|
||||
@@ -272,24 +258,25 @@ and scales horizontally to multiple nodes.
|
||||
|
||||
## What is the difference between single-node and cluster versions of VictoriaMetrics?
|
||||
|
||||
Both the [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and
|
||||
[cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) versions of VictoriaMetrics are built
|
||||
on the same core code, so they share many features. That said, here are the key differences between them:
|
||||
Both [single-node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and
|
||||
[cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) versions of VictoriaMetrics
|
||||
share the core source code, so they have many common features. They have the following differences though:
|
||||
|
||||
* The [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) runs on a single host,
|
||||
* [Single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) runs on a single host,
|
||||
while [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) can scale to many hosts.
|
||||
The single-node VictoriaMetrics can scale vertically, e.g. its capacity and performance scales almost linearly when increasing
|
||||
Single-node VictoriaMetrics scales vertically though, e.g. its capacity and performance scales almost linearly when increasing
|
||||
available CPU, RAM, disk IO and disk space. See [an article about vertical scalability of a single-node VictoriaMetrics](https://valyala.medium.com/measuring-vertical-scalability-for-time-series-databases-in-google-cloud-92550d78d8ae).
|
||||
|
||||
* The cluster version of VictoriaMetrics supports [multitenancy](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy),
|
||||
but single-node VictoriaMetrics does not.
|
||||
* Cluster version of VictoriaMetrics supports [multitenancy](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy),
|
||||
while single-node VictoriaMetrics doesn't support it.
|
||||
|
||||
* The cluster version of VictoriaMetrics supports data replication, while single-node VictoriaMetrics relies on the durability
|
||||
of the persistent storage pointed by the `-storageDataPath` command-line flag.
|
||||
* Cluster version of VictoriaMetrics supports data replication, while single-node VictoriaMetrics relies on the durability
|
||||
of the persistent storage pointed by `-storageDataPath` command-line flag.
|
||||
See [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) for details.
|
||||
|
||||
* The single-node version of VictoriaMetrics delivers higher capacity and performance than the cluster version when
|
||||
running on the same hardware with equal CPU and RAM, as it avoids the overhead of network data transfers between cluster components.
|
||||
* Single-node VictoriaMetrics provides higher capacity and performance comparing to cluster version of VictoriaMetrics
|
||||
when running on the same hardware with the same amounts of CPU and RAM, since it has no overhead on data transfer
|
||||
between cluster components over the network.
|
||||
|
||||
See also [which type of VictoriaMetrics is recommended to use](#which-victoriametrics-type-is-recommended-for-use-in-production---single-node-or-cluster).
|
||||
|
||||
@@ -312,12 +299,7 @@ See [these docs](https://docs.victoriametrics.com/victoriametrics/cluster-victor
|
||||
|
||||
## How to set a memory limit for VictoriaMetrics components?
|
||||
|
||||
All VictoriaMetrics components provide command-line flags to control the size of internal buffers and caches:
|
||||
`-memory.allowedPercent` and `-memory.allowedBytes` (pass `-help` to any VictoriaMetrics component in order to see the description for these flags).
|
||||
These limits don't take into account additional memory, which may be needed for processing incoming queries.
|
||||
Hard limits may be enforced only by the OS via [cgroups](https://en.wikipedia.org/wiki/Cgroups),
|
||||
Docker (see [these docs](https://docs.docker.com/config/containers/resource_constraints)) or
|
||||
Kubernetes (see [these docs](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers)).
|
||||
All the VictoriaMetrics components provide command-line flags to control the size of internal buffers and caches: `-memory.allowedPercent` and `-memory.allowedBytes` (pass `-help` to any VictoriaMetrics component in order to see the description for these flags). These limits don't take into account additional memory, which may be needed for processing incoming queries. Hard limits may be enforced only by the OS via [cgroups](https://en.wikipedia.org/wiki/Cgroups), Docker (see [these docs](https://docs.docker.com/config/containers/resource_constraints)) or Kubernetes (see [these docs](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers)).
|
||||
|
||||
Memory usage for VictoriaMetrics components can be tuned according to the following docs:
|
||||
|
||||
@@ -395,8 +377,7 @@ The solution is to add more memory or to reduce the number of [active time serie
|
||||
|
||||
See [this article](https://valyala.medium.com/how-to-optimize-promql-and-metricsql-queries-85a1b75bf986).
|
||||
|
||||
VictoriaMetrics also provides [query tracer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing)
|
||||
and [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer),
|
||||
VictoriaMetrics also provides [query tracer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing) and [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer),
|
||||
which can help during query optimization.
|
||||
|
||||
See also [troubleshooting slow queries](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#slow-queries).
|
||||
@@ -406,7 +387,9 @@ See also [troubleshooting slow queries](https://docs.victoriametrics.com/victori
|
||||
Both [single-node VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and
|
||||
[VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) are production-ready.
|
||||
|
||||
See [Scalability limits of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/faq/#what-are-scalability-limits-of-victoriametrics)).
|
||||
Single-node VictoriaMetrics is able to handle quite big workloads in production
|
||||
with tens of millions of [active time series](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-an-active-time-series)
|
||||
at the ingestion rate of million of samples per second. See [this case study](https://docs.victoriametrics.com/victoriametrics/casestudies/#wixcom).
|
||||
|
||||
Single-node VictoriaMetrics requires lower amounts of CPU and RAM for handling the same workload comparing
|
||||
to cluster version of VictoriaMetrics, since it doesn't need to pass the encoded data over the network
|
||||
@@ -430,13 +413,11 @@ Cluster version of VictoriaMetrics may be preferred over single-node VictoriaMet
|
||||
at ingestion rates exceeding a million samples per second, then it is better to use cluster version of VictoriaMetrics,
|
||||
since its capacity can [scale horizontally with the number of nodes in the cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#cluster-resizing-and-scalability).
|
||||
|
||||
[Don't choose cluster unless you have to](https://victoriametrics.com/blog/dont-default-to-microservices-you-will-thank-us-later/).
|
||||
|
||||
## How to migrate data from single-node VictoriaMetrics to cluster version?
|
||||
|
||||
The single-node version of VictoriaMetrics stores data on disk in slightly different format compared to the cluster version of VictoriaMetrics.
|
||||
This makes it impossible to just copy the on-disk data from `-storageDataPath` directory from single-node VictoriaMetrics to a `vmstorage` node in VictoriaMetrics cluster.
|
||||
If you need to migrate data from a single-node VictoriaMetrics to the cluster version, then [follow these instructions](https://docs.victoriametrics.com/victoriametrics/vmctl/victoriametrics/).
|
||||
Single-node VictoriaMetrics stores data on disk in slightly different format comparing to cluster version of VictoriaMetrics.
|
||||
So it is impossible to just copy the on-disk data from `-storageDataPath` directory from single-node VictoriaMetrics to a `vmstorage` node in VictoriaMetrics cluster.
|
||||
If you need migrating data from single-node VictoriaMetrics to cluster version, then [follow these instructions](https://docs.victoriametrics.com/victoriametrics/vmctl/victoriametrics/).
|
||||
|
||||
## Why isn't MetricsQL 100% compatible with PromQL?
|
||||
|
||||
@@ -477,14 +458,14 @@ Single-node VictoriaMetrics cannot be restarted / upgraded or downgraded without
|
||||
## Why VictoriaMetrics misses automatic data re-balancing between vmstorage nodes?
|
||||
|
||||
VictoriaMetrics doesn't rebalance data between `vmstorage` nodes when new `vmstorage` nodes are added to the cluster.
|
||||
This means that newly added `vmstorage` nodes will have less data at `-storageDataPath` compared to the older `vmstorage` nodes
|
||||
This means that newly added `vmstorage` nodes will have less data at `-storageDataPath` comparing to the old `vmstorage` nodes
|
||||
until the historical data is removed from the old `vmstorage` nodes when it goes outside the configured [retention](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention).
|
||||
|
||||
The automatic re-balancing is the process of moving data between `vmstorage` nodes, so every node eventually contains the same amount of data.
|
||||
The automatic rebalancing is the process of moving data between `vmstorage` nodes, so every node has the same amounts of data eventually.
|
||||
It is disabled by default because it may consume additional CPU, network bandwidth and disk IO at `vmstorage` nodes for long periods of time,
|
||||
which, in turn, can negatively impact VictoriaMetrics cluster availability.
|
||||
|
||||
Additionally, it is unclear how to handle the automatic re-balancing if cluster configuration changes while the re-balancing is in progress.
|
||||
Additionally, it is unclear how to handle the automatic re-balancing if cluster configuration changes when the re-balancing is in progress.
|
||||
|
||||
The amounts of data stored in `vmstorage` becomes equal among old `vmstorage` nodes and new `vmstorage` nodes
|
||||
after historical data is removed from the old `vmstorage` nodes because it goes outside of configured [retention](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention).
|
||||
@@ -498,15 +479,15 @@ The query load becomes even between old `vmstorage` nodes and new `vmstorage` no
|
||||
over time ranges with data covered by new `vmstorage` nodes. Usually the most of queries are received
|
||||
from [alerting and recording rules](https://docs.victoriametrics.com/victoriametrics/vmalert/), which query data on limited time ranges
|
||||
such as a few hours or few days at max. This means that the query load between old `vmstorage` nodes and new `vmstorage` nodes
|
||||
should become even within few hours / days after adding new `vmstorage` nodes.
|
||||
should become even in a few hours / days after adding new `vmstorage` nodes.
|
||||
|
||||
## Why VictoriaMetrics misses automatic recovery of replication factor?
|
||||
|
||||
VictoriaMetrics doesn't restore [replication factor](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety)
|
||||
when some of `vmstorage` nodes are removed from the cluster because of the following reasons:
|
||||
|
||||
- Automatic replication factor recovery needs to copy non-trivial amounts of data between the remaining `vmstorage` nodes.
|
||||
This additional copying requires additional CPU, disk IO and network bandwidth at `vmstorage` nodes. This may negatively impact
|
||||
- Automatic replication factor recovery needs copying non-trivial amounts of data between the remaining `vmstorage` nodes.
|
||||
This copying takes additional CPU, disk IO and network bandwidth at `vmstorage` nodes. This may negatively impact
|
||||
VictoriaMetrics cluster availability during extended periods of time.
|
||||
|
||||
- It is unclear when the automatic replication factor recovery must be started. How to distinguish the expected temporary
|
||||
|
||||
@@ -2053,7 +2053,7 @@ and [cardinality explorer docs](#cardinality-explorer).
|
||||
* VictoriaMetrics limits the number of labels per each series, label name length and label value length
|
||||
via `-maxLabelsPerTimeseries`, `-maxLabelNameLen` and `-maxLabelValueLen` command-line flags respectively.
|
||||
Series that exceed the limits are ignored on ingestion. This prevents from ingesting malformed series.
|
||||
It is recommended [monitoring](#monitoring) `vm_rows_ignored_total` metric and VictoriaMetrics logs in order
|
||||
It is recommended [monitoring](#monitoring) `vm_rows_ingored_total` metric and VictoriaMetrics logs in order
|
||||
to determine whether limits must be adjusted for your workload.
|
||||
Alternatively, you can use [relabeling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#relabeling) to change metric target labels.
|
||||
|
||||
@@ -2778,10 +2778,6 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
|
||||
Log queries with execution time exceeding this value. Zero disables slow query logging. See also -search.logQueryMemoryUsage (default 5s)
|
||||
-search.logSlowQueryStats duration
|
||||
Log query statistics if execution time exceeding this value - see https://docs.victoriametrics.com/victoriametrics/query-stats . Zero disables slow query statistics logging. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
-search.logSlowQueryStatsHeaders array
|
||||
White list of header keys to log for queries exceeding -search.logSlowQueryStats. By default, no headers are logged. This flag is available only in VictoriaMetrics enterprise. See https://docs.victoriametrics.com/victoriametrics/enterprise/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-search.maxBinaryOpPushdownLabelValues instance
|
||||
The maximum number of values for a label in the first expression that can be extracted as a common label filter and pushed down to the second expression in a binary operation. A larger value makes the pushed-down filter more complex but fewer time series will be returned. This flag is useful when selective label contains numerous values, for example instance, and storage resources are abundant. (default 100)
|
||||
-search.maxConcurrentRequests int
|
||||
|
||||
@@ -61,13 +61,12 @@ Bumping the limits may significantly improve build speed.
|
||||
* `git tag -s v1.xx.y-cluster` in `cluster` branch
|
||||
* `git tag -s v1.xx.y-enterprise` in `enterprise-single-node` branch
|
||||
* `git tag -s v1.xx.y-enterprise-cluster` in `enterprise-cluster` branch
|
||||
1. Run `TAG=v1.xx.y EXTRA_DOCKER_TAG_SUFFIX=-rcY make publish-release`. This command performs the following tasks:
|
||||
1. Run `TAG=v1.xx.y make publish-release`. This command performs the following tasks:
|
||||
|
||||
- a) Build and package binaries in `*.tar.gz` release archives with the corresponding `_checksums.txt` files inside `bin` directory.
|
||||
This step can be run manually with the command `make release` from the needed git tag.
|
||||
- b) Build and publish [multi-platform Docker images](https://docs.docker.com/build/buildx/multiplatform-images/)
|
||||
for the given `TAG`, `TAG-cluster`, `TAG-enterprise` and `TAG-enterprise-cluster`.
|
||||
The resulting docker images will have special release candidate suffix for the given `EXTRA_DOCKER_TAG_SUFFIX`.
|
||||
The multi-platform Docker image is built for the following platforms:
|
||||
* linux/amd64
|
||||
* linux/arm64
|
||||
@@ -116,10 +115,10 @@ Bumping the limits may significantly improve build speed.
|
||||
|
||||
**Important note:** do not push enterprise tags to public GitHub repository - they must be pushed only to private repository.
|
||||
|
||||
1. Run `TAG=v1.xx.y EXTRA_DOCKER_TAG_SUFFIX=-rc1 make publish-final-images`. This command publishes the final release images from release candidate image for given `EXTRA_DOCKER_TAG_SUFFIX` and updates `latest` Docker image tag for the given `TAG`.
|
||||
1. Publish release by pressing "Publish release" green button in GitHub's UI.
|
||||
1. Run `TAG=v1.xx.y make publish-latest`. This command publishes the `latest` Docker image tag for the given `TAG`.
|
||||
This command must be run only for the latest officially published release. It must be skipped when publishing other releases such as
|
||||
[LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-releases/) or some test releases.
|
||||
1. Publish release by pressing "Publish release" green button in GitHub's UI.
|
||||
1. Update GitHub tickets related to the new release. Usually, such tickets have label [waiting for release](https://github.com/VictoriaMetrics/VictoriaMetrics/issues?q=is%3Aopen+is%3Aissue+label%3A%22waiting+for+release%22). Close such tickets by mentioning which release they were included into, and remove the label. See example [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6637#issuecomment-2390729511).
|
||||
1. Bump VictoriaMetrics version at `deployment/docker/*.yml`. For example:
|
||||
|
||||
|
||||
@@ -18,38 +18,11 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
## [v1.121.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.121.0)
|
||||
|
||||
Released at 2025-07-04
|
||||
|
||||
**Update Note 1:** The `-retryMaxTime` flag has been deprecated. Please use `-retryMaxInterval` flag instead. For more details, see [#9169](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9169).
|
||||
|
||||
* FEATURE: all the [VictoriaMetrics Enterprise](https://docs.victoriametrics.com/enterprise.html) components: improve error message when an empty license is provided via the `-license` or `-licenseFile` command-line flags. See [#9337](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9337) for the details.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add `concurrency` option to kafka remoteWrite producer. See [9249](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9249) issue for details.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): support `label_limit` scrape parameter. It sets labels limit for each scraped sample from a target. If limit is exceeded, the scrape will be rejected and target marked with corresponding scrape error. The `label_limit` can be defined on global level, per job or during relabeling via `__label_limit__` label. See [scrape configs doc](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs) for details. See [#7660](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7660) and [#3233](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3233) issues.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): introduce a new flag `-retryMaxInterval` as a replacement for the deprecated `-retryMaxTime` flag. The new flag more accurately reflects the behavior it controls. See [#9169](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9169) for more details. Thanks to the @leiwingqueen.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): remove duplicate kubernetes targets from [service-discovery-debug](https://docs.victoriametrics.com/victoriametrics/relabeling/#relabel-debugging) page. See [8626](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8626) issue for details.
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): add `/api/v1/notifiers` API endpoint for returning list of configured or discovered notifiers.
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): add `datasource_type` query argument for `/api/v1/rules` and `/api/v1/alerts` endpoints to filter response by rule's datasource [type](https://docs.victoriametrics.com/victoriametrics/vmalert/#groups). See [#8537](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8537).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): respect group order defined in the rule file during [replay mode](https://docs.victoriametrics.com/victoriametrics/vmalert/#rules-backfilling) to allow chained group if needed. See [#9334](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9334).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert/): add `-replay.ruleEvaluationConcurrency` to allow concurrent `/query_range` requests for a single rule. Increasing this value when replaying for a long time and a single request range is limited by `-replay.maxDatapointsPerQuery`. See this [#7387](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7387) for details. Thanks to the @BenNF.
|
||||
* FEATURE: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): improve error messages when `vmbackupmanager` fails to create snapshot. See [#9340](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9340) for the details.
|
||||
* FEATURE: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): remove duplicate kubernetes targets from [service-discovery-debug](https://docs.victoriametrics.com/victoriametrics/relabeling/#relabel-debugging) page. See [8626](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8626) issue for details.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add command-line flag `-search.logSlowQueryStatsHeaders` for [query execution stats](https://docs.victoriametrics.com/victoriametrics/query-stats/). The new flag allows specifying the list of headers to log together with slow queries if user's request contains them. This flag is available only in VictoriaMetrics [enterprise](https://docs.victoriametrics.com/victoriametrics/enterprise/). See [Query Stats](https://docs.victoriametrics.com/victoriametrics/query-stats/) for details.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [vmselect](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add ability to proxy `/api/v1/notifiers` to vmalert when `-vmalert.proxyURL` is set. See [9267](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9267) PR for details.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): add `vm_cache_eviction_bytes_total` counter metrics to reflect cache evictions due to expiration, misses and cache size. See [9293](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9293) PR for details. Thanks to the @BenNF
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): enhance `MustReadAt` panic message to include filename for easier debugging of out-of-range reads. See [#9106](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9106).
|
||||
|
||||
* BUGFIX: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683) and [dashboards/vmalert](https://grafana.com/grafana/dashboards/14950): fix ad-hoc filters auto-complete and filtering on panels that use MetricsQL specific expressions. See [#8657](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8657).
|
||||
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/): fix negative increase result when `-search.maxLookback` or `-search.maxStalenessInterval` are set and data contains gap. See [#8935 (comment)](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-2978728661).
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): properly clean quantiles output state during flush. See [#9350](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9350).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): fix exposition of duplicated metrics for dynamically discovered notifiers via Consul and DNS. See [#9260](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9260).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): remove inline styles from UI to align with `--http.header.csp=default-src 'self'` setting. See [#9236](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9236).
|
||||
* BUGFIX: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): fix alerts state restoration for alerting rules that are using [templating](https://docs.victoriametrics.com/victoriametrics/vmalert/#templating) in the labels. See [#9305](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9305).
|
||||
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): automatically retry requests failing with `Expired Token` errors. This helps to avoid failed backups when using [EKS Pod Identity](https://docs.aws.amazon.com/eks/latest/userguide/pod-id-how-it-works.html) for authentication. See [#9280](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9280).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly return results for search requests with `.+|^$` regex filter expression. See [9290](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9290) issue for details.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix errors in console about loading of `manifest.json` when accessing UI through vmauth with Basic Auth enabled.
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): disabled opening of autocomplete popup on initial page load.
|
||||
* BUGFIX: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683) and [dashboards/vmalert](https://grafana.com/grafana/dashboards/14950): fix ad-hoc filters auto-complete and filtering on panels that use MetricsQL specific expressions. See [#8657](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8657).
|
||||
|
||||
## [v1.120.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.120.0)
|
||||
|
||||
@@ -72,7 +45,6 @@ Released at 2025-06-20
|
||||
* BUGFIX: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): increase startup healthcheck delay for storage reachability from 30 seconds to 3 minutes. This is required to avoid vmbackupmanager restarts when storage node startup take more than 30 seconds (e.g. when storage nodes stores more than 5Tb of data).
|
||||
* BUGFIX: [VictoriaMetrics Enterprise](https://docs.victoriametrics.com/enterprise.html) cluster: properly include FIPS binaries in release artifacts for platforms other than windows. Previously, FIPS binaries were only included in windows release artifacts. See [#9188](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9188).
|
||||
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): properly calculate `rate_sum` and `rate_avg` aggregations if aggregation `interval` is smaller than distance between samples timestamps. See [#9017](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9017).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): fixed static AWS credentials precedence, which could be previously overrridden by IRSA credentials. See [#9168](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9168) for details.
|
||||
|
||||
## [v1.119.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.119.0)
|
||||
|
||||
@@ -86,7 +58,6 @@ Released at 2025-06-06
|
||||
* FEATURE: [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): add support for user-defined timezone for backup scheduling. It is now possible to use `-backupScheduleTimezone=Europe/Paris` to take backups at midnight in `Europe/Paris` timezone. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3950) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6707) issues for details.
|
||||
* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/vmbackupmanager/): add an ability to set objects metadata (and tags for S3-compatible storage) when uploading backups by using `-objectMetadata` and `-s3ObjectTags` command-line flags. See [#8010](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8010).
|
||||
* FEATURE: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): allow disabling tenant cache for [multitenant read queries](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) by using `-search.disableCache` or `-search.tenantCacheExpireDuration=0` command-line flags, or by adding `nocache=1` query parameter. It can be useful for debugging purposes and in cases of frequent tenants creation.
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): log issues while reading persisted cache from the filesystem. This can be useful for debugging missing cache issues. See [#8934](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8934).
|
||||
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): fixed a regression in downsampling logic introduced in [#7440](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7440) and released in [v1.106.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.106.0), where downsampling rules with filters `filter:offset:interval` could be incorrectly skipped in favor of unfiltered rules `offset:interval`. See [#8969](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8969).
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): properly apply `rententionFilter` on flag value changes. Previously, it ignored any `filter` value changes for historical data. See [#8885](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8885) for details.
|
||||
|
||||
@@ -71,7 +71,7 @@ On top of this, Enterprise package of VictoriaMetrics includes the following fea
|
||||
- [Multitenant support in vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/#multitenancy).
|
||||
- [Ability to read alerting and recording rules from Object Storage](https://docs.victoriametrics.com/victoriametrics/vmalert/#reading-rules-from-object-storage).
|
||||
- [Ability to filter incoming requests by IP at vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/#ip-filters).
|
||||
- [FIPS 140-3 compatible builds](https://docs.victoriametrics.com/victoriametrics/enterprise/#fips-compatibility).
|
||||
- [FIPS compliant mode for all the components](https://docs.victoriametrics.com/victoriametrics/enterprise/#fips-compliance).
|
||||
|
||||
Contact us via [this page](https://victoriametrics.com/products/enterprise/) if you are interested in VictoriaMetrics Enterprise.
|
||||
|
||||
@@ -280,25 +280,14 @@ kubectl create secret generic vm-license --from-literal=license={BASE64_ENCODED_
|
||||
Note that license key provided by using secret is mounted in a file. This allows to perform updates of the license without the need to restart the pod.
|
||||
See full list of CRD specifications [here](https://docs.victoriametrics.com/operator/api.html).
|
||||
|
||||
### FIPS compliance
|
||||
|
||||
### FIPS Compatibility
|
||||
VictoriaMetrics Enterprise components can be run in FIPS compliant mode {{% available_from "v1.118.0" %}}. Binary releases and Docker images
|
||||
of VictoriaMetrics Enterprise components have `fips` suffix in their names. For example, `victoria-metrics-linux-amd64-v1.120.0-enterprise.tar.gz`
|
||||
archive includes `victoria-metrics-prod` and `victoria-metrics-fips` binaries. The latter binary is FIPS compliant.
|
||||
|
||||
VictoriaMetrics Enterprise components support FIPS 140-3 compatible mode {{% available_from "v1.118.0" %}} using the BoringCrypto module (FIPS 140-3 validated by Google). Binaries and Docker images with the `-fips` suffix are built with BoringCrypto for all cryptographic operations.
|
||||
|
||||
Builds are available for amd64 and arm64
|
||||
|
||||
Example archive:
|
||||
|
||||
`victoria-metrics-linux-amd64-v1.120.0-enterprise.tar.gz`
|
||||
|
||||
Includes:
|
||||
|
||||
* `victoria-metrics-prod` (standard)
|
||||
* `victoria-metrics-fips` (FIPS-compatible via BoringCrypto)
|
||||
|
||||
Example Docker image:
|
||||
|
||||
`victoriametrics/victoria-metrics:v1.120.0-enterprise-fips` – uses the FIPS-compatible binary and based on `scratch` image.
|
||||
Docker images of VictoriaMetrics Enterprise components have `fips` suffix in their names. For example, `victoriametrics/victoria-metrics:v1.120.0-enterprise-fips`
|
||||
image uses FIPS compliant binary version.
|
||||
|
||||
## Monitoring license expiration
|
||||
|
||||
|
||||
@@ -46,8 +46,6 @@ Each log entry contains the following fields:
|
||||
* `samples_fetched`: number of [data samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples) fetched;
|
||||
* `bytes`: number of bytes transferred from storage to process the query;
|
||||
* `memory_estimated_bytes`: estimated memory needed to run the query. See `-search.maxMemoryPerQuery` cmd-line flag.
|
||||
* `headers.*`: header key-value pairs associated with request {{% available_from "tip" %}}. Only headers listed in `-search.logSlowQueryStatsHeaders`
|
||||
are logged.
|
||||
|
||||
## Analysis
|
||||
|
||||
|
||||
@@ -2005,17 +2005,7 @@ scrape_configs:
|
||||
# label during target relabeling phase.
|
||||
# See https://docs.victoriametrics.com/victoriametrics/relabeling/
|
||||
#
|
||||
# series_limit: <int>
|
||||
|
||||
# label_limit is an optional limit on the number of labels per each sample
|
||||
# exposed by a target. It can be set globally for a whole scrape configuration and for each scrape job
|
||||
#
|
||||
# By default, the limit is disabled.
|
||||
# The label_limit can be set on a per-target basis by specifying `__label_limit__`
|
||||
# label during target relabeling phase. Available starting from v1.121.0.
|
||||
# See https://docs.victoriametrics.com/victoriametrics/relabeling/
|
||||
#
|
||||
# label_limit: <int>
|
||||
# series_limit: ...
|
||||
|
||||
# no_stale_markers allows disabling staleness tracking.
|
||||
# By default, staleness tracking is enabled for all the discovered scrape targets.
|
||||
|
||||
@@ -533,11 +533,6 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
|
||||
scrape_samples_post_metric_relabeling > 10000
|
||||
```
|
||||
|
||||
* `scrape_labels_limit` - the configured limit on the number of [labels](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#labels) the given target can expose
|
||||
per [sample](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples).
|
||||
The limit can be set via `label_limit` option at [scrape_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs).
|
||||
This metric is exposed only if the `label_limit` is set.
|
||||
|
||||
* `scrape_series_added` - **an approximate** number of new [series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) the given target generates during the current scrape.
|
||||
This metric allows detecting targets (identified by `instance` label),
|
||||
which lead to [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate).
|
||||
@@ -1289,9 +1284,6 @@ To switch to [the VictoriaMetrics remote write protocol](https://docs.victoriame
|
||||
simply set the `-remoteWrite.forceVMProto=true` flag. It is also possible to adjust the compression level for the VictoriaMetrics remote write protocol using the `-remoteWrite.vmProtoCompressLevel`
|
||||
command-line flag.
|
||||
|
||||
By default, `vmagent` uses a single producer per topic. This behaviour can be changed with setting `kafka://localhost:9092/?concurrency=<int>`, which adds additional workers. It could improve throughput in networks with high latency.
|
||||
Or if kafka brokers located at different region/availability-zone.
|
||||
|
||||
#### Estimating message size and rate
|
||||
|
||||
If you are migrating from remote write to Kafka, the request rate and request body size of remote write can roughly correspond to the message rate and size of Kafka.
|
||||
|
||||
@@ -818,7 +818,7 @@ max range per request: 8h20m0s
|
||||
2021-06-07T09:59:12.098Z info app/vmalert/replay.go:68 replay finished! Imported 511734 samples
|
||||
```
|
||||
|
||||
> In replay mode, groups are executed sequentially in the defined order. Within each group, rules are also executed sequentially,
|
||||
> In replay mode, groups are executed one after another in sequence. Within each group, rules are also executed sequentially,
|
||||
regardless of the `concurrency` setting. This ensures that any potential chaining between rules is preserved (see `-replay.rulesDelay`).
|
||||
If you want rules to run concurrently based on the `concurrency` setting, set `-replay.rulesDelay=0`.
|
||||
|
||||
@@ -861,9 +861,6 @@ There are following non-required `replay` flags:
|
||||
* `-replay.disableProgressBar` - whether to disable progress bar which shows progress work.
|
||||
Progress bar may generate a lot of log records, which is not formatted as standard VictoriaMetrics logger.
|
||||
It could break logs parsing by external system and generate additional load on it.
|
||||
* `-replay.ruleEvaluationConcurrency` - The maximum number of concurrent `/query_range` requests for a single rule.
|
||||
Increasing this value when replaying for a long time and a single request range is limited by `-replay.maxDatapointsPerQuery`.
|
||||
The default value is `1`.
|
||||
|
||||
See full description for these flags in `./vmalert -help`.
|
||||
|
||||
|
||||
@@ -176,7 +176,6 @@ unauthorized_user:
|
||||
- "http://vminsert-3:8480/"
|
||||
- src_paths:
|
||||
- "/select/.*"
|
||||
- "/admin/.*"
|
||||
url_prefix:
|
||||
- "http://vmselect-1:8481/"
|
||||
- "http://vmselect-2:8481/"
|
||||
|
||||
@@ -481,15 +481,15 @@ Run `vmbackup -help` in order to see all the available options:
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from -snapshot.createURL if not provided. All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete
|
||||
-snapshot.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -snapshot.createURL. By default, system CA is used
|
||||
Optional path to TLS CA file to use for verifying connections to -snapshotCreateURL. By default, system CA is used
|
||||
-snapshot.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -snapshot.createURL
|
||||
Optional path to client-side TLS certificate file to use when connecting to -snapshotCreateURL
|
||||
-snapshot.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -snapshot.createURL
|
||||
Whether to skip tls verification when connecting to -snapshotCreateURL
|
||||
-snapshot.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -snapshot.createURL
|
||||
Optional path to client-side TLS certificate key to use when connecting to -snapshotCreateURL
|
||||
-snapshot.tlsServerName string
|
||||
Optional TLS server name to use for connections to -snapshot.createURL. By default, the server name from -snapshot.createURL is used
|
||||
Optional TLS server name to use for connections to -snapshotCreateURL. By default, the server name from -snapshotCreateURL is used
|
||||
-snapshotName string
|
||||
Name for the snapshot to backup. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-work-with-snapshots. There is no need in setting -snapshotName if -snapshot.createURL is set
|
||||
-storageDataPath string
|
||||
|
||||
@@ -616,16 +616,6 @@ command-line flags:
|
||||
VictoriaMetrics create snapshot url. When this is given a snapshot will automatically be created during backup.Example: http://victoriametrics:8428/snapshot/create
|
||||
-snapshot.deleteURL string
|
||||
VictoriaMetrics delete snapshot url. Optional. Will be generated from snapshot.createURL if not provided. All created snapshots will be automatically deleted. Example: http://victoriametrics:8428/snapshot/delete
|
||||
-snapshot.tlsCAFile string
|
||||
Optional path to TLS CA file to use for verifying connections to -snapshot.createURL. By default, system CA is used
|
||||
-snapshot.tlsCertFile string
|
||||
Optional path to client-side TLS certificate file to use when connecting to -snapshot.createURL
|
||||
-snapshot.tlsInsecureSkipVerify
|
||||
Whether to skip tls verification when connecting to -snapshot.createURL
|
||||
-snapshot.tlsKeyFile string
|
||||
Optional path to client-side TLS certificate key to use when connecting to -snapshot.createURL
|
||||
-snapshot.tlsServerName string
|
||||
Optional TLS server name to use for connections to -snapshot.createURL. By default, the server name from -snapshotCreateURL is used
|
||||
-storageDataPath string
|
||||
Path to VictoriaMetrics data. Must match -storageDataPath from VictoriaMetrics or vmstorage (default "victoria-metrics-data")
|
||||
-tls array
|
||||
|
||||
2
go.mod
2
go.mod
@@ -100,7 +100,7 @@ require (
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.3.0 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
|
||||
github.com/google/s2a-go v0.1.9 // indirect
|
||||
|
||||
6
go.sum
6
go.sum
@@ -42,6 +42,8 @@ github.com/VictoriaMetrics/fastcache v1.12.5 h1:966OX9JjqYmDAFdp3wEXLwzukiHIm+GV
|
||||
github.com/VictoriaMetrics/fastcache v1.12.5/go.mod h1:K+JGPBn0sueFlLjZ8rcVM0cKkWKNElKyQXmw57QOoYI=
|
||||
github.com/VictoriaMetrics/metrics v1.37.0 h1:u5Yr+HFofQyn7kgmmkufgkX0nEA6G1oEyK2eaKsVaUM=
|
||||
github.com/VictoriaMetrics/metrics v1.37.0/go.mod h1:r7hveu6xMdUACXvB8TYdAj8WEsKzWB0EkpJN+RDtOf8=
|
||||
github.com/VictoriaMetrics/metricsql v0.84.5 h1:3JeIKpEh9yCNBVoeKJovICRvNea6h6m50h/RGW36P2g=
|
||||
github.com/VictoriaMetrics/metricsql v0.84.5/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VictoriaMetrics/metricsql v0.84.6 h1:r1rl05prim/r+Me4BUULaZQYXn2eZa3dnrtk+hY3X90=
|
||||
github.com/VictoriaMetrics/metricsql v0.84.6/go.mod h1:d4EisFO6ONP/HIGDYTAtwrejJBBeKGQYiRl095bS4QQ=
|
||||
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
|
||||
@@ -158,8 +160,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
|
||||
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
|
||||
github.com/go-resty/resty/v2 v2.16.3 h1:zacNT7lt4b8M/io2Ahj6yPypL7bqx9n1iprfQuodV+E=
|
||||
github.com/go-resty/resty/v2 v2.16.3/go.mod h1:hkJtXbA2iKHzJheXYvQ8snQES5ZLGKMwQ07xAwp/fiA=
|
||||
github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk=
|
||||
github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/go-zookeeper/zk v1.0.4 h1:DPzxraQx7OrPyXq2phlGlNSIyWEsAox0RJmjTseMV6I=
|
||||
github.com/go-zookeeper/zk v1.0.4/go.mod h1:nOB03cncLtlp4t+UAkGSV+9beXP/akpekBwL+UX1Qcw=
|
||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||
|
||||
@@ -74,6 +74,9 @@ func NewConfig(ec2Endpoint, stsEndpoint, region, roleARN, accessKey, secretKey,
|
||||
}
|
||||
cfg.ec2Endpoint = buildAPIEndpoint(ec2Endpoint, cfg.region, "ec2")
|
||||
cfg.stsEndpoint = buildAPIEndpoint(stsEndpoint, cfg.region, "sts")
|
||||
if cfg.roleARN == "" {
|
||||
cfg.roleARN = os.Getenv("AWS_ROLE_ARN")
|
||||
}
|
||||
cfg.webTokenPath = os.Getenv("AWS_WEB_IDENTITY_TOKEN_FILE")
|
||||
if cfg.webTokenPath != "" && cfg.irsaRoleARN == "" {
|
||||
return nil, fmt.Errorf("roleARN is missing for AWS_WEB_IDENTITY_TOKEN_FILE=%q; set it via env var AWS_ROLE_ARN", cfg.webTokenPath)
|
||||
@@ -203,19 +206,18 @@ func (cfg *Config) getAPICredentials() (*credentials, error) {
|
||||
AccessKeyID: cfg.defaultAccessKey,
|
||||
SecretAccessKey: cfg.defaultSecretKey,
|
||||
}
|
||||
fullURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_FULL_URI")
|
||||
if relativeURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"); len(relativeURI) > 0 {
|
||||
fullURI = "http://169.254.170.2" + relativeURI
|
||||
}
|
||||
switch {
|
||||
case len(acNew.AccessKeyID) > 0 && len(acNew.SecretAccessKey) > 0:
|
||||
case len(cfg.webTokenPath) > 0:
|
||||
if len(cfg.webTokenPath) > 0 {
|
||||
token, err := os.ReadFile(cfg.webTokenPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read webToken from path: %q, err: %w", cfg.webTokenPath, err)
|
||||
}
|
||||
return cfg.getRoleWebIdentityCredentials(string(token), cfg.irsaRoleARN)
|
||||
case len(fullURI) > 0:
|
||||
}
|
||||
fullURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_FULL_URI")
|
||||
if relativeURI := os.Getenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"); len(relativeURI) > 0 {
|
||||
fullURI = "http://169.254.170.2" + relativeURI
|
||||
}
|
||||
if len(fullURI) > 0 {
|
||||
token := os.Getenv("AWS_CONTAINER_AUTHORIZATION_TOKEN")
|
||||
if len(token) == 0 && len(cfg.containerTokenPath) > 0 {
|
||||
t, err := os.ReadFile(cfg.containerTokenPath)
|
||||
@@ -229,14 +231,17 @@ func (cfg *Config) getAPICredentials() (*credentials, error) {
|
||||
return nil, err
|
||||
}
|
||||
acNew = ac
|
||||
default:
|
||||
// we need instance credentials if we do not have access keys
|
||||
}
|
||||
|
||||
// we need instance credentials if we do not have access keys
|
||||
if len(acNew.AccessKeyID) == 0 && len(acNew.SecretAccessKey) == 0 {
|
||||
ac, err := getInstanceRoleCredentials(cfg.client)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain instance role credentials: %w", err)
|
||||
}
|
||||
acNew = ac
|
||||
}
|
||||
|
||||
// read credentials from sts api, if role_arn is defined
|
||||
if len(cfg.roleARN) > 0 {
|
||||
ac, err := cfg.getRoleARNCredentials(acNew, cfg.roleARN)
|
||||
@@ -368,7 +373,7 @@ func (cfg *Config) getRoleWebIdentityCredentials(token, roleARN string) (*creden
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(cfg.roleARN) > 0 {
|
||||
if roleARN != cfg.roleARN {
|
||||
// need to assume a different role
|
||||
assumeCreds, err := cfg.getRoleARNCredentials(creds, cfg.roleARN)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,10 +2,6 @@ package awsapi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -67,155 +63,6 @@ func TestParseARNCredentialsFailure(t *testing.T) {
|
||||
f("foobar")
|
||||
}
|
||||
|
||||
type fakeRoundTripper struct {
|
||||
responses map[string]*http.Response
|
||||
}
|
||||
|
||||
func (m *fakeRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
queryParams := req.URL.Query()
|
||||
action := queryParams.Get("Action")
|
||||
resp, ok := m.responses[action]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unexpected action: %q", action)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func TestGetAPICredentials(t *testing.T) {
|
||||
responses := map[string]string{
|
||||
"AssumeRole": `
|
||||
<AssumeRoleResponse xmlns="https://sts.amazonaws.com/doc/2011-06-15/">
|
||||
<AssumeRoleResult>
|
||||
<AssumedRoleUser>
|
||||
<Arn>arn:aws:sts::123456789012:assumed-role/demo/TestAR</Arn>
|
||||
<AssumedRoleId>ARO123EXAMPLE123:TestAR</AssumedRoleId>
|
||||
</AssumedRoleUser>
|
||||
<Credentials>
|
||||
<AccessKeyId>ROLEACCESSKEYID</AccessKeyId>
|
||||
<SecretAccessKey>ROLESECRETACCESSKEY</SecretAccessKey>
|
||||
<SessionToken>ROLETOKEN</SessionToken>
|
||||
<Expiration>2019-11-09T13:34:41Z</Expiration>
|
||||
</Credentials>
|
||||
<PackedPolicySize>6</PackedPolicySize>
|
||||
</AssumeRoleResult>
|
||||
<ResponseMetadata>
|
||||
<RequestId>c6104cbe-af31-11e0-8154-cbc7ccf896c7</RequestId>
|
||||
</ResponseMetadata>
|
||||
</AssumeRoleResponse>
|
||||
`,
|
||||
"AssumeRoleWithWebIdentity": `
|
||||
<AssumeRoleWithWebIdentityResponse xmlns="https://sts.amazonaws.com/doc/2011-06-15/">
|
||||
<AssumeRoleWithWebIdentityResult>
|
||||
<Audience>sts.amazonaws.com</Audience>
|
||||
<AssumedRoleUser>
|
||||
<AssumedRoleId>AROA2X6NOXN27E3OGMK3T:vmagent-ec2-discovery</AssumedRoleId>
|
||||
<Arn>arn:aws:sts::111111111:assumed-role/eks-role-9N0EFKEDJ1X/vmagent-ec2-discovery</Arn>
|
||||
</AssumedRoleUser>
|
||||
<Provider>arn:aws:iam::111111111:oidc-provider/oidc.eks.eu-west-1.amazonaws.com/id/111111111</Provider>
|
||||
<Credentials>
|
||||
<AccessKeyId>IRSAACCESSKEYID</AccessKeyId>
|
||||
<SecretAccessKey>IRSASECRETACCESSKEY</SecretAccessKey>
|
||||
<SessionToken>IRSATOKEN</SessionToken>
|
||||
<Expiration>2021-03-01T13:38:15Z</Expiration>
|
||||
</Credentials>
|
||||
<SubjectFromWebIdentityToken>system:serviceaccount:default:vmagent</SubjectFromWebIdentityToken>
|
||||
</AssumeRoleWithWebIdentityResult>
|
||||
<ResponseMetadata>
|
||||
<RequestId>1214124-7bb0-4673-ad6d-af9e67fc1141</RequestId>
|
||||
</ResponseMetadata>
|
||||
</AssumeRoleWithWebIdentityResponse>
|
||||
`,
|
||||
}
|
||||
f := func(c *Config, credsExpected *credentials) {
|
||||
t.Helper()
|
||||
if len(c.webTokenPath) > 0 {
|
||||
tempDir := t.TempDir()
|
||||
c.webTokenPath = filepath.Join(tempDir, c.webTokenPath)
|
||||
err := os.WriteFile(c.webTokenPath, []byte("webtoken"), 0644)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create webtoken file: %v", err)
|
||||
}
|
||||
}
|
||||
rt := &fakeRoundTripper{
|
||||
responses: make(map[string]*http.Response),
|
||||
}
|
||||
for action, value := range responses {
|
||||
recorder := httptest.NewRecorder()
|
||||
recorder.WriteHeader(http.StatusOK)
|
||||
_, _ = recorder.WriteString(value)
|
||||
fakeResponse := recorder.Result()
|
||||
rt.responses[action] = fakeResponse
|
||||
}
|
||||
c.client = &http.Client{
|
||||
Transport: rt,
|
||||
}
|
||||
creds, err := c.getAPICredentials()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(creds, credsExpected) {
|
||||
t.Fatalf("unexpected creds;\ngot\n%+v\nwant\n%+v", creds, credsExpected)
|
||||
}
|
||||
}
|
||||
|
||||
// static credentials
|
||||
f(&Config{
|
||||
defaultAccessKey: "staticAccessKey",
|
||||
defaultSecretKey: "staticSecretKey",
|
||||
}, &credentials{
|
||||
AccessKeyID: "staticAccessKey",
|
||||
SecretAccessKey: "staticSecretKey",
|
||||
})
|
||||
|
||||
// static credentials with webtoken defined
|
||||
f(&Config{
|
||||
defaultAccessKey: "staticAccessKey",
|
||||
defaultSecretKey: "staticSecretKey",
|
||||
irsaRoleARN: "irsarole",
|
||||
webTokenPath: "somepath",
|
||||
}, &credentials{
|
||||
AccessKeyID: "staticAccessKey",
|
||||
SecretAccessKey: "staticSecretKey",
|
||||
})
|
||||
|
||||
// static credentials with role assume
|
||||
f(&Config{
|
||||
roleARN: "somerole",
|
||||
defaultAccessKey: "staticAccessKey",
|
||||
defaultSecretKey: "staticSecretKey",
|
||||
}, &credentials{
|
||||
AccessKeyID: "ROLEACCESSKEYID",
|
||||
SecretAccessKey: "ROLESECRETACCESSKEY",
|
||||
Expiration: mustParseRFC3339("2019-11-09T13:34:41Z"),
|
||||
Token: "ROLETOKEN",
|
||||
})
|
||||
|
||||
// webtoken credentials
|
||||
f(&Config{
|
||||
stsEndpoint: "http://stsendpoint",
|
||||
irsaRoleARN: "irsarole",
|
||||
webTokenPath: "tokenpath",
|
||||
}, &credentials{
|
||||
AccessKeyID: "IRSAACCESSKEYID",
|
||||
SecretAccessKey: "IRSASECRETACCESSKEY",
|
||||
Expiration: mustParseRFC3339("2021-03-01T13:38:15Z"),
|
||||
Token: "IRSATOKEN",
|
||||
})
|
||||
|
||||
// webtoken credentials with assume role
|
||||
f(&Config{
|
||||
roleARN: "somerole",
|
||||
stsEndpoint: "http://stsendpoint",
|
||||
irsaRoleARN: "irsarole",
|
||||
webTokenPath: "tokenpath",
|
||||
}, &credentials{
|
||||
AccessKeyID: "ROLEACCESSKEYID",
|
||||
SecretAccessKey: "ROLESECRETACCESSKEY",
|
||||
Expiration: mustParseRFC3339("2019-11-09T13:34:41Z"),
|
||||
Token: "ROLETOKEN",
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseARNCredentialsSuccess(t *testing.T) {
|
||||
f := func(data, role string, credsExpected *credentials) {
|
||||
t.Helper()
|
||||
|
||||
@@ -122,10 +122,6 @@ func (fs *FS) Init(ctx context.Context) error {
|
||||
o.Retryables = append(retry.DefaultRetryables, retry.RetryableErrorCode{
|
||||
Codes: map[string]struct{}{
|
||||
"IncompleteBody": {},
|
||||
// Tolerate token expiration as it might be handled by token rotation automatically
|
||||
// when using EKS Pod Identity or similar.
|
||||
// See: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9280
|
||||
"ExpiredToken": {},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
|
||||
// at windows only files could be synced
|
||||
// Sync for directories is not supported.
|
||||
func mustSyncPath(_ string) {
|
||||
func mustSyncPath(path string) {
|
||||
}
|
||||
|
||||
func mustRemoveDirAtomic(dir string) {
|
||||
@@ -66,7 +66,7 @@ var (
|
||||
mmapByAddr = map[uintptr]windows.Handle{}
|
||||
)
|
||||
|
||||
func mmap(fd, length int) ([]byte, error) {
|
||||
func mmap(fd int, length int) ([]byte, error) {
|
||||
flProtect := uint32(windows.PAGE_READONLY)
|
||||
dwDesiredAccess := uint32(windows.FILE_MAP_READ)
|
||||
// https://learn.microsoft.com/en-us/windows/win32/memory/creating-a-file-mapping-object#file-mapping-size
|
||||
@@ -81,11 +81,7 @@ func mmap(fd, length int) ([]byte, error) {
|
||||
windows.CloseHandle(h)
|
||||
return nil, os.NewSyscallError("MapViewOfFile", errno)
|
||||
}
|
||||
|
||||
// mitigate go vet false positive
|
||||
// https://github.com/golang/go/issues/58625
|
||||
addrPtr := *(*unsafe.Pointer)(unsafe.Pointer(&addr))
|
||||
data := unsafe.Slice((*byte)(addrPtr), length)
|
||||
data := unsafe.Slice((*byte)(unsafe.Pointer(addr)), length)
|
||||
|
||||
mmapByAddrLock.Lock()
|
||||
mmapByAddr[addr] = h
|
||||
@@ -125,7 +121,7 @@ func mustGetFreeSpace(path string) uint64 {
|
||||
}
|
||||
|
||||
// stub
|
||||
func fadviseSequentialRead(_ *os.File, _ bool) error {
|
||||
func fadviseSequentialRead(f *os.File, prefetch bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ func (r *ReaderAt) MustReadAt(p []byte, off int64) {
|
||||
}
|
||||
} else {
|
||||
if off > int64(len(mr.mmapData)-len(p)) {
|
||||
logger.Panicf("BUG: off=%d is out of allowed range [0...%d] for len(p)=%d in file %q", off, len(mr.mmapData)-len(p), len(p), r.path)
|
||||
logger.Panicf("BUG: off=%d is out of allowed range [0...%d] for len(p)=%d", off, len(mr.mmapData)-len(p), len(p))
|
||||
}
|
||||
src := mr.mmapData[off:]
|
||||
// The copy() below may result in thread block as described at https://valyala.medium.com/mmap-in-go-considered-harmful-d92a25cb161d .
|
||||
|
||||
@@ -266,11 +266,6 @@ func (lr *LogRows) Reset() {
|
||||
lr.defaultMsgValue = ""
|
||||
}
|
||||
|
||||
// RowsCount returns current log rows count
|
||||
func (lr *LogRows) RowsCount() int {
|
||||
return len(lr.rows)
|
||||
}
|
||||
|
||||
// ResetKeepSettings resets rows stored in lr, while keeping its settings passed to GetLogRows().
|
||||
func (lr *LogRows) ResetKeepSettings() {
|
||||
lr.a.reset()
|
||||
|
||||
@@ -537,11 +537,6 @@ func (q *Query) AddTimeFilter(start, end int64) {
|
||||
q.visitSubqueries(func(q *Query) {
|
||||
q.addTimeFilterNoSubqueries(ft)
|
||||
})
|
||||
|
||||
// Initialize rate functions with the step calculated from HTTP time filter
|
||||
// This fixes the bug where rate_sum() doesn't divide by stepSeconds when
|
||||
// time filter is specified via HTTP params instead of LogsQL expression
|
||||
q.initStatsRateFuncsFromTimeFilter()
|
||||
}
|
||||
|
||||
func (q *Query) addTimeFilterNoSubqueries(ft *filterTime) {
|
||||
@@ -1210,17 +1205,14 @@ func ParseQueryAtTimestamp(s string, timestamp int64) (*Query, error) {
|
||||
return nil, fmt.Errorf("unexpected unparsed tail after [%s]; context: [%s]; tail: [%s]", q, lex.context(), lex.s)
|
||||
}
|
||||
q.optimize()
|
||||
q.initStatsRateFuncsFromTimeFilter()
|
||||
|
||||
return q, nil
|
||||
}
|
||||
|
||||
func (q *Query) initStatsRateFuncsFromTimeFilter() {
|
||||
start, end := q.GetFilterTimeRange()
|
||||
if start != math.MinInt64 && end != math.MaxInt64 {
|
||||
step := end - start + 1 // 1 is needed in order to include [start ... end] in the step.
|
||||
q.initStatsRateFuncs(step)
|
||||
}
|
||||
|
||||
return q, nil
|
||||
}
|
||||
|
||||
func (q *Query) initStatsRateFuncs(step int64) {
|
||||
|
||||
@@ -54,6 +54,26 @@ func TestStatsSum(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
f("stats by(b) sum(a)", [][]Field{
|
||||
{
|
||||
{"a", "0.000609"},
|
||||
{"b", "b1"},
|
||||
},
|
||||
{
|
||||
{"a", "0.000731"},
|
||||
{"b", "b2"},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"b", "b2"},
|
||||
{"sum(a)", "0.000731"},
|
||||
},
|
||||
{
|
||||
{"b", "b1"},
|
||||
{"sum(a)", "0.000609"},
|
||||
},
|
||||
})
|
||||
|
||||
f("stats sum(a) as x", [][]Field{
|
||||
{
|
||||
{"_msg", `abc`},
|
||||
|
||||
@@ -28,22 +28,16 @@ type Item struct {
|
||||
//
|
||||
// The returned bytes representation belongs to data.
|
||||
func (it Item) Bytes(data []byte) []byte {
|
||||
n := it.End - it.Start
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
return unsafe.Slice((*byte)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(data)), it.Start)), n)
|
||||
n := int(it.End - it.Start)
|
||||
return unsafe.Slice((*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(data)))+uintptr(it.Start))), n)
|
||||
}
|
||||
|
||||
// String returns string representation of it obtained from data.
|
||||
//
|
||||
// The returned string representation belongs to data.
|
||||
func (it Item) String(data []byte) string {
|
||||
n := it.End - it.Start
|
||||
if n == 0 {
|
||||
return ""
|
||||
}
|
||||
return unsafe.String((*byte)(unsafe.Add(unsafe.Pointer(unsafe.SliceData(data)), it.Start)), n)
|
||||
n := int(it.End - it.Start)
|
||||
return unsafe.String((*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(unsafe.SliceData(data)))+uintptr(it.Start))), n)
|
||||
}
|
||||
|
||||
func (ib *inmemoryBlock) Len() int {
|
||||
|
||||
@@ -265,7 +265,6 @@ func (cfg *Config) getJobNames() []string {
|
||||
//
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/
|
||||
type GlobalConfig struct {
|
||||
LabelLimit int `yaml:"label_limit,omitempty"`
|
||||
ScrapeInterval *promutil.Duration `yaml:"scrape_interval,omitempty"`
|
||||
ScrapeTimeout *promutil.Duration `yaml:"scrape_timeout,omitempty"`
|
||||
ExternalLabels *promutil.Labels `yaml:"external_labels,omitempty"`
|
||||
@@ -296,7 +295,6 @@ type ScrapeConfig struct {
|
||||
RelabelConfigs []promrelabel.RelabelConfig `yaml:"relabel_configs,omitempty"`
|
||||
MetricRelabelConfigs []promrelabel.RelabelConfig `yaml:"metric_relabel_configs,omitempty"`
|
||||
SampleLimit int `yaml:"sample_limit,omitempty"`
|
||||
LabelLimit int `yaml:"label_limit,omitempty"`
|
||||
|
||||
// This silly option is needed for compatibility with Prometheus.
|
||||
// vmagent was supporting disable_compression option since the beginning, while Prometheus developers
|
||||
@@ -880,13 +878,6 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
|
||||
if jobName == "" {
|
||||
return nil, fmt.Errorf("missing `job_name` field in `scrape_config`")
|
||||
}
|
||||
labelLimit := sc.LabelLimit
|
||||
if labelLimit <= 0 {
|
||||
labelLimit = globalCfg.LabelLimit
|
||||
if labelLimit <= 0 {
|
||||
labelLimit = defaultLabelLimit
|
||||
}
|
||||
}
|
||||
scrapeInterval := sc.ScrapeInterval.Duration()
|
||||
if scrapeInterval <= 0 {
|
||||
scrapeInterval = globalCfg.ScrapeInterval.Duration()
|
||||
@@ -996,7 +987,6 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
|
||||
relabelConfigs: relabelConfigs,
|
||||
metricRelabelConfigs: metricRelabelConfigs,
|
||||
sampleLimit: sc.SampleLimit,
|
||||
labelLimit: labelLimit,
|
||||
disableCompression: disableCompression,
|
||||
disableKeepAlive: sc.DisableKeepAlive,
|
||||
streamParse: sc.StreamParse,
|
||||
@@ -1028,7 +1018,6 @@ type scrapeWorkConfig struct {
|
||||
relabelConfigs *promrelabel.ParsedConfigs
|
||||
metricRelabelConfigs *promrelabel.ParsedConfigs
|
||||
sampleLimit int
|
||||
labelLimit int
|
||||
disableCompression bool
|
||||
disableKeepAlive bool
|
||||
streamParse bool
|
||||
@@ -1266,16 +1255,6 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
|
||||
}
|
||||
sampleLimit = n
|
||||
}
|
||||
// Read label_limit option from __label_limit__ label.
|
||||
// See https://docs.victoriametrics.com/victoriametrics/vmagent/#automatically-generated-metrics
|
||||
labelLimit := swc.labelLimit
|
||||
if s := labels.Get("__label_limit__"); len(s) > 0 {
|
||||
n, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse __label_limit__=%q: %w", s, err)
|
||||
}
|
||||
labelLimit = n
|
||||
}
|
||||
// Read stream_parse option from __stream_parse__ label.
|
||||
// See https://docs.victoriametrics.com/victoriametrics/vmagent/#stream-parsing-mode
|
||||
streamParse := swc.streamParse
|
||||
@@ -1321,14 +1300,13 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
|
||||
AuthConfig: swc.authConfig,
|
||||
RelabelConfigs: swc.relabelConfigs,
|
||||
MetricRelabelConfigs: swc.metricRelabelConfigs,
|
||||
SampleLimit: sampleLimit,
|
||||
DisableCompression: swc.disableCompression,
|
||||
DisableKeepAlive: swc.disableKeepAlive,
|
||||
StreamParse: streamParse,
|
||||
ScrapeAlignInterval: swc.scrapeAlignInterval,
|
||||
ScrapeOffset: swc.scrapeOffset,
|
||||
SampleLimit: sampleLimit,
|
||||
SeriesLimit: seriesLimit,
|
||||
LabelLimit: labelLimit,
|
||||
NoStaleMarkers: swc.noStaleMarkers,
|
||||
AuthToken: at,
|
||||
|
||||
@@ -1374,5 +1352,4 @@ func mergeLabels(dst *promutil.Labels, swc *scrapeWorkConfig, target string, ext
|
||||
const (
|
||||
defaultScrapeInterval = time.Minute
|
||||
defaultScrapeTimeout = 10 * time.Second
|
||||
defaultLabelLimit = 0
|
||||
)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user