Compare commits
52 Commits
docs/trace
...
alert_best
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
afd8ac87f4 | ||
|
|
469b337707 | ||
|
|
2b131c13b5 | ||
|
|
aa342cea3b | ||
|
|
26a2f4c25c | ||
|
|
8a3a5e6867 | ||
|
|
9f9a1e1996 | ||
|
|
2d27404639 | ||
|
|
91a05697fd | ||
|
|
e9571576b8 | ||
|
|
314fed78cc | ||
|
|
6751e43975 | ||
|
|
aaefe469c1 | ||
|
|
fc33411d87 | ||
|
|
07574bdf7f | ||
|
|
e553a41fa0 | ||
|
|
f9aa74c367 | ||
|
|
21878f0760 | ||
|
|
aca321f235 | ||
|
|
0b2e976706 | ||
|
|
dc08b590c4 | ||
|
|
71eef98b0e | ||
|
|
6bc48ce669 | ||
|
|
482ae8135f | ||
|
|
393398996a | ||
|
|
7703a95709 | ||
|
|
114d657670 | ||
|
|
a9b641531b | ||
|
|
536df52682 | ||
|
|
5143ad7674 | ||
|
|
6e7df578c4 | ||
|
|
4a7b4ae852 | ||
|
|
65087f08c4 | ||
|
|
ca0479fff3 | ||
|
|
644c7a97c8 | ||
|
|
098cba5b73 | ||
|
|
5d0e8c0d1b | ||
|
|
442bfa6c35 | ||
|
|
ee031b21a7 | ||
|
|
deec361a64 | ||
|
|
fd4dce81ce | ||
|
|
5431696d83 | ||
|
|
1b71184bfb | ||
|
|
29c06b9543 | ||
|
|
369f3f0da1 | ||
|
|
6f93f0e1a7 | ||
|
|
96b773198f | ||
|
|
006381266b | ||
|
|
ae1dffe5d3 | ||
|
|
80e508eac7 | ||
|
|
86d8095417 | ||
|
|
df847e62c0 |
1
.gitignore
vendored
@@ -12,6 +12,7 @@
|
||||
/victoria-logs-data
|
||||
/victoria-metrics-data
|
||||
/vmagent-remotewrite-data
|
||||
/vlagent-remotewritewrite
|
||||
/vmstorage-data
|
||||
/vmselect-cache
|
||||
/package/temp-deb-*
|
||||
|
||||
2
Makefile
@@ -545,7 +545,7 @@ test-full:
|
||||
test-full-386:
|
||||
GOEXPERIMENT=synctest GOARCH=386 go test -coverprofile=coverage.txt -covermode=atomic ./lib/... ./app/...
|
||||
|
||||
integration-test: victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore victoria-logs
|
||||
integration-test: victoria-metrics vmagent vmalert vmauth vmctl vmbackup vmrestore victoria-logs vlagent
|
||||
go test ./apptest/... -skip="^TestCluster.*"
|
||||
|
||||
benchmark:
|
||||
|
||||
@@ -3,12 +3,12 @@
|
||||

|
||||

|
||||

|
||||

|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/main.yml)
|
||||

|
||||

|
||||

|
||||

|
||||

|
||||
[](https://x.com/VictoriaMetrics/)
|
||||
[](https://www.reddit.com/r/VictoriaMetrics/)
|
||||
|
||||
<picture>
|
||||
<source srcset="docs/victoriametrics/logo_white.webp" media="(prefers-color-scheme: dark)">
|
||||
|
||||
106
app/vlagent/Makefile
Normal file
@@ -0,0 +1,106 @@
|
||||
# All these commands must run from repository root.
|
||||
|
||||
vlagent:
|
||||
APP_NAME=vlagent $(MAKE) app-local
|
||||
|
||||
vlagent-race:
|
||||
APP_NAME=vlagent RACE=-race $(MAKE) app-local
|
||||
|
||||
vlagent-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker
|
||||
|
||||
vlagent-pure-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-pure
|
||||
|
||||
vlagent-linux-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-amd64
|
||||
|
||||
vlagent-linux-arm-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-arm
|
||||
|
||||
vlagent-linux-arm64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-arm64
|
||||
|
||||
vlagent-linux-ppc64le-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-ppc64le
|
||||
|
||||
vlagent-linux-386-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vlagent-darwin-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
vlagent-darwin-arm64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-darwin-arm64
|
||||
|
||||
vlagent-freebsd-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-freebsd-amd64
|
||||
|
||||
vlagent-openbsd-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-openbsd-amd64
|
||||
|
||||
vlagent-windows-amd64-prod:
|
||||
APP_NAME=vlagent $(MAKE) app-via-docker-windows-amd64
|
||||
|
||||
package-vlagent:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker
|
||||
|
||||
package-vlagent-pure:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-pure
|
||||
|
||||
package-vlagent-amd64:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-amd64
|
||||
|
||||
package-vlagent-arm:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-arm
|
||||
|
||||
package-vlagent-arm64:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-arm64
|
||||
|
||||
package-vlagent-ppc64le:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-ppc64le
|
||||
|
||||
package-vlagent-386:
|
||||
APP_NAME=vlagent $(MAKE) package-via-docker-386
|
||||
|
||||
publish-vlagent:
|
||||
APP_NAME=vlagent $(MAKE) publish-via-docker
|
||||
|
||||
vlagent-linux-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=1 GOOS=linux GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-arm:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=arm $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-arm64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-ppc64le:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=ppc64le $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-s390x:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-loong64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=loong64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-linux-386:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-darwin-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-darwin-arm64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=darwin GOARCH=arm64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-freebsd-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=freebsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-openbsd-amd64:
|
||||
APP_NAME=vlagent CGO_ENABLED=0 GOOS=openbsd GOARCH=amd64 $(MAKE) app-local-goos-goarch
|
||||
|
||||
vlagent-windows-amd64:
|
||||
GOARCH=amd64 APP_NAME=vlagent $(MAKE) app-local-windows-goarch
|
||||
|
||||
vlagent-pure:
|
||||
APP_NAME=vlagent $(MAKE) app-local-pure
|
||||
3
app/vlagent/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
See vlagent docs [here](https://docs.victoriametrics.com/victorialogs/vlagent/).
|
||||
|
||||
vlagent docs can be edited at [docs/vlagent.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victorialogs/vlagent.md).
|
||||
8
app/vlagent/deployment/Dockerfile
Normal file
@@ -0,0 +1,8 @@
|
||||
ARG base_image=non-existing
|
||||
FROM $base_image
|
||||
|
||||
EXPOSE 9429
|
||||
|
||||
ENTRYPOINT ["/vlagent-prod"]
|
||||
ARG src_binary=non-existing
|
||||
COPY $src_binary ./vlagent-prod
|
||||
97
app/vlagent/main.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlagent/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
|
||||
)
|
||||
|
||||
var (
|
||||
httpListenAddrs = flagutil.NewArrayString("httpListenAddr", "TCP address to listen for incoming http requests. "+
|
||||
"Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vlagent instances on the same server. "+
|
||||
"Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol")
|
||||
useProxyProtocol = flagutil.NewArrayBool("httpListenAddr.useProxyProtocol", "Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . "+
|
||||
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . "+
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Write flags and help message to stdout, since it is easier to grep or pipe.
|
||||
flag.CommandLine.SetOutput(os.Stdout)
|
||||
flag.Usage = usage
|
||||
envflag.Parse()
|
||||
buildinfo.Init()
|
||||
remotewrite.InitSecretFlags()
|
||||
logger.Init()
|
||||
|
||||
remotewrite.Init()
|
||||
vlinsert.Init()
|
||||
|
||||
insertutil.SetLogRowsStorage(&remotewrite.Storage{})
|
||||
listenAddrs := *httpListenAddrs
|
||||
if len(listenAddrs) == 0 {
|
||||
listenAddrs = []string{":9429"}
|
||||
}
|
||||
logger.Infof("starting vlagent at %q...", listenAddrs)
|
||||
startTime := time.Now()
|
||||
go httpserver.Serve(listenAddrs, requestHandler, httpserver.ServeOptions{
|
||||
UseProxyProtocol: useProxyProtocol,
|
||||
})
|
||||
logger.Infof("started vlagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
pushmetrics.Init()
|
||||
sig := procutil.WaitForSigterm()
|
||||
logger.Infof("received signal %s", sig)
|
||||
pushmetrics.Stop()
|
||||
|
||||
startTime = time.Now()
|
||||
logger.Infof("gracefully shutting down webservice at %q", listenAddrs)
|
||||
if err := httpserver.Stop(listenAddrs); err != nil {
|
||||
logger.Fatalf("cannot stop the webservice: %s", err)
|
||||
}
|
||||
vlinsert.Stop()
|
||||
remotewrite.Stop()
|
||||
logger.Infof("successfully shut down the webservice in %.3f seconds", time.Since(startTime).Seconds())
|
||||
logger.Infof("successfully stopped vlagent in %.3f seconds", time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
// RequestHandler handles insert requests for VictoriaLogs
|
||||
func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if r.URL.Path == "/" {
|
||||
if r.Method != http.MethodGet {
|
||||
return false
|
||||
}
|
||||
w.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
fmt.Fprintf(w, "<h2>vlagent</h2>")
|
||||
fmt.Fprintf(w, "See docs at <a href='https://docs.victoriametrics.com/victorialogs/vlagent/'>https://docs.victoriametrics.com/victorialogs/vlagent/</a></br>")
|
||||
fmt.Fprintf(w, "Useful endpoints:</br>")
|
||||
httpserver.WriteAPIHelp(w, [][2]string{
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
})
|
||||
return true
|
||||
}
|
||||
return vlinsert.RequestHandler(w, r)
|
||||
}
|
||||
|
||||
func usage() {
|
||||
const s = `
|
||||
vlagent collects logs via popular data ingestion protocols and routes it to VictoriaLogs.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victorialogs/vlagent/ .
|
||||
`
|
||||
flagutil.Usage(s)
|
||||
}
|
||||
12
app/vlagent/multiarch/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
# See https://medium.com/on-docker/use-multi-stage-builds-to-inject-ca-certs-ad1e8f01de1b
|
||||
ARG certs_image=non-existing
|
||||
ARG root_image=non-existing
|
||||
FROM $certs_image AS certs
|
||||
RUN apk update && apk upgrade && apk --update --no-cache add ca-certificates
|
||||
|
||||
FROM $root_image
|
||||
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
|
||||
EXPOSE 9429
|
||||
ENTRYPOINT ["/vlagent-prod"]
|
||||
ARG TARGETARCH
|
||||
COPY vlagent-linux-${TARGETARCH}-prod ./vlagent-prod
|
||||
462
app/vlagent/remotewrite/client.go
Normal file
@@ -0,0 +1,462 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
var (
|
||||
rateLimit = flagutil.NewArrayInt("remoteWrite.rateLimit", 0, "Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. "+
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data ")
|
||||
sendTimeout = flagutil.NewArrayDuration("remoteWrite.sendTimeout", time.Minute, "Timeout for sending a single block of data to the corresponding -remoteWrite.url")
|
||||
retryMinInterval = flagutil.NewArrayDuration("remoteWrite.retryMinInterval", time.Second, "The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxTime")
|
||||
retryMaxTime = flagutil.NewArrayDuration("remoteWrite.retryMaxTime", time.Minute, "The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval")
|
||||
proxyURL = flagutil.NewArrayString("remoteWrite.proxyURL", "Optional proxy URL for writing data to the corresponding -remoteWrite.url. "+
|
||||
"Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234")
|
||||
|
||||
tlsHandshakeTimeout = flagutil.NewArrayDuration("remoteWrite.tlsHandshakeTimeout", 20*time.Second, "The timeout for establishing tls connections to the corresponding -remoteWrite.url")
|
||||
tlsInsecureSkipVerify = flagutil.NewArrayBool("remoteWrite.tlsInsecureSkipVerify", "Whether to skip tls verification when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCertFile = flagutil.NewArrayString("remoteWrite.tlsCertFile", "Optional path to client-side TLS certificate file to use when connecting "+
|
||||
"to the corresponding -remoteWrite.url")
|
||||
tlsKeyFile = flagutil.NewArrayString("remoteWrite.tlsKeyFile", "Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url")
|
||||
tlsCAFile = flagutil.NewArrayString("remoteWrite.tlsCAFile", "Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. "+
|
||||
"By default, system CA is used")
|
||||
tlsServerName = flagutil.NewArrayString("remoteWrite.tlsServerName", "Optional TLS server name to use for connections to the corresponding -remoteWrite.url. "+
|
||||
"By default, the server name from -remoteWrite.url is used")
|
||||
|
||||
headers = flagutil.NewArrayString("remoteWrite.headers", "Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. "+
|
||||
"For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. "+
|
||||
"Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'")
|
||||
|
||||
basicAuthUsername = flagutil.NewArrayString("remoteWrite.basicAuth.username", "Optional basic auth username to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPassword = flagutil.NewArrayString("remoteWrite.basicAuth.password", "Optional basic auth password to use for the corresponding -remoteWrite.url")
|
||||
basicAuthPasswordFile = flagutil.NewArrayString("remoteWrite.basicAuth.passwordFile", "Optional path to basic auth password to use for the corresponding -remoteWrite.url. "+
|
||||
"The file is re-read every second")
|
||||
bearerToken = flagutil.NewArrayString("remoteWrite.bearerToken", "Optional bearer auth token to use for the corresponding -remoteWrite.url")
|
||||
bearerTokenFile = flagutil.NewArrayString("remoteWrite.bearerTokenFile", "Optional path to bearer token file to use for the corresponding -remoteWrite.url. "+
|
||||
"The token is re-read from the file every second")
|
||||
|
||||
oauth2ClientID = flagutil.NewArrayString("remoteWrite.oauth2.clientID", "Optional OAuth2 clientID to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecret = flagutil.NewArrayString("remoteWrite.oauth2.clientSecret", "Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url")
|
||||
oauth2ClientSecretFile = flagutil.NewArrayString("remoteWrite.oauth2.clientSecretFile", "Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url")
|
||||
oauth2EndpointParams = flagutil.NewArrayString("remoteWrite.oauth2.endpointParams", "Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . "+
|
||||
`The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}`)
|
||||
oauth2TokenURL = flagutil.NewArrayString("remoteWrite.oauth2.tokenUrl", "Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url")
|
||||
oauth2Scopes = flagutil.NewArrayString("remoteWrite.oauth2.scopes", "Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'")
|
||||
)
|
||||
|
||||
type client struct {
|
||||
sanitizedURL string
|
||||
remoteWriteURL string
|
||||
|
||||
fq *persistentqueue.FastQueue
|
||||
hc *http.Client
|
||||
|
||||
retryMinInterval time.Duration
|
||||
retryMaxTime time.Duration
|
||||
|
||||
sendBlock func(block []byte) bool
|
||||
authCfg *promauth.Config
|
||||
|
||||
rl *ratelimiter.RateLimiter
|
||||
|
||||
bytesSent *metrics.Counter
|
||||
blocksSent *metrics.Counter
|
||||
requestDuration *metrics.Histogram
|
||||
requestsOKCount *metrics.Counter
|
||||
errorsCount *metrics.Counter
|
||||
packetsDropped *metrics.Counter
|
||||
rateLimit *metrics.Gauge
|
||||
retriesCount *metrics.Counter
|
||||
sendDuration *metrics.FloatCounter
|
||||
|
||||
wg sync.WaitGroup
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
func newHTTPClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqueue.FastQueue, concurrency int) *client {
|
||||
authCfg, err := getAuthConfig(argIdx)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot initialize auth config for -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
|
||||
tr := httputil.NewTransport(false, "vlagent_remotewrite")
|
||||
tr.TLSHandshakeTimeout = tlsHandshakeTimeout.GetOptionalArg(argIdx)
|
||||
tr.MaxConnsPerHost = 2 * concurrency
|
||||
tr.MaxIdleConnsPerHost = 2 * concurrency
|
||||
tr.IdleConnTimeout = time.Minute
|
||||
tr.WriteBufferSize = 64 * 1024
|
||||
|
||||
pURL := proxyURL.GetOptionalArg(argIdx)
|
||||
if len(pURL) > 0 {
|
||||
if !strings.Contains(pURL, "://") {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: it must start with `http://`, `https://` or `socks5://`", pURL)
|
||||
}
|
||||
pu, err := url.Parse(pURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("cannot parse -remoteWrite.proxyURL=%q: %s", pURL, err)
|
||||
}
|
||||
tr.Proxy = http.ProxyURL(pu)
|
||||
}
|
||||
hc := &http.Client{
|
||||
Transport: authCfg.NewRoundTripper(tr),
|
||||
Timeout: sendTimeout.GetOptionalArg(argIdx),
|
||||
}
|
||||
c := &client{
|
||||
sanitizedURL: sanitizedURL,
|
||||
remoteWriteURL: remoteWriteURL,
|
||||
authCfg: authCfg,
|
||||
fq: fq,
|
||||
hc: hc,
|
||||
retryMinInterval: retryMinInterval.GetOptionalArg(argIdx),
|
||||
retryMaxTime: retryMaxTime.GetOptionalArg(argIdx),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
c.sendBlock = c.sendBlockHTTP
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
|
||||
limitReached := metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_rate_limit_reached_total{url=%q}`, c.sanitizedURL))
|
||||
if bytesPerSec := rateLimit.GetOptionalArg(argIdx); bytesPerSec > 0 {
|
||||
logger.Infof("applying %d bytes per second rate limit for -remoteWrite.url=%q", bytesPerSec, sanitizedURL)
|
||||
c.rl = ratelimiter.New(int64(bytesPerSec), limitReached, c.stopCh)
|
||||
}
|
||||
c.bytesSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_bytes_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.blocksSent = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_blocks_sent_total{url=%q}`, c.sanitizedURL))
|
||||
c.rateLimit = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_rate_limit{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(rateLimit.GetOptionalArg(argIdx))
|
||||
})
|
||||
c.requestDuration = metrics.GetOrCreateHistogram(fmt.Sprintf(`vlagent_remotewrite_duration_seconds{url=%q}`, c.sanitizedURL))
|
||||
c.requestsOKCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_requests_total{url=%q, status_code="2XX"}`, c.sanitizedURL))
|
||||
c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_errors_total{url=%q}`, c.sanitizedURL))
|
||||
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
|
||||
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
|
||||
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vlagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
|
||||
metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
|
||||
return float64(*queues)
|
||||
})
|
||||
for i := 0; i < concurrency; i++ {
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
c.runWorker()
|
||||
}()
|
||||
}
|
||||
logger.Infof("initialized client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
|
||||
func (c *client) MustStop() {
|
||||
close(c.stopCh)
|
||||
c.wg.Wait()
|
||||
logger.Infof("stopped client for -remoteWrite.url=%q", c.sanitizedURL)
|
||||
}
|
||||
|
||||
func getAuthConfig(argIdx int) (*promauth.Config, error) {
|
||||
headersValue := headers.GetOptionalArg(argIdx)
|
||||
var hdrs []string
|
||||
if headersValue != "" {
|
||||
hdrs = strings.Split(headersValue, "^^")
|
||||
}
|
||||
username := basicAuthUsername.GetOptionalArg(argIdx)
|
||||
password := basicAuthPassword.GetOptionalArg(argIdx)
|
||||
passwordFile := basicAuthPasswordFile.GetOptionalArg(argIdx)
|
||||
var basicAuthCfg *promauth.BasicAuthConfig
|
||||
if username != "" || password != "" || passwordFile != "" {
|
||||
basicAuthCfg = &promauth.BasicAuthConfig{
|
||||
Username: username,
|
||||
Password: promauth.NewSecret(password),
|
||||
PasswordFile: passwordFile,
|
||||
}
|
||||
}
|
||||
|
||||
token := bearerToken.GetOptionalArg(argIdx)
|
||||
tokenFile := bearerTokenFile.GetOptionalArg(argIdx)
|
||||
|
||||
var oauth2Cfg *promauth.OAuth2Config
|
||||
clientSecret := oauth2ClientSecret.GetOptionalArg(argIdx)
|
||||
clientSecretFile := oauth2ClientSecretFile.GetOptionalArg(argIdx)
|
||||
if clientSecretFile != "" || clientSecret != "" {
|
||||
endpointParamsJSON := oauth2EndpointParams.GetOptionalArg(argIdx)
|
||||
endpointParams, err := flagutil.ParseJSONMap(endpointParamsJSON)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse JSON for -remoteWrite.oauth2.endpointParams=%s: %w", endpointParamsJSON, err)
|
||||
}
|
||||
oauth2Cfg = &promauth.OAuth2Config{
|
||||
ClientID: oauth2ClientID.GetOptionalArg(argIdx),
|
||||
ClientSecret: promauth.NewSecret(clientSecret),
|
||||
ClientSecretFile: clientSecretFile,
|
||||
EndpointParams: endpointParams,
|
||||
TokenURL: oauth2TokenURL.GetOptionalArg(argIdx),
|
||||
Scopes: strings.Split(oauth2Scopes.GetOptionalArg(argIdx), ";"),
|
||||
}
|
||||
}
|
||||
|
||||
tlsCfg := &promauth.TLSConfig{
|
||||
CAFile: tlsCAFile.GetOptionalArg(argIdx),
|
||||
CertFile: tlsCertFile.GetOptionalArg(argIdx),
|
||||
KeyFile: tlsKeyFile.GetOptionalArg(argIdx),
|
||||
ServerName: tlsServerName.GetOptionalArg(argIdx),
|
||||
InsecureSkipVerify: tlsInsecureSkipVerify.GetOptionalArg(argIdx),
|
||||
}
|
||||
|
||||
opts := &promauth.Options{
|
||||
BasicAuth: basicAuthCfg,
|
||||
BearerToken: token,
|
||||
BearerTokenFile: tokenFile,
|
||||
OAuth2: oauth2Cfg,
|
||||
TLSConfig: tlsCfg,
|
||||
Headers: hdrs,
|
||||
}
|
||||
authCfg, err := opts.NewConfig()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot populate auth config for remoteWrite idx: %d, err: %w", argIdx, err)
|
||||
}
|
||||
return authCfg, nil
|
||||
}
|
||||
|
||||
func (c *client) runWorker() {
|
||||
var ok bool
|
||||
var block []byte
|
||||
ch := make(chan bool, 1)
|
||||
for {
|
||||
block, ok = c.fq.MustReadBlock(block[:0])
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if len(block) == 0 {
|
||||
// skip empty data blocks from sending
|
||||
continue
|
||||
}
|
||||
go func() {
|
||||
startTime := time.Now()
|
||||
ch <- c.sendBlock(block)
|
||||
c.sendDuration.Add(time.Since(startTime).Seconds())
|
||||
}()
|
||||
select {
|
||||
case ok := <-ch:
|
||||
if ok {
|
||||
// The block has been sent successfully
|
||||
continue
|
||||
}
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
return
|
||||
case <-c.stopCh:
|
||||
// c must be stopped. Wait for a while in the hope the block will be sent.
|
||||
graceDuration := 5 * time.Second
|
||||
select {
|
||||
case ok := <-ch:
|
||||
if !ok {
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
case <-time.After(graceDuration):
|
||||
// Return unsent block to the queue.
|
||||
c.fq.MustWriteBlockIgnoreDisabledPQ(block)
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (c *client) doRequest(url string, body []byte) (*http.Response, error) {
|
||||
req, err := c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := c.hc.Do(req)
|
||||
if err == nil {
|
||||
return resp, nil
|
||||
}
|
||||
if !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
|
||||
return nil, err
|
||||
}
|
||||
// It is likely connection became stale or timed out during the first request.
|
||||
// Make another attempt in hope request will succeed.
|
||||
// If not, the error should be handled by the caller as usual.
|
||||
// This should help with https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4139
|
||||
req, err = c.newRequest(url, body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
resp, err = c.hc.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("second attempt: %w", err)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
|
||||
reqBody := bytes.NewBuffer(body)
|
||||
req, err := http.NewRequest(http.MethodPost, url, reqBody)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error from http.NewRequest(%q): %s", url, err)
|
||||
}
|
||||
err = c.authCfg.SetHeaders(req, true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
h := req.Header
|
||||
h.Set("User-Agent", "vlagent")
|
||||
h.Set("Content-Encoding", "zstd")
|
||||
h.Set("Content-Type", "application/octet-stream")
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// sendBlockHTTP sends the given block to c.remoteWriteURL.
|
||||
//
|
||||
// The function returns false only if c.stopCh is closed.
|
||||
// Otherwise, it tries sending the block to remote storage indefinitely.
|
||||
func (c *client) sendBlockHTTP(block []byte) bool {
|
||||
c.rl.Register(len(block))
|
||||
maxRetryDuration := timeutil.AddJitterToDuration(c.retryMaxTime)
|
||||
retryDuration := timeutil.AddJitterToDuration(c.retryMinInterval)
|
||||
retriesCount := 0
|
||||
|
||||
again:
|
||||
startTime := time.Now()
|
||||
resp, err := c.doRequest(c.remoteWriteURL, block)
|
||||
c.requestDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
c.errorsCount.Inc()
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
remoteWriteRetryLogger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
|
||||
len(block), c.sanitizedURL, err, retryDuration.Seconds())
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
statusCode := resp.StatusCode
|
||||
if statusCode/100 == 2 {
|
||||
_ = resp.Body.Close()
|
||||
c.requestsOKCount.Inc()
|
||||
c.bytesSent.Add(len(block))
|
||||
c.blocksSent.Inc()
|
||||
return true
|
||||
}
|
||||
|
||||
metrics.GetOrCreateCounter(fmt.Sprintf(`vlagent_remotewrite_requests_total{url=%q, status_code="%d"}`, c.sanitizedURL, statusCode)).Inc()
|
||||
if statusCode == 400 || statusCode == 404 {
|
||||
logBlockRejected(block, c.sanitizedURL, resp)
|
||||
_ = resp.Body.Close()
|
||||
c.packetsDropped.Inc()
|
||||
return true
|
||||
}
|
||||
// Unexpected status code returned
|
||||
retriesCount++
|
||||
retryAfterHeader := parseRetryAfterHeader(resp.Header.Get("Retry-After"))
|
||||
retryDuration = getRetryDuration(retryAfterHeader, retryDuration, maxRetryDuration)
|
||||
|
||||
// Handle response
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
logger.Errorf("cannot read response body from %q during retry #%d: %s", c.sanitizedURL, retriesCount, err)
|
||||
} else {
|
||||
logger.Errorf("unexpected status code received after sending a block with size %d bytes to %q during retry #%d: %d; response body=%q; "+
|
||||
"re-sending the block in %.3f seconds", len(block), c.sanitizedURL, retriesCount, statusCode, body, retryDuration.Seconds())
|
||||
}
|
||||
t := timerpool.Get(retryDuration)
|
||||
select {
|
||||
case <-c.stopCh:
|
||||
timerpool.Put(t)
|
||||
return false
|
||||
case <-t.C:
|
||||
timerpool.Put(t)
|
||||
}
|
||||
c.retriesCount.Inc()
|
||||
goto again
|
||||
}
|
||||
|
||||
var remoteWriteRejectedLogger = logger.WithThrottler("remoteWriteRejected", 5*time.Second)
|
||||
var remoteWriteRetryLogger = logger.WithThrottler("remoteWriteRetry", 5*time.Second)
|
||||
|
||||
// getRetryDuration returns retry duration.
|
||||
// retryAfterDuration has the highest priority.
|
||||
// If retryAfterDuration is not specified, retryDuration gets doubled.
|
||||
// retryDuration can't exceed maxRetryDuration.
|
||||
//
|
||||
// Also see: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6097
|
||||
func getRetryDuration(retryAfterDuration, retryDuration, maxRetryDuration time.Duration) time.Duration {
|
||||
// retryAfterDuration has the highest priority duration
|
||||
if retryAfterDuration > 0 {
|
||||
return timeutil.AddJitterToDuration(retryAfterDuration)
|
||||
}
|
||||
|
||||
// default backoff retry policy
|
||||
retryDuration *= 2
|
||||
if retryDuration > maxRetryDuration {
|
||||
retryDuration = maxRetryDuration
|
||||
}
|
||||
|
||||
return retryDuration
|
||||
}
|
||||
|
||||
func logBlockRejected(block []byte, sanitizedURL string, resp *http.Response) {
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; "+
|
||||
"failed to read response body: %s",
|
||||
len(block), sanitizedURL, resp.StatusCode, err)
|
||||
} else {
|
||||
remoteWriteRejectedLogger.Errorf("sending a block with size %d bytes to %q was rejected (skipping the block): status code %d; response body: %s",
|
||||
len(block), sanitizedURL, resp.StatusCode, string(body))
|
||||
}
|
||||
}
|
||||
|
||||
// parseRetryAfterHeader parses `Retry-After` value retrieved from HTTP response header.
|
||||
// retryAfterString should be in either HTTP-date or a number of seconds.
|
||||
// It will return time.Duration(0) if `retryAfterString` does not follow RFC 7231.
|
||||
func parseRetryAfterHeader(retryAfterString string) (retryAfterDuration time.Duration) {
|
||||
if retryAfterString == "" {
|
||||
return retryAfterDuration
|
||||
}
|
||||
|
||||
defer func() {
|
||||
v := retryAfterDuration.Seconds()
|
||||
logger.Infof("'Retry-After: %s' parsed into %.2f second(s)", retryAfterString, v)
|
||||
}()
|
||||
|
||||
// Retry-After could be in "Mon, 02 Jan 2006 15:04:05 GMT" format.
|
||||
if parsedTime, err := time.Parse(http.TimeFormat, retryAfterString); err == nil {
|
||||
return time.Duration(time.Until(parsedTime).Seconds()) * time.Second
|
||||
}
|
||||
// Retry-After could be in seconds.
|
||||
if seconds, err := strconv.Atoi(retryAfterString); err == nil {
|
||||
return time.Duration(seconds) * time.Second
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
99
app/vlagent/remotewrite/client_test.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"math"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestCalculateRetryDuration(t *testing.T) {
|
||||
// `testFunc` call `calculateRetryDuration` for `n` times
|
||||
// and evaluate if the result of `calculateRetryDuration` is
|
||||
// 1. >= expectMinDuration
|
||||
// 2. <= expectMinDuration + 10% (see timeutil.AddJitterToDuration)
|
||||
f := func(retryAfterDuration, retryDuration time.Duration, n int, expectMinDuration time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
retryDuration = getRetryDuration(retryAfterDuration, retryDuration, time.Minute)
|
||||
}
|
||||
|
||||
expectMaxDuration := helper(expectMinDuration)
|
||||
expectMinDuration = expectMinDuration - (1000 * time.Millisecond) // Avoid edge case when calculating time.Until(now)
|
||||
|
||||
if !(retryDuration >= expectMinDuration && retryDuration <= expectMaxDuration) {
|
||||
t.Fatalf(
|
||||
"incorrect retry duration, want (ms): [%d, %d], got (ms): %d",
|
||||
expectMinDuration.Milliseconds(), expectMaxDuration.Milliseconds(),
|
||||
retryDuration.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for 1 time.
|
||||
{
|
||||
// default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
// default backoff policy exceed max limit"
|
||||
f(0, 10*time.Minute, 1, time.Minute)
|
||||
|
||||
// retry after > default backoff policy
|
||||
f(10*time.Second, 1*time.Second, 1, 10*time.Second)
|
||||
// retry after < default backoff policy
|
||||
f(1*time.Second, 10*time.Second, 1, 1*time.Second)
|
||||
// retry after invalid and < default backoff policy
|
||||
f(0, time.Second, 1, 2*time.Second)
|
||||
|
||||
}
|
||||
|
||||
// Call calculateRetryDuration for multiple times.
|
||||
{
|
||||
// default backoff policy 2 times
|
||||
f(0, time.Second, 2, 4*time.Second)
|
||||
// default backoff policy 3 times
|
||||
f(0, time.Second, 3, 8*time.Second)
|
||||
// default backoff policy N times exceed max limit
|
||||
f(0, time.Second, 10, time.Minute)
|
||||
|
||||
// retry after 120s 1 times
|
||||
f(120*time.Second, time.Second, 1, 120*time.Second)
|
||||
// retry after 120s 2 times
|
||||
f(120*time.Second, time.Second, 2, 120*time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRetryAfterHeader(t *testing.T) {
|
||||
f := func(retryAfterString string, expectResult time.Duration) {
|
||||
t.Helper()
|
||||
|
||||
result := parseRetryAfterHeader(retryAfterString)
|
||||
// expect `expectResult == result` when retryAfterString is in seconds or invalid
|
||||
// expect the difference between result and expectResult to be lower than 10%
|
||||
if !(expectResult == result || math.Abs(float64(expectResult-result))/float64(expectResult) < 0.10) {
|
||||
t.Fatalf(
|
||||
"incorrect retry after duration, want (ms): %d, got (ms): %d",
|
||||
expectResult.Milliseconds(), result.Milliseconds(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// retry after header in seconds
|
||||
f("10", 10*time.Second)
|
||||
// retry after header in date time
|
||||
f(time.Now().Add(30*time.Second).UTC().Format(http.TimeFormat), 30*time.Second)
|
||||
// retry after header invalid
|
||||
f("invalid-retry-after", 0)
|
||||
// retry after header not in GMT
|
||||
f(time.Now().Add(10*time.Second).Format("Mon, 02 Jan 2006 15:04:05 FAKETZ"), 0)
|
||||
}
|
||||
|
||||
// helper calculate the max possible time duration calculated by timeutil.AddJitterToDuration.
|
||||
func helper(d time.Duration) time.Duration {
|
||||
dv := d / 10
|
||||
if dv > 10*time.Second {
|
||||
dv = 10 * time.Second
|
||||
}
|
||||
|
||||
return d + dv
|
||||
}
|
||||
158
app/vlagent/remotewrite/pendinglogrows.go
Normal file
@@ -0,0 +1,158 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding/zstd"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
||||
)
|
||||
|
||||
var (
|
||||
maxUnpackedBlockSize = flagutil.NewBytes("remoteWrite.maxBlockSize", 8*1024*1024, "The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage.")
|
||||
flushInterval = flag.Duration("remoteWrite.flushInterval", time.Second, "Interval for flushing the data to remote storage. "+
|
||||
"This option takes effect only when less than 2MB of data per second are pushed to -remoteWrite.url")
|
||||
)
|
||||
|
||||
type pendingLogs struct {
|
||||
lastFlushTime atomic.Uint64
|
||||
|
||||
// The queue to send blocks to.
|
||||
fq *persistentqueue.FastQueue
|
||||
|
||||
// mu protects wr
|
||||
mu sync.Mutex
|
||||
wr writeRequest
|
||||
|
||||
stopCh chan struct{}
|
||||
periodicFlusherWG sync.WaitGroup
|
||||
}
|
||||
|
||||
func newPendingLogs(fq *persistentqueue.FastQueue) *pendingLogs {
|
||||
pl := &pendingLogs{
|
||||
fq: fq,
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
|
||||
pl.periodicFlusherWG.Add(1)
|
||||
go func() {
|
||||
defer pl.periodicFlusherWG.Done()
|
||||
pl.periodicFlusher()
|
||||
}()
|
||||
|
||||
return pl
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) add(lr *logstorage.LogRows) {
|
||||
lr.ForEachRow(func(_ uint64, r *logstorage.InsertRow) {
|
||||
pl.addLogRow(r)
|
||||
})
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) addLogRow(r *logstorage.InsertRow) {
|
||||
bb := bbPool.Get()
|
||||
bb.B = r.Marshal(bb.B)
|
||||
|
||||
pl.mu.Lock()
|
||||
_, _ = pl.wr.pendingData.Write(bb.B)
|
||||
pl.wr.pendingLogRowsCount++
|
||||
if len(pl.wr.pendingData.B) > maxUnpackedBlockSize.IntN() {
|
||||
pl.mustFlushLocked()
|
||||
}
|
||||
pl.mu.Unlock()
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) mustFlushLocked() {
|
||||
pl.lastFlushTime.Store(fasttime.UnixTimestamp())
|
||||
pl.wr.push(func(b []byte) {
|
||||
if !pl.fq.TryWriteBlock(b) {
|
||||
logger.Fatalf("BUG: TryWriteBlock cannot return false")
|
||||
}
|
||||
})
|
||||
pl.wr.reset()
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) periodicFlusher() {
|
||||
flushSeconds := int64(flushInterval.Seconds())
|
||||
if flushSeconds <= 0 {
|
||||
flushSeconds = 1
|
||||
}
|
||||
d := timeutil.AddJitterToDuration(*flushInterval)
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-pl.stopCh:
|
||||
pl.mu.Lock()
|
||||
pl.mustFlushOnStop()
|
||||
pl.mu.Unlock()
|
||||
return
|
||||
case <-ticker.C:
|
||||
if fasttime.UnixTimestamp()-pl.lastFlushTime.Load() < uint64(flushSeconds) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
pl.mu.Lock()
|
||||
pl.mustFlushLocked()
|
||||
pl.mu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// mustFlushOnStop force pushes wr data
|
||||
//
|
||||
// This is needed in order to properly save in-memory data to persistent queue on graceful shutdown.
|
||||
func (pl *pendingLogs) mustFlushOnStop() {
|
||||
pl.wr.push(pl.fq.MustWriteBlockIgnoreDisabledPQ)
|
||||
pl.wr.reset()
|
||||
}
|
||||
|
||||
func (pl *pendingLogs) mustStop() {
|
||||
close(pl.stopCh)
|
||||
pl.periodicFlusherWG.Wait()
|
||||
}
|
||||
|
||||
type writeRequest struct {
|
||||
pendingData bytesutil.ByteBuffer
|
||||
pendingLogRowsCount int64
|
||||
}
|
||||
|
||||
func (wr *writeRequest) push(pushBlock func([]byte)) {
|
||||
if len(wr.pendingData.B) == 0 {
|
||||
return
|
||||
}
|
||||
b := wr.pendingData.B
|
||||
|
||||
zb := compressBufPool.Get()
|
||||
zb.B = zstd.CompressLevel(zb.B[:0], b, 1)
|
||||
zbLen := len(zb.B)
|
||||
pushBlock(zb.B)
|
||||
compressBufPool.Put(zb)
|
||||
blockSizeBytes.Update(float64(zbLen))
|
||||
blockSizeLogRows.Update(float64(wr.pendingLogRowsCount))
|
||||
}
|
||||
|
||||
func (wr *writeRequest) reset() {
|
||||
wr.pendingData.Reset()
|
||||
wr.pendingLogRowsCount = 0
|
||||
}
|
||||
|
||||
var (
|
||||
blockSizeBytes = metrics.NewHistogram(`vlagent_remotewrite_block_size_bytes`)
|
||||
blockSizeLogRows = metrics.NewHistogram(`vlagent_remotewrite_block_size_rows`)
|
||||
)
|
||||
|
||||
var (
|
||||
compressBufPool bytesutil.ByteBufferPool
|
||||
bbPool bytesutil.ByteBufferPool
|
||||
)
|
||||
277
app/vlagent/remotewrite/remotewrite.go
Normal file
@@ -0,0 +1,277 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
"github.com/cespare/xxhash/v2"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage/netinsert"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
)
|
||||
|
||||
var (
|
||||
remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support VictoriaLogs native protocol. "+
|
||||
"Example url: http://<victorialogs-host>:9428/internal/insert. "+
|
||||
"Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems.")
|
||||
maxPendingBytesPerURL = flagutil.NewArrayBytes("remoteWrite.maxDiskUsagePerURL", 0, "The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath "+
|
||||
"for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. "+
|
||||
"Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. "+
|
||||
"Disk usage is unlimited if the value is set to 0")
|
||||
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vlagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL")
|
||||
queues = flag.Int("remoteWrite.queues", cgroup.AvailableCPUs()*2, "The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues "+
|
||||
"isn't enough for sending high volume of collected data to remote storage. "+
|
||||
"Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage")
|
||||
|
||||
showRemoteWriteURL = flag.Bool("remoteWrite.showURL", false, "Whether to show -remoteWrite.url in the exported metrics. "+
|
||||
"It is hidden by default, since it can contain sensitive info such as auth key")
|
||||
)
|
||||
|
||||
// rwctxsGlobal contains statically populated entries when -remoteWrite.url is specified.
|
||||
var rwctxsGlobal []*remoteWriteCtx
|
||||
|
||||
// Storage implements insertutil.LogRowsStorage interface
|
||||
type Storage struct{}
|
||||
|
||||
// MustAddRows implements insertutil.LogRowsStorage interface
|
||||
func (*Storage) MustAddRows(lr *logstorage.LogRows) {
|
||||
pushToRemoteStorages(lr)
|
||||
}
|
||||
|
||||
// CanWriteData implements insertutil.LogRowsStorage interface
|
||||
func (*Storage) CanWriteData() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// maxQueues limits the maximum value for `-remoteWrite.queues`. There is no sense in setting too high value,
|
||||
// since it may lead to high memory usage due to big number of buffers.
|
||||
var maxQueues = cgroup.AvailableCPUs() * 16
|
||||
|
||||
const persistentQueueDirname = "persistent-queue"
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging.
|
||||
func InitSecretFlags() {
|
||||
if !*showRemoteWriteURL {
|
||||
// remoteWrite.url can contain authentication codes, so hide it at `/metrics` output.
|
||||
flagutil.RegisterSecretFlag("remoteWrite.url")
|
||||
}
|
||||
}
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
// It must be called after flag.Parse().
|
||||
//
|
||||
// Stop must be called for graceful shutdown.
|
||||
func Init() {
|
||||
if len(*remoteWriteURLs) == 0 {
|
||||
logger.Fatalf("at least one `-remoteWrite.url` command-line flag must be set")
|
||||
}
|
||||
if *queues > maxQueues {
|
||||
*queues = maxQueues
|
||||
}
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
initRemoteWriteCtxs(*remoteWriteURLs)
|
||||
dropDanglingQueues()
|
||||
}
|
||||
|
||||
// Stop stops remotewrite.
|
||||
//
|
||||
// It is expected that nobody calls TryPush during and after the call to this func.
|
||||
func Stop() {
|
||||
for _, rwctx := range rwctxsGlobal {
|
||||
rwctx.mustStop()
|
||||
}
|
||||
rwctxsGlobal = nil
|
||||
}
|
||||
|
||||
func dropDanglingQueues() {
|
||||
// Remove dangling persistent queues, if any.
|
||||
// This is required for the case when the number of queues has been changed or URL have been changed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4014
|
||||
//
|
||||
// In case if there were many persistent queues with identical *remoteWriteURLs
|
||||
// the queue with the last index will be dropped.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6140
|
||||
existingQueues := make(map[string]struct{}, len(rwctxsGlobal))
|
||||
for _, rwctx := range rwctxsGlobal {
|
||||
existingQueues[rwctx.fq.Dirname()] = struct{}{}
|
||||
}
|
||||
|
||||
queuesDir := filepath.Join(*tmpDataPath, persistentQueueDirname)
|
||||
files := fs.MustReadDir(queuesDir)
|
||||
removed := 0
|
||||
for _, f := range files {
|
||||
dirname := f.Name()
|
||||
if _, ok := existingQueues[dirname]; !ok {
|
||||
logger.Infof("removing dangling queue %q", dirname)
|
||||
fullPath := filepath.Join(queuesDir, dirname)
|
||||
fs.MustRemoveAll(fullPath)
|
||||
removed++
|
||||
}
|
||||
}
|
||||
if removed > 0 {
|
||||
logger.Infof("removed %d dangling queues from %q, active queues: %d", removed, *tmpDataPath, len(rwctxsGlobal))
|
||||
}
|
||||
}
|
||||
|
||||
func initRemoteWriteCtxs(urls []string) {
|
||||
if len(urls) == 0 {
|
||||
logger.Panicf("BUG: urls must be non-empty")
|
||||
}
|
||||
|
||||
maxInmemoryBlocks := memory.Allowed() / len(urls) / 10000
|
||||
if maxInmemoryBlocks / *queues > 100 {
|
||||
// There is no much sense in keeping higher number of blocks in memory,
|
||||
// since this means that the producer outperforms consumer and the queue
|
||||
// will continue growing. It is better storing the queue to file.
|
||||
maxInmemoryBlocks = 100 * *queues
|
||||
}
|
||||
if maxInmemoryBlocks < 2 {
|
||||
maxInmemoryBlocks = 2
|
||||
}
|
||||
rwctxs := make([]*remoteWriteCtx, len(urls))
|
||||
rwctxIdx := make([]int, len(urls))
|
||||
for i, remoteWriteURLRaw := range urls {
|
||||
remoteWriteURL, err := url.Parse(remoteWriteURLRaw)
|
||||
if err != nil {
|
||||
logger.Fatalf("invalid -remoteWrite.url=%q: %s", remoteWriteURL, err)
|
||||
}
|
||||
sanitizedURL := fmt.Sprintf("%d:secret-url", i+1)
|
||||
if *showRemoteWriteURL {
|
||||
sanitizedURL = fmt.Sprintf("%d:%s", i+1, remoteWriteURL)
|
||||
}
|
||||
rwctxs[i] = newRemoteWriteCtx(i, remoteWriteURL, maxInmemoryBlocks, sanitizedURL)
|
||||
rwctxIdx[i] = i
|
||||
}
|
||||
|
||||
rwctxsGlobal = rwctxs
|
||||
}
|
||||
|
||||
func pushToRemoteStorages(lr *logstorage.LogRows) {
|
||||
rwctxs := rwctxsGlobal
|
||||
if len(rwctxs) == 1 {
|
||||
// fast path
|
||||
rwctxs[0].push(lr)
|
||||
return
|
||||
}
|
||||
// Push samples to remote storage systems in parallel in order to reduce
|
||||
// the time needed for sending the data to multiple remote storage systems.
|
||||
var wg sync.WaitGroup
|
||||
for _, rwctx := range rwctxs {
|
||||
wg.Add(1)
|
||||
go func(rwctx *remoteWriteCtx) {
|
||||
defer wg.Done()
|
||||
rwctx.push(lr)
|
||||
|
||||
}(rwctx)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
type remoteWriteCtx struct {
|
||||
idx int
|
||||
fq *persistentqueue.FastQueue
|
||||
c *client
|
||||
|
||||
pls []*pendingLogs
|
||||
pssNextIdx atomic.Uint64
|
||||
}
|
||||
|
||||
func newRemoteWriteCtx(argIdx int, remoteWriteURL *url.URL, maxInmemoryBlocks int, sanitizedURL string) *remoteWriteCtx {
|
||||
// protocol version is required by victoria-logs
|
||||
q := remoteWriteURL.Query()
|
||||
q.Set("version", netinsert.ProtocolVersion)
|
||||
remoteWriteURL.RawQuery = q.Encode()
|
||||
|
||||
// strip query params, otherwise changing params resets pq
|
||||
pqURL := *remoteWriteURL
|
||||
pqURL.RawQuery = ""
|
||||
pqURL.Fragment = ""
|
||||
h := xxhash.Sum64([]byte(pqURL.String()))
|
||||
queuePath := filepath.Join(*tmpDataPath, persistentQueueDirname, fmt.Sprintf("%d_%016X", argIdx+1, h))
|
||||
maxPendingBytes := maxPendingBytesPerURL.GetOptionalArg(argIdx)
|
||||
if maxPendingBytes != 0 && maxPendingBytes < persistentqueue.DefaultChunkFileSize {
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4195
|
||||
logger.Warnf("rounding the -remoteWrite.maxDiskUsagePerURL=%d to the minimum supported value: %d", maxPendingBytes, persistentqueue.DefaultChunkFileSize)
|
||||
maxPendingBytes = persistentqueue.DefaultChunkFileSize
|
||||
}
|
||||
|
||||
fq := persistentqueue.MustOpenFastQueue(queuePath, sanitizedURL, maxInmemoryBlocks, maxPendingBytes, false)
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_pending_data_bytes{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetPendingBytes())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_pending_inmemory_blocks{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
return float64(fq.GetInmemoryQueueLen())
|
||||
})
|
||||
_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vlagent_remotewrite_queue_blocked{path=%q, url=%q}`, queuePath, sanitizedURL), func() float64 {
|
||||
if fq.IsWriteBlocked() {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
})
|
||||
|
||||
var c *client
|
||||
switch remoteWriteURL.Scheme {
|
||||
case "http", "https":
|
||||
c = newHTTPClient(argIdx, remoteWriteURL.String(), sanitizedURL, fq, *queues)
|
||||
default:
|
||||
logger.Fatalf("unsupported scheme: %s for remoteWriteURL: %s, want `http`, `https`", remoteWriteURL.Scheme, sanitizedURL)
|
||||
}
|
||||
c.init(argIdx, *queues, sanitizedURL)
|
||||
|
||||
// Initialize pss
|
||||
plsLen := *queues
|
||||
if n := cgroup.AvailableCPUs(); plsLen > n {
|
||||
// There is no sense in running more than availableCPUs concurrent pendingLogs,
|
||||
// since every pendingLogs can saturate up to a single CPU.
|
||||
plsLen = n
|
||||
}
|
||||
pls := make([]*pendingLogs, plsLen)
|
||||
for i := range pls {
|
||||
pls[i] = newPendingLogs(fq)
|
||||
}
|
||||
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: argIdx,
|
||||
fq: fq,
|
||||
c: c,
|
||||
pls: pls,
|
||||
}
|
||||
|
||||
return rwctx
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) push(lr *logstorage.LogRows) {
|
||||
pls := rwctx.pls
|
||||
idx := rwctx.pssNextIdx.Add(1) % uint64(len(pls))
|
||||
pls[idx].add(lr)
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) mustStop() {
|
||||
for _, ps := range rwctx.pls {
|
||||
ps.mustStop()
|
||||
}
|
||||
rwctx.idx = 0
|
||||
rwctx.pls = nil
|
||||
rwctx.fq.UnblockAllReaders()
|
||||
rwctx.c.MustStop()
|
||||
rwctx.c = nil
|
||||
|
||||
rwctx.fq.MustClose()
|
||||
rwctx.fq = nil
|
||||
}
|
||||
@@ -276,7 +276,7 @@ func readJournaldLogEntry(streamName string, lr *insertutil.LineReader, lmp inse
|
||||
}
|
||||
size := binary.LittleEndian.Uint64(fb.value[:8])
|
||||
|
||||
// Read the value until its lenth exceeds the given size - the last char in the read value will always be '\n'
|
||||
// Read the value until its length exceeds the given size - the last char in the read value will always be '\n'
|
||||
// because it is appended by appendNextLineToValue().
|
||||
for uint64(len(fb.value[8:])) <= size {
|
||||
if err := fb.appendNextLineToValue(lr); err != nil {
|
||||
|
||||
@@ -89,15 +89,18 @@ func (t *Type) ValidateExpr(expr string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SupportedType is true if given datasource type is supported
|
||||
func SupportedType(dsType string) bool {
|
||||
return dsType == "graphite" || dsType == "prometheus" || dsType == "vlogs"
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
func (t *Type) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
var s string
|
||||
if err := unmarshal(&s); err != nil {
|
||||
return err
|
||||
}
|
||||
switch s {
|
||||
case "graphite", "prometheus", "vlogs":
|
||||
default:
|
||||
if !SupportedType(s) {
|
||||
return fmt.Errorf("unknown datasource type=%q, want prometheus, graphite or vlogs", s)
|
||||
}
|
||||
t.Name = s
|
||||
|
||||
@@ -148,9 +148,13 @@ func main() {
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init datasource: %s", err)
|
||||
}
|
||||
if err := replay(groupsCfg, q, rw); err != nil {
|
||||
totalRows, droppedRows, err := replay(groupsCfg, q, rw)
|
||||
if err != nil {
|
||||
logger.Fatalf("replay failed: %s", err)
|
||||
}
|
||||
if droppedRows > 0 {
|
||||
logger.Fatalf("failed to push all generated samples to remote write url, dropped %d samples out of %d", droppedRows, totalRows)
|
||||
}
|
||||
logger.Infof("replay succeed!")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -194,9 +194,10 @@ func mergeLabels(target string, metaLabels *promutil.Labels, cfg *Config) *promu
|
||||
alertsPath = address[n:]
|
||||
address = address[:n]
|
||||
}
|
||||
m.Add("__address__", address)
|
||||
m.Add("__scheme__", scheme)
|
||||
m.Add("__alerts_path__", alertsPath)
|
||||
m.AddFrom(metaLabels)
|
||||
// force labels
|
||||
m.Set("__address__", address)
|
||||
m.Set("__scheme__", scheme)
|
||||
m.Set("__alerts_path__", alertsPath)
|
||||
return m
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package notifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -53,8 +54,11 @@ func (cw *configWatcher) notifiers() []Notifier {
|
||||
for _, n := range ns {
|
||||
notifiers = append(notifiers, n.Notifier)
|
||||
}
|
||||
|
||||
}
|
||||
// deterministically sort the output
|
||||
sort.Slice(notifiers, func(i, j int) bool {
|
||||
return notifiers[i].Addr() < notifiers[j].Addr()
|
||||
})
|
||||
return notifiers
|
||||
}
|
||||
|
||||
@@ -85,12 +89,12 @@ func (cw *configWatcher) reload(path string) error {
|
||||
}
|
||||
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||
targets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
|
||||
cw.setTargets(typeK, targets)
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
|
||||
cw.wg.Add(1)
|
||||
go func() {
|
||||
@@ -105,22 +109,22 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
updateTargets, errors := targetsFromLabels(labelsFn, cw.cfg, cw.genFn)
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
cw.setTargets(typeK, updateTargets)
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator) ([]Target, []error) {
|
||||
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
|
||||
metaLabels, err := labelsFn()
|
||||
if err != nil {
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||
}
|
||||
var targets []Target
|
||||
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
|
||||
var errors []error
|
||||
duplicates := make(map[string]struct{})
|
||||
for _, labels := range metaLabels {
|
||||
@@ -143,18 +147,9 @@ func targetsFromLabels(labelsFn getLabels, cfg *Config, genFn AlertURLGenerator)
|
||||
continue
|
||||
}
|
||||
duplicates[u] = struct{}{}
|
||||
|
||||
am, err := NewAlertManager(u, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
targets = append(targets, Target{
|
||||
Notifier: am,
|
||||
Labels: processedLabels,
|
||||
})
|
||||
targetMetadata[u] = processedLabels
|
||||
}
|
||||
return targets, errors
|
||||
return targetMetadata, errors
|
||||
}
|
||||
|
||||
type getLabels func() ([]*promutil.Labels, error)
|
||||
@@ -241,21 +236,40 @@ func (cw *configWatcher) mustStop() {
|
||||
|
||||
func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
||||
cw.targetsMu.Lock()
|
||||
newT := make(map[string]Target)
|
||||
for _, t := range targets {
|
||||
newT[t.Addr()] = t
|
||||
}
|
||||
oldT := cw.targets[key]
|
||||
|
||||
for _, ot := range oldT {
|
||||
if _, ok := newT[ot.Addr()]; !ok {
|
||||
ot.Notifier.Close()
|
||||
}
|
||||
}
|
||||
cw.targets[key] = targets
|
||||
cw.targetsMu.Unlock()
|
||||
}
|
||||
|
||||
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
|
||||
cw.targetsMu.Lock()
|
||||
defer cw.targetsMu.Unlock()
|
||||
oldTargets := cw.targets[key]
|
||||
var updatedTargets []Target
|
||||
for _, ot := range oldTargets {
|
||||
if _, ok := targetMetadata[ot.Addr()]; !ok {
|
||||
// if target not exists in currentTargets, close it
|
||||
ot.Notifier.Close()
|
||||
} else {
|
||||
updatedTargets = append(updatedTargets, ot)
|
||||
delete(targetMetadata, ot.Addr())
|
||||
}
|
||||
}
|
||||
// create new resources for the new targets
|
||||
for addr, labels := range targetMetadata {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||
continue
|
||||
}
|
||||
updatedTargets = append(updatedTargets, Target{
|
||||
Notifier: am,
|
||||
Labels: labels,
|
||||
})
|
||||
}
|
||||
|
||||
cw.targets[key] = updatedTargets
|
||||
}
|
||||
|
||||
// mergeHTTPClientConfigs merges fields between child and parent params
|
||||
// by populating child from parent params if they're missing.
|
||||
func mergeHTTPClientConfigs(parent, child promauth.HTTPClientConfig) promauth.HTTPClientConfig {
|
||||
|
||||
@@ -8,8 +8,10 @@ import (
|
||||
"os"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
)
|
||||
|
||||
func TestConfigWatcherReload(t *testing.T) {
|
||||
@@ -59,6 +61,11 @@ static_configs:
|
||||
}
|
||||
|
||||
func TestConfigWatcherStart(t *testing.T) {
|
||||
oldSDCheckInterval := consul.SDCheckInterval
|
||||
defer func() { consul.SDCheckInterval = oldSDCheckInterval }()
|
||||
consulCheckInterval := 100 * time.Millisecond
|
||||
consul.SDCheckInterval = &consulCheckInterval
|
||||
|
||||
consulSDServer := newFakeConsulServer()
|
||||
defer consulSDServer.Close()
|
||||
|
||||
@@ -97,6 +104,11 @@ consul_sd_configs:
|
||||
if n2.Addr() != expAddr2 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
||||
}
|
||||
|
||||
f := func() bool { return len(cw.notifiers()) == 1 }
|
||||
if !waitFor(f, time.Second) {
|
||||
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
||||
@@ -193,6 +205,7 @@ const (
|
||||
)
|
||||
|
||||
func newFakeConsulServer() *httptest.Server {
|
||||
requestCount := 0
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
||||
@@ -207,8 +220,9 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}`))
|
||||
})
|
||||
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
rw.Header().Set("X-Consul-Index", "1")
|
||||
rw.Write([]byte(`
|
||||
if requestCount == 0 {
|
||||
rw.Header().Set("X-Consul-Index", "1")
|
||||
rw.Write([]byte(`
|
||||
[
|
||||
{
|
||||
"Node": {
|
||||
@@ -297,6 +311,56 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}
|
||||
}
|
||||
]`))
|
||||
} else {
|
||||
rw.Header().Set("X-Consul-Index", "2")
|
||||
rw.Write([]byte(`
|
||||
[
|
||||
{
|
||||
"Node": {
|
||||
"ID": "e8e3629a-3f50-9d6e-aaf8-f173b5b05c72",
|
||||
"Node": "machine",
|
||||
"Address": "127.0.0.1",
|
||||
"Datacenter": "dc1",
|
||||
"TaggedAddresses": {
|
||||
"lan": "127.0.0.1",
|
||||
"lan_ipv4": "127.0.0.1",
|
||||
"wan": "127.0.0.1",
|
||||
"wan_ipv4": "127.0.0.1"
|
||||
},
|
||||
"Meta": {
|
||||
"consul-network-segment": ""
|
||||
},
|
||||
"CreateIndex": 13,
|
||||
"ModifyIndex": 14
|
||||
},
|
||||
"Service": {
|
||||
"ID": "am3",
|
||||
"Service": "alertmanager",
|
||||
"Tags": [
|
||||
"alertmanager",
|
||||
"__scheme__=http"
|
||||
],
|
||||
"Address": "",
|
||||
"Meta": null,
|
||||
"Port": 9097,
|
||||
"Weights": {
|
||||
"Passing": 1,
|
||||
"Warning": 1
|
||||
},
|
||||
"EnableTagOverride": false,
|
||||
"Proxy": {
|
||||
"Mode": "",
|
||||
"MeshGateway": {},
|
||||
"Expose": {}
|
||||
},
|
||||
"Connect": {},
|
||||
"CreateIndex": 16,
|
||||
"ModifyIndex": 16
|
||||
}
|
||||
}
|
||||
]`))
|
||||
}
|
||||
requestCount++
|
||||
})
|
||||
|
||||
return httptest.NewServer(mux)
|
||||
@@ -357,3 +421,13 @@ func TestParseLabels_Success(t *testing.T) {
|
||||
PathPrefix: "test",
|
||||
}, "https://alertmanager:9093/api/v1/alerts")
|
||||
}
|
||||
|
||||
func waitFor(f func() bool, timeout time.Duration) bool {
|
||||
for start := time.Now(); time.Since(start) < timeout; {
|
||||
if f() == true {
|
||||
return true
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -216,7 +216,7 @@ var (
|
||||
)
|
||||
|
||||
// GetDroppedRows returns value of droppedRows metric
|
||||
func GetDroppedRows() int64 { return int64(droppedRows.Get()) }
|
||||
func GetDroppedRows() int { return int(droppedRows.Get()) }
|
||||
|
||||
// flush is a blocking function that marshals WriteRequest and sends
|
||||
// it to remote-write endpoint. Flush performs limited amount of retries
|
||||
|
||||
@@ -30,13 +30,13 @@ var (
|
||||
"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode.")
|
||||
)
|
||||
|
||||
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewrite.RWClient) error {
|
||||
func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewrite.RWClient) (totalRows, droppedRows int, err error) {
|
||||
if *replayMaxDatapoints < 1 {
|
||||
return fmt.Errorf("replay.maxDatapointsPerQuery can't be lower than 1")
|
||||
return 0, 0, fmt.Errorf("replay.maxDatapointsPerQuery can't be lower than 1")
|
||||
}
|
||||
tFrom, err := time.Parse(time.RFC3339, *replayFrom)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse replay.timeFrom=%q: %w", *replayFrom, err)
|
||||
return 0, 0, fmt.Errorf("failed to parse replay.timeFrom=%q: %w", *replayFrom, err)
|
||||
}
|
||||
|
||||
// use tFrom location for default value, otherwise filters could have different locations
|
||||
@@ -44,12 +44,12 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
|
||||
if *replayTo != "" {
|
||||
tTo, err = time.Parse(time.RFC3339, *replayTo)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse replay.timeTo=%q: %w", *replayTo, err)
|
||||
return 0, 0, fmt.Errorf("failed to parse replay.timeTo=%q: %w", *replayTo, err)
|
||||
}
|
||||
}
|
||||
|
||||
if !tTo.After(tFrom) {
|
||||
return fmt.Errorf("replay.timeTo=%v must be bigger than replay.timeFrom=%v", tTo, tFrom)
|
||||
return 0, 0, fmt.Errorf("replay.timeTo=%v must be bigger than replay.timeFrom=%v", tTo, tFrom)
|
||||
}
|
||||
labels := make(map[string]string)
|
||||
for _, s := range *externalLabels {
|
||||
@@ -58,7 +58,7 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
|
||||
}
|
||||
n := strings.IndexByte(s, '=')
|
||||
if n < 0 {
|
||||
return fmt.Errorf("missing '=' in `-label`. It must contain label in the form `name=value`; got %q", s)
|
||||
return 0, 0, fmt.Errorf("missing '=' in `-label`. It must contain label in the form `name=value`; got %q", s)
|
||||
}
|
||||
labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
@@ -69,18 +69,14 @@ func replay(groupsCfg []config.Group, qb datasource.QuerierBuilder, rw remotewri
|
||||
"\nmax data points per request: %d\n",
|
||||
tFrom, tTo, *replayMaxDatapoints)
|
||||
|
||||
var total int
|
||||
for _, cfg := range groupsCfg {
|
||||
ng := rule.NewGroup(cfg, qb, *evaluationInterval, labels)
|
||||
total += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar)
|
||||
totalRows += ng.Replay(tFrom, tTo, rw, *replayMaxDatapoints, *replayRuleRetryAttempts, *replayRulesDelay, *disableProgressBar)
|
||||
}
|
||||
logger.Infof("replay evaluation finished, generated %d samples", total)
|
||||
logger.Infof("replay evaluation finished, generated %d samples", totalRows)
|
||||
if err := rw.Close(); err != nil {
|
||||
return err
|
||||
return 0, 0, err
|
||||
}
|
||||
droppedRows := remotewrite.GetDroppedRows()
|
||||
if droppedRows > 0 {
|
||||
return fmt.Errorf("failed to push all generated samples to remote write url, dropped %d samples out of %d", droppedRows, total)
|
||||
}
|
||||
return nil
|
||||
droppedRows = remotewrite.GetDroppedRows()
|
||||
return totalRows, droppedRows, nil
|
||||
}
|
||||
|
||||
@@ -8,38 +8,45 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
type fakeReplayQuerier struct {
|
||||
datasource.FakeQuerier
|
||||
registry map[string]map[string]struct{}
|
||||
registry map[string]map[string][]datasource.Metric
|
||||
}
|
||||
|
||||
func (fr *fakeReplayQuerier) BuildWithParams(_ datasource.QuerierParams) datasource.Querier {
|
||||
return fr
|
||||
}
|
||||
|
||||
type fakeRWClient struct{}
|
||||
|
||||
func (fc *fakeRWClient) Push(_ prompbmarshal.TimeSeries) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fc *fakeRWClient) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (fr *fakeReplayQuerier) QueryRange(_ context.Context, q string, from, to time.Time) (res datasource.Result, err error) {
|
||||
key := fmt.Sprintf("%s+%s", from.Format("15:04:05"), to.Format("15:04:05"))
|
||||
dps, ok := fr.registry[q]
|
||||
if !ok {
|
||||
return res, fmt.Errorf("unexpected query received: %q", q)
|
||||
}
|
||||
_, ok = dps[key]
|
||||
metrics, ok := dps[key]
|
||||
if !ok {
|
||||
return res, fmt.Errorf("unexpected time range received: %q", key)
|
||||
}
|
||||
delete(dps, key)
|
||||
if len(fr.registry[q]) < 1 {
|
||||
delete(fr.registry, q)
|
||||
}
|
||||
res.Data = metrics
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func TestReplay(t *testing.T) {
|
||||
f := func(from, to string, maxDP int, ruleDelay time.Duration, cfg []config.Group, qb *fakeReplayQuerier) {
|
||||
f := func(from, to string, maxDP int, ruleDelay time.Duration, cfg []config.Group, qb *fakeReplayQuerier, expectTotalRows int) {
|
||||
t.Helper()
|
||||
|
||||
fromOrig, toOrig, maxDatapointsOrig := *replayFrom, *replayTo, *replayMaxDatapoints
|
||||
@@ -52,15 +59,16 @@ func TestReplay(t *testing.T) {
|
||||
|
||||
*replayRuleRetryAttempts = 1
|
||||
*replayRulesDelay = ruleDelay
|
||||
rwb := &remotewrite.DebugClient{}
|
||||
rwb := &fakeRWClient{}
|
||||
*replayFrom = from
|
||||
*replayTo = to
|
||||
*replayMaxDatapoints = maxDP
|
||||
if err := replay(cfg, qb, rwb); err != nil {
|
||||
totalRows, _, err := replay(cfg, qb, rwb)
|
||||
if err != nil {
|
||||
t.Fatalf("replay failed: %s", err)
|
||||
}
|
||||
if len(qb.registry) > 0 {
|
||||
t.Fatalf("not all requests were sent: %#v", qb.registry)
|
||||
if totalRows != expectTotalRows {
|
||||
t.Fatalf("unexpected total rows count: got %d, want %d", totalRows, expectTotalRows)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,90 +76,154 @@ func TestReplay(t *testing.T) {
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:00.000Z", 10, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string]struct{}{
|
||||
"sum(up)": {"12:00:00+12:02:00": {}},
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up)": {"12:00:00+12:02:00": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
}},
|
||||
},
|
||||
})
|
||||
}, 1)
|
||||
|
||||
// one rule + multiple responses
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string]struct{}{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up)": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:00:00+12:01:00": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:02:00+12:02:30": {},
|
||||
"12:02:00+12:02:30": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}, 2)
|
||||
|
||||
// datapoints per step
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T15:02:30.000Z", 60, time.Millisecond, []config.Group{
|
||||
{Interval: promutil.NewDuration(time.Minute), Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string]struct{}{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up)": {
|
||||
"12:00:00+13:00:00": {},
|
||||
"13:00:00+14:00:00": {},
|
||||
"12:00:00+13:00:00": {
|
||||
{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{1, 2},
|
||||
},
|
||||
},
|
||||
"13:00:00+14:00:00": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
"14:00:00+15:00:00": {},
|
||||
"15:00:00+15:02:30": {},
|
||||
},
|
||||
},
|
||||
})
|
||||
}, 3)
|
||||
|
||||
// multiple recording rules + multiple responses
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up)"}}},
|
||||
{Rules: []config.Rule{{Record: "bar", Expr: "max(up)"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string]struct{}{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up)": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:00:00+12:01:00": {
|
||||
{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{1, 2},
|
||||
},
|
||||
},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
"max(up)": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:01:00+12:02:00": {
|
||||
{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{1, 2},
|
||||
},
|
||||
},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
},
|
||||
})
|
||||
}, 4)
|
||||
|
||||
// multiple alerting rules + multiple responses
|
||||
// alerting rule generates two series `ALERTS` and `ALERTS_FOR_STATE` when triggered
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, time.Millisecond, []config.Group{
|
||||
{Rules: []config.Rule{{Alert: "foo", Expr: "sum(up) > 1"}}},
|
||||
{Rules: []config.Rule{{Alert: "bar", Expr: "max(up) < 1"}}},
|
||||
}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string]struct{}{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up) > 1": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:00:00+12:01:00": {
|
||||
{
|
||||
Timestamps: []int64{1, 2},
|
||||
Values: []float64{1, 2},
|
||||
},
|
||||
},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
"max(up) < 1": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:00:00+12:01:00": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
},
|
||||
})
|
||||
}, 6)
|
||||
|
||||
// multiple alerting rules in one group+ multiple responses + concurrency
|
||||
// multiple recording rules in one group+ multiple responses + concurrency
|
||||
f("2021-01-01T12:00:00.000Z", "2021-01-01T12:02:30.000Z", 1, 0, []config.Group{
|
||||
{Rules: []config.Rule{{Alert: "foo", Expr: "sum(up) > 1"}, {Alert: "bar", Expr: "max(up) < 1"}}, Concurrency: 2}}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string]struct{}{
|
||||
{Rules: []config.Rule{{Record: "foo", Expr: "sum(up) > 1"}, {Record: "bar", Expr: "max(up) < 1"}}, Concurrency: 2}}, &fakeReplayQuerier{
|
||||
registry: map[string]map[string][]datasource.Metric{
|
||||
"sum(up) > 1": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:02:00+12:02:30": {},
|
||||
"12:00:00+12:01:00": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
"12:01:00+12:02:00": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
"12:02:00+12:02:30": {
|
||||
{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
},
|
||||
},
|
||||
},
|
||||
"max(up) < 1": {
|
||||
"12:00:00+12:01:00": {},
|
||||
"12:01:00+12:02:00": {},
|
||||
"12:01:00+12:02:00": {{
|
||||
Timestamps: []int64{1},
|
||||
Values: []float64{1},
|
||||
}},
|
||||
"12:02:00+12:02:30": {},
|
||||
},
|
||||
},
|
||||
})
|
||||
}, 4)
|
||||
}
|
||||
|
||||
@@ -99,3 +99,15 @@ textarea.curl-area {
|
||||
padding: 0;
|
||||
overflow: scroll;
|
||||
}
|
||||
|
||||
.w-10 {
|
||||
width: 10%;
|
||||
}
|
||||
|
||||
.w-20 {
|
||||
width: 20%;
|
||||
}
|
||||
|
||||
.w-60 {
|
||||
width: 60%;
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/tpl"
|
||||
@@ -27,6 +28,7 @@ var (
|
||||
// such as Grafana, and proxied via vmselect.
|
||||
{"api/v1/rules", "list all loaded groups and rules"},
|
||||
{"api/v1/alerts", "list all active alerts"},
|
||||
{"api/v1/notifiers", "list all notifiers"},
|
||||
{fmt.Sprintf("api/v1/alert?%s=<int>&%s=<int>", paramGroupID, paramAlertID), "get alert status by group and alert ID"},
|
||||
}
|
||||
systemLinks = [][2]string{
|
||||
@@ -42,6 +44,10 @@ var (
|
||||
{Name: "Notifiers", URL: "notifiers"},
|
||||
{Name: "Docs", URL: "https://docs.victoriametrics.com/victoriametrics/vmalert/"},
|
||||
}
|
||||
ruleTypeMap = map[string]string{
|
||||
"alert": ruleTypeAlerting,
|
||||
"record": ruleTypeRecording,
|
||||
}
|
||||
)
|
||||
|
||||
type requestHandler struct {
|
||||
@@ -89,10 +95,13 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
WriteRuleDetails(w, r, rule)
|
||||
return true
|
||||
case "/vmalert/groups":
|
||||
filter := r.URL.Query().Get("filter")
|
||||
rf := extractRulesFilter(r, filter)
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data := rh.groups(rf)
|
||||
WriteListGroups(w, r, data, filter)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
case "/vmalert/notifiers":
|
||||
WriteListTargets(w, r, notifier.GetTargets())
|
||||
@@ -102,23 +111,35 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
// served without `vmalert` prefix:
|
||||
case "/rules":
|
||||
// Grafana makes an extra request to `/rules`
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI,
|
||||
var data []apiGroup
|
||||
filter := r.URL.Query().Get("filter")
|
||||
rf := extractRulesFilter(r, filter)
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||
var data []*apiGroup
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data = rh.groups(rf)
|
||||
WriteListGroups(w, r, data, filter)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
|
||||
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
|
||||
data, err := rh.listNotifiers()
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
w.Write(data)
|
||||
return true
|
||||
case "/vmalert/api/v1/rules", "/api/v1/rules":
|
||||
// path used by Grafana for ng alerting
|
||||
var data []byte
|
||||
var err error
|
||||
|
||||
filter := r.URL.Query().Get("filter")
|
||||
rf := extractRulesFilter(r, filter)
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err = rh.listGroups(rf)
|
||||
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
@@ -129,7 +150,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
|
||||
// path used by Grafana for ng alerting
|
||||
data, err := rh.listAlerts()
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := rh.listAlerts(rf)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
@@ -218,7 +244,7 @@ func (rh *requestHandler) getAlert(r *http.Request) (*apiAlert, error) {
|
||||
type listGroupsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Groups []apiGroup `json:"groups"`
|
||||
Groups []*apiGroup `json:"groups"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
@@ -229,82 +255,102 @@ type rulesFilter struct {
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
onlyUnhealthy bool
|
||||
onlyNoMatch bool
|
||||
filter string
|
||||
dsType config.Type
|
||||
}
|
||||
|
||||
func extractRulesFilter(r *http.Request, filter string) rulesFilter {
|
||||
rf := rulesFilter{}
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
|
||||
rf := &rulesFilter{}
|
||||
query := r.URL.Query()
|
||||
|
||||
var ruleType string
|
||||
ruleTypeParam := r.URL.Query().Get("type")
|
||||
// for some reason, `type` in filter doesn't match `type` in response,
|
||||
// so we use this matching here
|
||||
if ruleTypeParam == "alert" {
|
||||
ruleType = ruleTypeAlerting
|
||||
} else if ruleTypeParam == "record" {
|
||||
ruleType = ruleTypeRecording
|
||||
ruleTypeParam := query.Get("type")
|
||||
if len(ruleTypeParam) > 0 {
|
||||
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
|
||||
rf.ruleType = ruleType
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "type": not supported value %q`, ruleTypeParam), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
dsType := query.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
rf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
filter := strings.ToLower(query.Get("filter"))
|
||||
if len(filter) > 0 {
|
||||
if filter == "nomatch" || filter == "unhealthy" {
|
||||
rf.filter = filter
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
rf.ruleType = ruleType
|
||||
|
||||
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
|
||||
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
|
||||
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
|
||||
rf.files = append([]string{}, r.Form["file[]"]...)
|
||||
switch filter {
|
||||
case "unhealthy":
|
||||
rf.onlyUnhealthy = true
|
||||
case "noMatch":
|
||||
rf.onlyNoMatch = true
|
||||
}
|
||||
return rf
|
||||
return rf, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf rulesFilter) []apiGroup {
|
||||
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
||||
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
|
||||
return false
|
||||
}
|
||||
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
|
||||
return false
|
||||
}
|
||||
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) []*apiGroup {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
groups := make([]apiGroup, 0)
|
||||
groups := make([]*apiGroup, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
|
||||
continue
|
||||
}
|
||||
|
||||
g := groupToAPI(group)
|
||||
// the returned list should always be non-nil
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||
filteredRules := make([]apiRule, 0)
|
||||
for _, r := range g.Rules {
|
||||
if rf.ruleType != "" && rf.ruleType != r.Type {
|
||||
for _, rule := range g.Rules {
|
||||
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||
continue
|
||||
}
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
|
||||
continue
|
||||
}
|
||||
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
|
||||
continue
|
||||
}
|
||||
if rf.excludeAlerts {
|
||||
r.Alerts = nil
|
||||
rule.Alerts = nil
|
||||
}
|
||||
if (r.LastError == "" && rf.onlyUnhealthy) || (!isNoMatch(r) && rf.onlyNoMatch) {
|
||||
continue
|
||||
}
|
||||
if r.LastError != "" {
|
||||
if rule.LastError != "" {
|
||||
g.Unhealthy++
|
||||
} else {
|
||||
g.Healthy++
|
||||
}
|
||||
if isNoMatch(r) {
|
||||
if isNoMatch(rule) {
|
||||
g.NoMatch++
|
||||
}
|
||||
filteredRules = append(filteredRules, r)
|
||||
filteredRules = append(filteredRules, rule)
|
||||
}
|
||||
g.Rules = filteredRules
|
||||
groups = append(groups, g)
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
slices.SortFunc(groups, func(a, b apiGroup) int {
|
||||
slices.SortFunc(groups, func(a, b *apiGroup) int {
|
||||
if a.Name != b.Name {
|
||||
return strings.Compare(a.Name, b.Name)
|
||||
}
|
||||
@@ -313,7 +359,7 @@ func (rh *requestHandler) groups(rf rulesFilter) []apiGroup {
|
||||
return groups
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listGroups(rf rulesFilter) ([]byte, error) {
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
||||
lr := listGroupsResponse{Status: "success"}
|
||||
lr.Data.Groups = rh.groups(rf)
|
||||
b, err := json.Marshal(lr)
|
||||
@@ -360,14 +406,17 @@ func (rh *requestHandler) groupAlerts() []groupAlerts {
|
||||
return gAlerts
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
lr.Data.Alerts = make([]*apiAlert, 0)
|
||||
for _, g := range rh.m.groups {
|
||||
for _, r := range g.Rules {
|
||||
for _, group := range rh.m.groups {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
for _, r := range group.Rules {
|
||||
a, ok := r.(*rule.AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
@@ -391,6 +440,42 @@ func (rh *requestHandler) listAlerts() ([]byte, error) {
|
||||
return b, nil
|
||||
}
|
||||
|
||||
type listNotifiersResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Notifiers []*apiNotifier `json:"notifiers"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||
targets := notifier.GetTargets()
|
||||
|
||||
lr := listNotifiersResponse{Status: "success"}
|
||||
lr.Data.Notifiers = make([]*apiNotifier, 0)
|
||||
for protoName, protoTargets := range targets {
|
||||
notifier := &apiNotifier{
|
||||
Kind: string(protoName),
|
||||
Targets: make([]*apiTarget, 0, len(protoTargets)),
|
||||
}
|
||||
for _, target := range protoTargets {
|
||||
notifier.Targets = append(notifier.Targets, &apiTarget{
|
||||
Address: target.Notifier.Addr(),
|
||||
Labels: target.Labels.ToMap(),
|
||||
})
|
||||
}
|
||||
lr.Data.Notifiers = append(lr.Data.Notifiers, notifier)
|
||||
}
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
|
||||
return &httpserver.ErrorWithStatusCode{
|
||||
Err: err,
|
||||
|
||||
@@ -93,18 +93,18 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []apiGroup, filter string) %}
|
||||
{% func ListGroups(r *http.Request, groups []*apiGroup, filter string) %}
|
||||
{%code
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
filters := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"noMatch": "No Match",
|
||||
"nomatch": "No Match",
|
||||
}
|
||||
icons := map[string]string{
|
||||
"": "all",
|
||||
"unhealthy": "unhealthy",
|
||||
"noMatch": "nomatch",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
@@ -161,9 +161,9 @@
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" style="width: 60%">Rule</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
<th scope="col" class="w-60">Rule</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -594,9 +594,9 @@
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" style="width: 10%" class="text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
{% if seriesFetchedEnabled %}<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>{% endif %}
|
||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
|
||||
@@ -316,7 +316,7 @@ func Welcome(r *http.Request) string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:96
|
||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []apiGroup, filter string) {
|
||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*apiGroup, filter string) {
|
||||
//line app/vmalert/web.qtpl:96
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -325,12 +325,12 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []apiGr
|
||||
filters := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"noMatch": "No Match",
|
||||
"nomatch": "No Match",
|
||||
}
|
||||
icons := map[string]string{
|
||||
"": "all",
|
||||
"unhealthy": "unhealthy",
|
||||
"noMatch": "nomatch",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
@@ -523,9 +523,9 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []apiGr
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" style="width: 60%">Rule</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" style="width: 20%" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
<th scope="col" class="w-60">Rule</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many series were produced by the rule">Series</th>
|
||||
<th scope="col" class="w-20" class="text-center" title="How many seconds ago rule was executed">Updated</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -722,7 +722,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []apiGr
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:222
|
||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []apiGroup, filter string) {
|
||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []*apiGroup, filter string) {
|
||||
//line app/vmalert/web.qtpl:222
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:222
|
||||
@@ -733,7 +733,7 @@ func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []apiGr
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:222
|
||||
func ListGroups(r *http.Request, groups []apiGroup, filter string) string {
|
||||
func ListGroups(r *http.Request, groups []*apiGroup, filter string) string {
|
||||
//line app/vmalert/web.qtpl:222
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:222
|
||||
@@ -1697,17 +1697,17 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule apiRule)
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col" title="The time when event was created">Updated at</th>
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many series expression returns. Each series will represent an alert.">Series returned</th>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:598
|
||||
if seriesFetchedEnabled {
|
||||
//line app/vmalert/web.qtpl:598
|
||||
qw422016.N().S(`<th scope="col" style="width: 10%" class="text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>`)
|
||||
qw422016.N().S(`<th scope="col" class="w-10 text-center" title="How many series were scanned by datasource during the evaluation">Series fetched</th>`)
|
||||
//line app/vmalert/web.qtpl:598
|
||||
}
|
||||
//line app/vmalert/web.qtpl:598
|
||||
qw422016.N().S(`
|
||||
<th scope="col" style="width: 10%" class="text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="w-10 text-center" title="How many seconds request took">Duration</th>
|
||||
<th scope="col" class="text-center" title="Time used for rule execution">Executed at</th>
|
||||
<th scope="col" class="text-center" title="cURL command with request example">cURL</th>
|
||||
</tr>
|
||||
|
||||
@@ -19,25 +19,34 @@ import (
|
||||
func TestHandler(t *testing.T) {
|
||||
fq := &datasource.FakeQuerier{}
|
||||
fq.Add(datasource.Metric{
|
||||
Values: []float64{1}, Timestamps: []int64{0},
|
||||
Values: []float64{1},
|
||||
Timestamps: []int64{0},
|
||||
})
|
||||
g := rule.NewGroup(config.Group{
|
||||
Name: "group",
|
||||
File: "rules.yaml",
|
||||
Concurrency: 1,
|
||||
Rules: []config.Rule{
|
||||
{ID: 0, Alert: "alert"},
|
||||
{ID: 1, Record: "record"},
|
||||
},
|
||||
}, fq, 1*time.Minute, nil)
|
||||
ar := g.Rules[0].(*rule.AlertingRule)
|
||||
rr := g.Rules[1].(*rule.RecordingRule)
|
||||
|
||||
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||
|
||||
m := &manager{groups: map[uint64]*rule.Group{
|
||||
g.CreateID(): g,
|
||||
}}
|
||||
m := &manager{groups: map[uint64]*rule.Group{}}
|
||||
var ar *rule.AlertingRule
|
||||
var rr *rule.RecordingRule
|
||||
for _, dsType := range []string{"prometheus", "", "graphite"} {
|
||||
g := rule.NewGroup(config.Group{
|
||||
Name: "group",
|
||||
File: "rules.yaml",
|
||||
Type: config.NewRawType(dsType),
|
||||
Concurrency: 1,
|
||||
Rules: []config.Rule{
|
||||
{
|
||||
ID: 0,
|
||||
Alert: "alert",
|
||||
},
|
||||
{
|
||||
ID: 1,
|
||||
Record: "record",
|
||||
},
|
||||
},
|
||||
}, fq, 1*time.Minute, nil)
|
||||
ar = g.Rules[0].(*rule.AlertingRule)
|
||||
rr = g.Rules[1].(*rule.RecordingRule)
|
||||
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||
m.groups[g.CreateID()] = g
|
||||
}
|
||||
rh := &requestHandler{m: m}
|
||||
|
||||
getResp := func(t *testing.T, url string, to any, code int) {
|
||||
@@ -54,7 +63,7 @@ func TestHandler(t *testing.T) {
|
||||
t.Fatalf("err closing body %s", err)
|
||||
}
|
||||
}()
|
||||
if to != nil {
|
||||
if to != nil && code < 300 {
|
||||
if err = json.NewDecoder(resp.Body).Decode(to); err != nil {
|
||||
t.Fatalf("unexpected err %s", err)
|
||||
}
|
||||
@@ -95,14 +104,23 @@ func TestHandler(t *testing.T) {
|
||||
t.Run("/api/v1/alerts", func(t *testing.T) {
|
||||
lr := listAlertsResponse{}
|
||||
getResp(t, ts.URL+"/api/v1/alerts", &lr, 200)
|
||||
if length := len(lr.Data.Alerts); length != 1 {
|
||||
t.Fatalf("expected 1 alert got %d", length)
|
||||
if length := len(lr.Data.Alerts); length != 3 {
|
||||
t.Fatalf("expected 3 alert got %d", length)
|
||||
}
|
||||
|
||||
lr = listAlertsResponse{}
|
||||
getResp(t, ts.URL+"/vmalert/api/v1/alerts", &lr, 200)
|
||||
if length := len(lr.Data.Alerts); length != 1 {
|
||||
t.Fatalf("expected 1 alert got %d", length)
|
||||
if length := len(lr.Data.Alerts); length != 3 {
|
||||
t.Fatalf("expected 3 alert got %d", length)
|
||||
}
|
||||
|
||||
lr = listAlertsResponse{}
|
||||
getResp(t, ts.URL+"/api/v1/alerts?datasource_type=test", &lr, 400)
|
||||
|
||||
lr = listAlertsResponse{}
|
||||
getResp(t, ts.URL+"/api/v1/alerts?datasource_type=prometheus", &lr, 200)
|
||||
if length := len(lr.Data.Alerts); length != 2 {
|
||||
t.Fatalf("expected 2 alert got %d", length)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/alert?alertID&groupID", func(t *testing.T) {
|
||||
@@ -138,14 +156,14 @@ func TestHandler(t *testing.T) {
|
||||
t.Run("/api/v1/rules", func(t *testing.T) {
|
||||
lr := listGroupsResponse{}
|
||||
getResp(t, ts.URL+"/api/v1/rules", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Fatalf("expected 1 group got %d", length)
|
||||
if length := len(lr.Data.Groups); length != 3 {
|
||||
t.Fatalf("expected 3 group got %d", length)
|
||||
}
|
||||
|
||||
lr = listGroupsResponse{}
|
||||
getResp(t, ts.URL+"/vmalert/api/v1/rules", &lr, 200)
|
||||
if length := len(lr.Data.Groups); length != 1 {
|
||||
t.Fatalf("expected 1 group got %d", length)
|
||||
if length := len(lr.Data.Groups); length != 3 {
|
||||
t.Fatalf("expected 3 group got %d", length)
|
||||
}
|
||||
})
|
||||
t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
|
||||
@@ -172,10 +190,10 @@ func TestHandler(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||
check := func(url string, expGroups, expRules int) {
|
||||
check := func(url string, statusCode, expGroups, expRules int) {
|
||||
t.Helper()
|
||||
lr := listGroupsResponse{}
|
||||
getResp(t, ts.URL+url, &lr, 200)
|
||||
getResp(t, ts.URL+url, &lr, statusCode)
|
||||
if length := len(lr.Data.Groups); length != expGroups {
|
||||
t.Fatalf("expected %d groups got %d", expGroups, length)
|
||||
}
|
||||
@@ -191,25 +209,31 @@ func TestHandler(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
check("/api/v1/rules?type=alert", 1, 1)
|
||||
check("/api/v1/rules?type=record", 1, 1)
|
||||
check("/api/v1/rules?type=alert", 200, 3, 3)
|
||||
check("/api/v1/rules?type=record", 200, 3, 3)
|
||||
check("/api/v1/rules?type=records", 400, 0, 0)
|
||||
|
||||
check("/vmalert/api/v1/rules?type=alert", 1, 1)
|
||||
check("/vmalert/api/v1/rules?type=record", 1, 1)
|
||||
check("/vmalert/api/v1/rules?type=alert", 200, 3, 3)
|
||||
check("/vmalert/api/v1/rules?type=record", 200, 3, 3)
|
||||
check("/vmalert/api/v1/rules?type=recording", 400, 0, 0)
|
||||
|
||||
check("/vmalert/api/v1/rules?datasource_type=prometheus", 200, 2, 4)
|
||||
check("/vmalert/api/v1/rules?datasource_type=graphite", 200, 1, 2)
|
||||
check("/vmalert/api/v1/rules?datasource_type=graphiti", 400, 0, 0)
|
||||
|
||||
// no filtering expected due to bad params
|
||||
check("/api/v1/rules?type=badParam", 1, 2)
|
||||
check("/api/v1/rules?foo=bar", 1, 2)
|
||||
check("/api/v1/rules?type=badParam", 400, 0, 0)
|
||||
check("/api/v1/rules?foo=bar", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=foo&rule_group[]=bar", 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=foo&rule_group[]=group&rule_group[]=bar", 1, 2)
|
||||
check("/api/v1/rules?rule_group[]=foo&rule_group[]=bar", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=foo&rule_group[]=group&rule_group[]=bar", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 1, 2)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 1, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 1, 1)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 1, 2)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
|
||||
})
|
||||
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
||||
// check if response returns active alerts by default
|
||||
@@ -259,7 +283,7 @@ func TestEmptyResponse(t *testing.T) {
|
||||
t.Fatalf("err closing body %s", err)
|
||||
}
|
||||
}()
|
||||
if to != nil {
|
||||
if to != nil && code < 300 {
|
||||
if err = json.NewDecoder(resp.Body).Decode(to); err != nil {
|
||||
t.Fatalf("unexpected err %s", err)
|
||||
}
|
||||
|
||||
@@ -20,6 +20,16 @@ const (
|
||||
paramRuleID = "rule_id"
|
||||
)
|
||||
|
||||
type apiNotifier struct {
|
||||
Kind string `json:"kind"`
|
||||
Targets []*apiTarget `json:"targets"`
|
||||
}
|
||||
|
||||
type apiTarget struct {
|
||||
Address string `json:"address"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
|
||||
// apiAlert represents a notifier.AlertingRule state
|
||||
// for WEB view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
@@ -108,7 +118,7 @@ type apiGroup struct {
|
||||
|
||||
// groupAlerts represents a group of alerts for WEB view
|
||||
type groupAlerts struct {
|
||||
Group apiGroup
|
||||
Group *apiGroup
|
||||
Alerts []*apiAlert
|
||||
}
|
||||
|
||||
@@ -327,7 +337,7 @@ func newAlertAPI(ar *rule.AlertingRule, a *notifier.Alert) *apiAlert {
|
||||
return aa
|
||||
}
|
||||
|
||||
func groupToAPI(g *rule.Group) apiGroup {
|
||||
func groupToAPI(g *rule.Group) *apiGroup {
|
||||
g = g.DeepCopy()
|
||||
ag := apiGroup{
|
||||
// encode as string to avoid rounding
|
||||
@@ -353,7 +363,7 @@ func groupToAPI(g *rule.Group) apiGroup {
|
||||
for _, r := range g.Rules {
|
||||
ag.Rules = append(ag.Rules, ruleToAPI(r))
|
||||
}
|
||||
return ag
|
||||
return &ag
|
||||
}
|
||||
|
||||
func urlValuesToStrings(values url.Values) []string {
|
||||
|
||||
@@ -70,7 +70,7 @@ var (
|
||||
Usage: "VictoriaMetrics address to perform import requests. \n" +
|
||||
"Should be the same as --httpListenAddr value for single-node version or vminsert component. \n" +
|
||||
"When importing into the clustered version do not forget to set additionally --vm-account-id flag. \n" +
|
||||
"Please note, that `vmctl` performs initial readiness check for the given address by checking `/health` endpoint.",
|
||||
"Please note, that vmctl performs initial readiness check for the given address by checking /health endpoint.",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmUser,
|
||||
@@ -514,27 +514,27 @@ var (
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcBearerToken,
|
||||
Usage: "Optional bearer auth token to use for the corresponding `--vm-native-src-addr`",
|
||||
Usage: "Optional bearer auth token to use for the corresponding --vm-native-src-addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcCertFile,
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to `--vm-native-src-addr`",
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to --vm-native-src-addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcKeyFile,
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to `--vm-native-src-addr`",
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to --vm-native-src-addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcCAFile,
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to `--vm-native-src-addr`. By default, system CA is used",
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to --vm-native-src-addr. By default, system CA is used",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeSrcServerName,
|
||||
Usage: "Optional TLS server name to use for connections to `--vm-native-src-addr`. By default, the server name from `--vm-native-src-addr` is used",
|
||||
Usage: "Optional TLS server name to use for connections to --vm-native-src-addr. By default, the server name from --vm-native-src-addr is used",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeSrcInsecureSkipVerify,
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to `--vm-native-src-addr`",
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to --vm-native-src-addr",
|
||||
Value: false,
|
||||
},
|
||||
|
||||
@@ -563,27 +563,27 @@ var (
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstBearerToken,
|
||||
Usage: "Optional bearer auth token to use for the corresponding `--vm-native-dst-addr`",
|
||||
Usage: "Optional bearer auth token to use for the corresponding --vm-native-dst-addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstCertFile,
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to `--vm-native-dst-addr`",
|
||||
Usage: "Optional path to client-side TLS certificate file to use when connecting to --vm-native-dst-addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstKeyFile,
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to `--vm-native-dst-addr`",
|
||||
Usage: "Optional path to client-side TLS key to use when connecting to --vm-native-dst-addr",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstCAFile,
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to `--vm-native-dst-addr`. By default, system CA is used",
|
||||
Usage: "Optional path to TLS CA file to use for verifying connections to --vm-native-dst-addr. By default, system CA is used",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: vmNativeDstServerName,
|
||||
Usage: "Optional TLS server name to use for connections to `--vm-native-dst-addr`. By default, the server name from `--vm-native-dst-addr` is used",
|
||||
Usage: "Optional TLS server name to use for connections to --vm-native-dst-addr. By default, the server name from --vm-native-dst-addr is used",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmNativeDstInsecureSkipVerify,
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to `--vm-native-dst-addr`",
|
||||
Usage: "Whether to skip TLS certificate verification when connecting to --vm-native-dst-addr",
|
||||
Value: false,
|
||||
},
|
||||
|
||||
@@ -597,7 +597,7 @@ var (
|
||||
Name: vmRateLimit,
|
||||
Usage: "Optional data transfer rate limit in bytes per second.\n" +
|
||||
"By default, the rate limit is disabled. It can be useful for limiting load on source or destination databases. \n" +
|
||||
"Rate limit is applied per worker, see `--vm-concurrency`.",
|
||||
"Rate limit is applied per worker, see --vm-concurrency.",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: vmInterCluster,
|
||||
|
||||
@@ -140,6 +140,15 @@ func (p *vmNativeProcessor) runSingle(ctx context.Context, f native.Filter, srcU
|
||||
|
||||
written, err := io.Copy(w, reader)
|
||||
if err != nil {
|
||||
// io.Copy could fail if ImportPipe will fail before and close the pr
|
||||
// so we check if that's the case and to not ignore importErr if it exists.
|
||||
select {
|
||||
case importErr := <-importCh:
|
||||
if importErr != nil {
|
||||
return fmt.Errorf("failed to import %s: %w", p.dst.Addr, importErr)
|
||||
}
|
||||
default:
|
||||
}
|
||||
return fmt.Errorf("failed to write into %q: %s", p.dst.Addr, err)
|
||||
}
|
||||
|
||||
|
||||
@@ -562,6 +562,15 @@ func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"status":"success","data":{"alerts":[]}}`)
|
||||
return true
|
||||
case "/api/v1/notifiers", "/notifiers":
|
||||
notifiersRequests.Inc()
|
||||
if len(*vmalertProxyURL) > 0 {
|
||||
proxyVMAlertRequests(w, r)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"status":"success","data":{"notifiers":[]}}`)
|
||||
return true
|
||||
case "/api/v1/metadata":
|
||||
// Return dumb placeholder for https://prometheus.io/docs/prometheus/latest/querying/api/#querying-metric-metadata
|
||||
metadataRequests.Inc()
|
||||
@@ -691,9 +700,10 @@ var (
|
||||
expandWithExprsRequests = metrics.NewCounter(`vm_http_requests_total{path="/expand-with-exprs"}`)
|
||||
prettifyQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/prettify-query"}`)
|
||||
|
||||
vmalertRequests = metrics.NewCounter(`vm_http_requests_total{path="/vmalert"}`)
|
||||
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
|
||||
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
|
||||
vmalertRequests = metrics.NewCounter(`vm_http_requests_total{path="/vmalert"}`)
|
||||
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
|
||||
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
|
||||
notifiersRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/notifiers"}`)
|
||||
|
||||
metadataRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/metadata"}`)
|
||||
buildInfoRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/buildinfo"}`)
|
||||
|
||||
@@ -377,11 +377,13 @@ func getRollupConfigs(funcName string, rf rollupFunc, expr metricsql.Expr, start
|
||||
preFunc := func(_ []float64, _ []int64) {}
|
||||
funcName = strings.ToLower(funcName)
|
||||
|
||||
// window > lookbackDelta could result in negative delta.
|
||||
// See issue: https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8342
|
||||
stalenessInterval := lookbackDelta
|
||||
if stalenessInterval != 0 && stalenessInterval < window {
|
||||
stalenessInterval = window
|
||||
if stalenessInterval != 0 {
|
||||
// If stalenessInterval was set, it should additionally account for [window] range to cover following cases:
|
||||
// * window > stalenessInterval, see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8342
|
||||
// * window captures prevValue in doInternal while removeCounterResets does not,
|
||||
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-3000735468
|
||||
stalenessInterval += window
|
||||
}
|
||||
|
||||
if rollupFuncsRemoveCounterResets[funcName] {
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="/manifest.json"/>
|
||||
<link rel="manifest" href="/manifest.json" crossorigin="use-credentials"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<link rel="icon" href="/favicon.svg" />
|
||||
<link rel="apple-touch-icon" href="/favicon.svg" />
|
||||
<link rel="mask-icon" href="/favicon.svg" color="#000000">
|
||||
<link rel="icon" href="/favicon.victorialogs.svg" />
|
||||
<link rel="apple-touch-icon" href="/favicon.victorialogs.svg" />
|
||||
<link rel="mask-icon" href="/favicon.victorialogs.svg" color="#000000">
|
||||
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=5"/>
|
||||
<meta name="theme-color" content="#000000"/>
|
||||
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="/manifest.json"/>
|
||||
<link rel="manifest" href="/manifest.json" crossorigin="use-credentials"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
manifest.json provides metadata used when your web app is installed on a
|
||||
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
|
||||
-->
|
||||
<link rel="manifest" href="/manifest.json"/>
|
||||
<link rel="manifest" href="/manifest.json" crossorigin="use-credentials"/>
|
||||
<!--
|
||||
Notice the use of in the tags above.
|
||||
It will be replaced with the URL of the `public` folder during the build.
|
||||
|
||||
5
app/vmui/packages/vmui/public/favicon.victorialogs.svg
Normal file
@@ -0,0 +1,5 @@
|
||||
<svg width="48" height="48" fill="#e94600" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M24.5475 0C10.3246.0265251 1.11379 3.06365 4.40623 6.10077c0 0 12.32997 11.23333 16.58217 14.84083.8131.6896 2.1728 1.1936 3.5191 1.2201h.1199c1.3463-.0265 2.706-.5305 3.5191-1.2201 4.2522-3.5942 16.5422-14.84083 16.5422-14.84083C48.0478 3.06365 38.8636.0265251 24.6674 0"/>
|
||||
<path d="M28.1579 27.0159c-.8131.6896-2.1728 1.1936-3.5191 1.2201h-.12c-1.3463-.0265-2.7059-.5305-3.519-1.2201-2.9725-2.5067-13.35639-11.87-17.26201-15.3979v5.4112c0 .5968.22661 1.3793.6265 1.7506C7.00358 21.1936 17.2675 30.5437 20.9731 33.6737c.8132.6896 2.1728 1.1936 3.5191 1.2201h.12c1.3463-.0265 2.7059-.5305 3.519-1.2201 3.679-3.13 13.9429-12.4536 16.6089-14.8939.4132-.3713.6265-1.1538.6265-1.7506V11.618c-3.9323 3.5411-14.3162 12.931-17.2354 15.3979h.0267Z"/>
|
||||
<path d="M28.1579 39.748c-.8131.6897-2.1728 1.1937-3.5191 1.2202h-.12c-1.3463-.0265-2.7059-.5305-3.519-1.2202-2.9725-2.4933-13.35639-11.8567-17.26201-15.3978v5.4111c0 .5969.22661 1.3793.6265 1.7507C7.00358 33.9258 17.2675 43.2759 20.9731 46.4058c.8132.6897 2.1728 1.1937 3.5191 1.2202h.12c1.3463-.0265 2.7059-.5305 3.519-1.2202 3.679-3.1299 13.9429-12.4535 16.6089-14.8938.4132-.3714.6265-1.1538.6265-1.7507v-5.4111c-3.9323 3.5411-14.3162 12.931-17.2354 15.3978h.0267Z"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.3 KiB |
@@ -433,3 +433,24 @@ func (tc *TestCase) MustStartVlsingle(instance string, flags []string) *Vlsingle
|
||||
tc.addApp(instance, app)
|
||||
return app
|
||||
}
|
||||
|
||||
// MustStartDefaultVlagent is a test helper function that starts an instance of
|
||||
// vlagent with defaults suitable for most tests.
|
||||
func (tc *TestCase) MustStartDefaultVlagent(remoteWriteURLs []string) *Vlagent {
|
||||
tc.t.Helper()
|
||||
|
||||
return tc.MustStartVlagent("vlagent", remoteWriteURLs, nil)
|
||||
}
|
||||
|
||||
// MustStartVlagent is a test helper function that starts an instance of
|
||||
// vlagent and fails the test if the app fails to start.
|
||||
func (tc *TestCase) MustStartVlagent(instance string, remoteWriteURLs []string, flags []string) *Vlagent {
|
||||
tc.t.Helper()
|
||||
|
||||
app, err := StartVlagent(instance, remoteWriteURLs, flags, tc.cli)
|
||||
if err != nil {
|
||||
tc.t.Fatalf("Could not start %s: %v", instance, err)
|
||||
}
|
||||
tc.addApp(instance, app)
|
||||
return app
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ func testSpecialQueryRegression(tc *at.TestCase, sut at.PrometheusWriteQuerier)
|
||||
testDuplicateLabel(tc, sut)
|
||||
testTooBigLookbehindWindow(tc, sut)
|
||||
testMatchSeries(tc, sut)
|
||||
testNegativeIncrease(tc, sut)
|
||||
|
||||
// graphite
|
||||
testComparisonNotInfNotNan(tc, sut)
|
||||
@@ -234,6 +235,53 @@ func testMatchSeries(tc *at.TestCase, sut at.PrometheusWriteQuerier) {
|
||||
})
|
||||
}
|
||||
|
||||
func testNegativeIncrease(tc *at.TestCase, sut at.PrometheusWriteQuerier) {
|
||||
t := tc.T()
|
||||
|
||||
// negative increase when user overrides staleness interval
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-2978728661
|
||||
sut.PrometheusAPIV1ImportPrometheus(t, []string{
|
||||
`foo 108 1750109243514`, // 2025-06-16 21:27:23:514
|
||||
`foo 108 1750109258514`, // 2025-06-16 21:27:38:514
|
||||
// gap 75s
|
||||
`foo 1 1750109333514`, // 2025-06-16 21:28:53:514
|
||||
`foo 1 1750109348514`, // 2025-06-16 21:29:08:514
|
||||
}, at.QueryOpts{})
|
||||
sut.ForceFlush(t)
|
||||
|
||||
tc.Assert(&at.AssertOptions{
|
||||
Msg: "regression for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8935#issuecomment-2978728661",
|
||||
DoNotRetry: true,
|
||||
Got: func() any {
|
||||
return sut.PrometheusAPIV1QueryRange(t, `increase(foo[1m])`, at.QueryOpts{
|
||||
Start: "2025-06-16T21:28:40.700Z",
|
||||
End: "2025-06-16T21:29:30.700Z",
|
||||
Step: "9s",
|
||||
MaxLookback: "65s",
|
||||
})
|
||||
},
|
||||
Want: &at.PrometheusAPIV1QueryResponse{
|
||||
Status: "success",
|
||||
Data: &at.QueryData{
|
||||
ResultType: "matrix",
|
||||
Result: []*at.QueryResult{
|
||||
{
|
||||
Metric: map[string]string{},
|
||||
Samples: []*at.Sample{
|
||||
at.NewSample(t, "2025-06-16T21:28:40.700Z", 0),
|
||||
at.NewSample(t, "2025-06-16T21:28:49.700Z", 0),
|
||||
at.NewSample(t, "2025-06-16T21:28:58.700Z", 1),
|
||||
at.NewSample(t, "2025-06-16T21:29:07.700Z", 1),
|
||||
at.NewSample(t, "2025-06-16T21:29:16.700Z", 0),
|
||||
at.NewSample(t, "2025-06-16T21:29:25.700Z", 0),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func testComparisonNotInfNotNan(tc *at.TestCase, sut at.PrometheusWriteQuerier) {
|
||||
t := tc.T()
|
||||
|
||||
|
||||
154
apptest/tests/vlagent_remotewrite_test.go
Normal file
@@ -0,0 +1,154 @@
|
||||
package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
at "github.com/VictoriaMetrics/VictoriaMetrics/apptest"
|
||||
)
|
||||
|
||||
// TestSingleVlagentRemoteWrite performs tests for remote write data ingestion
|
||||
// by vlagent application
|
||||
func TestSingleVlagentRemoteWrite(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
// test data ingestion into
|
||||
const instance = "vlsingle"
|
||||
const r1Port = "50425"
|
||||
sutFlags := []string{
|
||||
"-httpListenAddr=127.0.0.1:" + r1Port,
|
||||
"-storageDataPath=" + tc.Dir() + "/" + instance,
|
||||
"-retentionPeriod=100y",
|
||||
}
|
||||
|
||||
sut := tc.MustStartVlsingle(instance, sutFlags)
|
||||
remoteWriteURL := fmt.Sprintf("http://%s/internal/insert", sut.HTTPAddr())
|
||||
|
||||
vlagent := tc.MustStartDefaultVlagent([]string{remoteWriteURL})
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "foo":"bar"}`,
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "bar":"foo"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
sut.ForceFlush(t)
|
||||
got := sut.LogsQLQuery(t, "ingest jsonline", at.QueryOptsLogs{})
|
||||
wantLogLines := []string{
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
assertLogsQLResponseEqual(t, got, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
// stop log storage and check data buffering works correctly
|
||||
tc.StopApp(instance)
|
||||
|
||||
// ingest some data vlagent must hold it in memory
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline2","_time": "2025-06-05T14:30:19.088007Z", "foo":"bar"}`,
|
||||
`{"_msg":"ingest jsonline2","_time": "2025-06-05T14:30:19.088007Z", "bar":"foo"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
vlagent.WaitQueueEmptyAfter(t, func() {
|
||||
// start storage and check if buffered data correctly ingested
|
||||
sut = tc.MustStartVlsingle(instance, sutFlags)
|
||||
})
|
||||
|
||||
sut.ForceFlush(t)
|
||||
got = sut.LogsQLQuery(t, "ingest jsonline2", at.QueryOptsLogs{})
|
||||
wantLogLines = []string{
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
assertLogsQLResponseEqual(t, got, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
}
|
||||
|
||||
func TestSingleVlagentRemoteWriteReplication(t *testing.T) {
|
||||
os.RemoveAll(t.Name())
|
||||
tc := at.NewTestCase(t)
|
||||
defer tc.Stop()
|
||||
|
||||
const (
|
||||
instanceReplica0 = "vlsingle-0"
|
||||
vlsinglePortR0 = "53541"
|
||||
instanceReplica1 = "vlsingle-1"
|
||||
vlsinglePortR1 = "53124"
|
||||
vlagentInstance = "vlagent"
|
||||
)
|
||||
sutFlagsR0 := []string{
|
||||
"-httpListenAddr=127.0.0.1:" + vlsinglePortR0,
|
||||
"-storageDataPath=" + path.Join(tc.Dir(), instanceReplica0),
|
||||
"-retentionPeriod=100y",
|
||||
}
|
||||
sutFlagsR1 := []string{
|
||||
"-httpListenAddr=127.0.0.1:" + vlsinglePortR1,
|
||||
"-storageDataPath=" + path.Join(tc.Dir(), instanceReplica1),
|
||||
"-retentionPeriod=100y",
|
||||
}
|
||||
|
||||
sutR0 := tc.MustStartVlsingle(instanceReplica0, sutFlagsR0)
|
||||
sutR1 := tc.MustStartVlsingle(instanceReplica1, sutFlagsR1)
|
||||
|
||||
vlagentRemoteWriteURLs := []string{
|
||||
fmt.Sprintf("http://%s/internal/insert", sutR0.HTTPAddr()),
|
||||
fmt.Sprintf("http://%s/internal/insert", sutR1.HTTPAddr()),
|
||||
}
|
||||
vlagentFlags := []string{
|
||||
"-remoteWrite.tmpDataPath=" + fmt.Sprintf("%s/%s-%d", os.TempDir(), vlagentInstance, time.Now().UnixNano()),
|
||||
}
|
||||
vlagent := tc.MustStartVlagent(vlagentInstance, vlagentRemoteWriteURLs, vlagentFlags)
|
||||
|
||||
// ingest data and check if it properly replicated to the vlsingles
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "foo":"bar"}`,
|
||||
`{"_msg":"ingest jsonline","_time": "2025-06-05T14:30:19.088007Z", "bar":"foo"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
wantLogLines := []string{
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
|
||||
sutR0.ForceFlush(t)
|
||||
gotR0 := sutR0.LogsQLQuery(t, "ingest jsonline", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR0, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
sutR1.ForceFlush(t)
|
||||
gotR1 := sutR1.LogsQLQuery(t, "ingest jsonline", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR1, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
// stop log storage and check data buffering works correctly at vlagent
|
||||
tc.StopApp(instanceReplica0)
|
||||
|
||||
// ingest some data vlagent must hold it in memory
|
||||
vlagent.JSONLineWrite(t, []string{
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}, at.QueryOptsLogs{})
|
||||
|
||||
// check alive storage received data
|
||||
wantLogLines = []string{
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","bar":"foo"}`,
|
||||
`{"_msg":"ingest jsonline2","_stream":"{}","_time":"2025-06-05T14:30:19.088007Z","foo":"bar"}`,
|
||||
}
|
||||
|
||||
sutR1.ForceFlush(t)
|
||||
gotR1 = sutR1.LogsQLQuery(t, "ingest jsonline2", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR1, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
|
||||
// stop vmagent, it must buffer data on-disk
|
||||
tc.StopApp(vlagentInstance)
|
||||
|
||||
vlagent = tc.MustStartVlagent(vlagentInstance, vlagentRemoteWriteURLs, vlagentFlags)
|
||||
vlagent.WaitQueueEmptyAfter(t, func() {
|
||||
// start storage and check if buffered data correctly ingested
|
||||
sutR0 = tc.MustStartVlsingle(instanceReplica0, sutFlagsR0)
|
||||
})
|
||||
|
||||
sutR0.ForceFlush(t)
|
||||
gotR0 = sutR0.LogsQLQuery(t, "ingest jsonline2", at.QueryOptsLogs{})
|
||||
assertLogsQLResponseEqual(t, gotR0, &at.LogsQLQueryResponse{LogLines: wantLogLines})
|
||||
}
|
||||
159
apptest/vlagent.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package apptest
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Vlagent holds the state of a vlagent app and provides vlagent-specific functions
|
||||
type Vlagent struct {
|
||||
*app
|
||||
*ServesMetrics
|
||||
|
||||
remoteStoragesCount int
|
||||
httpListenAddr string
|
||||
}
|
||||
|
||||
// StartVlagent starts an instance of vlagent with the given flags.
|
||||
// It also sets the default flags and populates the app instance state with runtime
|
||||
// values extracted from the application log (such as httpListenAddr)
|
||||
func StartVlagent(instance string, remoteWriteURLs []string, flags []string, cli *Client) (*Vlagent, error) {
|
||||
extractREs := []*regexp.Regexp{
|
||||
httpListenAddrRE,
|
||||
}
|
||||
|
||||
app, stderrExtracts, err := startApp(instance, "../../bin/vlagent", flags, &appOptions{
|
||||
defaultFlags: map[string]string{
|
||||
"-httpListenAddr": "127.0.0.1:0",
|
||||
"-remoteWrite.url": strings.Join(remoteWriteURLs, ","),
|
||||
"-remoteWrite.tmpDataPath": fmt.Sprintf("%s/%s-%d", os.TempDir(), instance, time.Now().UnixNano()),
|
||||
"-remoteWrite.flushInterval": "10ms",
|
||||
"-remoteWrite.showURL": "true",
|
||||
},
|
||||
extractREs: extractREs,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Vlagent{
|
||||
app: app,
|
||||
remoteStoragesCount: len(remoteWriteURLs),
|
||||
ServesMetrics: &ServesMetrics{
|
||||
metricsURL: fmt.Sprintf("http://%s/metrics", stderrExtracts[0]),
|
||||
cli: cli,
|
||||
},
|
||||
httpListenAddr: stderrExtracts[0],
|
||||
}, nil
|
||||
}
|
||||
|
||||
// JSONLineWrite is a test helper function that inserts a
|
||||
// collection of records in json line format by sending a HTTP
|
||||
// POST request to /insert/jsonline vlagent endpoint.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#json-stream-api
|
||||
func (app *Vlagent) JSONLineWrite(t *testing.T, records []string, opts QueryOptsLogs) {
|
||||
t.Helper()
|
||||
|
||||
data := []byte(strings.Join(records, "\n"))
|
||||
|
||||
url := fmt.Sprintf("http://%s/insert/jsonline", app.httpListenAddr)
|
||||
uv := opts.asURLValues()
|
||||
uvs := uv.Encode()
|
||||
if len(uvs) > 0 {
|
||||
url += "?" + uvs
|
||||
}
|
||||
app.sendBlocking(t, len(records), func() {
|
||||
_, statusCode := app.cli.Post(t, url, "text/plain", data)
|
||||
if statusCode != http.StatusOK {
|
||||
t.Fatalf("unexpected status code: got %d, want %d", statusCode, http.StatusOK)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// WaitQueueEmptyAfter checks that persistent queue is empty
|
||||
// after execution of provided callback
|
||||
func (app *Vlagent) WaitQueueEmptyAfter(t *testing.T, cb func()) {
|
||||
t.Helper()
|
||||
const (
|
||||
retries = 70
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
// vlagent_remotewrite_blocks_sent_total
|
||||
// take in account data replication
|
||||
blocksSent := app.remoteWriteBlocksSent(t)
|
||||
cb()
|
||||
for range retries {
|
||||
if app.remoteWriteBlocksSent(t) > blocksSent && app.persistentQueueSize(t) == 0 {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out while waiting for inserted logs to be flushed to remote storage")
|
||||
|
||||
}
|
||||
|
||||
// sendBlocking sends the data to remote write url by executing `send` function and
|
||||
// waits until the data is actually sent.
|
||||
//
|
||||
// vlagent does not send the data immediately. It first puts the data into a
|
||||
// buffer. Then a background goroutine takes the data from the buffer sends it
|
||||
// to the vmstorage. This happens every 1s by default.
|
||||
//
|
||||
// Waiting is implemented a retrieving the value of `vlagent_remotewrite_block_size_rows_sum`
|
||||
// metric and checking whether it is equal or greater than the wanted value.
|
||||
// If it is, then the data has been sent to remote storage.
|
||||
//
|
||||
// Unreliable if the records are inserted concurrently.
|
||||
func (app *Vlagent) sendBlocking(t *testing.T, numRecordsToSend int, send func()) {
|
||||
t.Helper()
|
||||
|
||||
send()
|
||||
|
||||
const (
|
||||
retries = 50
|
||||
period = 100 * time.Millisecond
|
||||
)
|
||||
// take in account data replication
|
||||
wantRowsSentCount := app.remoteWriteRowsPushed(t) + numRecordsToSend*app.remoteStoragesCount
|
||||
for range retries {
|
||||
if app.remoteWriteRowsPushed(t) >= wantRowsSentCount {
|
||||
return
|
||||
}
|
||||
time.Sleep(period)
|
||||
}
|
||||
t.Fatalf("timed out while waiting for inserted rows to be sent to remote storage")
|
||||
}
|
||||
|
||||
func (app *Vlagent) remoteWriteBlocksSent(t *testing.T) int {
|
||||
total := 0.0
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_blocks_sent_total") {
|
||||
total += v
|
||||
}
|
||||
return int(total)
|
||||
}
|
||||
|
||||
func (app *Vlagent) remoteWriteRowsPushed(t *testing.T) int {
|
||||
total := 0.0
|
||||
// vlagent_remotewrite_blocks_sent_total
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_block_size_rows_sum") {
|
||||
total += v
|
||||
}
|
||||
return int(total)
|
||||
}
|
||||
|
||||
func (app *Vlagent) persistentQueueSize(t *testing.T) int {
|
||||
total := 0.0
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_pending_data_bytes") {
|
||||
total += v
|
||||
}
|
||||
for _, v := range app.GetMetricsByPrefix(t, "vlagent_remotewrite_pending_inmemory_blocks") {
|
||||
total += v
|
||||
}
|
||||
return int(total)
|
||||
}
|
||||
@@ -5826,7 +5826,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 2387
|
||||
@@ -6110,7 +6110,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 2395
|
||||
@@ -7538,7 +7538,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU panel shows a plenty of free resources - try increasing `search.maxConcurrentRequests`.",
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU and Memory panels show a plenty of free resources - try increasing `-search.maxConcurrentRequests`. Please note, the higher is `-search.maxConcurrentRequests`, the higher could be [peak memory usage](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#out-of-memory-errors).",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -10053,7 +10053,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5014
|
||||
@@ -10159,7 +10159,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5014
|
||||
@@ -10687,4 +10687,4 @@
|
||||
"uid": "oS7Bi_0Wz",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
|
||||
5138
dashboards/vlagent.json
Normal file
@@ -5827,7 +5827,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 2387
|
||||
@@ -6111,7 +6111,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 2395
|
||||
@@ -7539,7 +7539,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU panel shows a plenty of free resources - try increasing `search.maxConcurrentRequests`.",
|
||||
"description": "Shows the max number of concurrent selects across instances.\n* `max` limit can be configured via `search.maxConcurrentRequests` flag\n* `current` shows the current number of goroutines busy with processing requests\n\nWhen `current` hits `max` constantly, it means one or more vmselect nodes are overloaded with number of requests. If you observe that CPU for vmselects is saturated, consider adding more vmselect replicas or increase CPU resources. If CPU and Memory panels show a plenty of free resources - try increasing `-search.maxConcurrentRequests`. Please note, the higher is `-search.maxConcurrentRequests`, the higher could be [peak memory usage](https://docs.victoriametrics.com/victoriametrics/troubleshooting/#out-of-memory-errors).",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -10054,7 +10054,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 5014
|
||||
@@ -10160,7 +10160,7 @@
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 5014
|
||||
@@ -10688,4 +10688,4 @@
|
||||
"uid": "oS7Bi_0Wz_vm",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3409,7 +3409,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max(10, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0",
|
||||
"expr": "topk(10, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{ job }}",
|
||||
"range": true,
|
||||
|
||||
@@ -115,13 +115,14 @@
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"color": "green",
|
||||
"index": 0,
|
||||
"text": "Ok"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
"type": "special"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
@@ -140,8 +141,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -173,17 +173,19 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 ) or 0",
|
||||
"expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 )",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -204,8 +206,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -237,7 +238,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -268,8 +269,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -301,7 +301,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -332,8 +332,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -369,17 +368,19 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "(sum(increase(vmalert_alerting_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])) or vector(0)) + \n(sum(increase(vmalert_recording_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])) or vector(0))",
|
||||
"expr": "(sum(increase(vmalert_alerting_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval]))) + \n(sum(increase(vmalert_recording_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -394,14 +395,24 @@
|
||||
"description": "Shows number of Recording Rules which produce no data.\n\n Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"index": 1,
|
||||
"text": "0"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -437,7 +448,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -446,7 +457,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\"} < 1) or 0",
|
||||
"expr": "count(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\"} < 1)",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -479,8 +490,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -535,7 +545,7 @@
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -605,8 +615,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -642,7 +651,7 @@
|
||||
"sort": "asc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -724,8 +733,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -763,7 +771,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -787,7 +795,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Top $topk groups by evaluation duration. Shows groups that take the most of time during the evaluation across all instances.\n\nThe panel uses MetricsQL functions and may not work with VictoriaMetrics.",
|
||||
"description": "Top $topk groups by evaluation duration. Shows groups that take the most of time during the evaluation across all instances.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -831,8 +839,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -870,7 +877,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -879,7 +886,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, max(sum(\n rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n/\n rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n) by(job, instance, group, file)) \nby(job, group, file))",
|
||||
"expr": "topk($topk, max(sum(\n rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n/\n rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n) by(job, instance, group, file)) \nby(job, group, file))",
|
||||
"interval": "",
|
||||
"legendFormat": "({{job}}) {{group}}({{file}})",
|
||||
"range": true,
|
||||
@@ -938,8 +945,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -975,7 +981,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1043,8 +1049,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1080,7 +1085,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1160,8 +1165,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1276,8 +1280,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1394,8 +1397,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1503,8 +1505,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1637,8 +1638,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1754,8 +1754,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1876,8 +1875,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1997,8 +1995,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2106,8 +2103,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2216,8 +2212,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2325,8 +2320,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2434,8 +2428,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -2856,7 +2849,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows top $topk current active (firing) alerting rules.\n\nThe panel uses MetricsQL functions and may not work with VictoriaMetrics.",
|
||||
"description": "Shows top $topk current active (firing) alerting rules.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2943,7 +2936,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, group, file, alertname) > 0)",
|
||||
"expr": "topk($topk, sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, group, file, alertname) > 0)",
|
||||
"interval": "",
|
||||
"legendFormat": "({{job}}) {{group}}.{{alertname}}({{file}})",
|
||||
"range": true,
|
||||
@@ -3376,7 +3369,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the top $topk recording rules which generate the most of [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.\n\nThe panel uses MetricsQL functions and may not work with VictoriaMetrics.",
|
||||
"description": "Shows the top $topk recording rules which generate the most of [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3463,7 +3456,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, \n max(\n sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, instance, group, file, recording) > 0\n ) by(job, group, file, recording)\n)",
|
||||
"expr": "topk($topk, \n max(\n sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, instance, group, file, recording) > 0\n ) by(job, group, file, recording)\n)",
|
||||
"interval": "",
|
||||
"legendFormat": "({{job}}) {{group}}.{{recording}}({{file}})",
|
||||
"range": true,
|
||||
@@ -4084,7 +4077,7 @@
|
||||
],
|
||||
"preload": false,
|
||||
"refresh": "",
|
||||
"schemaVersion": 40,
|
||||
"schemaVersion": 41,
|
||||
"tags": [
|
||||
"victoriametrics"
|
||||
],
|
||||
@@ -4228,10 +4221,10 @@
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
"name": "filter",
|
||||
"type": "adhoc"
|
||||
}
|
||||
]
|
||||
@@ -4244,6 +4237,5 @@
|
||||
"timezone": "",
|
||||
"title": "VictoriaMetrics - vmalert (VM)",
|
||||
"uid": "LzldHAVnz_vm",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
"version": 1
|
||||
}
|
||||
@@ -3408,7 +3408,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max(10, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0",
|
||||
"expr": "topk(10, sum(sum_over_time(scrape_series_added[5m])) by (job)) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "{{ job }}",
|
||||
"range": true,
|
||||
|
||||
@@ -114,13 +114,14 @@
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"color": "green",
|
||||
"index": 0,
|
||||
"text": "Ok"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
"type": "special"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
@@ -139,8 +140,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -172,17 +172,19 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 ) or 0",
|
||||
"expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 )",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -203,8 +205,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -236,7 +237,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -267,8 +268,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -300,7 +300,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -331,8 +331,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -368,17 +367,19 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "(sum(increase(vmalert_alerting_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])) or vector(0)) + \n(sum(increase(vmalert_recording_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])) or vector(0))",
|
||||
"expr": "(sum(increase(vmalert_alerting_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval]))) + \n(sum(increase(vmalert_recording_rules_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])))",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@@ -393,14 +394,24 @@
|
||||
"description": "Shows number of Recording Rules which produce no data.\n\n Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"match": "null",
|
||||
"result": {
|
||||
"index": 1,
|
||||
"text": "0"
|
||||
}
|
||||
},
|
||||
"type": "special"
|
||||
}
|
||||
],
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -436,7 +447,7 @@
|
||||
"textMode": "auto",
|
||||
"wideLayout": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -445,7 +456,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "count(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\"} < 1) or 0",
|
||||
"expr": "count(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\"} < 1)",
|
||||
"interval": "",
|
||||
"legendFormat": "",
|
||||
"range": true,
|
||||
@@ -478,8 +489,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -534,7 +544,7 @@
|
||||
},
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -604,8 +614,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -641,7 +650,7 @@
|
||||
"sort": "asc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -723,8 +732,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -762,7 +770,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -786,7 +794,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Top $topk groups by evaluation duration. Shows groups that take the most of time during the evaluation across all instances.\n\nThe panel uses MetricsQL functions and may not work with Prometheus.",
|
||||
"description": "Top $topk groups by evaluation duration. Shows groups that take the most of time during the evaluation across all instances.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -830,8 +838,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -869,7 +876,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -878,7 +885,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, max(sum(\n rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n/\n rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n) by(job, instance, group, file)) \nby(job, group, file))",
|
||||
"expr": "topk($topk, max(sum(\n rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n/\n rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}[$__rate_interval])\n) by(job, instance, group, file)) \nby(job, group, file))",
|
||||
"interval": "",
|
||||
"legendFormat": "({{job}}) {{group}}({{file}})",
|
||||
"range": true,
|
||||
@@ -937,8 +944,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -974,7 +980,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1042,8 +1048,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1079,7 +1084,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.0.2",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -1159,8 +1164,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1275,8 +1279,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1393,8 +1396,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1502,8 +1504,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1636,8 +1637,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -1753,8 +1753,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1875,8 +1874,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -1996,8 +1994,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2105,8 +2102,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2215,8 +2211,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2324,8 +2319,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -2433,8 +2427,7 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
"color": "green"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -2855,7 +2848,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows top $topk current active (firing) alerting rules.\n\nThe panel uses MetricsQL functions and may not work with Prometheus.",
|
||||
"description": "Shows top $topk current active (firing) alerting rules.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -2942,7 +2935,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, group, file, alertname) > 0)",
|
||||
"expr": "topk($topk, sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, group, file, alertname) > 0)",
|
||||
"interval": "",
|
||||
"legendFormat": "({{job}}) {{group}}.{{alertname}}({{file}})",
|
||||
"range": true,
|
||||
@@ -3375,7 +3368,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the top $topk recording rules which generate the most of [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.\n\nThe panel uses MetricsQL functions and may not work with Prometheus.",
|
||||
"description": "Shows the top $topk recording rules which generate the most of [samples](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#raw-samples). Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3462,7 +3455,7 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk_max($topk, \n max(\n sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, instance, group, file, recording) > 0\n ) by(job, group, file, recording)\n)",
|
||||
"expr": "topk($topk, \n max(\n sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\", file=~\"$file\"}) by(job, instance, group, file, recording) > 0\n ) by(job, group, file, recording)\n)",
|
||||
"interval": "",
|
||||
"legendFormat": "({{job}}) {{group}}.{{recording}}({{file}})",
|
||||
"range": true,
|
||||
@@ -4083,7 +4076,7 @@
|
||||
],
|
||||
"preload": false,
|
||||
"refresh": "",
|
||||
"schemaVersion": 40,
|
||||
"schemaVersion": 41,
|
||||
"tags": [
|
||||
"victoriametrics"
|
||||
],
|
||||
@@ -4227,10 +4220,10 @@
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
"uid": "${ds}"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
"name": "filter",
|
||||
"type": "adhoc"
|
||||
}
|
||||
]
|
||||
@@ -4243,6 +4236,5 @@
|
||||
"timezone": "",
|
||||
"title": "VictoriaMetrics - vmalert",
|
||||
"uid": "LzldHAVnz",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
"version": 1
|
||||
}
|
||||
@@ -21,6 +21,7 @@ to the Internet.
|
||||
* [alertmanager](#alertmanager)
|
||||
* [Grafana](#grafana)
|
||||
* [Alerts](#alerts)
|
||||
* [Troubleshooting](#troubleshooting)
|
||||
|
||||
## VictoriaMetrics single server
|
||||
|
||||
@@ -56,6 +57,8 @@ To shutdown environment run:
|
||||
make docker-vm-single-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
## VictoriaMetrics cluster
|
||||
|
||||
To spin-up environment with VictoriaMetrics cluster run the following command:
|
||||
@@ -92,6 +95,8 @@ To shutdown environment execute the following command:
|
||||
make docker-vm-cluster-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
## vmagent
|
||||
|
||||
vmagent is used for scraping and pushing time series to VictoriaMetrics instance.
|
||||
@@ -135,6 +140,8 @@ To shutdown environment execute the following command:
|
||||
make docker-vl-single-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
## VictoriaLogs cluster
|
||||
|
||||
To spin-up environment with VictoriaLogs cluster run the following command:
|
||||
@@ -176,6 +183,8 @@ To shutdown environment execute the following command:
|
||||
make docker-vl-cluster-down
|
||||
```
|
||||
|
||||
See [troubleshooting](#troubleshooting) in case if issues.
|
||||
|
||||
Please see more examples on integration of VictoriaLogs with other log shippers below:
|
||||
* [filebeat](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/filebeat)
|
||||
* [fluentbit](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/victorialogs/fluentbit)
|
||||
@@ -249,3 +258,34 @@ The list of alerting rules is the following:
|
||||
|
||||
Please, also see [how to monitor VictoriaMetrics installations](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#monitoring)
|
||||
and [how to monitor VictoriaLogs installations](https://docs.victoriametrics.com/victorialogs/#monitoring).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
This environment has the following requirements:
|
||||
* installed [docker compose](https://docs.docker.com/compose/);
|
||||
* access to the Internet for downloading docker images;
|
||||
* **All commands should be executed from the root directory of [the VictoriaMetrics repo](https://github.com/VictoriaMetrics/VictoriaMetrics).**
|
||||
|
||||
The expected output of running a command like `make docker-vm-single-up` is the following:
|
||||
```sh
|
||||
make docker-vm-single-up :(
|
||||
docker compose -f deployment/docker/compose-vm-single.yml up -d
|
||||
[+] Running 9/9
|
||||
✔ Network docker_default Created 0.0s
|
||||
✔ Volume "docker_vmagentdata" Created 0.0s
|
||||
✔ Container docker-alertmanager-1 Started 0.3s
|
||||
✔ Container docker-victoriametrics-1 Started 0.3s
|
||||
...
|
||||
```
|
||||
|
||||
Containers are started in [--detach mode](https://docs.docker.com/reference/cli/docker/compose/up/), meaning they run in the background.
|
||||
As a result, you won't see their logs or exit status directly in the terminal.
|
||||
|
||||
If something isn’t working as expected, try the following troubleshooting steps:
|
||||
1. Run from the correct directory. Make sure you're running the command from the root of the [VictoriaMetrics repository](https://github.com/VictoriaMetrics/VictoriaMetrics).
|
||||
2. Check container status. Run `docker ps -a` to list all containers and their status. Healthy and running containers should have `STATUS` set to `Up`.
|
||||
3. View container logs. To inspect logs for a specific container, get its container ID from step p2 and run: `docker logs -f <containerID>`.
|
||||
4. Read the logs carefully and follow any suggested actions.
|
||||
5. Check for port conflicts. Some containers (e.g., Grafana) expose HTTP ports. If a port (like `:3000`) is already in use, the container may fail to start. Stop the conflicting process or change the exposed port in the Docker Compose file.
|
||||
6. Shut down the deployment. To tear down the environment, run: `make <environment>-down` (i.e. `make docker-vm-single-down`).
|
||||
Note, this command also removes all attached volumes, so all the temporary created data will be removed too (i.e. Grafana dashboards or collected metrics).
|
||||
@@ -1,14 +1,17 @@
|
||||
# balance load among vmselects
|
||||
# see https://docs.victoriametrics.com/victoriametrics/vmauth/#load-balancing
|
||||
unauthorized_user:
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/select/.*"
|
||||
url_prefix:
|
||||
- http://vmselect-1:8481
|
||||
- http://vmselect-2:8481
|
||||
- src_paths:
|
||||
- "/insert/.*"
|
||||
url_prefix:
|
||||
- http://vminsert-1:8480
|
||||
- http://vminsert-2:8480
|
||||
users:
|
||||
- username: "foo"
|
||||
password: "bar"
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/select/.*"
|
||||
- "/admin/.*"
|
||||
url_prefix:
|
||||
- http://vmselect-1:8481
|
||||
- http://vmselect-2:8481
|
||||
- src_paths:
|
||||
- "/insert/.*"
|
||||
url_prefix:
|
||||
- http://vminsert-1:8480
|
||||
- http://vminsert-2:8480
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
services:
|
||||
# Grafana instance configured with VictoriaLogs as datasource
|
||||
grafana:
|
||||
image: grafana/grafana:11.5.0
|
||||
image: grafana/grafana:12.0.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "vmauth"
|
||||
@@ -68,7 +68,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# scraping, storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.119.0
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
volumes:
|
||||
- vmdata:/storage
|
||||
- ./prometheus-vl-cluster.yml:/etc/prometheus/prometheus.yml
|
||||
@@ -81,7 +81,7 @@ services:
|
||||
# It proxies query requests from vmalert to either VictoriaMetrics or VictoriaLogs,
|
||||
# depending on the requested path.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.119.0
|
||||
image: victoriametrics/vmauth:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "vlselect-1"
|
||||
@@ -97,7 +97,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules according to given rule type.
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.119.0
|
||||
image: victoriametrics/vmalert:v1.120.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
services:
|
||||
# Grafana instance configured with VictoriaLogs as datasource
|
||||
grafana:
|
||||
image: grafana/grafana:11.5.0
|
||||
image: grafana/grafana:12.0.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "victorialogs"
|
||||
@@ -49,7 +49,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# scraping, storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.119.0
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
ports:
|
||||
- "8428:8428"
|
||||
volumes:
|
||||
@@ -64,7 +64,7 @@ services:
|
||||
# It proxies query requests from vmalert to either VictoriaMetrics or VictoriaLogs,
|
||||
# depending on the requested path.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.119.0
|
||||
image: victoriametrics/vmauth:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "victorialogs"
|
||||
@@ -78,7 +78,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules according to the given rule type.
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.119.0
|
||||
image: victoriametrics/vmalert:v1.120.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.119.0
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -17,7 +17,7 @@ services:
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.5.0
|
||||
image: grafana/grafana:12.0.2
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -35,14 +35,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.119.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.120.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.119.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.120.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -52,7 +52,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.119.0-cluster
|
||||
image: victoriametrics/vminsert:v1.120.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -61,7 +61,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.119.0-cluster
|
||||
image: victoriametrics/vminsert:v1.120.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.119.0-cluster
|
||||
image: victoriametrics/vmselect:v1.120.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -83,7 +83,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.119.0-cluster
|
||||
image: victoriametrics/vmselect:v1.120.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -98,7 +98,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.119.0
|
||||
image: victoriametrics/vmauth:v1.120.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -112,7 +112,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.119.0
|
||||
image: victoriametrics/vmalert:v1.120.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.119.0
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.119.0
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -38,7 +38,7 @@ services:
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.5.0
|
||||
image: grafana/grafana:12.0.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.119.0
|
||||
image: victoriametrics/vmalert:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -19,7 +19,7 @@ services:
|
||||
retries: 10
|
||||
|
||||
dd-proxy:
|
||||
image: docker.io/victoriametrics/vmauth:v1.119.0
|
||||
image: docker.io/victoriametrics/vmauth:v1.120.0
|
||||
restart: on-failure
|
||||
volumes:
|
||||
- ./:/etc/vmauth
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.119.0
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.119.0
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -27,7 +27,7 @@ services:
|
||||
restart: always
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana-oss:10.2.1
|
||||
image: grafana/grafana:12.0.2
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.119.0
|
||||
image: victoriametrics/vmalert:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -58,7 +58,7 @@ services:
|
||||
- ./vmsingle/promscrape.yml:/promscrape.yml
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.5.0
|
||||
image: grafana/grafana:12.0.2
|
||||
depends_on: [vmsingle]
|
||||
ports:
|
||||
- 3000:3000
|
||||
|
||||
@@ -126,7 +126,7 @@ Metrics to save the output (in metric names or labels). Must have `__name__` key
|
||||
</td>
|
||||
<td>
|
||||
|
||||
`/api/v1/import`
|
||||
`/api/v1/import`{{% deprecated_from "v1.19.2" anomaly %}}
|
||||
</td>
|
||||
<td>
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.119.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.119.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.119.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.120.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.120.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.120.0)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -315,7 +315,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.119.0
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -332,7 +332,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.119.0
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -365,7 +365,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.119.0
|
||||
image: victoriametrics/vmalert:v1.120.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -241,27 +241,27 @@ services:
|
||||
- grafana_data:/var/lib/grafana/
|
||||
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.119.0
|
||||
image: victoriametrics/victoria-metrics:v1.120.0
|
||||
command:
|
||||
- -httpListenAddr=0.0.0.0:8429
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.119.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.120.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.119.0-cluster
|
||||
image: victoriametrics/vminsert:v1.120.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
- -httpListenAddr=0.0.0.0:8480
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.119.0-cluster
|
||||
image: victoriametrics/vmselect:v1.120.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
- -httpListenAddr=0.0.0.0:8481
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.119.0
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
@@ -270,7 +270,7 @@ services:
|
||||
- -remoteWrite.url=http://vmsingle:8429/api/v1/write
|
||||
|
||||
vmgateway-cluster:
|
||||
image: victoriametrics/vmgateway:v1.119.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.120.0-enterprise
|
||||
ports:
|
||||
- 8431:8431
|
||||
volumes:
|
||||
@@ -286,7 +286,7 @@ services:
|
||||
- -auth.oidcDiscoveryEndpoints=http://keycloak:8080/realms/master/.well-known/openid-configuration
|
||||
|
||||
vmgateway-single:
|
||||
image: victoriametrics/vmgateway:v1.119.0-enterprise
|
||||
image: victoriametrics/vmgateway:v1.120.0-enterprise
|
||||
ports:
|
||||
- 8432:8431
|
||||
volumes:
|
||||
@@ -397,7 +397,7 @@ Once iDP configuration is done, vmagent configuration needs to be updated to use
|
||||
|
||||
```yaml
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.119.0
|
||||
image: victoriametrics/vmagent:v1.120.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
- ./vmagent-client-secret:/etc/vmagent/oauth2-client-secret
|
||||
|
||||
@@ -209,8 +209,8 @@ Migrating data from other databases to VictoriaMetrics is as simple as importing
|
||||
[supported ingestion formats](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#push-model).
|
||||
|
||||
But migration from InfluxDB might get easier with [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/). See more about
|
||||
migrating [from InfluxDB v1.x versions](https://docs.victoriametrics.com/victoriametrics/vmctl/#migrating-data-from-influxdb-1x).
|
||||
Migrating data from InfluxDB v2.x is not supported. But there is a useful [3rd party solution](https://docs.victoriametrics.com/victoriametrics/vmctl/#migrating-data-from-influxdb-2x)
|
||||
migrating [from InfluxDB v1.x versions](https://docs.victoriametrics.com/victoriametrics/vmctl/influxdb/).
|
||||
Migrating data from InfluxDB v2.x is not supported. But there is a useful [3rd party solution](https://docs.victoriametrics.com/victoriametrics/vmctl/influxdb/#influxdb-v2)
|
||||
for this.
|
||||
|
||||
Please note, data migration is a backfilling process, so read about [backfilling tips](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#backfilling).
|
||||
|
||||
@@ -75,4 +75,3 @@ Additional context
|
||||
### What more can we do?
|
||||
|
||||
Setup vmagents in Ground Control regions. That allows it to accept data close to storage and add more reliability if storage is temporarily offline.
|
||||
g
|
||||
|
||||
@@ -59,6 +59,7 @@ Released at 2025-06-20
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix issue with hits chart ignoring selected AccountID and ProjectID. See [#9157](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9157).
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix missing field values in auto-complete. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/8749)
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): remove the compact mode of the table tab and add field sorting capabilities to the JSON tab. See [#7047](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7047).
|
||||
* BUGFIX: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): fix errors in console about loading of `manifest.json` when accessing UI through vmauth with Basic Auth enabled.
|
||||
* BUGFIX: [Journald data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/): properly read log timestamp from `__REALTIME_TIMESTAMP` field according to [the docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#time-field). See [#9144](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9144). The bug has been introduced in [v1.22.0-victorialogs](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.22.0-victorialogs).
|
||||
* BUGFIX: [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/): support `-` as a timestamp value, as described in [RFC5424](https://datatracker.ietf.org/doc/html/rfc5424#section-6.2.3).
|
||||
* BUGFIX: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/): properly handle quotes inside quoted strings such as `"\""`. Previously this could lead to panics. See [#9219](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9219).
|
||||
@@ -570,7 +571,7 @@ Released at 2024-09-30
|
||||
Released at 2024-09-29
|
||||
|
||||
* FEATURE: [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/): accept Unix timestamps in seconds in the ingested logs. This simplifies integration with systems, which prefer Unix timestamps over text-based representation of time.
|
||||
* FEATURE: [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe): allow using `order` alias instead of `sort`. For example, `_time:5s | order by (_time)` query works the same as `_time:5s | sort by (_time)`. This simplifies the to [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) transition from SQL-like query languages.
|
||||
* FEATURE: [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe): allow using `order` alias instead of `sort`. For example, `_time:5s | order by (_time)` query works the same as `_time:5s | sort by (_time)`. This simplifies [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) transition from SQL-like query languages.
|
||||
* FEATURE: [`stats` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe): allow using multiple identical [stats functions](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions) with distinct [filters](https://docs.victoriametrics.com/victorialogs/logsql/#stats-with-additional-filters) and automatically generated result names. For example, `_time:5m | count(), count() if (error)` query works as expected now, e.g. it returns two results over the last 5 minutes: the total number of logs and the number of logs with `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word). Previously this query couldn't be executed because the `if (...)` condition wasn't included in the automatically generate result name, so both results had the same name - `count(*)`.
|
||||
|
||||
* BUGFIX: properly calculate [`uniq`](https://docs.victoriametrics.com/victorialogs/logsql/#uniq-pipe) and [`top`](https://docs.victoriametrics.com/victorialogs/logsql/#top-pipe) pipes. Previously they could return invalid results in some cases.
|
||||
@@ -650,7 +651,7 @@ Released at 2024-07-05
|
||||
|
||||
Released at 2024-07-02
|
||||
|
||||
* FEATURE: add `-syslog.useLocalTimestamp.tcp` and `-syslog.useLocalTimestamp.udp` command-line flags, which could be used for using the local timestamp as [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) for the logs ingested via the corresponding `-syslog.listenAddr.tcp` / `-syslog.listenAddr.udp`. By default the timestamp from the syslog message is used as [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field). See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/).
|
||||
* FEATURE: add `-syslog.useLocalTimestamp.tcp` and `-syslog.useLocalTimestamp.udp` command-line flags, which could be used for using the local timestamp as [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) for the logs ingested via the corresponding `-syslog.listenAddr.tcp` / `-syslog.listenAddr.udp`. By default, the timestamp from the syslog message is used as [`_time` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field). See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/).
|
||||
|
||||
* BUGFIX: make slowly ingested logs visible for search as soon as they are ingested into VictoriaLogs. Previously slowly ingested logs could remain invisible for search for long time.
|
||||
|
||||
|
||||
@@ -207,7 +207,7 @@ via `-insert.maxLineSizeBytes` command-line flag.
|
||||
VictoriaLogs limits [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) name length to 128 bytes -
|
||||
Log entries with longer field names are ignored during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
|
||||
|
||||
The maximum length of a field name is hardcoded and is unikely to increase, since this may increase RAM and CPU usage.
|
||||
The maximum length of a field name is hardcoded and is unlikely to increase, since this may increase RAM and CPU usage.
|
||||
|
||||
## How many fields a single log entry may contain
|
||||
|
||||
@@ -243,7 +243,7 @@ is returned in the `total_bytes` field.
|
||||
|
||||
If you use [VictoriaLogs web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui)
|
||||
or [Grafana plugin for VictoriaLogs](https://docs.victoriametrics.com/victorialogs/victorialogs-datasource/),
|
||||
then make sure the selected time range covers the last day. Otherwise the query above returns
|
||||
then make sure the selected time range covers the last day. Otherwise, the query above returns
|
||||
results on the intersection of the last day and the selected time range.
|
||||
|
||||
See [why the log field occupies a lot of disk space](#why-the-log-field-occupies-a-lot-of-disk-space).
|
||||
@@ -334,7 +334,7 @@ The query works in the following way:
|
||||
|
||||
The needed storage space depends on the following factors:
|
||||
|
||||
- Data compressibility. VictoraLogs compresses the ingested logs before storing them to disk. The compression ratio depends on the "randomness" of the ingested logs.
|
||||
- Data compressibility. VictoriaLogs compresses the ingested logs before storing them to disk. The compression ratio depends on the "randomness" of the ingested logs.
|
||||
Less "random" logs with many repeated field values and small differences between log messages compress the best (up to 100x and more).
|
||||
More "random" logs with many unique field values may have very low compression rate.
|
||||
|
||||
|
||||
@@ -1319,7 +1319,7 @@ This query matches the following log messages, since their length is in the requ
|
||||
This query doesn't match the following log messages:
|
||||
|
||||
- `foo`, since it is too short
|
||||
- `foo bar baz abc`, sinc it is too long
|
||||
- `foo bar baz abc`, since it is too long
|
||||
|
||||
It is possible to use `inf` as the upper bound. For example, the following query matches [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
with the length bigger or equal to 5 chars:
|
||||
@@ -3362,7 +3362,7 @@ It understands the following Syslog formats:
|
||||
|
||||
The following fields are unpacked:
|
||||
|
||||
- `level` - optained from `PRI`.
|
||||
- `level` - obtained from `PRI`.
|
||||
- `priority` - obtained from `PRI`.
|
||||
- `facility` - calculated as `PRI / 8`.
|
||||
- `facility_keyword` - string representation of the `facility` field according to [these docs](https://en.wikipedia.org/wiki/Syslog#Facility).
|
||||
@@ -3497,7 +3497,7 @@ See also:
|
||||
|
||||
#### Conditional unroll
|
||||
|
||||
If the [`unroll` pipe](#unroll-pipe) must be applied only to some [log enties](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
|
||||
If the [`unroll` pipe](#unroll-pipe) must be applied only to some [log entries](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model),
|
||||
then add `if (<filters>)` after `unroll`.
|
||||
The `<filters>` can contain arbitrary [filters](#filters). For example, the following query unrolls `value` field only if `value_type` field equals to `json_array`:
|
||||
|
||||
@@ -3534,7 +3534,7 @@ LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
|
||||
|
||||
`avg(field1, ..., fieldN)` [stats pipe function](#stats-pipe-functions) calculates the average value across
|
||||
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
Non-numeric values are ignored.
|
||||
Non-numeric values are ignored. If all the values are non-numeric, then `NaN` is returned.
|
||||
|
||||
For example, the following query returns the average value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
@@ -3972,6 +3972,7 @@ See also:
|
||||
|
||||
`sum(field1, ..., fieldN)` [stats pipe function](#stats-pipe-functions) calculates the sum of numeric values across
|
||||
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
Non-numeric values are skipped. If all the values across `field1`, ..., `fieldN` are non-numeric, then `NaN` is returned.
|
||||
|
||||
For example, the following query returns the sum of numeric values for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
@@ -4127,7 +4128,7 @@ LogsQL supports the following string literals:
|
||||
|
||||
- `"double quoted"`. Double quote and backslash inside such a string must be escaped with `\`: `"escape\"doublequote and \\ backslash"`.
|
||||
Double-quoted strings may contain special sequences such as `\n`, `\t`, `\f`, `\x8c`, etc. They are decoded according to [these docs](https://go.dev/ref/spec#String_literals).
|
||||
- `'single quoted'`. Single quote and backsliash inside such a string must be escaped with `\`: `'escape\'singlequote and \\ backslash'`.
|
||||
- `'single quoted'`. Single quote and backslash inside such a string must be escaped with `\`: `'escape\'singlequote and \\ backslash'`.
|
||||
- ``` `backtick quoted` ```. Strings with backslashes, double quotes and single quotes shouldn't be escaped inside backtick-quoted strings.
|
||||
|
||||
## Comments
|
||||
@@ -4136,10 +4137,10 @@ LogsQL query may contain comments at any place. The comment starts with `#` and
|
||||
Example query with comments:
|
||||
|
||||
```logsql
|
||||
error # find logs with `error` word
|
||||
| stats by (_stream) logs # then count the number of logs per `_stream` label
|
||||
| sort by (logs) desc # then sort by the found logs in descending order
|
||||
| limit 5 # and show top 5 streams with the biggest number of logs
|
||||
error # find logs with `error` word
|
||||
| stats by (_stream) count() logs # then count the number of logs per `_stream` label
|
||||
| sort by (logs) desc # then sort by the found logs in descending order
|
||||
| limit 5 # and show top 5 streams with the biggest number of logs
|
||||
```
|
||||
|
||||
## Numeric values
|
||||
|
||||
@@ -31,41 +31,126 @@ See [quick start guide](#quick-start) on how to start working with VictoriaLogs
|
||||
|
||||
## Architecture
|
||||
|
||||
VictoriaLogs in cluster mode consists of `vlinsert`, `vlselect` and `vlstorage` components:
|
||||
VictoriaLogs in cluster mode is composed of three main components: `vlinsert`, `vlselect`, and `vlstorage`.
|
||||
|
||||
- `vlinsert` accepts the ingested logs via [all the supported data ingestion protocols](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
|
||||
and spreads them evenly among the `vlstorage` nodes listed via the `-storageNode` command-line flag.
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant LS as Log Sources
|
||||
participant VI as vlinsert
|
||||
participant VS1 as vlstorage-1
|
||||
participant VS2 as vlstorage-2
|
||||
participant VL as vlselect
|
||||
participant QC as Query Client
|
||||
|
||||
Note over LS,VS2: Log Ingestion Flow
|
||||
LS->>VI: Send logs via supported protocols
|
||||
VI->>VS1: POST /internal/insert (HTTP)
|
||||
VI->>VS2: POST /internal/insert (HTTP)
|
||||
Note right of VI: Distributes logs evenly<br/>across vlstorage nodes
|
||||
|
||||
Note over VS1,QC: Query Flow
|
||||
QC->>VL: Query via HTTP endpoints
|
||||
VL->>VS1: GET /internal/select/* (HTTP)
|
||||
VL->>VS2: GET /internal/select/* (HTTP)
|
||||
VS1-->>VL: Return local results
|
||||
VS2-->>VL: Return local results
|
||||
VL->>QC: Processed & aggregated results
|
||||
```
|
||||
|
||||
- `vlselect` accepts incoming queries via [all the supported HTTP querying endpoints](https://docs.victoriametrics.com/victorialogs/querying/),
|
||||
requests the needed data from `vlstorage` nodes listed via the `-storageNode` command-line flag, processes the queries and returns the corresponding responses.
|
||||
- `vlinsert` handles log ingestion via [all supported protocols](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
|
||||
It distributes incoming logs evenly across `vlstorage` nodes, as specified by the `-storageNode` command-line flag.
|
||||
|
||||
- `vlstorage` is responsible for two tasks:
|
||||
- `vlselect` receives queries through [all supported HTTP query endpoints](https://docs.victoriametrics.com/victorialogs/querying/).
|
||||
It fetches the required data from the configured `vlstorage` nodes, processes the queries, and returns the results.
|
||||
|
||||
- It accepts logs from `vlinsert` nodes and stores them at the directory specified via `-storageDataPath` command-line flag.
|
||||
See [these docs](https://docs.victoriametrics.com/victorialogs/#storage) for details about this flag.
|
||||
- `vlstorage` performs two key roles:
|
||||
- It stores logs received from `vlinsert` at the directory defined by the `-storageDataPath` flag.
|
||||
See [storage configuration docs](https://docs.victoriametrics.com/victorialogs/#storage) for details.
|
||||
- It handles queries from `vlselect` by retrieving and transforming the requested data locally before returning results.
|
||||
|
||||
- It processes requests from `vlselect` nodes. It selects the requested logs and performs all data transformations and calculations,
|
||||
which can be executed locally, before sending the results to `vlselect`.
|
||||
Each `vlstorage` node operates as a self-contained VictoriaLogs instance.
|
||||
Refer to the [single-node and cluster mode duality](#single-node-and-cluster-mode-duality) documentation for more information.
|
||||
This design allows you to reuse existing single-node VictoriaLogs instances by listing them in the `-storageNode` flag for `vlselect`, enabling unified querying across all nodes.
|
||||
|
||||
`vlstorage` is basically a single-node version of VictoriaLogs. See [these docs](#single-node-and-cluster-mode-duality) for details.
|
||||
This means that the existing single-node VictoriaLogs instances can be added to the list of `vlstorage` nodes via `-storageNode` command-line flag at `vlselect`
|
||||
in order to get global querying view over all the logs across all the single-node VictoriaLogs instances.
|
||||
All VictoriaLogs components are horizontally scalable and can be deployed on hardware best suited to their respective workloads.
|
||||
`vlinsert` and `vlselect` can be run on the same node, which allows the minimal cluster to consist of just one `vlstorage` node and one node acting as both `vlinsert` and `vlselect`.
|
||||
However, for production environments, it is recommended to separate `vlinsert` and `vlselect` roles to avoid resource contention — for example, to prevent heavy queries from interfering with log ingestion.
|
||||
|
||||
Every component of the VictoriaLogs cluster can scale from a single node to arbitrary number of nodes and can run on the most suitable hardware for the given workload.
|
||||
`vlinsert` nodes can be used as `vlselect` nodes, so the minimum VictoriaLogs cluster must contain a `vlstorage` node plus a node, which plays both `vlinsert` and `vlselect` roles.
|
||||
It isn't recommended sharing `vlinsert` and `vlselect` responsibilities in a single node, since this increases chances that heavy queries can negatively affect data ingestion
|
||||
and vice versa.
|
||||
Communication between `vlinsert` / `vlselect` and `vlstorage` is done via HTTP over the port specified by the `-httpListenAddr` flag:
|
||||
|
||||
`vlselect` and `vlinsert` communicate with `vlstorage` via HTTP at the TCP port specified via `-httpListenAddr` command-line flag:
|
||||
- `vlinsert` sends data to the `/internal/insert` endpoint on `vlstorage`.
|
||||
- `vlselect` sends queries to endpoints under `/internal/select/` on `vlstorage`.
|
||||
|
||||
- `vlinsert` sends requests to `/internal/insert` HTTP endpoint at `vlstorage`.
|
||||
- `vlselect` sends requests to HTTP endpoints at `vlstorage` starting with `/internal/select/`.
|
||||
This HTTP-based communication model allows you to use reverse proxies for authorization, routing, and encryption between components.
|
||||
Use of [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) is recommended for managing access control.
|
||||
|
||||
This allows using various http proxies for authorization, routing and encryption of requests between these components.
|
||||
It is recommended to use [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/).
|
||||
For advanced setups, refer to the [multi-level cluster setup](#multi-level-cluster-setup) documentation.
|
||||
|
||||
See also [multi-level cluster setup](#multi-level-cluster-setup).
|
||||
## High availability
|
||||
|
||||
In the cluster setup, the following rules apply:
|
||||
|
||||
- The `vlselect` component requires **all relevant vlstorage nodes to be available** in order to return complete and correct query results.
|
||||
|
||||
- If even one of the vlstorage nodes is temporarily unavailable, `vlselect` cannot safely return a full response, since some of the required data may reside on the missing node. Rather than risk delivering partial or misleading query results, which can cause confusion, trigger false alerts, or produce incorrect metrics, VictoriaLogs chooses to return an error instead.
|
||||
|
||||
- The `vlinsert` component continues to function normally when some vlstorage nodes are unavailable. It automatically routes new logs to the remaining available nodes to ensure that data ingestion remains uninterrupted and newly received logs are not lost.
|
||||
|
||||
> [!NOTE] Insight
|
||||
> In most real-world cases, `vlstorage` nodes become unavailable during planned maintenance such as upgrades, config changes, or rolling restarts. These are typically infrequent (weekly or monthly) and brief (a few minutes).
|
||||
> A short period of query downtime during such events is acceptable and fits well within most SLAs. For example, 60 minutes of downtime per month still provides around 99.86% availability, which often outperforms complex HA setups that rely on opaque auto-recovery and may fail unpredictably.
|
||||
|
||||
VictoriaLogs itself does not handle replication at the storage level. Instead, it relies on an external log shipper, such as [vector](https://docs.victoriametrics.com/victorialogs/data-ingestion/vector/) or [vlagent](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/9034), to send the same log stream to multiple independent VictoriaLogs instances:
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "HA Solution"
|
||||
subgraph "Ingestion Layer"
|
||||
LS["Log Sources<br/>(Applications)"]
|
||||
VECTOR["Log Collector<br/>• Buffering<br/>• Replication<br/>• Delivery Guarantees"]
|
||||
end
|
||||
|
||||
subgraph "Storage Layer"
|
||||
subgraph "Zone A"
|
||||
VLA["VictoriaLogs Cluster A"]
|
||||
end
|
||||
|
||||
subgraph "Zone B"
|
||||
VLB["VictoriaLogs Cluster B"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph "Query Layer"
|
||||
LB["Load Balancer<br/>(vmauth)<br/>• Health Checks<br/>• Failover<br/>• Query Distribution"]
|
||||
QC["Query Clients<br/>(Grafana, API)"]
|
||||
end
|
||||
|
||||
LS --> VECTOR
|
||||
VECTOR -->|"Replicate logs to<br/>Zone A cluster"| VLA
|
||||
VECTOR -->|"Replicate logs to<br/>Zone B cluster"| VLB
|
||||
|
||||
VLA -->|"Serve queries from<br/>Zone A cluster"| LB
|
||||
VLB -->|"Serve queries from<br/>Zone B cluster"| LB
|
||||
LB --> QC
|
||||
|
||||
style VECTOR fill:#e8f5e8
|
||||
style VLA fill:#d5e8d4
|
||||
style VLB fill:#d5e8d4
|
||||
style LB fill:#e1f5fe
|
||||
style QC fill:#fff2cc
|
||||
style LS fill:#fff2cc
|
||||
end
|
||||
```
|
||||
|
||||
In this HA solution:
|
||||
|
||||
- A log shipper at the top receives logs and replicates them in parallel to two VictoriaLogs clusters.
|
||||
- If one cluster fails completely (i.e., **all** of its storage nodes become unavailable), the log shipper continues to send logs to the remaining healthy cluster and buffers any logs that cannot be delivered. When the failed cluster becomes available again, the log shipper resumes sending both buffered and new logs to it.
|
||||
- On the read path, a load balancer (e.g., vmauth) sits in front of the VictoriaLogs clusters and routes query requests to any healthy cluster.
|
||||
- If one cluster fails (i.e., **at least one** of its storage nodes is unavailable), the load balancer detects this and automatically redirects all query traffic to the remaining healthy cluster.
|
||||
|
||||
There's no hidden coordination logic or consensus algorithm. You can scale it horizontally and operate it safely, even in bare-metal Kubernetes clusters using local PVs, as long as the log shipper handles reliable replication and buffering.
|
||||
|
||||
## Single-node and cluster mode duality
|
||||
|
||||
Every `vlstorage` node can be used as a single-node VictoriaLogs instance:
|
||||
@@ -189,7 +274,7 @@ Start `vlselect` node, which [accepts incoming queries](https://docs.victoriamet
|
||||
|
||||
Note that all the VictoriaLogs cluster components - `vlstorage`, `vlinsert` and `vlselect` - share the same executable - `victoria-logs-prod`.
|
||||
Their roles depend on whether the `-storageNode` command-line flag is set - if this flag is set, then the executable runs in `vlinsert` and `vlselect` modes.
|
||||
Otherwise it runs in `vlstorage` mode, which is identical to a [single-node VictoriaLogs mode](https://docs.victoriametrics.com/victorialogs/).
|
||||
Otherwise, it runs in `vlstorage` mode, which is identical to a [single-node VictoriaLogs mode](https://docs.victoriametrics.com/victorialogs/).
|
||||
|
||||
Let's ingest some logs (aka [wide events](https://jeremymorrell.dev/blog/a-practitioners-guide-to-wide-events/))
|
||||
from [GitHub archive](https://www.gharchive.org/) into the VictoriaLogs cluster with the following command:
|
||||
|
||||
@@ -28,23 +28,24 @@ See [the list of supported Journald fields](https://www.freedesktop.org/software
|
||||
|
||||
## Level field
|
||||
|
||||
VictoriaLogs atomatically sets the `level` log field according to the [`PRIORITY` field falue](https://wiki.archlinux.org/title/Systemd/Journal).
|
||||
VictoriaLogs automatically sets the `level` log field according to the [`PRIORITY` field value](https://wiki.archlinux.org/title/Systemd/Journal).
|
||||
|
||||
## Stream fields
|
||||
|
||||
VictoriaLogs uses `(_MACHINE_ID, _HOSTNAME, _SYSTEMD_UNIT)` as [stream fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#stream-fields)
|
||||
for logs ingested via jorunald protocol. The list of log stream fields can be changed via `-journald.streamFields` command-line flag if needed,
|
||||
by providing comma-separated list of journald fields form [this list](https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html).
|
||||
for logs ingested via journald protocol. The list of log stream fields can be changed via `-journald.streamFields` command-line flag if needed,
|
||||
by providing comma-separated list of journald fields from [this list](https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html).
|
||||
|
||||
Please make sure that the log stream fields passed to `-journlad.streamFields` do not contain fields with high number or unbound number of unique values,
|
||||
Please make sure that the log stream fields passed to `-journald.streamFields` do not contain fields with high number or unbound number of unique values,
|
||||
since this may lead to [high cardinality issues](https://docs.victoriametrics.com/victorialogs/keyconcepts/#high-cardinality).
|
||||
This can happen with `_SYSTEMD_UNIT` if you have templated units with non-static instances
|
||||
such as `systemd-coredump@.service` or if you have a `.socket` unit with `Accept=yes`.
|
||||
|
||||
The following Journald fields are also good candidates for stream fields:
|
||||
|
||||
- `_TRANSPORT`
|
||||
- `_TRANSPORT` (to separate out kernel and audit logs which are not associated with a `_SYSTEMD_UNIT`)
|
||||
- `_SYSTEMD_USER_UNIT`
|
||||
|
||||
|
||||
## Dropping fields
|
||||
|
||||
VictoriaLogs can be configured for skipping the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
|
||||
@@ -11,6 +11,7 @@ tags:
|
||||
- logs
|
||||
aliases:
|
||||
- /victorialogs/keyConcepts.html
|
||||
- /victorialogs/keyConcepts/
|
||||
---
|
||||
## Data model
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ Loki allows applying filters to log labels with `{...} | label op value` syntax:
|
||||
according to [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#logical-filter).
|
||||
|
||||
* `{...} | label > value`, `{...} label >= value`, `{...} label < value` and `{...} label <= value`.
|
||||
This is eqvalent to `{...} label:>value`, `{...} label:>=value`, `{...} label:<value` and `{...} label:<=value`
|
||||
This is equivalent to `{...} label:>value`, `{...} label:>=value`, `{...} label:<value` and `{...} label:<=value`
|
||||
in VictoriaLogs. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#range-comparison-filter).
|
||||
|
||||
* `{...} | label ~= value`. This is equivalent to `{...} label:~value` in VictoriaLogs.
|
||||
@@ -192,7 +192,7 @@ Loki provides the ability to drop log labels with the `{...} | drop label1, ...,
|
||||
according to [these docs](https://grafana.com/docs/loki/latest/query/log_queries/#drop-labels-expression).
|
||||
The similar syntax is also supported by VictoriaLogs. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#delete-pipe).
|
||||
|
||||
Lokis supports conditional dropping of labels with the `{...} | drop label="value"` syntax.
|
||||
Loki supports conditional dropping of labels with the `{...} | drop label="value"` syntax.
|
||||
This can be replaced with [conditional format](https://docs.victoriametrics.com/victorialogs/logsql/#conditional-format) at VictoriaLogs:
|
||||
|
||||
```logsql
|
||||
@@ -266,7 +266,7 @@ can be substituted with the simple `{...} | count()` query at VictoriaLogs.
|
||||
|
||||
### Unwrapped range aggregations
|
||||
|
||||
Loki allows calculating metrics from label values by using the `func_name({...} | unwrap label_name)` syntax. There is no need in unrapping any labels in VictoriaLogs -
|
||||
Loki allows calculating metrics from label values by using the `func_name({...} | unwrap label_name)` syntax. There is no need in unwrapping any labels in VictoriaLogs -
|
||||
just pass the needed label names into the needed [`stats` pipe function](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions).
|
||||
|
||||
VictoriaLogs aggregates all the selected logs by default, while Loki groups stats by log stream. Use `... | stats by (_stream) ...`
|
||||
|
||||
@@ -128,7 +128,7 @@ See also:
|
||||
|
||||
## How to select logs with all the given words in log message?
|
||||
|
||||
Just enumerate the needed [words](https://docs.victoriametrics.com/victorialogs/logsql/#word) in the query, by deliming them with whitespace.
|
||||
Just enumerate the needed [words](https://docs.victoriametrics.com/victorialogs/logsql/#word) in the query, by delimiting them with whitespace.
|
||||
For example, the following query selects logs containing both `error` and `kubernetes` [words](https://docs.victoriametrics.com/victorialogs/logsql/#word)
|
||||
in the [log message](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ FROM <table>
|
||||
```
|
||||
|
||||
The `<filters>` part selects the needed logs (rows) according to the provided [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters).
|
||||
Then the provided [pipes](https://docs.victoriametrics.com/victorialogs/logsql/#pipes) are executed sequentlially.
|
||||
Then the provided [pipes](https://docs.victoriametrics.com/victorialogs/logsql/#pipes) are executed sequentially.
|
||||
Every such pipe receives all the rows from the previous stage, performs some calculations and/or transformations,
|
||||
and then pushes the resulting rows to the next stage. This simplifies reading and understanding the query - just read it from the beginning
|
||||
to the end in order to understand what does it do at every stage.
|
||||
|
||||
508
docs/victorialogs/vlagent.md
Normal file
@@ -0,0 +1,508 @@
|
||||
---
|
||||
weight: 3
|
||||
menu:
|
||||
docs:
|
||||
parent: victorialogs
|
||||
weight: 3
|
||||
title: vlagent
|
||||
tags:
|
||||
- logs
|
||||
aliases:
|
||||
- /vlagent.html
|
||||
- /vlagent/index.html
|
||||
- /vlagent/
|
||||
---
|
||||
|
||||
`vlagent` is a tiny agent which helps you collect logs from various sources
|
||||
and store them in [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/).
|
||||
See [Quick Start](#quick-start) for details.
|
||||
|
||||
|
||||
## Motivation
|
||||
|
||||
While VictoriaLogs provides an efficient solution to store and observe logs, it lacks of replication out of box.
|
||||
Previous solution was to configure clients to replicate log streams into multiple VictoriaLogs installations.
|
||||
`vlagent` is a missing piece of log streams replication.
|
||||
|
||||
## Features
|
||||
|
||||
- It can accept logs from popular log collectors. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
|
||||
* Can replicate collected logs simultaneously to multiple VictoriaLogs instances - see [these docs](#replication-and-high-availability).
|
||||
* Works smoothly in environments with unstable connections to remote storage. If the remote storage is unavailable, the collected logs
|
||||
are buffered at `-remoteWrite.tmpDataPath`. The buffered logs are sent to remote storage as soon as the connection
|
||||
to the remote storage is repaired. The maximum disk usage for the buffer can be limited with `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
## Quick Start
|
||||
|
||||
Please download `vlagent` archive from [releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest) (
|
||||
`vlagent` is also available in docker images [Docker Hub](https://hub.docker.com/r/victoriametrics/vlagent/tags) and [Quay](https://quay.io/repository/victoriametrics/vlagent?tab=tags)),
|
||||
unpack it and pass the following flags to the `vlagent` binary in order to start sending the data to the VictoriaLogs remote storage:
|
||||
|
||||
* `-remoteWrite.url` with VictoriaLogs native protocol compatible remote storage endpoint, where to send the data to.
|
||||
The `-remoteWrite.url` may refer to [DNS SRV](https://en.wikipedia.org/wiki/SRV_record) address. See [these docs](#srv-urls) for details.
|
||||
|
||||
Example command for writing the data received via [supported push-based protocols](#how-to-push-data-to-vlagent)
|
||||
to [single-node VictoriaLogs](https://docs.victoriametrics.com/victorialogs) located at `victoria-logs-host:9428`:
|
||||
|
||||
```sh
|
||||
/path/to/vlagent -remoteWrite.url=https://victoria-logs-host:9428/internal/insert
|
||||
```
|
||||
|
||||
Pass `-help` to `vlagent` in order to see [the full list of supported command-line flags with their descriptions](#advanced-usage).
|
||||
|
||||
### Replication and high availability
|
||||
|
||||
`vlagent` replicates the collected logs among multiple remote storage instances configured via `-remoteWrite.url` args.
|
||||
If a single remote storage instance temporarily is out of service, then the collected data remains available in another remote storage instance.
|
||||
`vlagent` buffers the collected data in files at `-remoteWrite.tmpDataPath` until the remote storage becomes available again,
|
||||
and then it sends the buffered data to the remote storage in order to prevent data gaps.
|
||||
|
||||
## Monitoring
|
||||
|
||||
`vlagent` exports various metrics in Prometheus exposition format at `http://vmalent-host:9429/metrics` page.
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus-compatible scraper,
|
||||
so that the exported metrics may be analyzed later.
|
||||
|
||||
Use official [Grafana dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vlagent.json) for `vlagent` state overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add a review to the dashboard.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* It is recommended [setting up the official Grafana dashboard](#monitoring) in order to monitor the state of `vlagent`.
|
||||
|
||||
* It is recommended increasing `-remoteWrite.queues` if `vlagent_remotewrite_pending_data_bytes` [metric](#monitoring)
|
||||
grows constantly. It is also recommended increasing `-remoteWrite.maxBlockSize` command-line flags in this case.
|
||||
This can improve data ingestion performance to the configured remote storage systems at the cost of higher memory usage.
|
||||
|
||||
* If you see gaps in the data pushed by `vlagent` to remote storage when `-remoteWrite.maxDiskUsagePerURL` is set,
|
||||
try increasing `-remoteWrite.queues`. Such gaps may appear because `vlagent` cannot keep up with sending the collected data to remote storage.
|
||||
Therefore, it starts dropping the buffered data if the on-disk buffer size exceeds `-remoteWrite.maxDiskUsagePerURL`.
|
||||
|
||||
* `vlagent` drops data blocks if remote storage replies with `400 Bad Request` and `404 Not Found` HTTP responses.
|
||||
The number of dropped blocks can be monitored via `vlagent_remotewrite_packets_dropped_total` metric exported at [/metrics page](#monitoring).
|
||||
|
||||
* `vlagent` buffers scraped data at the `-remoteWrite.tmpDataPath` directory until it is sent to `-remoteWrite.url`.
|
||||
The directory can grow large when remote storage is unavailable for extended periods of time and if the maximum directory size isn't limited
|
||||
with `-remoteWrite.maxDiskUsagePerURL` command-line flag.
|
||||
If you don't want to send all the buffered data from the directory to remote storage then simply stop `vlagent` and delete the directory.
|
||||
|
||||
* By default `vlagent` masks `-remoteWrite.url` with `secret-url` values in logs and at `/metrics` page because
|
||||
the url may contain sensitive information such as auth tokens or passwords.
|
||||
Pass `-remoteWrite.showURL` command-line flag when starting `vlagent` in order to see all the valid urls.
|
||||
|
||||
See also:
|
||||
|
||||
- [General Troubleshooting](https://docs.victoriametrics.com/victoriametrics/troubleshooting/)
|
||||
|
||||
|
||||
## Profiling
|
||||
|
||||
`vlagent` provides handlers for collecting the following [Go profiles](https://blog.golang.org/profiling-go-programs):
|
||||
|
||||
* Memory profile can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:9429/debug/pprof/heap > mem.pprof
|
||||
```
|
||||
|
||||
|
||||
* CPU profile can be collected with the following command (replace `0.0.0.0` with hostname if needed):
|
||||
|
||||
|
||||
```sh
|
||||
curl http://0.0.0.0:9429/debug/pprof/profile > cpu.pprof
|
||||
```
|
||||
|
||||
|
||||
The command for collecting CPU profile waits for 30 seconds before returning.
|
||||
|
||||
The collected profiles may be analyzed with [go tool pprof](https://github.com/google/pprof).
|
||||
|
||||
It is safe sharing the collected profiles from security point of view, since they do not contain sensitive information.
|
||||
|
||||
## Advanced usage
|
||||
|
||||
`vlagent` can be fine-tuned with various command-line flags. Run `./vlagent -help` in order to see the full list of these flags with their descriptions and default values:
|
||||
|
||||
```bash
|
||||
vlagent collects logs via popular data ingestion protocols and routes it to VictoriaLogs.
|
||||
|
||||
See the docs at https://docs.victoriametrics.com/victorialogs/vlagent/ .
|
||||
|
||||
-blockcache.missesBeforeCaching int
|
||||
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
|
||||
-datadog.ignoreFields array
|
||||
Comma-separated list of fields to ignore for logs ingested via DataDog protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/datadog-agent/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-datadog.maxRequestSize size
|
||||
The maximum size in bytes of a single DataDog request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-datadog.streamFields array
|
||||
Comma-separated list of fields to use as log stream fields for logs ingested via DataDog protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/datadog-agent/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-defaultMsgValue string
|
||||
Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field (default "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field")
|
||||
-elasticsearch.version string
|
||||
Elasticsearch version to report to client (default "8.9.0")
|
||||
-enableTCP6
|
||||
Whether to enable IPv6 for listening and dialing. By default, only IPv4 TCP and UDP are used
|
||||
-envflag.enable
|
||||
Whether to enable reading flags from environment variables in addition to the command line. Command line flag values have priority over values from environment vars. Flags are read only from the command line if this flag isn't set. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#environment-variables for more details
|
||||
-envflag.prefix string
|
||||
Prefix for environment variables if -envflag.enable is set
|
||||
-filestream.disableFadvise
|
||||
Whether to disable fadvise() syscall when reading large data files. The fadvise() syscall prevents from eviction of recently accessed data from OS page cache during background merges and backups. In some rare cases it is better to disable the syscall if it uses too much CPU
|
||||
-flagsAuthKey value
|
||||
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
|
||||
-fs.disableMmap
|
||||
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
|
||||
-http.connTimeout duration
|
||||
Incoming connections to -httpListenAddr are closed after the configured timeout. This may help evenly spreading load among a cluster of services behind TCP-level load balancer. Zero value disables closing of incoming connections (default 2m0s)
|
||||
-http.disableCORS
|
||||
Disable CORS for all origins (*)
|
||||
-http.disableKeepAlive
|
||||
Whether to disable HTTP keep-alive for incoming connections at -httpListenAddr
|
||||
-http.disableResponseCompression
|
||||
Disable compression of HTTP responses to save CPU resources. By default, compression is enabled to save network bandwidth
|
||||
-http.header.csp string
|
||||
Value for 'Content-Security-Policy' header, recommended: "default-src 'self'"
|
||||
-http.header.frameOptions string
|
||||
Value for 'X-Frame-Options' header
|
||||
-http.header.hsts string
|
||||
Value for 'Strict-Transport-Security' header, recommended: 'max-age=31536000; includeSubDomains'
|
||||
-http.idleConnTimeout duration
|
||||
Timeout for incoming idle http connections (default 1m0s)
|
||||
-http.maxGracefulShutdownDuration duration
|
||||
The maximum duration for a graceful shutdown of the HTTP server. A highly loaded server may require increased value for a graceful shutdown (default 7s)
|
||||
-http.pathPrefix string
|
||||
An optional prefix to add to all the paths handled by http server. For example, if '-http.pathPrefix=/foo/bar' is set, then all the http requests will be handled on '/foo/bar/*' paths. This may be useful for proxied requests. See https://www.robustperception.io/using-external-urls-and-proxies-with-prometheus
|
||||
-http.shutdownDelay duration
|
||||
Optional delay before http server shutdown. During this delay, the server returns non-OK responses from /health page, so load balancers can route new requests to other servers
|
||||
-httpAuth.password value
|
||||
Password for HTTP server's Basic Auth. The authentication is disabled if -httpAuth.username is empty
|
||||
Flag value can be read from the given file when using -httpAuth.password=file:///abs/path/to/file or -httpAuth.password=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -httpAuth.password=http://host/path or -httpAuth.password=https://host/path
|
||||
-httpAuth.username string
|
||||
Username for HTTP server's Basic Auth. The authentication is disabled if empty. See also -httpAuth.password
|
||||
-httpListenAddr array
|
||||
TCP address to listen for incoming http requests. Set this flag to empty value in order to disable listening on any port. This mode may be useful for running multiple vlagent instances on the same server. Note that /targets and /metrics pages aren't available if -httpListenAddr=''. See also -tls and -httpListenAddr.useProxyProtocol
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-httpListenAddr.useProxyProtocol array
|
||||
Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-insert.disable
|
||||
Whether to disable /insert/* HTTP endpoints
|
||||
-insert.maxFieldsPerLine int
|
||||
The maximum number of log fields per line, which can be read by /insert/* handlers; see https://docs.victoriametrics.com/victorialogs/faq/#how-many-fields-a-single-log-entry-may-contain (default 1000)
|
||||
-insert.maxLineSizeBytes size
|
||||
The maximum size of a single line, which can be read by /insert/* handlers; see https://docs.victoriametrics.com/victorialogs/faq/#what-length-a-log-record-is-expected-to-have
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 262144)
|
||||
-insert.maxQueueDuration duration
|
||||
The maximum duration to wait in the queue when -maxConcurrentInserts concurrent insert requests are executed (default 1m0s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
|
||||
-internStringMaxLen int
|
||||
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
|
||||
-internalinsert.disable
|
||||
Whether to disable /internal/insert HTTP endpoint
|
||||
-internalinsert.maxRequestSize size
|
||||
The maximum size in bytes of a single request, which can be accepted at /internal/insert HTTP endpoint
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-journald.ignoreFields array
|
||||
Comma-separated list of fields to ignore for logs ingested over journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-journald.includeEntryMetadata
|
||||
Include Journald fields with double underscore prefixes
|
||||
-journald.streamFields array
|
||||
Comma-separated list of fields to use as log stream fields for logs ingested over journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-journald.tenantID string
|
||||
TenantID for logs ingested via the Journald endpoint. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#multitenancy (default "0:0")
|
||||
-journald.timeField string
|
||||
Field to use as a log timestamp for logs ingested via journald protocol. See https://docs.victoriametrics.com/victorialogs/data-ingestion/journald/#time-field (default "__REALTIME_TIMESTAMP")
|
||||
-loggerDisableTimestamps
|
||||
Whether to disable writing timestamps in logs
|
||||
-loggerErrorsPerSecondLimit int
|
||||
Per-second limit on the number of ERROR messages. If more than the given number of errors are emitted per second, the remaining errors are suppressed. Zero values disable the rate limit
|
||||
-loggerFormat string
|
||||
Format for logs. Possible values: default, json (default "default")
|
||||
-loggerJSONFields string
|
||||
Allows renaming fields in JSON formatted logs. Example: "ts:timestamp,msg:message" renames "ts" to "timestamp" and "msg" to "message". Supported fields: ts, level, caller, msg
|
||||
-loggerLevel string
|
||||
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
|
||||
-loggerMaxArgLen int
|
||||
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000)
|
||||
-loggerOutput string
|
||||
Output for the logs. Supported values: stderr, stdout (default "stderr")
|
||||
-loggerTimezone string
|
||||
Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
|
||||
-loggerWarnsPerSecondLimit int
|
||||
Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
|
||||
-loki.disableMessageParsing
|
||||
Whether to disable automatic parsing of JSON-encoded log fields inside Loki log message into distinct log fields
|
||||
-loki.maxRequestSize size
|
||||
The maximum size in bytes of a single Loki request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-maxConcurrentInserts int
|
||||
The maximum number of concurrent insert requests. Set higher value when clients send data over slow networks. Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage. See also -insert.maxQueueDuration (default 20)
|
||||
-memory.allowedBytes size
|
||||
Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)
|
||||
-memory.allowedPercent float
|
||||
Allowed percent of system memory VictoriaMetrics caches may occupy. See also -memory.allowedBytes. Too low a value may increase cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache which will result in higher disk IO usage (default 60)
|
||||
-metrics.exposeMetadata
|
||||
Whether to expose TYPE and HELP metadata at the /metrics page, which is exposed at -httpListenAddr . The metadata may be needed when the /metrics page is consumed by systems, which require this information. For example, Managed Prometheus in Google Cloud - https://cloud.google.com/stackdriver/docs/managed-prometheus/troubleshooting#missing-metric-type
|
||||
-metricsAuthKey value
|
||||
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path
|
||||
-opentelemetry.maxRequestSize size
|
||||
The maximum size in bytes of a single OpenTelemetry request
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 67108864)
|
||||
-pprofAuthKey value
|
||||
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.*
|
||||
Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path
|
||||
-pushmetrics.disableCompression
|
||||
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
|
||||
-pushmetrics.extraLabel array
|
||||
Optional labels to add to metrics pushed to every -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.header array
|
||||
Optional HTTP request header to send to every -pushmetrics.url . For example, -pushmetrics.header='Authorization: Basic foobar' adds 'Authorization: Basic foobar' header to every request to every -pushmetrics.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-pushmetrics.interval duration
|
||||
Interval for pushing metrics to every -pushmetrics.url (default 10s)
|
||||
-pushmetrics.url array
|
||||
Optional URL to push metrics exposed at /metrics page. See https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#push-metrics . By default, metrics exposed at /metrics page aren't pushed to any remote storage
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.basicAuth.password array
|
||||
Optional basic auth password to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.basicAuth.passwordFile array
|
||||
Optional path to basic auth password to use for the corresponding -remoteWrite.url. The file is re-read every second
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.basicAuth.username array
|
||||
Optional basic auth username to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.bearerToken array
|
||||
Optional bearer auth token to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.bearerTokenFile array
|
||||
Optional path to bearer token file to use for the corresponding -remoteWrite.url. The token is re-read from the file every second
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.flushInterval duration
|
||||
Interval for flushing the data to remote storage. This option takes effect only when less than 2MB of data per second are pushed to -remoteWrite.url (default 1s)
|
||||
-remoteWrite.headers array
|
||||
Optional HTTP headers to send with each request to the corresponding -remoteWrite.url. For example, -remoteWrite.headers='My-Auth:foobar' would send 'My-Auth: foobar' HTTP header with every request to the corresponding -remoteWrite.url. Multiple headers must be delimited by '^^': -remoteWrite.headers='header1:value1^^header2:value2'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.maxBlockSize size
|
||||
The maximum block size to send to remote storage. Bigger blocks may improve performance at the cost of the increased memory usage.
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 8388608)
|
||||
-remoteWrite.maxDiskUsagePerURL array
|
||||
The maximum file-based buffer size in bytes at -remoteWrite.tmpDataPath for each -remoteWrite.url. When buffer size reaches the configured maximum, then old data is dropped when adding new data to the buffer. Buffered data is stored in ~500MB chunks. It is recommended to set the value for this flag to a multiple of the block size 500MB. Disk usage is unlimited if the value is set to 0
|
||||
Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB. (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.oauth2.clientID array
|
||||
Optional OAuth2 clientID to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.clientSecret array
|
||||
Optional OAuth2 clientSecret to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.clientSecretFile array
|
||||
Optional OAuth2 clientSecretFile to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.endpointParams array
|
||||
Optional OAuth2 endpoint parameters to use for the corresponding -remoteWrite.url . The endpoint parameters must be set in JSON format: {"param1":"value1",...,"paramN":"valueN"}
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.scopes array
|
||||
Optional OAuth2 scopes to use for the corresponding -remoteWrite.url. Scopes must be delimited by ';'
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.oauth2.tokenUrl array
|
||||
Optional OAuth2 tokenURL to use for the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.proxyURL array
|
||||
Optional proxy URL for writing data to the corresponding -remoteWrite.url. Supported proxies: http, https, socks5. Example: -remoteWrite.proxyURL=socks5://proxy:1234
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.queues int
|
||||
The number of concurrent queues to each -remoteWrite.url. Set more queues if default number of queues isn't enough for sending high volume of collected data to remote storage. Default value depends on the number of available CPU cores. It should work fine in most cases since it minimizes resource usage (default 20)
|
||||
-remoteWrite.rateLimit array
|
||||
Optional rate limit in bytes per second for data sent to the corresponding -remoteWrite.url. By default, the rate limit is disabled. It can be useful for limiting load on remote storage when big amounts of buffered data (default 0)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.retryMaxTime array
|
||||
The max time spent on retry attempts to send a block of data to the corresponding -remoteWrite.url. Change this value if it is expected for -remoteWrite.url to be unreachable for more than -remoteWrite.retryMaxTime. See also -remoteWrite.retryMinInterval (default 1m0s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.retryMinInterval array
|
||||
The minimum delay between retry attempts to send a block of data to the corresponding -remoteWrite.url. Every next retry attempt will double the delay to prevent hammering of remote database. See also -remoteWrite.retryMaxTime (default 1s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.sendTimeout array
|
||||
Timeout for sending a single block of data to the corresponding -remoteWrite.url (default 1m0s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.showURL
|
||||
Whether to show -remoteWrite.url in the exported metrics. It is hidden by default, since it can contain sensitive info such as auth key
|
||||
-remoteWrite.tlsCAFile array
|
||||
Optional path to TLS CA file to use for verifying connections to the corresponding -remoteWrite.url. By default, system CA is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tlsCertFile array
|
||||
Optional path to client-side TLS certificate file to use when connecting to the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tlsHandshakeTimeout array
|
||||
The timeout for establishing tls connections to the corresponding -remoteWrite.url (default 20s)
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.tlsInsecureSkipVerify array
|
||||
Whether to skip tls verification when connecting to the corresponding -remoteWrite.url
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-remoteWrite.tlsKeyFile array
|
||||
Optional path to client-side TLS certificate key to use when connecting to the corresponding -remoteWrite.url
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tlsServerName array
|
||||
Optional TLS server name to use for connections to the corresponding -remoteWrite.url. By default, the server name from -remoteWrite.url is used
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.tmpDataPath string
|
||||
Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . See also -remoteWrite.maxDiskUsagePerURL (default "vlagent-remotewrite-data")
|
||||
-remoteWrite.url array
|
||||
Remote storage URL to write data to. It must support VictoriaLogs native protocol. Example url: http://<victorialogs-host>:9428/internal/insert. Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems.
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.compressMethod.tcp array
|
||||
Compression method for syslog messages received at the corresponding -syslog.listenAddr.tcp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.compressMethod.udp array
|
||||
Compression method for syslog messages received at the corresponding -syslog.listenAddr.udp. Supported values: none, gzip, deflate. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#compression
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.decolorizeFields.tcp array
|
||||
Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.decolorizeFields.udp array
|
||||
Fields to remove ANSI color codes across logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#decolorizing-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.extraFields.tcp array
|
||||
Fields to add to logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.extraFields.udp array
|
||||
Fields to add to logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#adding-extra-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.ignoreFields.tcp array
|
||||
Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.ignoreFields.udp array
|
||||
Fields to ignore at logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#dropping-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.listenAddr.tcp array
|
||||
Comma-separated list of TCP addresses to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.listenAddr.udp array
|
||||
Comma-separated list of UDP address to listen to for Syslog messages. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.streamFields.tcp array
|
||||
Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.streamFields.udp array
|
||||
Fields to use as log stream labels for logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#stream-fields
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tenantID.tcp array
|
||||
TenantID for logs ingested via the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tenantID.udp array
|
||||
TenantID for logs ingested via the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#multitenancy
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.timezone string
|
||||
Timezone to use when parsing timestamps in RFC3164 syslog messages. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 . See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/ (default "Local")
|
||||
-syslog.tls array
|
||||
Whether to enable TLS for receiving syslog messages at the corresponding -syslog.listenAddr.tcp. The corresponding -syslog.tlsCertFile and -syslog.tlsKeyFile must be set if -syslog.tls is set. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-syslog.tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for -syslog.listenAddr.tcp if -syslog.tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants . See also https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -syslog.listenAddr.tcp if the corresponding -syslog.tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-syslog.tlsMinVersion string
|
||||
The minimum TLS version to use for -syslog.listenAddr.tcp if -syslog.tls is set. Supported values: TLS10, TLS11, TLS12, TLS13. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#security (default "TLS13")
|
||||
-syslog.useLocalTimestamp.tcp array
|
||||
Whether to use local timestamp instead of the original timestamp for the ingested syslog messages at the corresponding -syslog.listenAddr.tcp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-syslog.useLocalTimestamp.udp array
|
||||
Whether to use local timestamp instead of the original timestamp for the ingested syslog messages at the corresponding -syslog.listenAddr.udp. See https://docs.victoriametrics.com/victorialogs/data-ingestion/syslog/#log-timestamps
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tls array
|
||||
Whether to enable TLS for incoming HTTP requests at the given -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set. See also -mtls
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-tlsCertFile array
|
||||
Path to file with TLS certificate for the corresponding -httpListenAddr if -tls is set. Prefer ECDSA certs instead of RSA certs as RSA certs are slower. The provided certificate file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsCipherSuites array
|
||||
Optional list of TLS cipher suites for incoming requests over HTTPS if -tls is set. See the list of supported cipher suites at https://pkg.go.dev/crypto/tls#pkg-constants
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsKeyFile array
|
||||
Path to file with TLS key for the corresponding -httpListenAddr if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated. See also -tlsAutocertHosts
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-tlsMinVersion array
|
||||
Optional minimum TLS version to use for the corresponding -httpListenAddr if -tls is set. Supported values: TLS10, TLS11, TLS12, TLS13
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-version
|
||||
Show VictoriaMetrics version
|
||||
```
|
||||
@@ -1,29 +0,0 @@
|
||||
VictoriaMetrics Cloud is a managed, easy to use monitoring solution that integrates seamlessly with
|
||||
other tools and frameworks in the Observability ecosystem such as OpenTelemetry, Grafana, Prometheus, Graphite,
|
||||
InfluxDB, OpenTSDB and DataDog - see [these docs](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#how-to-import-time-series-data)
|
||||
for further details about importing time series data into VictoriaMetrics.
|
||||
|
||||
<br>
|
||||
|
||||

|
||||
<br>
|
||||
|
||||
## Get Started
|
||||
* [Quick Start](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/) documentation.
|
||||
* [Try it now](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_overview) with a free trial.
|
||||
|
||||
|
||||
## Use cases
|
||||
VictoriaMetrics Cloud is designed for teams and organizations that handle any volume of metrics. The most common use cases for VictoriaMetrics Cloud are:
|
||||
* Long-term remote storage for Prometheus, OpenTelemetry and any other standardized metrics.
|
||||
* Reliable and efficient drop-in replacement for Prometheus and Graphite.
|
||||
* Easy and cost-saving enterprise managed alternative solution for Prometheus, Thanos, Mimir or Cortex.
|
||||
* Efficient replacement for InfluxDB and OpenTSDB by consuming lower amounts of RAM, CPU and disk.
|
||||
* Cost-efficient alternative for other Observability services like DataDog or Grafana Cloud.
|
||||
|
||||
Discover VictoriaMetrics Cloud Features and Benefits [here](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/features/).
|
||||
|
||||
## Learn more
|
||||
* [VictoriaMetrics Cloud announcement](https://victoriametrics.com/blog/introduction-to-managed-monitoring/).
|
||||
* [Pricing comparison for Managed Prometheus](https://victoriametrics.com/blog/managed-prometheus-pricing/).
|
||||
* [Monitoring Proxmox VE via VictoriaMetrics Cloud and vmagent](https://victoriametrics.com/blog/proxmox-monitoring-with-dbaas/).
|
||||
@@ -1,17 +0,0 @@
|
||||
---
|
||||
title: VictoriaMetrics Cloud
|
||||
weight: 40
|
||||
menu:
|
||||
docs:
|
||||
weight: 40
|
||||
identifier: cloud
|
||||
pageRef: /victoriametrics-cloud/
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
aliases:
|
||||
- /victoriametrics-cloud/index.html
|
||||
- /managed-victoriametrics/index.html
|
||||
---
|
||||
{{% content "README.md" %}}
|
||||
@@ -1,24 +0,0 @@
|
||||
---
|
||||
title: Account Management
|
||||
weight:
|
||||
disableToc: true
|
||||
menu:
|
||||
docs:
|
||||
weight: 2
|
||||
parent: cloud
|
||||
identifier: account-management
|
||||
pageRef: /victoriametrics-cloud/account-management/
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
This section contains all the information related to users and accounts management. Remember that you
|
||||
can always get started rapidly following the short guide under the [Quick Start](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/) section.
|
||||
|
||||
Here, the following information is presented:
|
||||
|
||||
* [Registration and Trial](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/registration-and-trial/), including authentication methods, trial period information and how to continue using VictoriaMetrics Cloud after the trial period expires.
|
||||
* [Account and Profile](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/account-and-profile/), including how to change passwords.
|
||||
* [Roles and permissions](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/roles-and-permissions/), to know more about user privilege and actions.
|
||||
* [User Management](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/user-management/), to learn how to add, edit and remove users.
|
||||
@@ -1,21 +0,0 @@
|
||||
---
|
||||
weight: 2
|
||||
title: Account and Profile
|
||||
disableToc: true
|
||||
menu:
|
||||
docs:
|
||||
parent: account-management
|
||||
weight: 2
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
|
||||
Upon registration, every user is assigned to a profile and a [role](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/roles-and-permissions/).
|
||||
|
||||
If forgot your password, it can always be restored by clicking the `Forgot password?` link on the
|
||||
[Sign In](https://console.victoriametrics.cloud/signIn?utm_source=website&utm_campaign=docs_quickstart) page.
|
||||
|
||||
If you need to change your authentication method, or need assistance, don't hesitate to contact our
|
||||
support team at support-cloud@victoriametrics.com.
|
||||
@@ -1,81 +0,0 @@
|
||||
---
|
||||
weight: 5
|
||||
title: Organizations in VictoriaMetrics Cloud
|
||||
menu:
|
||||
docs:
|
||||
parent: account-management
|
||||
weight: 5
|
||||
name: Organizations
|
||||
---
|
||||
|
||||
Organizations in VictoriaMetrics Cloud are designed to streamline team collaboration, improve
|
||||
access control, and simplify scaling observability across multiple teams and environments.
|
||||
|
||||
By using Organizations, users can invite collaborators, assign specific roles and permissions,
|
||||
and organize deployments under a structured access model. This provides a more secure and
|
||||
efficient way to manage access, scale operations, and maintain governance.
|
||||
|
||||
## Getting started with Organizations
|
||||
New users of VictoriaMetrics Cloud registered via the [SignUp](https://console.victoriametrics.cloud/signUp)
|
||||
page are automatically enrolled into a new Organization, where they can invite other new or
|
||||
existing users.
|
||||
|
||||
|
||||
## Working with Organizations
|
||||
|
||||
The left navigation menu of the VictoriaMetrics Cloud console is divided into Services (top) and Organizations (bottom).
|
||||
All features described here are easily accessible through that menu.
|
||||
|
||||
{{% collapse name="User Management" %}}
|
||||
|
||||
The `User Management` page inside the Organizations menu allows to:
|
||||
- Invite new users to collaborate
|
||||
- Check activity, creation time and authentication methods used by users part of the Organization
|
||||
- Manage other users
|
||||
|
||||
Organization `Admins` can perform the following `Actions` on other existing users:
|
||||
- Manage their [`roles`](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/roles-and-permissions/)
|
||||
- `Deactivate` or `Activate` them: Deactivated users are still part of VictoriaMetrics Cloud but cannot perform actions inside the Organization
|
||||
- `Delete` them from the Organization
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
|
||||
{{% collapse name="API Keys" %}}
|
||||
|
||||
[API Keys](https://docs.victoriametrics.com/victoriametrics-cloud/api/) are needed to enforce
|
||||
authentication in programmatic actions (for example, in scripts) to interact with VictoriaMetrics Cloud.
|
||||
The API itself is documented in the [api-docs](https://console.victoriametrics.cloud/api-docs) page.
|
||||
|
||||
In the [API Keys](https://console.victoriametrics.cloud/api_keys) page, Organization Admins can:
|
||||
* Create API Keys, giving them an easily identifiable `Name`, set the `Lifetime`, `Permissions` (Read, Write or both), and grant permissions to all or specific VictoriaMetrics Cloud deployments.
|
||||
* Check existing API Keys relevant information
|
||||
* Revoke previously generated API Keys
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Billing" %}}
|
||||
|
||||
Centralized billing information can be accessed through the [Billing Page](https://console.victoriametrics.cloud/billing).
|
||||
Here Organization `Admins` can check usage, manage Payment Methods, download invoices and check ongoing spends.
|
||||
|
||||
For more billing related information, read the [Billing documentation](https://docs.victoriametrics.com/victoriametrics-cloud/billing/) page.
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Audit Logs" %}}
|
||||
|
||||
VictoriaMetrics Cloud provides centralized access to [Audit Logs](https://console.victoriametrics.cloud/audit) for Organizations.
|
||||
Here, `Admins` can check events performed by other Organization users within VictoriaMetrics Cloud.
|
||||
Audit logs can be filtered by Action, Email or Date.
|
||||
|
||||
VictoriaMetrics Cloud also enables Exporting Audit Logs as CSV.
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Details" %}}
|
||||
|
||||
Organization `Admins` can change their Organization name or leave an Organization in the `Details` [page](https://console.victoriametrics.cloud/organization).
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
weight: 1
|
||||
title: Registration and Trial
|
||||
disableToc: true
|
||||
menu:
|
||||
docs:
|
||||
parent: account-management
|
||||
weight: 1
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
|
||||
Start your registration process by visiting the [Sign Up](https://console.victoriametrics.cloud/signUp?utm_source=website&utm_campaign=docs_registration_and_trial) page.
|
||||
|
||||
## Authentication methods
|
||||
|
||||
VictoriaMetrics Cloud supports user registration and authentication via the following mechanisms:
|
||||
|
||||
1. Sign up with Google.
|
||||
2. Email and password.
|
||||
|
||||
|
||||
## Trial period and credits
|
||||
|
||||
After registration, every new user is granted with $`200` in credits to spend during the trial period.
|
||||
The trial period starts once the first deployment is created and lasts for `30` days.
|
||||
|
||||
In general, VictoriaMetrics Cloud billing is based on the time a deployment consumes resources (stopped deployments consume storage).
|
||||
This means that, during the trial period, you are welcome to start, test and delete as many VictoriaMetrics
|
||||
Cloud deployments as you wish.
|
||||
|
||||
Adding a payment method is not required to register or make use of deployments during the trial period. Once the credits are expired,
|
||||
existing trial deployments will be automatically deleted. If you add a [payment method](https://docs.victoriametrics.com/victoriametrics-cloud/billing/#payment-methods),
|
||||
the service won't be disrupted and you will be charged on that one once the credits are exhausted.
|
||||
|
||||
## After trial version expires
|
||||
When the trial period ends, adding a [payment method](https://docs.victoriametrics.com/victoriametrics-cloud/billing/#payment-methods) will let you continue
|
||||
using VictoriaMetrics Cloud. After the trial period expires, deployments will be stopped and deleted after 7 days if no payment methods are found for your account. If you need assistance or have any questions, don't hesitate to contact our support team at support-cloud@victoriametrics.com.
|
||||
@@ -1,111 +0,0 @@
|
||||
---
|
||||
weight: 3
|
||||
title: Roles and Permissions
|
||||
disableToc: true
|
||||
menu:
|
||||
docs:
|
||||
parent: account-management
|
||||
weight: 3
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
|
||||
## User roles
|
||||
|
||||
VictoriaMetrics Cloud provides different levels of user access based on role definitions.
|
||||
Roles determine the information that users can access and edit inside VictoriaMetrics Cloud in
|
||||
different `Categories`, such as Deployments, Billing or Notifications, for example. The full list of roles
|
||||
definitions can be found in the [table](#roles-and-permissions) below.
|
||||
|
||||
Organization Administrators can assign and change other users roles both during the user creation procedure or afterwards. See the [User Management](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/user-management/)
|
||||
section for more information.
|
||||
|
||||
### Roles and permissions
|
||||
|
||||
<table class="params">
|
||||
<tr>
|
||||
<td><strong>User Role</strong></td>
|
||||
<td><strong>Categories</strong></td>
|
||||
<td><strong>Permissions</strong></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="7" ><strong>Admin</strong></td>
|
||||
<td>Deployments</td>
|
||||
<td>
|
||||
Access to all deployments tabs and information
|
||||
<p>Create, update and delete deployment</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Integrations</td>
|
||||
<td>Access to different integration configurations</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Billing</td>
|
||||
<td>Check billing information</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Notifications</td>
|
||||
<td>Create and update notifications</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Audit Logs</td>
|
||||
<td>Can check all information in audit logs</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>User Management</td>
|
||||
<td>Add, edit and delete users</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>API Keys</td>
|
||||
<td>Add, edit and delete API Keys</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="3"><strong>Editor</strong></td>
|
||||
<td>Deployments</td>
|
||||
<td>
|
||||
Access to all deployments tabs and information
|
||||
<p>Create, update and delete deployment</p>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Notifications</td>
|
||||
<td>Create and update notifications</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Audit Logs</td>
|
||||
<td>Can check all information in audit logs</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Viewer</strong></td>
|
||||
<td>Deployments</td>
|
||||
<td>Access to Overview, Monitoring, Explore and Alerts deployments tabs and information</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
### Profile status
|
||||
|
||||
Profile lifecycle comprises different statuses depending on where they are in their registration process.
|
||||
If you think your profile is in a wrong status or need assistance, don't hesitate to contact our
|
||||
support team at support-cloud@victoriametrics.com.
|
||||
|
||||
<table class="params">
|
||||
<tr>
|
||||
<td><strong>Active</strong></td>
|
||||
<td>The user can log in and use VictoriaMetrics Cloud. The user role defines the access level.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Pending Invitation</strong></td>
|
||||
<td>An invitation was sent. The user must accept this.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Expired Invitation</strong></td>
|
||||
<td>An invitation was expired. The admin should resend invitation to the user.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><strong>Inactive</strong></td>
|
||||
<td>The user is registered in the VictoriaMetrics Cloud but has no access to perform any actions. Admin can activate or completely delete the user.</td>
|
||||
</tr>
|
||||
</table>
|
||||
@@ -1,59 +0,0 @@
|
||||
---
|
||||
weight: 4
|
||||
title: User Management in VictoriaMetrics Cloud
|
||||
menu:
|
||||
docs:
|
||||
parent: account-management
|
||||
weight: 4
|
||||
name: User Management
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
aliases:
|
||||
- /victoriametrics-cloud/user-managment/index.html
|
||||
- /victoriametrics-cloud/user-management/index.html
|
||||
- /managed-victoriametrics/user-management/index.html
|
||||
---
|
||||
The User Management system enables VictoriaMetrics Cloud Administrators to control user access and
|
||||
onboard or offboard users to their Organization. It categorizes users according to their needs and role.
|
||||
|
||||
Administrators can manage users in the [User Management section](https://cloud.victoriametrics.com/users), which provides a
|
||||
user list where actions can be applied:
|
||||
|
||||
|
||||
| **User Management field** | **Description** |
|
||||
|------------------------------------|-----------------------------------|
|
||||
| **`Email`** | Registration user email. |
|
||||
| **`Status`** | User profile [status](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/roles-and-permissions#profile-status). |
|
||||
| **`User Role`** | Admin, Editor or Viewer. See description [here](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/roles-and-permissions#roles-and-permissions). |
|
||||
| **`Created At`** | Date on which this user was created. |
|
||||
| **`Last Active`** | User's last login date and time. |
|
||||
| **`Auth method`** | User's [authentication method](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/registration-and-trial/#authentication-methods). |
|
||||
| **`Actions`** | Click here to manage the user. |
|
||||
|
||||
## Adding Users
|
||||
|
||||
Users can be added to VictoriaMetrics Cloud by sending an invitation. Invitations can be sent by
|
||||
clicking on `Invite User` in the [User Management section](https://cloud.victoriametrics.com/users).
|
||||
|
||||
After filling out the form, click on the `Invite` button.
|
||||
The user will be saved, and an invitation email to the provided email address will be sent. As a confirmation, you will see the success message.
|
||||
|
||||
> The invitation link is only active for 24 hours.
|
||||
|
||||
The user will remain at the `Pending Invitation` [status](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/roles-and-permissions#profile-status)
|
||||
until the invitation is accepted. At his point the user is all set and transitions to the `Active` status.
|
||||
|
||||
## Updating Users
|
||||
|
||||
Users can be activated, deactivated or modified, including their role, under the `Actions` menu and selecting `Manage`.
|
||||
|
||||
## Deleting Users
|
||||
|
||||
Users can also be deleted from an Organization. Simply navigate to the [User Management section](https://cloud.victoriametrics.com/users),
|
||||
and select `Delete user` under the `Actions` menu.
|
||||
|
||||
## Resending invitations
|
||||
|
||||
If an invitation is expired, you can always to resend the invite to the user, by clicking on the `Resend invitation` button.
|
||||
@@ -1,301 +0,0 @@
|
||||
---
|
||||
weight: 16
|
||||
title: Alerting with vmalert and VictoriaMetrics Cloud
|
||||
menu:
|
||||
docs:
|
||||
parent: "cloud"
|
||||
weight: 16
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
- guide
|
||||
aliases:
|
||||
- /victoriametrics-cloud/alerting-vmalert-victoria-metrics-cloud/index.html
|
||||
- /managed-victoriametrics/alerting-vmalert-victoria-metrics-cloud/index.html
|
||||
---
|
||||
|
||||
This guide explains the different ways in which you can use vmalert in conjunction with VictoriaMetrics Cloud
|
||||
|
||||

|
||||
|
||||
## Preconditions
|
||||
|
||||
* [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) is installed. You can obtain it by building it from [source](https://docs.victoriametrics.com/victoriametrics/vmalert/#quickstart), downloading it from the [GitHub releases page](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/latest), or using the docker image [Docker Hub](https://hub.docker.com/r/victoriametrics/vmalert) or [Quay](https://quay.io/repository/victoriametrics/vmalert?tab=tags) for the container ecosystem (such as docker, k8s, etc.).
|
||||
* [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) is installed.
|
||||
* You have a [single or cluster](https://docs.victoriametrics.com/victoriametrics-cloud/quickstart/#creating-deployment) deployment in [VictoriaMetrics Cloud](https://docs.victoriametrics.com/victoriametrics-cloud/overview/).
|
||||
* If you are using helm, add the [VictoriaMetrics helm chart](https://docs.victoriametrics.com/helm/victoriametrics-alert#how-to-install) repository to your helm repositories. This step is optional.
|
||||
* If you are using [vmoperator](https://docs.victoriametrics.com/operator/quick-start/#quick-start), make sure that it and its CRDs are installed. This step is also optional.
|
||||
|
||||
## Setup
|
||||
|
||||
### Alerting and recording rules file(s)
|
||||
|
||||
You need to prepare file(s) with alerting or recording rules.
|
||||
|
||||
An example file with one alerting rule.
|
||||
|
||||
alerts.yml
|
||||
|
||||
```yaml
|
||||
groups:
|
||||
- name: common
|
||||
rules:
|
||||
- alert: instanceIsDown
|
||||
for: 1m
|
||||
expr: up == 0
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} instance: {{$labels.instance }} is not up"
|
||||
description: "Job {{ $labels.job }} instance: {{$labels.instance }} is not up for the last 1 minute"
|
||||
```
|
||||
|
||||
### VictoriaMetrics Cloud access token and deployment endpoint
|
||||
|
||||
To use vmalert with VictoriaMetrics Cloud, you must create a read/write token, or use an existing one. The token must have write access to ingest recording rules, ALERTS and ALERTS_FOR_STATE metrics, and read access for rules evaluation.
|
||||
|
||||
For instructions on how to create tokens, please refer to this section of the [documentation](https://docs.victoriametrics.com/victoriametrics-cloud/quickstart/#deployment-access).
|
||||
|
||||
#### Single-Node
|
||||
|
||||

|
||||

|
||||
|
||||
#### Cluster
|
||||
|
||||

|
||||

|
||||

|
||||
|
||||
### vmalert configuration
|
||||
|
||||
#### Single-Node
|
||||
|
||||
##### Binary
|
||||
|
||||
```sh
|
||||
export TOKEN=81e8226e-****-****-****-************
|
||||
export MANAGED_VM_URL=https://gw-c15-1c.cloud.victoriametrics.com
|
||||
export ALERTMANAGER_URL=http://localhost:9093
|
||||
./vmalert -rule=alerts.yml -datasource.url=$MANAGED_VM_URL -datasource.bearerToken=$TOKEN -notifier.url=$ALERTMANAGER_URL -remoteWrite.url=$MANAGED_VM_URL -remoteWrite.bearerToken=$TOKEN -remoteRead.url=$MANAGED_VM_URL -remoteRead.bearerToken=$TOKEN
|
||||
```
|
||||
|
||||
##### Docker
|
||||
|
||||
```sh
|
||||
export TOKEN=81e8226e-****-****-****-************
|
||||
export MANAGED_VM_URL=https://gw-c15-1c.cloud.victoriametrics.com
|
||||
export ALERTMANAGER_URL=http://alertmanager:9093
|
||||
docker run -it -p 8080:8080 -v $(pwd)/alerts.yml:/etc/alerts/alerts.yml victoriametrics/vmalert:v1.87.1 -datasource.url=$MANAGED_VM_URL -datasource.bearerToken=$TOKEN -remoteRead.url=$MANAGED_VM_URL -remoteRead.bearerToken=$TOKEN -remoteWrite.url=$MANAGED_VM_URL -remoteWrite.bearerToken=$TOKEN -notifier.url=$ALERTMANAGER_URL -rule="/etc/alerts/*.yml"
|
||||
```
|
||||
|
||||
##### Helm Chart
|
||||
|
||||
```sh
|
||||
export TOKEN=81e8226e-****-****-****-************
|
||||
export MANAGED_VM_URL=https://gw-c15-1c.cloud.victoriametrics.com
|
||||
export ALERTMANAGER=http://alertmanager:9093
|
||||
cat <<EOF | helm install vmalert vm/victoria-metrics-alert -f -
|
||||
server:
|
||||
datasource:
|
||||
url: $MANAGED_VM_URL
|
||||
bearer:
|
||||
token: $TOKEN
|
||||
remote:
|
||||
write:
|
||||
url: $MANAGED_VM_URL
|
||||
bearer:
|
||||
token: $TOKEN
|
||||
read:
|
||||
url: $MANAGED_VM_URL
|
||||
bearer:
|
||||
token: $TOKEN
|
||||
notifier:
|
||||
alertmanager:
|
||||
url: $ALERTMANAGER
|
||||
config:
|
||||
alerts:
|
||||
groups:
|
||||
- name: common
|
||||
rules:
|
||||
- alert: instanceIsDown
|
||||
for: 1m
|
||||
expr: up == 0
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} instance: {{$labels.instance }} is not up"
|
||||
description: "Job {{ $labels.job }} instance: {{$labels.instance }} is not up for the last 1 minute"
|
||||
EOF
|
||||
```
|
||||
|
||||
##### VMalert CRD for vmoperator
|
||||
|
||||
```sh
|
||||
export TOKEN=81e8226e-****-****-****-************
|
||||
export MANAGED_VM_URL=https://gw-c15-1c.cloud.victoriametrics.com
|
||||
export ALERTMANAGER=http://alertmanager:9093
|
||||
cat << EOF | kubectl apply -f -
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMAlert
|
||||
metadata:
|
||||
name: vmalert-managed-vm
|
||||
spec:
|
||||
replicaCount: 1
|
||||
datasource:
|
||||
url: $MANAGED_VM_URL
|
||||
bearerTokenSecret:
|
||||
name: managed-token
|
||||
key: token
|
||||
remoteWrite:
|
||||
url: $MANAGED_VM_URL
|
||||
bearerTokenSecret:
|
||||
name: managed-token
|
||||
key: token
|
||||
remoteRead:
|
||||
url: $MANAGED_VM_URL
|
||||
bearerTokenSecret:
|
||||
name: managed-token
|
||||
key: token
|
||||
notifier:
|
||||
url: $ALERTMANAGER
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
type: managed
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: managed-token
|
||||
stringData:
|
||||
token: $TOKEN
|
||||
EOF
|
||||
```
|
||||
|
||||
##### Testing
|
||||
|
||||
You can ingest metric that will raise an alert
|
||||
|
||||
```sh
|
||||
export TOKEN=81e8226e-****-****-****-*************
|
||||
export MANAGED_VM_URL=https://gw-c15-1c.cloud.victoriametrics.com/
|
||||
curl -H "Authorization: Bearer $TOKEN" -X POST "$MANAGED_VM_URLapi/v1/import/prometheus" -d 'up{job="vmalert-test", instance="localhost"} 0'
|
||||
```
|
||||
|
||||
#### Cluster
|
||||
|
||||
##### Binary
|
||||
|
||||
```sh
|
||||
export TOKEN=76bc5470-****-****-****-************
|
||||
export MANAGED_VM_READ_URL=https://gw-c15-1a.cloud.victoriametrics.com/select/0/prometheus/
|
||||
export MANAGED_VM_WRITE_URL=https://gw-c15-1a.cloud.victoriametrics.com/insert/0/prometheus/
|
||||
export ALERTMANAGER_URL=http://localhost:9093
|
||||
./vmalert -rule=alerts.yml -datasource.url=$MANAGED_VM_READ_URL -datasource.bearerToken=$TOKEN -notifier.url=$ALERTMANAGER_URL -remoteWrite.url=$MANAGED_VM_WRITE_URL -remoteWrite.bearerToken=$TOKEN -remoteRead.url=$MANAGED_VM_READ_URL -remoteRead.bearerToken=$TOKEN
|
||||
```
|
||||
|
||||
##### Docker
|
||||
|
||||
```sh
|
||||
export TOKEN=76bc5470-****-****-****-************
|
||||
export MANAGED_VM_READ_URL=https://gw-c15-1a.cloud.victoriametrics.com/select/0/prometheus/
|
||||
export MANAGED_VM_WRITE_URL=https://gw-c15-1a.cloud.victoriametrics.com/insert/0/prometheus/
|
||||
export ALERTMANAGER_URL=http://alertmanager:9093
|
||||
docker run -it -p 8080:8080 -v $(pwd)/alerts.yml:/etc/alerts/alerts.yml victoriametrics/vmalert:v1.87.1 -datasource.url=$MANAGED_VM_READ_URL -datasource.bearerToken=$TOKEN -remoteRead.url=$MANAGED_VM_READ_URL -remoteRead.bearerToken=$TOKEN -remoteWrite.url=$MANAGED_VM_WRITE_URL -remoteWrite.bearerToken=$TOKEN -notifier.url=$ALERTMANAGER_URL -rule="/etc/alerts/*.yml"
|
||||
```
|
||||
|
||||
##### Helm Chart
|
||||
|
||||
```sh
|
||||
export TOKEN=76bc5470-****-****-****-************
|
||||
export MANAGED_VM_READ_URL=https://gw-c15-1a.cloud.victoriametrics.com/select/0/prometheus/
|
||||
export MANAGED_VM_WRITE_URL=https://gw-c15-1a.cloud.victoriametrics.com/insert/0/prometheus/
|
||||
export ALERTMANAGER=http://alertmanager:9093
|
||||
cat <<EOF | helm install vmalert vm/victoria-metrics-alert -f -
|
||||
server:
|
||||
datasource:
|
||||
url: $MANAGED_VM_READ_URL
|
||||
bearer:
|
||||
token: $TOKEN
|
||||
remote:
|
||||
write:
|
||||
url: $MANAGED_VM_WRITE_URL
|
||||
bearer:
|
||||
token: $TOKEN
|
||||
read:
|
||||
url: $MANAGED_VM_READ_URL
|
||||
bearer:
|
||||
token: $TOKEN
|
||||
notifier:
|
||||
alertmanager:
|
||||
url: $ALERTMANAGER
|
||||
config:
|
||||
alerts:
|
||||
groups:
|
||||
- name: common
|
||||
rules:
|
||||
- alert: instanceIsDown
|
||||
for: 1m
|
||||
expr: up == 0
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} instance: {{$labels.instance }} is not up"
|
||||
description: "Job {{ $labels.job }} instance: {{$labels.instance }} is not up for the last 1 minute"
|
||||
EOF
|
||||
```
|
||||
|
||||
##### VMalert CRD for vmoperator
|
||||
|
||||
```sh
|
||||
export TOKEN=76bc5470-****-****-****-************
|
||||
export MANAGED_VM_READ_URL=https://gw-c15-1a.cloud.victoriametrics.com/select/0/prometheus/
|
||||
export MANAGED_VM_WRITE_URL=https://gw-c15-1a.cloud.victoriametrics.com/insert/0/prometheus/
|
||||
export ALERTMANAGER=http://alertmanager:9093
|
||||
cat << EOF | kubectl apply -f -
|
||||
apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMAlert
|
||||
metadata:
|
||||
name: vmalert-managed-vm
|
||||
spec:
|
||||
replicaCount: 1
|
||||
datasource:
|
||||
url: $MANAGED_VM_READ_URL
|
||||
bearerTokenSecret:
|
||||
name: managed-token
|
||||
key: token
|
||||
remoteWrite:
|
||||
url: $MANAGED_VM_WRITE_URL
|
||||
bearerTokenSecret:
|
||||
name: managed-token
|
||||
key: token
|
||||
remoteRead:
|
||||
url: $MANAGED_VM_READ_URL
|
||||
bearerTokenSecret:
|
||||
name: managed-token
|
||||
key: token
|
||||
notifier:
|
||||
url: $ALERTMANAGER
|
||||
ruleSelector:
|
||||
matchLabels:
|
||||
type: managed
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: managed-token
|
||||
stringData:
|
||||
token: $TOKEN
|
||||
EOF
|
||||
```
|
||||
|
||||
##### Testing
|
||||
|
||||
You can ingest metric that will raise an alert
|
||||
|
||||
```sh
|
||||
export TOKEN=76bc5470-****-****-****-************
|
||||
export MANAGED_VM_WRITE_URL=https://gw-c15-1a.cloud.victoriametrics.com/insert/0/prometheus/
|
||||
curl -H "Authorization: Bearer $TOKEN" -X POST "$MANAGED_VM_WRITE_URLapi/v1/import/prometheus" -d 'up{job="vmalert-test", instance="localhost"} 0'
|
||||
```
|
||||
|
Before Width: | Height: | Size: 201 KiB |
|
Before Width: | Height: | Size: 209 KiB |
|
Before Width: | Height: | Size: 246 KiB |
|
Before Width: | Height: | Size: 43 KiB |
|
Before Width: | Height: | Size: 188 KiB |
|
Before Width: | Height: | Size: 185 KiB |
@@ -1,218 +0,0 @@
|
||||
---
|
||||
weight: 15
|
||||
title: Setup Alertmanager & VMAlert for VictoriaMetrics Cloud
|
||||
menu:
|
||||
docs:
|
||||
parent: "cloud"
|
||||
weight: 15
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
- guide
|
||||
aliases:
|
||||
- /victoriametrics-cloud/alertmanager-setup-for-deployment/index.html
|
||||
- /managed-victoriametrics/alertmanager-setup-for-deployment/index.html
|
||||
---
|
||||
VictoriaMetrics Cloud supports configuring alerting rules, powered by vmalert, and sending notifications with hosted Alertmanager.
|
||||
|
||||
## Configure Alertmanager
|
||||
|
||||
You have two options to configure Cloud Alertmanager:
|
||||
|
||||
1. From integrations section: Menu **"Integrations" `->` "Cloud Alertmanager" `->` "New configuration"**:
|
||||

|
||||
2. From deployment page: **"Deployment page" `->` "Rules" tab `->` "Settings" `->` "Connect notifier" `/` "New notifier"**:
|
||||

|
||||
|
||||
For creating a new configuration, you need to provide the following parameters:
|
||||
|
||||
- **Name of the configuration** (it only affects the display in the user interface)
|
||||
- **Configuration file** in [specified format](#alertmanager-config-specification)
|
||||
|
||||
Before saving the configuration, you can validate it by clicking the "Test configuration" button.
|
||||
|
||||
After creating the configuration, you can connect it to one or multiple deployments.
|
||||
In order to do this you need to go to the "Deployment page" `->` "Rules" tab `->` "Settings" `,
|
||||
select the created notifier and confirm the action:
|
||||
|
||||

|
||||
|
||||
Alertmanager is now set up for your deployment, and you be able to get notifications from it.
|
||||
|
||||
### Alertmanager config specification
|
||||
|
||||
VictoriaMetrics Cloud supports Alertmanager with standard [configuration specification](https://prometheus.io/docs/alerting/latest/configuration/).
|
||||
|
||||
But with respect to the specification, there are the following limitations:
|
||||
|
||||
1. Only allowed receivers:
|
||||
* `discord_configs`
|
||||
* `pagerduty_configs`
|
||||
* `slack_configs`
|
||||
* `webhook_configs`
|
||||
* `opsgenie_configs`
|
||||
* `wechat_configs`
|
||||
* `pushover_configs`
|
||||
* `victorops_configs`
|
||||
* `telegram_configs`
|
||||
* `webex_configs`
|
||||
* `msteams_configs`
|
||||
2. All configuration params with `_file` suffix are not allowed for security reasons.
|
||||
3. The maximum file size is 20mb.
|
||||
|
||||
### Configuration example
|
||||
|
||||
Here is an example of Alertmanager configuration:
|
||||
|
||||
```yaml
|
||||
route:
|
||||
receiver: slack-infra
|
||||
repeat_interval: 1m
|
||||
group_interval: 30s
|
||||
routes:
|
||||
- matchers:
|
||||
- team = team-1
|
||||
receiver: dev-team-1
|
||||
continue: true
|
||||
- matchers:
|
||||
- team = team-2
|
||||
receiver: dev-team-2
|
||||
continue: true
|
||||
receivers:
|
||||
- name: slack-infra
|
||||
slack_configs:
|
||||
- api_url: https://hooks.slack.com/services/valid-url
|
||||
channel: infra
|
||||
title: |-
|
||||
[{{ .Status | toUpper -}}
|
||||
{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{- end -}}
|
||||
]
|
||||
{{ if ne .Status "firing" -}}
|
||||
:lgtm:
|
||||
{{- else if eq .CommonLabels.severity "critical" -}}
|
||||
:fire:
|
||||
{{- else if eq .CommonLabels.severity "warning" -}}
|
||||
:warning:
|
||||
{{- else if eq .CommonLabels.severity "info" -}}
|
||||
:information_source:
|
||||
{{- else -}}
|
||||
:question:
|
||||
{{- end }}
|
||||
text: |
|
||||
{{ range .Alerts }}
|
||||
{{- if .Annotations.summary }}
|
||||
Summary: {{ .Annotations.summary }}
|
||||
{{- end }}
|
||||
{{- if .Annotations.description }}
|
||||
Description: {{ .Annotations.description }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
actions:
|
||||
- type: button
|
||||
text: 'Query :mag:'
|
||||
url: '{{ (index .Alerts 0).GeneratorURL }}'
|
||||
- type: button
|
||||
text: 'Silence :no_bell:'
|
||||
url: '{{ template "__silenceURL" . }}'
|
||||
- name: dev-team-1
|
||||
slack_configs:
|
||||
- api_url: https://hooks.slack.com/services/valid-url
|
||||
channel: dev-alerts
|
||||
- name: dev-team-2
|
||||
slack_configs:
|
||||
- api_url: https://hooks.slack.com/services/valid-url
|
||||
channel: dev-alerts
|
||||
```
|
||||
|
||||
### Custom Alertmanager
|
||||
|
||||
If for some reason Cloud Alertmanager is not suitable for you, you can use VictoriaMetrics Cloud with any external Alertmanager hosted in your infrastructure.
|
||||
|
||||
For that select Custom Alertmanager instead of Cloud Alertmanager when [creating the Alertmanager](#configure-alertmanager):
|
||||
|
||||

|
||||
|
||||
Limitations for the Custom Alertmanager:
|
||||
|
||||
- Your custom Alertmanager should be available from the Internet via **HTTPS** with **Basic authentication** or **Bearer token authentication**.
|
||||
- You will not be able to use "Alerts" tab on the deployment page.
|
||||
|
||||
You can test the connection to your custom Alertmanager by clicking the "Test connection" button.
|
||||
`/api/v2/status` endpoint is used to verify that connection to Alertmanager is working.
|
||||
|
||||
## Configure alerting and recording rules
|
||||
|
||||
Alerting and recording rules could be uploaded on **"Deployment page" `->` "Rules" tab `->` "Settings"**:
|
||||
|
||||

|
||||
|
||||
You can click on the upload area or drag and drop the files with rules there.
|
||||
|
||||
Files should be in the [Prometheus alerting rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/)
|
||||
or [Prometheus recording rules definition format](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/).
|
||||
|
||||
There are limitations for the rules files:
|
||||
|
||||
1. All files may contain no more than 100 rules in total. If you need to upload more rules contact us via [support-cloud@victoriametrics.com](mailto:support-cloud@victoriametrics.com).
|
||||
2. The maximum file size is 20mb.
|
||||
3. The names of the groups in the files should be unique.
|
||||
|
||||
You can also use API for uploading rules. Switch to **"Upload with API"** on the page and follow the instructions:
|
||||
|
||||
- Choose the API key for uploading rules
|
||||
- After that you can copy the curl command for uploading rules and execute it in your terminal
|
||||
|
||||

|
||||
|
||||
You can use the following API endpoints for the automation with rules:
|
||||
|
||||
* POST: `/api/v1/deployments/{deploymentId}/rule-sets/files/{fileName}` - create/update rules file
|
||||
* DELETE `/api/v1/deployments/{deploymentId}/rule-sets/files/{fileName}` - delete rules file
|
||||
|
||||
For more details, please check [OpenAPI Reference](https://console.victoriametrics.cloud/api-docs).
|
||||
|
||||
### Example of alerting rules
|
||||
|
||||
Here is an example of alerting rules in the Prometheus alerting rules format:
|
||||
|
||||
```yaml
|
||||
groups:
|
||||
- name: examples
|
||||
concurrency: 2
|
||||
interval: 10s
|
||||
rules:
|
||||
- alert: never-firing
|
||||
expr: foobar > 0
|
||||
for: 30s
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: empty result rule
|
||||
- alert: always-firing
|
||||
expr: vector(1) > 0
|
||||
for: 30s
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "rule must be always at firing state"
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Rules execution state
|
||||
|
||||
The state of created rules is located in the `Rules` section of your deployment:
|
||||
|
||||

|
||||
|
||||
### Debug
|
||||
|
||||
It's possible to debug the alerting stack with logs for vmalert and Alertmanager, which are accessible in the `Logs` section of the deployment.
|
||||
|
||||

|
||||
|
||||
### Monitoring
|
||||
|
||||
Alertmanager and vmalert errors are tracked by a built-in monitoring system.
|
||||
Deployment's `Alerts` section has information about active incidents and incident history log.
|
||||
|
Before Width: | Height: | Size: 140 KiB |
|
Before Width: | Height: | Size: 146 KiB |
|
Before Width: | Height: | Size: 138 KiB |
|
Before Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 145 KiB |
|
Before Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 177 KiB |
@@ -1,78 +0,0 @@
|
||||
---
|
||||
weight: 8
|
||||
title: VictoriaMetrics Cloud API Documentation
|
||||
menu:
|
||||
docs:
|
||||
parent: "cloud"
|
||||
weight: 8
|
||||
name: API
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
VictoriaMetrics Cloud provides programmatic access for managing cloud resources and is useful for automation with tools like Terraform, OpenTofu, Infrastructure as a Code, GitOps framework, etc.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
* **API Keys**: Used to manage VictoriaMetrics Cloud resources via API.
|
||||
|
||||
**Note: [Access Tokens](https://docs.victoriametrics.com/victoriametrics-cloud/deployments/access-tokens/)** are used for reading and writing data to deployments. They are separate from API Keys and should not be confused. API Keys are specifically for managing resources via the API, while [Access Tokens](https://docs.victoriametrics.com/victoriametrics-cloud/deployments/access-tokens/) handle data access for deployments.
|
||||
|
||||
## API Swagger/OpenAPI Reference: [https://console.victoriametrics.cloud/api-docs](https://console.victoriametrics.cloud/api-docs)
|
||||
|
||||
## API Client
|
||||
|
||||
You can use [victoriametrics-cloud-api-go](https://github.com/VictoriaMetrics/victoriametrics-cloud-api-go) library to integrate your golang projects with VictoriaMetrics Cloud API.
|
||||
This library provides a convenient way to interact with the API, making it easier to manage deployments, access tokens, and other resources programmatically.
|
||||
|
||||
## API Key Properties:
|
||||
|
||||
* **Name**: Human-readable, for team context.
|
||||
* **Lifetime**: Key expiration date (no expiration is an option).
|
||||
* **Permissions**: Read-only or Read/Write access.
|
||||
* **Deployment Access**: Limit access to single, multiple, or all deployments. ***Note**: selecting all deployments in the list and the “All” option are not the same thing, the “All" option will also apply to future deployments that are created.*
|
||||
* **Key** or **Key Value**: Programmatically generated identifier. It's sensitive data used for Authentication. Any operation with API keys (including viewing/revealing Key Value), will be recorded in the [Audit Log](https://docs.victoriametrics.com/victoriametrics-cloud/audit-logs/).
|
||||
|
||||

|
||||
|
||||
## Authentication:
|
||||
|
||||
* **API Key Creation**: Required for using the VictoriaMetrics Cloud API. You need to issue the key manually [here](https://console.victoriametrics.cloud/api_keys).
|
||||
* **HTTP Header**:
|
||||
* **Header Name**: `X-VM-Cloud-Access`
|
||||
* **Header Value**: `<Key-Value>`
|
||||
|
||||
## General information API:
|
||||
|
||||
* **List Cloud Providers**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **List Regions**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **List Deployment Tiers**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
|
||||
## Deployments API:
|
||||
|
||||
* **List Deployments**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Get Deployment Details**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Create New Deployment**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Update Deployment Parameters**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Delete Deployment**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
|
||||
## Access Tokens API:
|
||||
|
||||
* **List Access Tokens**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Create New Access Token**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Reveal Access Token Secret**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Revoke Access Token**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
|
||||
## Alerting & Recording Rules API:
|
||||
|
||||
* **List Files**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **View File**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Upload File**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
* **Delete File**: [API reference](https://console.victoriametrics.cloud/api-docs)
|
||||
|
||||
For detailed setup instructions, check the [VictoriaMetrics Cloud - AlertManager Setup Guide](https://docs.victoriametrics.com/victoriametrics-cloud/alertmanager-setup-for-deployment/).
|
||||
|
||||
## Future API Features:
|
||||
|
||||
* **AlertManager**: Get Config, Upsert Config.
|
||||
|
Before Width: | Height: | Size: 171 KiB |
@@ -1,40 +0,0 @@
|
||||
---
|
||||
weight: 9
|
||||
title: VictoriaMetrics Cloud Audit Logs
|
||||
menu:
|
||||
docs:
|
||||
parent: "cloud"
|
||||
weight: 9
|
||||
name: Audit Logs
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
An [**audit log**](https://console.victoriametrics.cloud/audit) is a record of user and system activities within an organization. It captures details of who performed an action, what was done, and when it occurred. Audit logs are essential for security, compliance, and troubleshooting processes.
|
||||
|
||||
## Cloud Audit Log Scopes
|
||||
|
||||
VictoriaMetrics Cloud provides two scopes for audit logs:
|
||||
|
||||
1. **Organization-Level Audit Logs**
|
||||
These logs record all activities at the organization level, such as user logins, token reveals, updates to payment information, and deployments being created or destroyed.
|
||||
2. **Deployment-Level Audit Logs**
|
||||
These logs record activities related to a specific deployment only, such as changes to deployment parameters, creating or deleting access tokens, and modifying alerting or recording rules.
|
||||
|
||||
## Example Log Entry
|
||||
|
||||
* **Time**: 2024-10-0515:40 UTC
|
||||
* **Email**: cloud-admin@victoriametrics.com
|
||||
* **Action**: cluster updated: production-platform, changed properties: vmstorage settings changed: disk size changed from 50.0TB to 80.0TB,
|
||||
|
||||
## Filtering
|
||||
|
||||
The audit log page offers filtering options, allowing you to filter logs by time range, actor, or perform a full-text search by action.
|
||||
|
||||
## Export to CSV
|
||||
|
||||
The Export to CSV button on the audit log page allows you to export the entire audit log as a CSV file.
|
||||
|
||||
Filtering does not affect the export; you will always receive the entire audit log in the exported file.
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
---
|
||||
weight: 10
|
||||
title: VictoriaMetrics Cloud Billing
|
||||
menu:
|
||||
docs:
|
||||
parent: "cloud"
|
||||
weight: 10
|
||||
name: Billing
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
|
||||
VictoriaMetrics Cloud charges for three key components:
|
||||
|
||||
- **Compute**: The cost of deployment installation.
|
||||
- **Storage**: The storage used by the deployment.
|
||||
- **Network**: External (egress) network usage.
|
||||
|
||||
This breakdown will help you to better understand and manage your costs. Usage data is sent hourly to the payment provider (AWS or Stripe). Detailed billing information is available via the [Billing Page](https://console.victoriametrics.cloud/billing) of your VictoriaMetrics Cloud account.
|
||||
|
||||
Each deployment operates with predefined configurations and limits, protecting you from unexpected overages caused by factors such as:
|
||||
|
||||
* Data ingestion spikes.
|
||||
* Cardinality explosions.
|
||||
* Accidental heavy queries.
|
||||
|
||||
This ensures predictable costs and proactive alerts for workload anomalies.
|
||||
|
||||
__Note__: VictoriaMetrics Cloud does not store or process your payment information. We rely on trusted API providers (Stripe, AWS) for secure payment processing.
|
||||
|
||||
## Pricing
|
||||
|
||||
Pricing begins at **$190/month** for the Starter Tier. To view other tiers and their costs, navigate to the [Create New Deployment](https://console.victoriametrics.cloud/deployments/create) section in the VictoriaMetrics Cloud application.
|
||||
|
||||
Our aim is to make pricing information easy to access and understand. If you have any questions or feedback on our pricing, please contact us.
|
||||
|
||||
|
||||
## Usage Reports
|
||||
|
||||
The [Usage Reports](https://console.victoriametrics.cloud/billing/usage) section in the billing area provides a breakdown of:
|
||||
|
||||
* Storage Costs
|
||||
* Compute Costs
|
||||
* Networking Costs
|
||||
* Applied Credits
|
||||
|
||||
Your Final Monthly Cost is calculated as `usage - credits` and reflects the amount billed by your payment provider.
|
||||
|
||||
A graph is also available to display the daily cost breakdown for the selected month.
|
||||
|
||||
|
||||
## Payment Methods
|
||||
|
||||
VictoriaMetrics Cloud supports the following payment options:
|
||||
|
||||
- Credit Card
|
||||
- AWS Marketplace
|
||||
- ACH Transfers
|
||||
|
||||
You can add multiple payment methods and set one as the primary. Backup payment methods are used if the primary fails. More details are available via the [Payment Methods](https://console.victoriametrics.cloud/billing) tab of the Billing Page.
|
||||
|
||||
### Credit Card
|
||||
|
||||
Credit cards can be added through [Stripe](https://stripe.com/) integration.
|
||||
|
||||
### AWS Marketplace
|
||||
|
||||
Payments made via [AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-atfvt3b73m2z4?sr=0-1&ref_=beagle&applicationId=AWSMPContessa) include billing details in the AWS portal. AWS finalizes monthly bills at the start of the next month, typically charging between the 3rd and 5th business day. Visit the [AWS Knowledge Center](https://aws.amazon.com/premiumsupport/knowledge-center/) for more information.
|
||||
|
||||
### ACH Transfers
|
||||
|
||||
ACH payments are supported. Contact [VictoriaMetrics Cloud Support](https://docs.victoriametrics.com/victoriametrics-cloud/support/) for setup assistance.
|
||||
|
||||
|
||||
|
||||
## Invoices
|
||||
|
||||
[Invoices](https://console.victoriametrics.cloud/billing/invoices) are emailed monthly to users who pay via Credit Card or ACH Transfers. Notification email addresses can be updated in the [VictoriaMetrics Cloud Notifications](https://docs.victoriametrics.com/victoriametrics-cloud/setup-notifications/) section.
|
||||
|
||||
Invoices are also accessible on the Invoices Page, which provides:
|
||||
|
||||
* Invoice Period
|
||||
* Invoice Status
|
||||
* Downloadable PDF Links
|
||||
|
||||
For AWS Marketplace billing, check the AWS Portal for invoice information.
|
||||
|
||||
---
|
||||
|
||||
## FAQ
|
||||
|
||||
### What billing options does VictoriaMetrics Cloud support?
|
||||
|
||||
* Monthly Billing: Pay-as-you-go.
|
||||
* Annual/Multi-Year Contracts: Available via AWS or ACH transfers.
|
||||
|
||||
For more information, contact sales@victoriametrics.com.
|
||||
|
||||
### How is deployment usage metered?
|
||||
|
||||
Usage is metered hourly.
|
||||
|
||||
### Do you charge for backups?
|
||||
|
||||
No, backups are provided at no additional cost.
|
||||
|
||||
### How long is the billing cycle?
|
||||
|
||||
Although usage is metered hourly, billing is conducted monthly. The billing date corresponds to the registration date. For example, if you registered on December 5, you will be billed on the 5th of each subsequent month.
|
||||
|
||||
### Can you help reduce my costs?
|
||||
|
||||
We recommend using Enterprise features such as [downsampling](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#downsampling) and [retention filters](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#retention-filters) for cost optimization. Contact [VictoriaMetrics Cloud Support](https://docs.victoriametrics.com/victoriametrics-cloud/support/) for assistance.
|
||||
|
||||
### I want to extend my trial or get more credits. What should I do?
|
||||
|
||||
Contact [VictoriaMetrics Cloud Support](https://docs.victoriametrics.com/victoriametrics-cloud/support/) , and we’ll help extend your trial or provide additional credits.
|
||||
|
||||
### How do you charge for spikes in load?
|
||||
|
||||
We don’t charge for spikes. Each deployment has predefined configurations and limits. If a deployment cannot handle a spike, you will receive an alert, allowing you to take proactive measures.
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
---
|
||||
weight: 13
|
||||
title: FAQ about VictoriaMetrics Cloud
|
||||
disableToc: true
|
||||
menu:
|
||||
docs:
|
||||
parent: "cloud"
|
||||
weight: 13
|
||||
name: FAQ VictoriaMetrics Cloud
|
||||
tags:
|
||||
- metrics
|
||||
- cloud
|
||||
- enterprise
|
||||
---
|
||||
|
||||
## What authentication and authorization mechanisms does VictoriaMetrics Cloud support?
|
||||
|
||||
* Console (UI) login options can be found in the [Registration and trial](https://docs.victoriametrics.com/victoriametrics-cloud/account-management/registration-and-trial/) section.
|
||||
* To interact programmatically with VictoriaMetrics Cloud deployments (sending or querying data), [bearer tokens](https://docs.victoriametrics.com/victoriametrics-cloud/deployments/access-tokens/) are used. See an example in [Quick start](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/#vmagent) or tailored examples under the [Integrations](https://cloud.victoriametrics.com/integrations) section.
|
||||
* To perform console API operations (automated actions with deployments, [access tokens](https://docs.victoriametrics.com/victoriametrics-cloud/deployments/access-tokens/), alerting/recording rules), [API Keys](https://docs.victoriametrics.com/victoriametrics-cloud/api/) are used.
|
||||
|
||||
Our roadmap is always evolving, so feel free to let us know any requirements you may have at support-cloud@victoriametrics.com.
|
||||
|
||||
## What permissions does VictoriaMetrics Cloud require on my AWS resources?
|
||||
VictoriaMetrics Cloud doesn’t require any permissions. Victoria Metrics Cloud instances are not deployed in your environment, but in a separate one. Interactions are made via https.
|
||||
|
||||
## How does VictoriaMetrics Cloud handle data encryption?
|
||||
Information exchange is secured with TLS.
|
||||
|
||||
## Does VictoriaMetrics Cloud require public internet access to AWS services, or can it integrate via AWS Private Link or VPC peering?
|
||||
Victoria Metrics Cloud doesn’t require access to AWS services, quite the opposite: your services need access to VictoriaMetrics Cloud for writing and querying metrics or logs. The only scenario where a call from the platform to your services can occur is when sending notifications about alerts (if you configure the notifications service to be running in your environment).
|
||||
|
||||
It's not mandatory to use public internet access, the option to use AWS Private Link is available, at an extra cost derived from direct AWS cost of the service.
|
||||
|
||||
## What networking requirements does VictoriaMetrics Cloud have (IP whitelisting, VPN, Direct Connect, etc.)?
|
||||
None.
|
||||
|
||||
## Does VictoriaMetrics Cloud support VPC endpoints for secure communication?
|
||||
A [VPC endpoint](https://docs.aws.amazon.com/whitepapers/latest/aws-privatelink/what-are-vpc-endpoints.html)
|
||||
enables users to privately connect to supported AWS services and VPC endpoint services powered by AWS PrivateLink.
|
||||
In summary, PrivateLink can be set up manually on an individual basis, upon request. It also implies extra cost, because there is a cost associated with it in AWS, and it’s not included in the VictoriaMetrics Cloud offering.
|
||||
|
||||
In any case, it’s important to note that connecting via public access is always secured via TLS with all endpoints.
|
||||
|
||||
## How does VictoriaMetrics Cloud ensure data integrity and consistency?
|
||||
We use the VictoriaMetrics Open Source project. To learn more, visit the [Open Source documentation](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#architecture-overview).
|
||||
|
||||
## How does VictoriaMetrics Cloud handle scalability within AWS resources?
|
||||
VictoriaMetrics Cloud deployments run in isolated environments, so scaling can be done freely. We have processes that ensure zero-downtime in cluster setups and very low downtime for single setups.
|
||||
|
||||
## Are there latency or performance considerations when integrating?
|
||||
* VictoriaMetrics Cloud deployments run in isolated environments, so there’s no interference between users and deployments are expected to run at high performance when compared, for example, with heavy-loaded on-prem setups.
|
||||
* Users can choose between different AWS regions, and selecting one that is closer helps.
|
||||
* The latency induced from running in the cloud is not noticeable for querying in dashboards or ingestion operation.
|
||||
|
||||
Should you need a different region, contact us at support-cloud@victoriametrics.com.
|
||||
|
||||
## What SLAs does VictoriaMetrics Cloud offer for availability and performance?
|
||||
SLA are available on our web site: https://victoriametrics.com/legal/cloud/terms-of-service/#service-levels
|
||||
|
||||
## Does the VictoriaMetrics Cloud provide logging and monitoring capabilities?
|
||||
Yes, logs and some of the metrics for your instances are available in the Victoria Metrics Console, we also provide alert notifications about issues with your instances.
|
||||
|
||||
## Can VictoriaMetrics Cloud integrate with AWS monitoring services like CloudWatch, X-Ray, or AWS Config?
|
||||
We have integration with CloudWatch, you can find it in Console -> Integrations: https://console.victoriametrics.cloud/integrations/cloudwatch
|
||||
Let us know if you need more integrations at support-cloud@victoriametrics.com.
|
||||
|
||||
## What troubleshooting mechanisms are in place for debugging issues?
|
||||
In case of deployment issues, users are notified with alerts, which have recommendations for possible fixes. Instance logs are also available under the Logs tab (log messages also usually contains recommendations) and for instance metrics available in the Monitoring tab of each deployment.
|
||||
|
||||
Apart from that, there are other mechanisms:
|
||||
* Cardinality explorer: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer
|
||||
* Query tracing: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#query-tracing
|
||||
* Top queries: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#top-queries
|
||||
* Active queries: https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#active-queries
|
||||
* And other tools (https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui) like Metric relabel debugger, Downsampling filters debugger, Retention filters debugger, Raw query view, etc…
|
||||
|
||||
Also, in case of problems, support is always available to help you at support-cloud@victoriametrics.com.
|
||||
|
||||
## What are the pricing models for VictoriaMetrics Cloud (subscription, usage-based, etc.)?
|
||||
VictoriaMetrics Cloud pricing is based in tiers. Tiers are configured based on a handful of parameters. See [Tier Parameters](https://docs.victoriametrics.com/victoriametrics-cloud/tiers-parameters/) for more information.
|
||||
|
||||
Detailed and updated tier pricing can be checked in the console when [creating deployments](https://cloud.victoriametrics.com/deployments/create).
|
||||
|
||||
## Are there data transfer costs associated with VictoriaMetrics Cloud integrations?
|
||||
Yes. We charge $0.09 per GB for external traffic, which matches AWS’ rate. Estimated traffic costs typically range from $1 to $30 per month, depending on deployment size and regular usage (such as data visualization, evaluation recording, and alerting rules and other integrations).
|
||||
|
||||
## Are there additional costs for API calls or storage?
|
||||
VictoriaMetrics Cloud does not charge extra for API calls.
|
||||
Regarding storage, the price is $1.46 for 10 Gb per Month. Since VictoriaMetrics Cloud is easy to
|
||||
scale, we recommend users to expand storage resources with consumption, instead of allocating all storage space from the beginning.
|
||||
We also offer deduplication and [cardinality explorer](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#cardinality-explorer) mechanisms
|
||||
to help reducing costs.
|
||||
|
||||
## Can VictoriaMetrics Cloud expenses be consolidated into my AWS bill?
|
||||
Yes. You can subscribe via AWS marketplace (see payment methods [documentation](https://docs.victoriametrics.com/victoriametrics-cloud/billing/#aws-marketplace)).
|
||||
|
||||
## How does billing work?
|
||||
See details in the [billing documentation and dedicated FAQ](https://docs.victoriametrics.com/victoriametrics-cloud/billing/).
|
||||
|
||||
## Where can I check the status of VictoriaMetrics Cloud?
|
||||
We expose the status of the VictoriaMetrics Cloud service in https://status.victoriametrics.com/
|
||||
|
||||
## What's the Privacy Policy of VictoriaMetrics Cloud?
|
||||
VictoriaMetrics Cloud Privacy Policy is available [here](https://cloud.victoriametrics.com/static/pdf/privacy_policy.pdf).
|
||||
|
||||
## Which are VictoriaMetrics Cloud Terms of Service?
|
||||
VictoriaMetrics Cloud Terms of Service are publicly available [here](https://victoriametrics.com/legal/cloud/terms-of-service/), including [SLAs](https://victoriametrics.com/legal/cloud/terms-of-service/#service-levels).
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
## Deployments
|
||||
|
||||
VictoriaMetrics Cloud is a DBaaS (Database as a Service) product for VictoriaMetrics.
|
||||
This means that you need to [create a deployment](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/#creating-deployments) before sending data to it.
|
||||
It is a fully managed service that allows you to deploy and manage your own instance of VictoriaMetrics
|
||||
in the cloud. Regular use cases of VictoriaMetrics Cloud are monitoring applications, infrastructure, or services,
|
||||
running in different setups like on-prem, Private, Public or Hybrid Cloud, Edge devices or IoT.
|
||||
|
||||
Here you can find more information about deployments:
|
||||
|
||||
- [How to create a deployment](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/#creating-deployments)
|
||||
- [Tiers and Deployment Types](https://docs.victoriametrics.com/victoriametrics-cloud/deployments/tiers-and-types/)
|
||||
- [How to use Access tokens to write and read data](https://docs.victoriametrics.com/victoriametrics-cloud/deployments/access-tokens/)
|
||||