mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-29 23:00:51 +03:00
Compare commits
67 Commits
debug-dock
...
v1.122.8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
780cb1bf05 | ||
|
|
a34d0d6056 | ||
|
|
5e98e0cff5 | ||
|
|
51b44afd34 | ||
|
|
5a8d7984ca | ||
|
|
57752ca2c0 | ||
|
|
171cdf0614 | ||
|
|
7d19ec2e4d | ||
|
|
a9b5033d50 | ||
|
|
a783b2048f | ||
|
|
fd48c72a83 | ||
|
|
05e52fa05a | ||
|
|
b49e8d032f | ||
|
|
73b10d7621 | ||
|
|
18268c3d13 | ||
|
|
bfb49c55af | ||
|
|
bd7fed9b41 | ||
|
|
a85c5830c1 | ||
|
|
009ddb9ce1 | ||
|
|
91bce8d3b4 | ||
|
|
a7d69cc51e | ||
|
|
2e6f42bff8 | ||
|
|
7ae1fd9614 | ||
|
|
51d1c16230 | ||
|
|
874f8b31a3 | ||
|
|
d3ac2473c0 | ||
|
|
3f45690342 | ||
|
|
3abd442742 | ||
|
|
5cec04d8e2 | ||
|
|
60f777620f | ||
|
|
8a9a40dbdd | ||
|
|
fde4b4013a | ||
|
|
bf69b0d686 | ||
|
|
a866474918 | ||
|
|
22f6cb6339 | ||
|
|
0d7b7649bf | ||
|
|
74611ce6f2 | ||
|
|
a5dd0324a9 | ||
|
|
45c0d40127 | ||
|
|
fc978c95af | ||
|
|
8e99efe0fa | ||
|
|
3e0aa46fdb | ||
|
|
ea41fea453 | ||
|
|
0165108a8f | ||
|
|
2652a7c762 | ||
|
|
82aacc5b75 | ||
|
|
0544bb12e0 | ||
|
|
70c293467a | ||
|
|
90b4c84ad5 | ||
|
|
0a194d067a | ||
|
|
9ffe965063 | ||
|
|
775ee71fad | ||
|
|
75b597e727 | ||
|
|
f3b0f4292d | ||
|
|
5fa87af6be | ||
|
|
b9a3369254 | ||
|
|
159f990c8e | ||
|
|
b5c3e93f7e | ||
|
|
7a6139416e | ||
|
|
d6bbfaf164 | ||
|
|
11f488d8ff | ||
|
|
d0f8773f4b | ||
|
|
7ec6f28a7c | ||
|
|
46ef5460a9 | ||
|
|
1a68d4ac8a | ||
|
|
be7039429d | ||
|
|
76e5cd2cd4 |
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
@@ -47,6 +47,8 @@ jobs:
|
||||
arch: arm
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
- os: linux
|
||||
arch: s390x
|
||||
- os: darwin
|
||||
arch: amd64
|
||||
- os: darwin
|
||||
|
||||
17
Makefile
17
Makefile
@@ -125,6 +125,15 @@ vmutils-linux-ppc64le: \
|
||||
vmrestore-linux-ppc64le \
|
||||
vmctl-linux-ppc64le
|
||||
|
||||
vmutils-linux-s390x: \
|
||||
vmagent-linux-s390x \
|
||||
vmalert-linux-s390x \
|
||||
vmalert-tool-linux-s390x \
|
||||
vmauth-linux-s390x \
|
||||
vmbackup-linux-s390x \
|
||||
vmrestore-linux-s390x \
|
||||
vmctl-linux-s390x
|
||||
|
||||
vmutils-darwin-amd64: \
|
||||
vmagent-darwin-amd64 \
|
||||
vmalert-darwin-amd64 \
|
||||
@@ -257,6 +266,7 @@ release-victoria-metrics: \
|
||||
release-victoria-metrics-linux-amd64 \
|
||||
release-victoria-metrics-linux-arm \
|
||||
release-victoria-metrics-linux-arm64 \
|
||||
release-victoria-metrics-linux-s390x \
|
||||
release-victoria-metrics-darwin-amd64 \
|
||||
release-victoria-metrics-darwin-arm64 \
|
||||
release-victoria-metrics-freebsd-amd64 \
|
||||
@@ -275,6 +285,9 @@ release-victoria-metrics-linux-arm:
|
||||
release-victoria-metrics-linux-arm64:
|
||||
GOOS=linux GOARCH=arm64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-linux-s390x:
|
||||
GOOS=linux GOARCH=s390x $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
release-victoria-metrics-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-victoria-metrics-goos-goarch
|
||||
|
||||
@@ -314,6 +327,7 @@ release-vmutils: \
|
||||
release-vmutils-linux-amd64 \
|
||||
release-vmutils-linux-arm64 \
|
||||
release-vmutils-linux-arm \
|
||||
release-vmutils-linux-s390x \
|
||||
release-vmutils-darwin-amd64 \
|
||||
release-vmutils-darwin-arm64 \
|
||||
release-vmutils-freebsd-amd64 \
|
||||
@@ -332,6 +346,9 @@ release-vmutils-linux-arm64:
|
||||
release-vmutils-linux-arm:
|
||||
GOOS=linux GOARCH=arm $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
release-vmutils-linux-s390x:
|
||||
GOOS=linux GOARCH=s390x $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
release-vmutils-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 $(MAKE) release-vmutils-goos-goarch
|
||||
|
||||
|
||||
@@ -27,6 +27,9 @@ victoria-metrics-linux-ppc64le-prod:
|
||||
victoria-metrics-linux-386-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-386
|
||||
|
||||
victoria-metrics-linux-s390x-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
victoria-metrics-darwin-amd64-prod:
|
||||
APP_NAME=victoria-metrics $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -27,6 +27,9 @@ vmagent-linux-ppc64le-prod:
|
||||
vmagent-linux-386-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmagent-linux-s390x-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmagent-darwin-amd64-prod:
|
||||
APP_NAME=vmagent $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -74,7 +74,7 @@ var (
|
||||
"See also -opentsdbHTTPListenAddr.useProxyProtocol")
|
||||
opentsdbHTTPUseProxyProtocol = flag.Bool("opentsdbHTTPListenAddr.useProxyProtocol", false, "Whether to use proxy protocol for connections accepted "+
|
||||
"at -opentsdbHTTPListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt")
|
||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config page. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
configAuthKey = flagutil.NewPassword("configAuthKey", "Authorization key for accessing /config and /remotewrite-.*-config pages. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
reloadAuthKey = flagutil.NewPassword("reloadAuthKey", "Auth key for /-/reload http endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*")
|
||||
dryRun = flag.Bool("dryRun", false, "Whether to check config files without running vmagent. The following files are checked: "+
|
||||
"-promscrape.config, -remoteWrite.relabelConfig, -remoteWrite.urlRelabelConfig, -remoteWrite.streamAggr.config . "+
|
||||
@@ -253,6 +253,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
{"metric-relabel-debug", "debug metric relabeling"},
|
||||
{"api/v1/targets", "advanced information about discovered targets in JSON format"},
|
||||
{"config", "-promscrape.config contents"},
|
||||
{"remotewrite-relabel-config", "-remoteWrite.relabelConfig contents"},
|
||||
{"remotewrite-url-relabel-config", "-remoteWrite.urlRelabelConfig contents"},
|
||||
{"metrics", "available service metrics"},
|
||||
{"flags", "command-line flags"},
|
||||
{"-/reload", "reload configuration"},
|
||||
@@ -478,6 +480,42 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
promscrape.WriteConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
return true
|
||||
case "/remotewrite-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
remotewrite.WriteRelabelConfigData(w)
|
||||
return true
|
||||
case "/api/v1/status/remotewrite-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteStatusRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
var bb bytesutil.ByteBuffer
|
||||
remotewrite.WriteRelabelConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
return true
|
||||
case "/remotewrite-url-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteURLRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
|
||||
remotewrite.WriteURLRelabelConfigData(w)
|
||||
return true
|
||||
case "/api/v1/status/remotewrite-url-relabel-config":
|
||||
if !httpserver.CheckAuthFlag(w, r, configAuthKey) {
|
||||
return true
|
||||
}
|
||||
remoteWriteStatusURLRelabelConfigRequests.Inc()
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
var bb bytesutil.ByteBuffer
|
||||
remotewrite.WriteURLRelabelConfigData(&bb)
|
||||
fmt.Fprintf(w, `{"status":"success","data":{"yaml":%s}}`, stringsutil.JSONString(string(bb.B)))
|
||||
return true
|
||||
case "/prometheus/-/reload", "/-/reload":
|
||||
if !httpserver.CheckAuthFlag(w, r, reloadAuthKey) {
|
||||
return true
|
||||
@@ -748,6 +786,12 @@ var (
|
||||
promscrapeConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/config"}`)
|
||||
promscrapeStatusConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/config"}`)
|
||||
|
||||
remoteWriteRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-relabel-config"}`)
|
||||
remoteWriteStatusRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-relabel-config"}`)
|
||||
|
||||
remoteWriteURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/remotewrite-url-relabel-config"}`)
|
||||
remoteWriteStatusURLRelabelConfigRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/status/remotewrite-url-relabel-config"}`)
|
||||
|
||||
promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`)
|
||||
)
|
||||
|
||||
|
||||
@@ -3,15 +3,18 @@ package remotewrite
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"go.yaml.in/yaml/v3"
|
||||
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
@@ -32,9 +35,12 @@ var (
|
||||
"See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels")
|
||||
)
|
||||
|
||||
var labelsGlobal []prompb.Label
|
||||
|
||||
var (
|
||||
labelsGlobal []prompb.Label
|
||||
|
||||
remoteWriteRelabelConfigData atomic.Pointer[[]byte]
|
||||
remoteWriteURLRelabelConfigData atomic.Pointer[[]interface{}]
|
||||
|
||||
relabelConfigReloads *metrics.Counter
|
||||
relabelConfigReloadErrors *metrics.Counter
|
||||
relabelConfigSuccess *metrics.Gauge
|
||||
@@ -67,6 +73,42 @@ func initRelabelConfigs() {
|
||||
}
|
||||
}
|
||||
|
||||
// WriteRelabelConfigData writes -remoteWrite.relabelConfig contents to w
|
||||
func WriteRelabelConfigData(w io.Writer) {
|
||||
p := remoteWriteRelabelConfigData.Load()
|
||||
if p == nil {
|
||||
// Nothing to write to w
|
||||
return
|
||||
}
|
||||
_, _ = w.Write(*p)
|
||||
}
|
||||
|
||||
// WriteURLRelabelConfigData writes -remoteWrite.urlRelabelConfig contents to w
|
||||
func WriteURLRelabelConfigData(w io.Writer) {
|
||||
p := remoteWriteURLRelabelConfigData.Load()
|
||||
if p == nil {
|
||||
// Nothing to write to w
|
||||
return
|
||||
}
|
||||
type urlRelabelCfg struct {
|
||||
Url string `yaml:"url"`
|
||||
RelabelConfig interface{} `yaml:"relabel_config"`
|
||||
}
|
||||
var cs []urlRelabelCfg
|
||||
for i, url := range *remoteWriteURLs {
|
||||
cfgData := (*p)[i]
|
||||
if !*showRemoteWriteURL {
|
||||
url = fmt.Sprintf("%d:secret-url", i+1)
|
||||
}
|
||||
cs = append(cs, urlRelabelCfg{
|
||||
Url: url,
|
||||
RelabelConfig: cfgData,
|
||||
})
|
||||
}
|
||||
d, _ := yaml.Marshal(cs)
|
||||
_, _ = w.Write(d)
|
||||
}
|
||||
|
||||
func reloadRelabelConfigs() {
|
||||
rcs := allRelabelConfigs.Load()
|
||||
if !rcs.isSet() {
|
||||
@@ -90,28 +132,42 @@ func reloadRelabelConfigs() {
|
||||
func loadRelabelConfigs() (*relabelConfigs, error) {
|
||||
var rcs relabelConfigs
|
||||
if *relabelConfigPathGlobal != "" {
|
||||
global, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
global, rawCfg, err := promrelabel.LoadRelabelConfigs(*relabelConfigPathGlobal)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load -remoteWrite.relabelConfig=%q: %w", *relabelConfigPathGlobal, err)
|
||||
}
|
||||
remoteWriteRelabelConfigData.Store(&rawCfg)
|
||||
rcs.global = global
|
||||
}
|
||||
if len(*relabelConfigPaths) > len(*remoteWriteURLs) {
|
||||
return nil, fmt.Errorf("too many -remoteWrite.urlRelabelConfig args: %d; it mustn't exceed the number of -remoteWrite.url args: %d",
|
||||
len(*relabelConfigPaths), (len(*remoteWriteURLs)))
|
||||
}
|
||||
|
||||
var urlRelabelCfgs []interface{}
|
||||
rcs.perURL = make([]*promrelabel.ParsedConfigs, len(*remoteWriteURLs))
|
||||
for i, path := range *relabelConfigPaths {
|
||||
if len(path) == 0 {
|
||||
// Skip empty relabel config.
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
||||
continue
|
||||
}
|
||||
prc, err := promrelabel.LoadRelabelConfigs(path)
|
||||
prc, rawCfg, err := promrelabel.LoadRelabelConfigs(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot load relabel configs from -remoteWrite.urlRelabelConfig=%q: %w", path, err)
|
||||
}
|
||||
rcs.perURL[i] = prc
|
||||
|
||||
var parsedCfg interface{}
|
||||
_ = yaml.Unmarshal(rawCfg, &parsedCfg)
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, parsedCfg)
|
||||
}
|
||||
if len(*remoteWriteURLs) > len(*relabelConfigPaths) {
|
||||
// fill the urlRelabelCfgs with empty relabel configs if not set
|
||||
for i := len(*relabelConfigPaths); i < len(*remoteWriteURLs); i++ {
|
||||
urlRelabelCfgs = append(urlRelabelCfgs, nil)
|
||||
}
|
||||
}
|
||||
remoteWriteURLRelabelConfigData.Store(&urlRelabelCfgs)
|
||||
return &rcs, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ratelimiter"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeserieslimits"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -485,6 +486,9 @@ func tryPush(at *auth.Token, wr *prompb.WriteRequest, forceDropSamplesOnFailure
|
||||
matchIdxs.B = sas.Push(tssBlock, matchIdxs.B)
|
||||
if !*streamAggrGlobalKeepInput {
|
||||
tssBlock = dropAggregatedSeries(tssBlock, matchIdxs.B, *streamAggrGlobalDropInput)
|
||||
} else if *streamAggrGlobalDropInput {
|
||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
||||
tssBlock = dropUnaggregatedSeries(tssBlock, matchIdxs.B)
|
||||
}
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
@@ -988,7 +992,17 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
tss = dropAggregatedSeries(tss, matchIdxs.B, rwctx.streamAggrDropInput)
|
||||
} else if rwctx.streamAggrDropInput {
|
||||
// if both keep_input and drop_input are true, we keep only the aggregated series
|
||||
if rctx == nil {
|
||||
rctx = getRelabelCtx()
|
||||
// Make a copy of tss before dropping aggregated series
|
||||
v = tssPool.Get().(*[]prompb.TimeSeries)
|
||||
tss = append(*v, tss...)
|
||||
}
|
||||
tss = dropUnaggregatedSeries(tss, matchIdxs.B)
|
||||
}
|
||||
|
||||
matchIdxsPool.Put(matchIdxs)
|
||||
}
|
||||
if rwctx.deduplicator != nil {
|
||||
@@ -1011,9 +1025,10 @@ func (rwctx *remoteWriteCtx) TryPushTimeSeries(tss []prompb.TimeSeries, forceDro
|
||||
return false
|
||||
}
|
||||
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
var matchIdxsPool slicesutil.BufferPool[uint32]
|
||||
|
||||
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput bool) []prompb.TimeSeries {
|
||||
// dropAggregatedSeries drops matched series, also the unmatched if dropInput is true.
|
||||
func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32, dropInput bool) []prompb.TimeSeries {
|
||||
dst := src[:0]
|
||||
if !dropInput {
|
||||
for i, match := range matchIdxs {
|
||||
@@ -1028,6 +1043,20 @@ func dropAggregatedSeries(src []prompb.TimeSeries, matchIdxs []byte, dropInput b
|
||||
return dst
|
||||
}
|
||||
|
||||
// dropUnaggregatedSeries drops unmatched series.
|
||||
func dropUnaggregatedSeries(src []prompb.TimeSeries, matchIdxs []uint32) []prompb.TimeSeries {
|
||||
dst := src[:0]
|
||||
for i, match := range matchIdxs {
|
||||
if match == 0 {
|
||||
continue
|
||||
}
|
||||
dst = append(dst, src[i])
|
||||
}
|
||||
tail := src[len(dst):]
|
||||
clear(tail)
|
||||
return dst
|
||||
}
|
||||
|
||||
func (rwctx *remoteWriteCtx) pushInternalTrackDropped(tss []prompb.TimeSeries) {
|
||||
if rwctx.tryPushTimeSeriesInternal(tss) {
|
||||
return
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/consistenthash"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||
@@ -57,8 +59,8 @@ func TestGetLabelsHash_Distribution(t *testing.T) {
|
||||
f(10)
|
||||
}
|
||||
|
||||
func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string) {
|
||||
func TestRemoteWriteContext_TryPushTimeSeries(t *testing.T) {
|
||||
f := func(streamAggrConfig, relabelConfig string, enableWindows bool, dedupInterval time.Duration, keepInput, dropInput bool, input string, expectedRowsPushedAfterRelabel, expectedPushedSample int) {
|
||||
t.Helper()
|
||||
perURLRelabel, err := promrelabel.ParseRelabelConfigsData([]byte(relabelConfig))
|
||||
if err != nil {
|
||||
@@ -71,10 +73,16 @@ func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||
}
|
||||
allRelabelConfigs.Store(rcs)
|
||||
|
||||
path := "fast-queue-write-test"
|
||||
fs.MustRemoveDir(path)
|
||||
fq := persistentqueue.MustOpenFastQueue(path, "test", 100, 0, false)
|
||||
defer fs.MustRemoveDir(path)
|
||||
defer fq.MustClose()
|
||||
|
||||
pss := make([]*pendingSeries, 1)
|
||||
isVMProto := &atomic.Bool{}
|
||||
isVMProto.Store(true)
|
||||
pss[0] = newPendingSeries(nil, isVMProto, 0, 100)
|
||||
pss[0] = newPendingSeries(fq, isVMProto, 0, 100)
|
||||
rwctx := &remoteWriteCtx{
|
||||
idx: 0,
|
||||
streamAggrKeepInput: keepInput,
|
||||
@@ -83,6 +91,8 @@ func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||
rowsPushedAfterRelabel: metrics.GetOrCreateCounter(`foo`),
|
||||
rowsDroppedByRelabel: metrics.GetOrCreateCounter(`bar`),
|
||||
}
|
||||
defer metrics.UnregisterAllMetrics()
|
||||
|
||||
if dedupInterval > 0 {
|
||||
rwctx.deduplicator = streamaggr.NewDeduplicator(nil, enableWindows, dedupInterval, nil, "dedup-global")
|
||||
}
|
||||
@@ -104,23 +114,27 @@ func TestRemoteWriteContext_TryPush_ImmutableTimeseries(t *testing.T) {
|
||||
inputTss := prometheus.MustParsePromMetrics(input, offsetMsecs)
|
||||
expectedTss := make([]prompb.TimeSeries, len(inputTss))
|
||||
|
||||
// copy inputTss to make sure it is not mutated during TryPush call
|
||||
// check inputTss is not modified after TryPushTimeSeries
|
||||
copy(expectedTss, inputTss)
|
||||
if !rwctx.TryPushTimeSeries(inputTss, false) {
|
||||
t.Fatalf("cannot push samples to rwctx")
|
||||
}
|
||||
|
||||
if int(rwctx.rowsPushedAfterRelabel.Get()) != expectedRowsPushedAfterRelabel {
|
||||
t.Fatalf("unexpected number of rows after relabel; got %d; want %d", rwctx.rowsPushedAfterRelabel.Get(), expectedRowsPushedAfterRelabel)
|
||||
}
|
||||
|
||||
if len(pss[0].wr.tss) != expectedPushedSample {
|
||||
t.Fatalf("unexpected number of pushed samples; got %d; want %d", len(pss[0].wr.tss), expectedPushedSample)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedTss, inputTss) {
|
||||
t.Fatalf("unexpected samples;\ngot\n%v\nwant\n%v", inputTss, expectedTss)
|
||||
}
|
||||
}
|
||||
|
||||
f(`
|
||||
- interval: 1m
|
||||
outputs: [sum_samples]
|
||||
- interval: 2m
|
||||
outputs: [count_series]
|
||||
`, `
|
||||
// relabeling
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
@@ -129,53 +143,66 @@ metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
`, 2, 2)
|
||||
|
||||
// relabeling + aggregation
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: ".*"
|
||||
`, false, 0, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 4, 2)
|
||||
|
||||
// aggregation + keepInput
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, ``, false, 0, true, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 4, 4)
|
||||
|
||||
// aggregation + dropInput
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, ``, false, 0, false, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`, 4, 0)
|
||||
|
||||
// aggregation + keepInput + dropInput
|
||||
f(`
|
||||
- match: '{env="dev"}'
|
||||
interval: 1m
|
||||
outputs: [sum_samples]
|
||||
`, ``, false, 0, true, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="bar"} 25
|
||||
`, 3, 1)
|
||||
|
||||
// aggregation + deduplication
|
||||
f(``, ``, true, time.Hour, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="foo"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="foo"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, false, false, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="bar"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, true, false, `
|
||||
metric{env="test"} 10
|
||||
metric{env="dev"} 20
|
||||
metric{env="foo"} 15
|
||||
metric{env="dev"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, false, true, `
|
||||
metric{env="foo"} 10
|
||||
metric{env="dev"} 20
|
||||
metric{env="foo"} 15
|
||||
metric{env="dev"} 25
|
||||
`)
|
||||
f(``, `
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dev"
|
||||
`, true, time.Hour, true, true, `
|
||||
metric{env="dev"} 10
|
||||
metric{env="test"} 20
|
||||
metric{env="dev"} 15
|
||||
metric{env="bar"} 25
|
||||
`)
|
||||
`, 4, 0)
|
||||
}
|
||||
|
||||
func TestShardAmountRemoteWriteCtx(t *testing.T) {
|
||||
|
||||
@@ -18,12 +18,12 @@ var (
|
||||
streamAggrGlobalConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation "+
|
||||
"with -streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to remote storages write. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
"are written to remote storages write. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in "+
|
||||
"-streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
||||
"are written to the remote storage. See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in "+
|
||||
"-streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
||||
"are written to the remote storage. See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrGlobalDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval on "+
|
||||
"aggregator before optional aggregation with -streamAggr.config . "+
|
||||
"See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
@@ -43,11 +43,11 @@ var (
|
||||
streamAggrConfig = flagutil.NewArrayString("remoteWrite.streamAggr.config", "Optional path to file with stream aggregation config for the corresponding -remoteWrite.url. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -remoteWrite.streamAggr.keepInput, -remoteWrite.streamAggr.dropInput and -remoteWrite.streamAggr.dedupInterval")
|
||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
streamAggrDropInput = flagutil.NewArrayBool("remoteWrite.streamAggr.dropInput", "Whether to drop input samples that not matching any rule in "+
|
||||
"the corresponding -remoteWrite.streamAggr.config. By default, only matched raw samples are dropped, while unmatched samples "+
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep all the input samples after the aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. By default, only aggregates samples are dropped, while the remaining samples "+
|
||||
streamAggrKeepInput = flagutil.NewArrayBool("remoteWrite.streamAggr.keepInput", "Whether to keep input samples that match any rule in "+
|
||||
"the corresponding -remoteWrite.streamAggr.config. By default, matched raw samples are aggregated and dropped, while unmatched samples "+
|
||||
"are written to the corresponding -remoteWrite.url . See also -remoteWrite.streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrDedupInterval = flagutil.NewArrayDuration("remoteWrite.streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation "+
|
||||
"with -remoteWrite.streamAggr.config at the corresponding -remoteWrite.url. See also -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
|
||||
@@ -27,6 +27,9 @@ vmalert-tool-linux-ppc64le-prod:
|
||||
vmalert-tool-linux-386-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmalert-tool-linux-s390x-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmalert-tool-darwin-amd64-prod:
|
||||
APP_NAME=vmalert-tool $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -132,7 +132,7 @@ func UnitTest(files []string, disableGroupLabel bool, externalLabels []string, e
|
||||
}
|
||||
labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
_, err = notifier.Init(labels, externalURL)
|
||||
err = notifier.Init(labels, externalURL)
|
||||
if err != nil {
|
||||
logger.Fatalf("failed to init notifier: %v", err)
|
||||
}
|
||||
@@ -379,7 +379,7 @@ func (tg *testGroup) test(evalInterval time.Duration, groupOrderMap map[string]i
|
||||
if len(g.Rules) == 0 {
|
||||
continue
|
||||
}
|
||||
errs := g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, rw, ts)
|
||||
errs := g.ExecOnce(context.Background(), rw, ts)
|
||||
for err := range errs {
|
||||
if err != nil {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("\nfailed to exec group: %q, time: %s, err: %w", g.Name,
|
||||
|
||||
@@ -27,6 +27,9 @@ vmalert-linux-ppc64le-prod:
|
||||
vmalert-linux-386-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmalert-linux-s390x-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmalert-darwin-amd64-prod:
|
||||
APP_NAME=vmalert $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -179,11 +179,11 @@ func (c *Client) Query(ctx context.Context, query string, ts time.Time) (Result,
|
||||
var parseFn func(resp *http.Response) (Result, error)
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
parseFn = parsePrometheusResponse
|
||||
parseFn = parsePrometheusInstantResponse
|
||||
case datasourceGraphite:
|
||||
parseFn = parseGraphiteResponse
|
||||
case datasourceVLogs:
|
||||
parseFn = parseVLogsResponse
|
||||
parseFn = parseVLogsInstantResponse
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to parse query response", c.dataSourceType)
|
||||
}
|
||||
@@ -239,9 +239,9 @@ func (c *Client) QueryRange(ctx context.Context, query string, start, end time.T
|
||||
var parseFn func(resp *http.Response) (Result, error)
|
||||
switch c.dataSourceType {
|
||||
case datasourcePrometheus:
|
||||
parseFn = parsePrometheusResponse
|
||||
parseFn = parsePrometheusRangeResponse
|
||||
case datasourceVLogs:
|
||||
parseFn = parseVLogsResponse
|
||||
parseFn = parseVLogsRangeResponse
|
||||
default:
|
||||
logger.Panicf("BUG: unsupported datasource type %q to parse query range response", c.dataSourceType)
|
||||
}
|
||||
|
||||
@@ -172,17 +172,26 @@ const (
|
||||
rtVector, rtMatrix, rScalar = "vector", "matrix", "scalar"
|
||||
)
|
||||
|
||||
func parsePrometheusResponse(resp *http.Response) (res Result, err error) {
|
||||
func parsePromResponse(resp *http.Response) (*promResponse, error) {
|
||||
r := &promResponse{}
|
||||
if err = json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return res, fmt.Errorf("failed to decode response: %w", err)
|
||||
if err := json.NewDecoder(resp.Body).Decode(r); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
if r.Status == statusError {
|
||||
return res, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
|
||||
return nil, fmt.Errorf("response error %q: %s", r.ErrorType, r.Error)
|
||||
}
|
||||
if r.Status != statusSuccess {
|
||||
return res, fmt.Errorf("unknown response status %q", r.Status)
|
||||
return nil, fmt.Errorf("unknown response status %q", r.Status)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func parsePrometheusInstantResponse(resp *http.Response) (res Result, err error) {
|
||||
r, err := parsePromResponse(resp)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
|
||||
var parseFn func() ([]Metric, error)
|
||||
switch r.Data.ResultType {
|
||||
case rtVector:
|
||||
@@ -191,12 +200,6 @@ func parsePrometheusResponse(resp *http.Response) (res Result, err error) {
|
||||
return res, fmt.Errorf("unmarshal err %w; \n %#v", err, string(r.Data.Result))
|
||||
}
|
||||
parseFn = pi.metrics
|
||||
case rtMatrix:
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return res, err
|
||||
}
|
||||
parseFn = pr.metrics
|
||||
case rScalar:
|
||||
var ps promScalar
|
||||
if err := json.Unmarshal(r.Data.Result, &ps); err != nil {
|
||||
@@ -206,7 +209,6 @@ func parsePrometheusResponse(resp *http.Response) (res Result, err error) {
|
||||
default:
|
||||
return res, fmt.Errorf("unknown result type %q", r.Data.ResultType)
|
||||
}
|
||||
|
||||
ms, err := parseFn()
|
||||
if err != nil {
|
||||
return res, err
|
||||
@@ -222,6 +224,34 @@ func parsePrometheusResponse(resp *http.Response) (res Result, err error) {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func parsePrometheusRangeResponse(resp *http.Response) (res Result, err error) {
|
||||
r, err := parsePromResponse(resp)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to parse response: %w", err)
|
||||
}
|
||||
if r.Data.ResultType != rtMatrix {
|
||||
return res, fmt.Errorf("unexpected result type %q; expected result type %q", r.Data.ResultType, rtMatrix)
|
||||
}
|
||||
|
||||
var pr promRange
|
||||
if err := json.Unmarshal(r.Data.Result, &pr.Result); err != nil {
|
||||
return res, err
|
||||
}
|
||||
ms, err := pr.metrics()
|
||||
if err != nil {
|
||||
return res, err
|
||||
}
|
||||
res = Result{Data: ms, IsPartial: r.IsPartial}
|
||||
if r.Stats.SeriesFetched != nil {
|
||||
intV, err := strconv.Atoi(*r.Stats.SeriesFetched)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to convert stats.seriesFetched to int: %w", err)
|
||||
}
|
||||
res.SeriesFetched = &intV
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *Client) setPrometheusInstantReqParams(r *http.Request, query string, timestamp time.Time) {
|
||||
if c.appendTypePrefix {
|
||||
r.URL.Path += "/prometheus"
|
||||
|
||||
@@ -65,21 +65,23 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
case 3:
|
||||
w.Write([]byte(`{"status":"unknown"}`))
|
||||
case 4:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix"}}`))
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector"}}`))
|
||||
case 5:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||
case 6:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"vm_rows","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"vm_requests","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
case 7:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
case 8:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"scalar","result":[1583786142, "1"]},"stats":{"seriesFetched": "42"}}`))
|
||||
case 9:
|
||||
w.Write([]byte(`{"status":"success", "isPartial":true, "data":{"resultType":"scalar","result":[1583786142, "1"]}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/render", func(w http.ResponseWriter, _ *http.Request) {
|
||||
c++
|
||||
switch c {
|
||||
case 9:
|
||||
case 10:
|
||||
w.Write([]byte(`[{"target":"constantLine(10)","tags":{"name":"constantLine(10)"},"datapoints":[[10,1611758343],[10,1611758373],[10,1611758403]]}]`))
|
||||
}
|
||||
})
|
||||
@@ -102,9 +104,9 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
t.Fatalf("failed to parse 'time' query param %q: %s", timeParam, err)
|
||||
}
|
||||
switch c {
|
||||
case 10:
|
||||
w.Write([]byte("[]"))
|
||||
case 11:
|
||||
w.Write([]byte("[]"))
|
||||
case 12:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"total","foo":"bar"},"value":[1583786142,"13763"]},{"metric":{"__name__":"total","foo":"baz"},"value":[1583786140,"2000"]}]}}`))
|
||||
}
|
||||
})
|
||||
@@ -123,6 +125,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
ts := time.Now()
|
||||
|
||||
expErr := func(query, err string) {
|
||||
t.Helper()
|
||||
_, _, gotErr := pq.Query(ctx, query, ts)
|
||||
if gotErr == nil {
|
||||
t.Fatalf("expected %q got nil", err)
|
||||
@@ -137,8 +140,9 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
expErr(vmQuery, "response error") // 2
|
||||
expErr(vmQuery, "unknown response status") // 3
|
||||
expErr(vmQuery, "unexpected end of JSON input") // 4
|
||||
expErr(vmQuery, "unknown result type") // 5
|
||||
|
||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 5 - vector
|
||||
res, _, err := pq.Query(ctx, vmQuery, ts) // 6 - vector
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -159,7 +163,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
}
|
||||
metricsEqual(t, res.Data, expected)
|
||||
|
||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 6 - scalar
|
||||
res, req, err := pq.Query(ctx, vmQuery, ts) // 7 - scalar
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -184,7 +188,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 7 - scalar with stats
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8 - scalar with stats
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -205,7 +209,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
*res.SeriesFetched)
|
||||
}
|
||||
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 8
|
||||
res, _, err = pq.Query(ctx, vmQuery, ts) // 9
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -216,7 +220,7 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
// test graphite
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 9 - graphite
|
||||
res, _, err = gq.Query(ctx, queryRender, ts) // 10 - graphite
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -236,9 +240,9 @@ func TestVMInstantQuery(t *testing.T) {
|
||||
vlogs := datasourceVLogs
|
||||
pq = s.BuildWithParams(QuerierParams{DataSourceType: string(vlogs), EvaluationInterval: 15 * time.Second})
|
||||
|
||||
expErr(vlogsQuery, "error parsing response") // 10
|
||||
expErr(vlogsQuery, "error parsing response") // 11
|
||||
|
||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 11
|
||||
res, _, err = pq.Query(ctx, vlogsQuery, ts) // 12
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
@@ -390,6 +394,8 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
switch c {
|
||||
case 0:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"vm_rows"},"values":[[1583786142,"13763"]]}]}}`))
|
||||
case 1:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"vector","result":[1583786142, "1"]}}`))
|
||||
}
|
||||
})
|
||||
mux.HandleFunc("/select/logsql/stats_query_range", func(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -422,7 +428,7 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
t.Fatalf("expected 'step' query param to be 60s; got %q instead", step)
|
||||
}
|
||||
switch c {
|
||||
case 1:
|
||||
case 2:
|
||||
w.Write([]byte(`{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"__name__":"total"},"values":[[1583786142,"10"]]}]}}`))
|
||||
}
|
||||
})
|
||||
@@ -446,13 +452,13 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
|
||||
start, end := time.Now().Add(-time.Minute), time.Now()
|
||||
|
||||
res, err := pq.QueryRange(ctx, vmQuery, start, end)
|
||||
res, err := pq.QueryRange(ctx, vmQuery, start, end) // case 0
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected %s", err)
|
||||
}
|
||||
m := res.Data
|
||||
if len(m) != 1 {
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
t.Fatalf("expected 1 metric got %d in %+v", len(m), m)
|
||||
}
|
||||
expected := Metric{
|
||||
Labels: []prompb.Label{{Value: "vm_rows", Name: "__name__"}},
|
||||
@@ -463,6 +469,9 @@ func TestVMRangeQuery(t *testing.T) {
|
||||
t.Fatalf("unexpected metric %+v want %+v", m[0], expected)
|
||||
}
|
||||
|
||||
_, err = pq.QueryRange(ctx, vmQuery, start, end) // case 1
|
||||
expectError(t, err, "unexpected result type")
|
||||
|
||||
// test unsupported graphite
|
||||
gq := s.BuildWithParams(QuerierParams{DataSourceType: string(datasourceGraphite)})
|
||||
|
||||
|
||||
@@ -40,8 +40,28 @@ func (c *Client) setVLogsRangeReqParams(r *http.Request, query string, start, en
|
||||
c.setReqParams(r, query)
|
||||
}
|
||||
|
||||
func parseVLogsResponse(resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusResponse(resp)
|
||||
func parseVLogsInstantResponse(resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusInstantResponse(resp)
|
||||
if err != nil {
|
||||
return Result{}, err
|
||||
}
|
||||
for i := range res.Data {
|
||||
m := &res.Data[i]
|
||||
for j := range m.Labels {
|
||||
// reserve the stats func result name with a new label `stats_result` instead of dropping it,
|
||||
// since there could be multiple stats results in a single query, for instance:
|
||||
// _time:5m | stats quantile(0.5, request_duration_seconds) p50, quantile(0.9, request_duration_seconds) p90
|
||||
if m.Labels[j].Name == "__name__" {
|
||||
m.Labels[j].Name = "stats_result"
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseVLogsRangeResponse(resp *http.Response) (res Result, err error) {
|
||||
res, err = parsePrometheusRangeResponse(resp)
|
||||
if err != nil {
|
||||
return Result{}, err
|
||||
}
|
||||
|
||||
@@ -227,14 +227,13 @@ func newManager(ctx context.Context) (*manager, error) {
|
||||
labels[s[:n]] = s[n+1:]
|
||||
}
|
||||
|
||||
nts, err := notifier.Init(labels, *externalURL)
|
||||
err = notifier.Init(labels, *externalURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init notifier: %w", err)
|
||||
}
|
||||
manager := &manager{
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
querierBuilder: q,
|
||||
notifiers: nts,
|
||||
labels: labels,
|
||||
}
|
||||
rw, err := remotewrite.Init(ctx)
|
||||
|
||||
@@ -96,9 +96,10 @@ groups:
|
||||
querierBuilder: &datasource.FakeQuerier{},
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
labels: map[string]string{},
|
||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||
rw: &remotewrite.Client{},
|
||||
}
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
syncCh := make(chan struct{})
|
||||
sighupCh := procutil.NewSighupChan()
|
||||
|
||||
@@ -16,7 +16,6 @@ import (
|
||||
// manager controls group states
|
||||
type manager struct {
|
||||
querierBuilder datasource.QuerierBuilder
|
||||
notifiers func() []notifier.Notifier
|
||||
|
||||
rw remotewrite.RWClient
|
||||
// remote read builder.
|
||||
@@ -94,17 +93,16 @@ func (m *manager) close() {
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||
m.wg.Add(1)
|
||||
id := g.GetID()
|
||||
g.Init()
|
||||
go func() {
|
||||
defer m.wg.Done()
|
||||
m.wg.Go(func() {
|
||||
if restore {
|
||||
g.Start(ctx, m.notifiers, m.rw, m.rr)
|
||||
g.Start(ctx, m.rw, m.rr)
|
||||
} else {
|
||||
g.Start(ctx, m.notifiers, m.rw, nil)
|
||||
g.Start(ctx, m.rw, nil)
|
||||
}
|
||||
}()
|
||||
})
|
||||
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
@@ -131,7 +129,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
if rrPresent && m.rw == nil {
|
||||
return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set")
|
||||
}
|
||||
if arPresent && m.notifiers == nil {
|
||||
if arPresent && notifier.GetTargets() == nil {
|
||||
return fmt.Errorf("config contains alerting rules but neither `-notifier.url` nor `-notifier.config` nor `-notifier.blackhole` aren't set")
|
||||
}
|
||||
|
||||
@@ -168,15 +166,15 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
if len(toUpdate) > 0 {
|
||||
var wg sync.WaitGroup
|
||||
for _, item := range toUpdate {
|
||||
wg.Add(1)
|
||||
// cancel evaluation so the Update will be applied as fast as possible.
|
||||
// it is important to call InterruptEval before the update, because cancel fn
|
||||
// can be re-assigned during the update.
|
||||
item.old.InterruptEval()
|
||||
go func(oldGroup *rule.Group, newGroup *rule.Group) {
|
||||
oldGroup.UpdateWith(newGroup)
|
||||
wg.Done()
|
||||
}(item.old, item.new)
|
||||
oldG := item.old
|
||||
newG := item.new
|
||||
wg.Go(func() {
|
||||
// cancel evaluation so the Update will be applied as fast as possible.
|
||||
// it is important to call InterruptEval before the update, because cancel fn
|
||||
// can be re-assigned during the update.
|
||||
oldG.InterruptEval()
|
||||
oldG.UpdateWith(newG)
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
@@ -40,10 +40,11 @@ func TestManagerEmptyRulesDir(t *testing.T) {
|
||||
// execution of configuration update.
|
||||
// Should be executed with -race flag
|
||||
func TestManagerUpdateConcurrent(t *testing.T) {
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
querierBuilder: &datasource.FakeQuerier{},
|
||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||
}
|
||||
paths := []string{
|
||||
"config/testdata/dir/rules0-good.rules",
|
||||
@@ -127,8 +128,9 @@ func TestManagerUpdate_Success(t *testing.T) {
|
||||
m := &manager{
|
||||
groups: make(map[uint64]*rule.Group),
|
||||
querierBuilder: &datasource.FakeQuerier{},
|
||||
notifiers: func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} },
|
||||
}
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
cfgInit := loadCfg(t, []string{initPath}, true, true)
|
||||
if err := m.update(ctx, cfgInit, false); err != nil {
|
||||
@@ -277,7 +279,8 @@ func TestManagerUpdate_Failure(t *testing.T) {
|
||||
rw: rw,
|
||||
}
|
||||
if notifiers != nil {
|
||||
m.notifiers = func() []notifier.Notifier { return notifiers }
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
}
|
||||
err := m.update(context.Background(), []config.Group{cfg}, false)
|
||||
if err == nil {
|
||||
|
||||
@@ -20,7 +20,7 @@ func TestAlertExecTemplate(t *testing.T) {
|
||||
)
|
||||
extLabels["cluster"] = extCluster
|
||||
extLabels["dc"] = extDC
|
||||
_, err := Init(extLabels, extURL)
|
||||
err := Init(extLabels, extURL)
|
||||
checkErr(t, err)
|
||||
|
||||
f := func(alert *Alert, annotations map[string]string, tplExpected map[string]string) {
|
||||
|
||||
@@ -77,10 +77,13 @@ func (am *AlertManager) LastError() string {
|
||||
}
|
||||
|
||||
// Send an alert or resolve message
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||
func (am *AlertManager) Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
||||
if len(alerts) != len(alertLabels) {
|
||||
return fmt.Errorf("mismatched number of alerts and label sets after global alert relabeling")
|
||||
}
|
||||
am.metrics.alertsSent.Add(len(alerts))
|
||||
startTime := time.Now()
|
||||
err := am.send(ctx, alerts, headers)
|
||||
err := am.send(ctx, alerts, alertLabels, headers)
|
||||
am.metrics.alertsSendDuration.UpdateDuration(startTime)
|
||||
if err != nil {
|
||||
am.metrics.alertsSendErrors.Add(len(alerts))
|
||||
@@ -91,12 +94,15 @@ func (am *AlertManager) Send(ctx context.Context, alerts []Alert, headers map[st
|
||||
return err
|
||||
}
|
||||
|
||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, headers map[string]string) error {
|
||||
func (am *AlertManager) send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, headers map[string]string) error {
|
||||
b := &bytes.Buffer{}
|
||||
alertsToSend := make([]Alert, 0, len(alerts))
|
||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||
for _, a := range alerts {
|
||||
lbls := a.applyRelabelingIfNeeded(am.relabelConfigs)
|
||||
for i, a := range alerts {
|
||||
lbls := alertLabels[i]
|
||||
if am.relabelConfigs != nil {
|
||||
lbls = am.relabelConfigs.Apply(lbls, 0)
|
||||
}
|
||||
if len(lbls) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
)
|
||||
|
||||
@@ -145,11 +146,11 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
||||
t.Fatalf("expected connection error got nil")
|
||||
}
|
||||
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, nil); err == nil {
|
||||
if err := am.Send(context.Background(), []Alert{{Labels: map[string]string{"a": "b"}}}, [][]prompb.Label{{{Name: "a", Value: "b"}}}, nil); err == nil {
|
||||
t.Fatalf("expected wrong http code error got nil")
|
||||
}
|
||||
|
||||
@@ -160,7 +161,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
End: time.Now().UTC(),
|
||||
Labels: map[string]string{"alertname": "alert0"},
|
||||
Annotations: map[string]string{"a": "b", "c": "d"},
|
||||
}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
}}, [][]prompb.Label{{{Name: "alertname", Value: "alert0"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
@@ -174,7 +175,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
Name: "alert2",
|
||||
Labels: map[string]string{"rule": "test", "tenant": "1"},
|
||||
},
|
||||
}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}, {Name: "tenant", Value: "0"}}, {{Name: "rule", Value: "test"}, {Name: "tenant", Value: "1"}}}, map[string]string{headerKey: "bar"}); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
@@ -187,7 +188,7 @@ func TestAlertManager_Send(t *testing.T) {
|
||||
Name: "alert2",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
}, map[string]string{}); err != nil {
|
||||
}, [][]prompb.Label{{{Name: "rule", Value: "test"}}, {{}}}, map[string]string{}); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -27,15 +27,9 @@ type Config struct {
|
||||
// PathPrefix is added to URL path before adding alertManagerPath value
|
||||
PathPrefix string `yaml:"path_prefix,omitempty"`
|
||||
|
||||
// ConsulSDConfigs contains list of settings for service discovery via Consul
|
||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"`
|
||||
// DNSSDConfigs contains list of settings for service discovery via DNS.
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||
DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"`
|
||||
|
||||
// StaticConfigs contains list of static targets
|
||||
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
||||
ConsulSDConfigs []ConsulSDConfigs `yaml:"consul_sd_configs,omitempty"`
|
||||
DNSSDConfigs []DNSSDConfigs `yaml:"dns_sd_configs,omitempty"`
|
||||
StaticConfigs []StaticConfig `yaml:"static_configs,omitempty"`
|
||||
|
||||
// HTTPClientConfig contains HTTP configuration for Notifier clients
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
@@ -62,14 +56,29 @@ type Config struct {
|
||||
parsedAlertRelabelConfigs *promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
// StaticConfig contains list of static targets in the following form:
|
||||
// staticConfig contains list of static targets in the following form:
|
||||
//
|
||||
// targets:
|
||||
// [ - '<host>' ]
|
||||
type StaticConfig struct {
|
||||
Targets []string `yaml:"targets"`
|
||||
// HTTPClientConfig contains HTTP configuration for the Targets
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"`
|
||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// ConsulSDConfigs contains list of settings for service discovery via Consul,
|
||||
// see https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config
|
||||
type ConsulSDConfigs struct {
|
||||
consul.SDConfig `yaml:",inline"`
|
||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// DNSSDConfigs contains list of settings for service discovery via DNS,
|
||||
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config
|
||||
type DNSSDConfigs struct {
|
||||
dns.SDConfig `yaml:",inline"`
|
||||
AlertRelabelConfigs []promrelabel.RelabelConfig `yaml:"alert_relabel_configs,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||
@@ -95,6 +104,31 @@ func (cfg *Config) UnmarshalYAML(unmarshal func(any) error) error {
|
||||
}
|
||||
cfg.parsedAlertRelabelConfigs = arCfg
|
||||
|
||||
for _, s := range cfg.StaticConfigs {
|
||||
if len(s.AlertRelabelConfigs) > 0 {
|
||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert_relabel_configs in static_config: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, s := range cfg.ConsulSDConfigs {
|
||||
if len(s.AlertRelabelConfigs) > 0 {
|
||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert_relabel_configs in consul_sd_config: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, s := range cfg.DNSSDConfigs {
|
||||
if len(s.AlertRelabelConfigs) > 0 {
|
||||
_, err := promrelabel.ParseRelabelConfigs(s.AlertRelabelConfigs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse alert_relabel_configs in dns_sd_config: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b, err := yaml.Marshal(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal configuration for checksum: %w", err)
|
||||
|
||||
@@ -35,4 +35,6 @@ func TestParseConfig_Failure(t *testing.T) {
|
||||
|
||||
f("testdata/unknownFields.bad.yaml", "unknown field")
|
||||
f("non-existing-file", "error reading")
|
||||
f("testdata/consul.bad.yaml", "failed to parse alert_relabel_configs in consul_sd_config")
|
||||
f("testdata/dns.bad.yaml", "failed to parse alert relabeling config")
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
@@ -28,11 +29,7 @@ type configWatcher struct {
|
||||
targets map[TargetType][]Target
|
||||
}
|
||||
|
||||
func newWatcher(path string, gen AlertURLGenerator) (*configWatcher, error) {
|
||||
cfg, err := parseConfig(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
func newWatcher(cfg *Config, gen AlertURLGenerator) (*configWatcher, error) {
|
||||
cw := &configWatcher{
|
||||
cfg: cfg,
|
||||
wg: sync.WaitGroup{},
|
||||
@@ -88,18 +85,15 @@ func (cw *configWatcher) reload(path string) error {
|
||||
return cw.start()
|
||||
}
|
||||
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn getLabels) error {
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
func (cw *configWatcher) add(typeK TargetType, interval time.Duration, targetsFn getTargets) error {
|
||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
return fmt.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
|
||||
cw.wg.Add(1)
|
||||
go func() {
|
||||
defer cw.wg.Done()
|
||||
|
||||
cw.wg.Go(func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
|
||||
@@ -109,62 +103,77 @@ func (cw *configWatcher) add(typeK TargetType, interval time.Duration, labelsFn
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
targetMetadata, errors := getTargetMetadata(labelsFn, cw.cfg)
|
||||
targetMetadata, errors := getTargetMetadata(targetsFn, cw.cfg)
|
||||
for _, err := range errors {
|
||||
logger.Errorf("failed to init notifier for %q: %w", typeK, err)
|
||||
}
|
||||
cw.updateTargets(typeK, targetMetadata, cw.cfg, cw.genFn)
|
||||
}
|
||||
}()
|
||||
})
|
||||
return nil
|
||||
}
|
||||
|
||||
func getTargetMetadata(labelsFn getLabels, cfg *Config) (map[string]*promutil.Labels, []error) {
|
||||
metaLabels, err := labelsFn()
|
||||
type targetMetadata struct {
|
||||
*promutil.Labels
|
||||
alertRelabelConfigs *promrelabel.ParsedConfigs
|
||||
}
|
||||
|
||||
func getTargetMetadata(targetsFn getTargets, cfg *Config) (map[string]targetMetadata, []error) {
|
||||
metaLabelsList, alertRelabelCfgs, err := targetsFn()
|
||||
if err != nil {
|
||||
return nil, []error{fmt.Errorf("failed to get labels: %w", err)}
|
||||
}
|
||||
targetMetadata := make(map[string]*promutil.Labels, len(metaLabels))
|
||||
targetMts := make(map[string]targetMetadata, len(metaLabelsList))
|
||||
var errors []error
|
||||
duplicates := make(map[string]struct{})
|
||||
for _, labels := range metaLabels {
|
||||
target := labels.Get("__address__")
|
||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
if len(u) == 0 {
|
||||
continue
|
||||
}
|
||||
if _, ok := duplicates[u]; ok { // check for duplicates
|
||||
if !*suppressDuplicateTargetErrors {
|
||||
logger.Errorf("skipping duplicate target with identical address %q; "+
|
||||
"make sure service discovery and relabeling is set up properly; "+
|
||||
"original labels: %s; resulting labels: %s",
|
||||
u, labels, processedLabels)
|
||||
for i := range metaLabelsList {
|
||||
metaLabels := metaLabelsList[i]
|
||||
alertRelabelCfg := alertRelabelCfgs[i]
|
||||
for _, labels := range metaLabels {
|
||||
target := labels.Get("__address__")
|
||||
u, processedLabels, err := parseLabels(target, labels, cfg)
|
||||
if err != nil {
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
if len(u) == 0 {
|
||||
continue
|
||||
}
|
||||
// check for duplicated targets
|
||||
// targets with same address but different alert_relabel_configs are still considered duplicates since it's mostly due to misconfiguration and could cause duplicated notifications.
|
||||
if _, ok := duplicates[u]; ok {
|
||||
if !*suppressDuplicateTargetErrors {
|
||||
logger.Errorf("skipping duplicate target with identical address %q; "+
|
||||
"make sure service discovery and relabeling is set up properly; "+
|
||||
"original labels: %s; resulting labels: %s",
|
||||
u, labels, processedLabels)
|
||||
}
|
||||
continue
|
||||
}
|
||||
duplicates[u] = struct{}{}
|
||||
targetMts[u] = targetMetadata{
|
||||
Labels: processedLabels,
|
||||
alertRelabelConfigs: alertRelabelCfg,
|
||||
}
|
||||
continue
|
||||
}
|
||||
duplicates[u] = struct{}{}
|
||||
targetMetadata[u] = processedLabels
|
||||
}
|
||||
return targetMetadata, errors
|
||||
return targetMts, errors
|
||||
}
|
||||
|
||||
type getLabels func() ([]*promutil.Labels, error)
|
||||
type getTargets func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error)
|
||||
|
||||
func (cw *configWatcher) start() error {
|
||||
if len(cw.cfg.StaticConfigs) > 0 {
|
||||
var targets []Target
|
||||
for _, cfg := range cw.cfg.StaticConfigs {
|
||||
for i, cfg := range cw.cfg.StaticConfigs {
|
||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.StaticConfigs[i].AlertRelabelConfigs)
|
||||
httpCfg := mergeHTTPClientConfigs(cw.cfg.HTTPClientConfig, cfg.HTTPClientConfig)
|
||||
for _, target := range cfg.Targets {
|
||||
address, labels, err := parseLabels(target, nil, cw.cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse labels for target %q: %w", target, err)
|
||||
}
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, cw.cfg.parsedAlertRelabelConfigs, cw.cfg.Timeout.Duration())
|
||||
notifier, err := NewAlertManager(address, cw.genFn, httpCfg, alertRelabelConfig, cw.cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init alertmanager for addr %q: %w", address, err)
|
||||
}
|
||||
@@ -178,17 +187,20 @@ func (cw *configWatcher) start() error {
|
||||
}
|
||||
|
||||
if len(cw.cfg.ConsulSDConfigs) > 0 {
|
||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||
var labels []*promutil.Labels
|
||||
err := cw.add(TargetConsul, *consul.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
||||
var labels [][]*promutil.Labels
|
||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
||||
for i := range cw.cfg.ConsulSDConfigs {
|
||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.ConsulSDConfigs[i].AlertRelabelConfigs)
|
||||
sdc := &cw.cfg.ConsulSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
||||
}
|
||||
labels = append(labels, targetLabels...)
|
||||
labels = append(labels, targetLabels)
|
||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
||||
}
|
||||
return labels, nil
|
||||
return labels, alertRelabelConfigs, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start consulSD discovery: %w", err)
|
||||
@@ -196,17 +208,21 @@ func (cw *configWatcher) start() error {
|
||||
}
|
||||
|
||||
if len(cw.cfg.DNSSDConfigs) > 0 {
|
||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([]*promutil.Labels, error) {
|
||||
var labels []*promutil.Labels
|
||||
err := cw.add(TargetDNS, *dns.SDCheckInterval, func() ([][]*promutil.Labels, []*promrelabel.ParsedConfigs, error) {
|
||||
var labels [][]*promutil.Labels
|
||||
var alertRelabelConfigs []*promrelabel.ParsedConfigs
|
||||
for i := range cw.cfg.DNSSDConfigs {
|
||||
alertRelabelConfig, _ := promrelabel.ParseRelabelConfigs(cw.cfg.DNSSDConfigs[i].AlertRelabelConfigs)
|
||||
sdc := &cw.cfg.DNSSDConfigs[i]
|
||||
targetLabels, err := sdc.GetLabels(cw.cfg.baseDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("got labels err: %w", err)
|
||||
return nil, nil, fmt.Errorf("got labels err: %w", err)
|
||||
}
|
||||
labels = append(labels, targetLabels...)
|
||||
labels = append(labels, targetLabels)
|
||||
alertRelabelConfigs = append(alertRelabelConfigs, alertRelabelConfig)
|
||||
|
||||
}
|
||||
return labels, nil
|
||||
return labels, alertRelabelConfigs, nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to start DNSSD discovery: %w", err)
|
||||
@@ -240,30 +256,30 @@ func (cw *configWatcher) setTargets(key TargetType, targets []Target) {
|
||||
cw.targetsMu.Unlock()
|
||||
}
|
||||
|
||||
func (cw *configWatcher) updateTargets(key TargetType, targetMetadata map[string]*promutil.Labels, cfg *Config, genFn AlertURLGenerator) {
|
||||
func (cw *configWatcher) updateTargets(key TargetType, targetMts map[string]targetMetadata, cfg *Config, genFn AlertURLGenerator) {
|
||||
cw.targetsMu.Lock()
|
||||
defer cw.targetsMu.Unlock()
|
||||
oldTargets := cw.targets[key]
|
||||
var updatedTargets []Target
|
||||
for _, ot := range oldTargets {
|
||||
if _, ok := targetMetadata[ot.Addr()]; !ok {
|
||||
if _, ok := targetMts[ot.Addr()]; !ok {
|
||||
// if target not exists in currentTargets, close it
|
||||
ot.Close()
|
||||
} else {
|
||||
updatedTargets = append(updatedTargets, ot)
|
||||
delete(targetMetadata, ot.Addr())
|
||||
delete(targetMts, ot.Addr())
|
||||
}
|
||||
}
|
||||
// create new resources for the new targets
|
||||
for addr, labels := range targetMetadata {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, cfg.parsedAlertRelabelConfigs, cfg.Timeout.Duration())
|
||||
for addr, metadata := range targetMts {
|
||||
am, err := NewAlertManager(addr, genFn, cfg.HTTPClientConfig, metadata.alertRelabelConfigs, cfg.Timeout.Duration())
|
||||
if err != nil {
|
||||
logger.Errorf("failed to init %s notifier with addr %q: %w", key, addr, err)
|
||||
continue
|
||||
}
|
||||
updatedTargets = append(updatedTargets, Target{
|
||||
Notifier: am,
|
||||
Labels: labels,
|
||||
Labels: metadata.Labels,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -28,7 +29,11 @@ static_configs:
|
||||
- localhost:9093
|
||||
- localhost:9094
|
||||
`)
|
||||
cw, err := newWatcher(f.Name(), nil)
|
||||
cfg, err := parseConfig(f.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %s", err)
|
||||
}
|
||||
cw, err := newWatcher(cfg, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start config watcher: %s", err)
|
||||
}
|
||||
@@ -83,33 +88,64 @@ consul_sd_configs:
|
||||
- server: %s
|
||||
services:
|
||||
- alertmanager
|
||||
`, consulSDServer.URL))
|
||||
- server: %s
|
||||
services:
|
||||
- alertmanager
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "tar"
|
||||
`, consulSDServer.URL, consulSDServer.URL))
|
||||
|
||||
cw, err := newWatcher(consulSDFile.Name(), nil)
|
||||
cfg, err := parseConfig(consulSDFile.Name())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %s", err)
|
||||
}
|
||||
cw, err := newWatcher(cfg, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start config watcher: %s", err)
|
||||
}
|
||||
defer cw.mustStop()
|
||||
|
||||
if len(cw.notifiers()) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(cw.notifiers()))
|
||||
if len(cw.notifiers()) != 3 {
|
||||
t.Fatalf("expected to get 3 notifiers; got %d", len(cw.notifiers()))
|
||||
}
|
||||
|
||||
expAddr1 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService1)
|
||||
expAddr2 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService2)
|
||||
expAddr3 := fmt.Sprintf("https://%s/proxy/api/v2/alerts", fakeConsulService3)
|
||||
|
||||
n1, n2 := cw.notifiers()[0], cw.notifiers()[1]
|
||||
n1, n2, n3 := cw.notifiers()[0], cw.notifiers()[1], cw.notifiers()[2]
|
||||
if n1.Addr() != expAddr1 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr1, n1.Addr())
|
||||
}
|
||||
if n2.Addr() != expAddr2 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr2, n2.Addr())
|
||||
}
|
||||
if n3.Addr() != expAddr3 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
||||
}
|
||||
|
||||
if n1.(*AlertManager).relabelConfigs.String() != "" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n1.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
if n2.(*AlertManager).relabelConfigs.String() != "" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n2.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
|
||||
f := func() bool { return len(cw.notifiers()) == 1 }
|
||||
if !waitFor(f, time.Second) {
|
||||
t.Fatalf("expected to get 1 notifiers; got %d", len(cw.notifiers()))
|
||||
}
|
||||
n3 = cw.notifiers()[0]
|
||||
if n3.Addr() != expAddr3 {
|
||||
t.Fatalf("exp address %q; got %q", expAddr3, n3.Addr())
|
||||
}
|
||||
if n3.(*AlertManager).relabelConfigs.String() != "- target_label: foo\n replacement: tar\n" {
|
||||
t.Fatalf("unexpected relabel configs: %q", n3.(*AlertManager).relabelConfigs.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestConfigWatcherReloadConcurrent supposed to test concurrent
|
||||
@@ -164,7 +200,11 @@ consul_sd_configs:
|
||||
"unknownFields.bad.yaml",
|
||||
}
|
||||
|
||||
cw, err := newWatcher(paths[0], nil)
|
||||
cfg, err := parseConfig(paths[0])
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse config: %s", err)
|
||||
}
|
||||
cw, err := newWatcher(cfg, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to start config watcher: %s", err)
|
||||
}
|
||||
@@ -202,10 +242,11 @@ func checkErr(t *testing.T, err error) {
|
||||
const (
|
||||
fakeConsulService1 = "127.0.0.1:9093"
|
||||
fakeConsulService2 = "127.0.0.1:9095"
|
||||
fakeConsulService3 = "127.0.0.1:9097"
|
||||
)
|
||||
|
||||
func newFakeConsulServer() *httptest.Server {
|
||||
requestCount := 0
|
||||
var requestCount atomic.Int32
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/v1/agent/self", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
rw.Write([]byte(`{"Config": {"Datacenter": "dc1"}}`))
|
||||
@@ -220,7 +261,7 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}`))
|
||||
})
|
||||
mux.HandleFunc("/v1/health/service/alertmanager", func(rw http.ResponseWriter, _ *http.Request) {
|
||||
if requestCount == 0 {
|
||||
if requestCount.Load() == 0 {
|
||||
rw.Header().Set("X-Consul-Index", "1")
|
||||
rw.Write([]byte(`
|
||||
[
|
||||
@@ -360,7 +401,7 @@ func newFakeConsulServer() *httptest.Server {
|
||||
}
|
||||
]`))
|
||||
}
|
||||
requestCount++
|
||||
requestCount.Add(1)
|
||||
})
|
||||
|
||||
return httptest.NewServer(mux)
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
// FakeNotifier is a mock notifier
|
||||
@@ -15,6 +17,19 @@ type FakeNotifier struct {
|
||||
counter int
|
||||
}
|
||||
|
||||
// InitFakeNotifier initializes global notifier to FakeNotifier,
|
||||
// and returns a cleanup function to restore the original getActiveNotifiers.
|
||||
func InitFakeNotifier() (*FakeNotifier, func()) {
|
||||
originalGetActiveNotifiers := getActiveNotifiers
|
||||
fn := &FakeNotifier{}
|
||||
getActiveNotifiers = func() []Notifier {
|
||||
return []Notifier{fn}
|
||||
}
|
||||
return fn, func() {
|
||||
getActiveNotifiers = originalGetActiveNotifiers
|
||||
}
|
||||
}
|
||||
|
||||
// Close does nothing
|
||||
func (*FakeNotifier) Close() {}
|
||||
|
||||
@@ -27,7 +42,7 @@ func (*FakeNotifier) LastError() string {
|
||||
func (*FakeNotifier) Addr() string { return "" }
|
||||
|
||||
// Send sets alerts and increases counter
|
||||
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error {
|
||||
func (fn *FakeNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error {
|
||||
fn.Lock()
|
||||
defer fn.Unlock()
|
||||
fn.counter += len(alerts)
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutil"
|
||||
)
|
||||
|
||||
@@ -96,11 +101,25 @@ func InitAlertURLGeneratorFn(externalURL *url.URL, externalAlertSource string, v
|
||||
return nil
|
||||
}
|
||||
|
||||
// cw holds a configWatcher for configPath configuration file
|
||||
// configWatcher provides a list of Notifier objects discovered
|
||||
// from static config or via service discovery.
|
||||
// cw is not nil only if configPath is provided.
|
||||
var cw *configWatcher
|
||||
var (
|
||||
// getActiveNotifiers returns the current list of Notifier objects.
|
||||
getActiveNotifiers func() []Notifier
|
||||
// globalRelabelCfg stores the parsed alert relabeling config from the config file if there is
|
||||
globalRelabelCfg *promrelabel.ParsedConfigs
|
||||
|
||||
// cw holds a configWatcher for configPath configuration file
|
||||
// configWatcher provides a list of Notifier objects discovered
|
||||
// from static config or via service discovery.
|
||||
// cw is not nil only if configPath is provided.
|
||||
cw *configWatcher
|
||||
|
||||
// externalLabels is a global variable for holding external labels configured via flags
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalLabels map[string]string
|
||||
// externalURL is a global variable for holding external URL value configured via flag
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalURL string
|
||||
)
|
||||
|
||||
// Reload checks the changes in configPath configuration file
|
||||
// and applies changes if any.
|
||||
@@ -111,66 +130,62 @@ func Reload() error {
|
||||
return cw.reload(*configPath)
|
||||
}
|
||||
|
||||
var staticNotifiersFn func() []Notifier
|
||||
|
||||
var (
|
||||
// externalLabels is a global variable for holding external labels configured via flags
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalLabels map[string]string
|
||||
// externalURL is a global variable for holding external URL value configured via flag
|
||||
// It is supposed to be inited via Init function only.
|
||||
externalURL string
|
||||
)
|
||||
|
||||
// Init returns a function for retrieving actual list of Notifier objects.
|
||||
// Init works in two mods:
|
||||
// - configuration via flags (for backward compatibility). Is always static
|
||||
// and don't support live reloads.
|
||||
// - configuration via file. Supports live reloads and service discovery.
|
||||
//
|
||||
// Init returns an error if both mods are used.
|
||||
func Init(extLabels map[string]string, extURL string) (func() []Notifier, error) {
|
||||
func Init(extLabels map[string]string, extURL string) error {
|
||||
externalURL = extURL
|
||||
externalLabels = extLabels
|
||||
_, err := url.Parse(externalURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse external URL: %w", err)
|
||||
return fmt.Errorf("failed to parse external URL: %w", err)
|
||||
}
|
||||
|
||||
if *blackHole {
|
||||
if len(*addrs) > 0 || *configPath != "" {
|
||||
return nil, fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
||||
return fmt.Errorf("only one of -notifier.blackhole, -notifier.url and -notifier.config flags must be specified")
|
||||
}
|
||||
notifier := newBlackHoleNotifier()
|
||||
staticNotifiersFn = func() []Notifier {
|
||||
getActiveNotifiers = func() []Notifier {
|
||||
return []Notifier{notifier}
|
||||
}
|
||||
return staticNotifiersFn, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
if *configPath == "" && len(*addrs) == 0 {
|
||||
return nil, nil
|
||||
return nil
|
||||
}
|
||||
if *configPath != "" && len(*addrs) > 0 {
|
||||
return nil, fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
||||
return fmt.Errorf("only one of -notifier.config or -notifier.url flags must be specified")
|
||||
}
|
||||
|
||||
if len(*addrs) > 0 {
|
||||
notifiers, err := notifiersFromFlags(AlertURLGeneratorFn)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||
return fmt.Errorf("failed to create notifier from flag values: %w", err)
|
||||
}
|
||||
staticNotifiersFn = func() []Notifier {
|
||||
getActiveNotifiers = func() []Notifier {
|
||||
return notifiers
|
||||
}
|
||||
return staticNotifiersFn, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
cw, err = newWatcher(*configPath, AlertURLGeneratorFn)
|
||||
cfg, err := parseConfig(*configPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init config watcher: %w", err)
|
||||
return err
|
||||
}
|
||||
return cw.notifiers, nil
|
||||
if cfg.AlertRelabelConfigs != nil {
|
||||
globalRelabelCfg = cfg.parsedAlertRelabelConfigs
|
||||
}
|
||||
cw, err = newWatcher(cfg, AlertURLGeneratorFn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to init config watcher: %w", err)
|
||||
}
|
||||
getActiveNotifiers = cw.notifiers
|
||||
return nil
|
||||
}
|
||||
|
||||
// InitSecretFlags must be called after flag.Parse and before any logging
|
||||
@@ -245,23 +260,57 @@ const (
|
||||
|
||||
// GetTargets returns list of static or discovered targets
|
||||
// via notifier configuration.
|
||||
//
|
||||
// Must be called after Init.
|
||||
func GetTargets() map[TargetType][]Target {
|
||||
var targets = make(map[TargetType][]Target)
|
||||
|
||||
if staticNotifiersFn != nil {
|
||||
for _, ns := range staticNotifiersFn() {
|
||||
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
||||
Notifier: ns,
|
||||
})
|
||||
}
|
||||
if getActiveNotifiers == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var targets = make(map[TargetType][]Target)
|
||||
// use cached targets from configWatcher instead of getActiveNotifiers for the extra target labels
|
||||
if cw != nil {
|
||||
cw.targetsMu.RLock()
|
||||
for key, ns := range cw.targets {
|
||||
targets[key] = append(targets[key], ns...)
|
||||
}
|
||||
cw.targetsMu.RUnlock()
|
||||
return targets
|
||||
}
|
||||
|
||||
// static notifiers don't have labels
|
||||
for _, ns := range getActiveNotifiers() {
|
||||
targets[TargetStatic] = append(targets[TargetStatic], Target{
|
||||
Notifier: ns,
|
||||
})
|
||||
}
|
||||
return targets
|
||||
}
|
||||
|
||||
// Send sends alerts to all active notifiers
|
||||
func Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) *vmalertutil.ErrGroup {
|
||||
alertsToSend := make([]Alert, 0, len(alerts))
|
||||
lblss := make([][]prompb.Label, 0, len(alerts))
|
||||
// apply global relabel config first without modifying original alerts in alerts
|
||||
for _, a := range alerts {
|
||||
lbls := a.applyRelabelingIfNeeded(globalRelabelCfg)
|
||||
if len(lbls) == 0 {
|
||||
continue
|
||||
}
|
||||
alertsToSend = append(alertsToSend, a)
|
||||
lblss = append(lblss, lbls)
|
||||
}
|
||||
|
||||
errGr := new(vmalertutil.ErrGroup)
|
||||
wg := sync.WaitGroup{}
|
||||
activeNotifiers := getActiveNotifiers()
|
||||
for i := range activeNotifiers {
|
||||
nt := activeNotifiers[i]
|
||||
wg.Go(func() {
|
||||
if err := nt.Send(ctx, alertsToSend, lblss, notifierHeaders); err != nil {
|
||||
errGr.Add(fmt.Errorf("failed to send alerts to addr %q: %w", nt.Addr(), err))
|
||||
}
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
return errGr
|
||||
}
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
package notifier
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
||||
)
|
||||
|
||||
func TestInit(t *testing.T) {
|
||||
@@ -14,14 +20,13 @@ func TestInit(t *testing.T) {
|
||||
|
||||
*addrs = flagutil.ArrayString{"127.0.0.1", "127.0.0.2"}
|
||||
|
||||
fn, err := Init(nil, "")
|
||||
err := Init(nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
nfs := fn()
|
||||
if len(nfs) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(nfs))
|
||||
if len(getActiveNotifiers()) != 2 {
|
||||
t.Fatalf("expected to get 2 notifiers; got %d", len(getActiveNotifiers()))
|
||||
}
|
||||
|
||||
targets := GetTargets()
|
||||
@@ -54,7 +59,7 @@ func TestInitNegative(t *testing.T) {
|
||||
*configPath = path
|
||||
*addrs = flagutil.ArrayString{addr}
|
||||
*blackHole = bh
|
||||
if _, err := Init(nil, ""); err == nil {
|
||||
if err := Init(nil, ""); err == nil {
|
||||
t.Fatalf("expected to get error; got nil instead")
|
||||
}
|
||||
}
|
||||
@@ -71,14 +76,13 @@ func TestBlackHole(t *testing.T) {
|
||||
|
||||
*blackHole = true
|
||||
|
||||
fn, err := Init(nil, "")
|
||||
err := Init(nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
nfs := fn()
|
||||
if len(nfs) != 1 {
|
||||
t.Fatalf("expected to get 1 notifier; got %d", len(nfs))
|
||||
if len(getActiveNotifiers()) != 1 {
|
||||
t.Fatalf("expected to get 1 notifier; got %d", len(getActiveNotifiers()))
|
||||
}
|
||||
|
||||
targets := GetTargets()
|
||||
@@ -120,3 +124,85 @@ func TestGetAlertURLGenerator(t *testing.T) {
|
||||
t.Fatalf("unexpected url want %s, got %s", exp, AlertURLGeneratorFn(testAlert))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSendAlerts(t *testing.T) {
|
||||
oldAlertURLGeneratorFn := AlertURLGeneratorFn
|
||||
defer func() { AlertURLGeneratorFn = oldAlertURLGeneratorFn }()
|
||||
AlertURLGeneratorFn = func(alert Alert) string {
|
||||
return ""
|
||||
}
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", func(_ http.ResponseWriter, _ *http.Request) {
|
||||
t.Fatalf("should not be called")
|
||||
})
|
||||
mux.HandleFunc(alertManagerPath, func(w http.ResponseWriter, r *http.Request) {
|
||||
var a []struct {
|
||||
Labels map[string]string `json:"labels"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&a); err != nil {
|
||||
t.Fatalf("can not unmarshal data into alert %s", err)
|
||||
}
|
||||
if len(a) != 2 {
|
||||
t.Fatalf("expected 2 alert in array got %d", len(a))
|
||||
}
|
||||
if len(a[0].Labels) != 4 {
|
||||
t.Fatalf("expected 4 labels got %d", len(a[0].Labels))
|
||||
}
|
||||
if a[0].Labels["env"] != "prod" {
|
||||
t.Fatalf("expected env label to be prod during relabeling, got %s", a[0].Labels["env"])
|
||||
}
|
||||
if a[0].Labels["c"] != "baz" {
|
||||
t.Fatalf("expected c label to be baz during relabeling, got %s", a[0].Labels["c"])
|
||||
}
|
||||
if len(a[1].Labels) != 1 {
|
||||
t.Fatalf("expected 1 labels got %d", len(a[1].Labels))
|
||||
}
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
f, err := os.CreateTemp("", "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer fs.MustRemovePath(f.Name())
|
||||
|
||||
rawConfig := `
|
||||
static_configs:
|
||||
- targets:
|
||||
- %s
|
||||
alert_relabel_configs:
|
||||
- source_labels: [b]
|
||||
target_label: "c"
|
||||
alert_relabel_configs:
|
||||
- source_labels: [a]
|
||||
target_label: "b"
|
||||
- target_label: "env"
|
||||
replacement: "prod"
|
||||
`
|
||||
config := fmt.Sprintf(rawConfig, srv.URL+alertManagerPath)
|
||||
writeToFile(f.Name(), config)
|
||||
|
||||
oldConfigPath := configPath
|
||||
defer func() { configPath = oldConfigPath }()
|
||||
*configPath = f.Name()
|
||||
err = Init(nil, "")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when parse notifier config: %s", err)
|
||||
}
|
||||
|
||||
firingAlerts := []Alert{
|
||||
{
|
||||
Name: "alert1",
|
||||
Labels: map[string]string{"a": "baz"},
|
||||
},
|
||||
{
|
||||
Name: "alert2",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
}
|
||||
errG := Send(context.Background(), firingAlerts, nil)
|
||||
if errG.Err() != nil {
|
||||
t.Fatalf("unexpected error when sending alerts: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
package notifier
|
||||
|
||||
import "context"
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
// Notifier is a common interface for alert manager provider
|
||||
type Notifier interface {
|
||||
// Send sends the given list of alerts.
|
||||
// Returns an error if fails to send the alerts.
|
||||
// Must unblock if the given ctx is cancelled.
|
||||
Send(ctx context.Context, alerts []Alert, notifierHeaders map[string]string) error
|
||||
Send(ctx context.Context, alerts []Alert, alertLabels [][]prompb.Label, notifierHeaders map[string]string) error
|
||||
// Addr returns address where alerts are sent.
|
||||
Addr() string
|
||||
// LastError returns error, that occured during last attempt to send data
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
package notifier
|
||||
|
||||
import "context"
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
|
||||
// blackHoleNotifier is a Notifier stub, used when no notifications need
|
||||
// to be sent.
|
||||
@@ -10,7 +14,7 @@ type blackHoleNotifier struct {
|
||||
}
|
||||
|
||||
// Send will send no notifications, but increase the metric.
|
||||
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ map[string]string) error { //nolint:revive
|
||||
func (bh *blackHoleNotifier) Send(_ context.Context, alerts []Alert, _ [][]prompb.Label, _ map[string]string) error { //nolint:revive
|
||||
bh.metrics.alertsSent.Add(len(alerts))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
metricset "github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
@@ -16,7 +17,7 @@ func TestBlackHoleNotifier_Send(t *testing.T) {
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||
}}, nil); err != nil {
|
||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
@@ -34,7 +35,7 @@ func TestBlackHoleNotifier_Close(t *testing.T) {
|
||||
Start: time.Now().UTC(),
|
||||
End: time.Now().UTC(),
|
||||
Annotations: map[string]string{"a": "b", "c": "d", "e": "f"},
|
||||
}}, nil); err != nil {
|
||||
}}, [][]prompb.Label{{}}, nil); err != nil {
|
||||
t.Fatalf("unexpected error %s", err)
|
||||
}
|
||||
|
||||
|
||||
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
Normal file
19
app/vmalert/notifier/testdata/consul.bad.yaml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
consul_sd_configs:
|
||||
- server: localhost:8500
|
||||
scheme: http
|
||||
services:
|
||||
- alertmanager
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "prod"
|
||||
- server: localhost:8500
|
||||
services:
|
||||
- consul
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "(abc"
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
Normal file
13
app/vmalert/notifier/testdata/dns.bad.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
dns_sd_configs:
|
||||
- names:
|
||||
- cloudflare.com
|
||||
type: 'A'
|
||||
port: 9093
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_dns_name]
|
||||
replacement: '${1}'
|
||||
target_label: dns_name
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "(abc"
|
||||
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
15
app/vmalert/notifier/testdata/mixed.good.yaml
vendored
@@ -2,12 +2,19 @@ static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "static"
|
||||
consul_sd_configs:
|
||||
- server: localhost:8500
|
||||
scheme: http
|
||||
services:
|
||||
- alertmanager
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "consul"
|
||||
- server: localhost:8500
|
||||
services:
|
||||
- consul
|
||||
@@ -17,6 +24,10 @@ dns_sd_configs:
|
||||
- cloudflare.com
|
||||
type: 'A'
|
||||
port: 9093
|
||||
alert_relabel_configs:
|
||||
- action: keep
|
||||
source_labels: [env]
|
||||
regex: "dns"
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_consul_tags]
|
||||
@@ -25,4 +36,4 @@ relabel_configs:
|
||||
target_label: __scheme__
|
||||
- source_labels: [__meta_dns_name]
|
||||
replacement: '${1}'
|
||||
target_label: dns_name
|
||||
target_label: dns_name
|
||||
|
||||
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
26
app/vmalert/notifier/testdata/static.good.yaml
vendored
@@ -1,22 +1,14 @@
|
||||
headers:
|
||||
- 'CustomHeader: foo'
|
||||
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:9093
|
||||
- localhost:9095
|
||||
- https://localhost:9093/test/api/v2/alerts
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: bar
|
||||
- http://192.168.0.101:9093
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
|
||||
- targets:
|
||||
- localhost:9096
|
||||
- localhost:9097
|
||||
basic_auth:
|
||||
username: foo
|
||||
password: baz
|
||||
- http://192.168.0.101:9093
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "ccc"
|
||||
|
||||
|
||||
alert_relabel_configs:
|
||||
- target_label: "foo"
|
||||
replacement: "aaa"
|
||||
|
||||
@@ -173,9 +173,8 @@ func (c *Client) run(ctx context.Context) {
|
||||
|
||||
cancel()
|
||||
}
|
||||
c.wg.Add(1)
|
||||
go func() {
|
||||
defer c.wg.Done()
|
||||
|
||||
c.wg.Go(func() {
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
@@ -197,7 +196,7 @@ func (c *Client) run(ctx context.Context) {
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
})
|
||||
}
|
||||
|
||||
var (
|
||||
|
||||
@@ -827,12 +827,9 @@ func TestGroup_Restore(t *testing.T) {
|
||||
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
||||
fg.Init()
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
||||
fg.Start(context.Background(), nts, nil, fqr)
|
||||
wg.Done()
|
||||
}()
|
||||
wg.Go(func() {
|
||||
fg.Start(context.Background(), nil, fqr)
|
||||
})
|
||||
fg.Close()
|
||||
wg.Wait()
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/vmalertutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
)
|
||||
@@ -39,6 +38,8 @@ var (
|
||||
disableAlertGroupLabel = flag.Bool("disableAlertgroupLabel", false, "Whether to disable adding group's Name as label to generated alerts and time series.")
|
||||
remoteReadLookBack = flag.Duration("remoteRead.lookback", time.Hour, "Lookback defines how far to look into past for alerts timeseries. "+
|
||||
"For example, if lookback=1h then range from now() to now()-1h will be scanned.")
|
||||
maxStartDelay = flag.Duration("group.maxStartDelay", 5*time.Minute, "Defines the max delay before starting the group evaluation. Group's start is artificially delayed for random duration on interval"+
|
||||
" [0..min(--group.maxStartDelay, group.interval)]. This helps smoothing out the load on the configured datasource, so evaluations aren't executed too close to each other.")
|
||||
)
|
||||
|
||||
// Group is an entity for grouping rules
|
||||
@@ -330,13 +331,13 @@ func (g *Group) Init() {
|
||||
}
|
||||
|
||||
// Start starts group's evaluation
|
||||
func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
evalTS := time.Now()
|
||||
// sleep random duration to spread group rules evaluation
|
||||
// over time to reduce the load on datasource.
|
||||
// over maxStartDelay to reduce the load on datasource.
|
||||
if !SkipRandSleepOnGroupStart {
|
||||
sleepBeforeStart := delayBeforeStart(evalTS, g.GetID(), g.Interval, g.EvalOffset)
|
||||
sleepBeforeStart := g.delayBeforeStart(evalTS, *maxStartDelay)
|
||||
g.infof("will start in %v", sleepBeforeStart)
|
||||
|
||||
sleepTimer := time.NewTimer(sleepBeforeStart)
|
||||
@@ -368,7 +369,6 @@ func (g *Group) Start(ctx context.Context, nts func() []notifier.Notifier, rw re
|
||||
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
|
||||
@@ -475,20 +475,31 @@ func (g *Group) UpdateWith(newGroup *Group) {
|
||||
g.updateCh <- newGroup
|
||||
}
|
||||
|
||||
// if offset is specified, delayBeforeStart returns a duration to help aligning timestamp with offset;
|
||||
// otherwise, it returns a random duration between [0..interval] based on group key.
|
||||
func delayBeforeStart(ts time.Time, key uint64, interval time.Duration, offset *time.Duration) time.Duration {
|
||||
if offset != nil {
|
||||
currentOffsetPoint := ts.Truncate(interval).Add(*offset)
|
||||
// delayBeforeStart returns duration for delaying the evaluation start
|
||||
// based on given ts and Group settings. The delay can't exceed maxDelay.
|
||||
// maxDelay is ignored if g.EvalOffset != nil.
|
||||
//
|
||||
// Delaying is important to smooth out the load on the datasource when all groups start at the same time.
|
||||
// delayBeforeStart calculates delay based on Group ID, so all groups will start at different moments of time.
|
||||
func (g *Group) delayBeforeStart(ts time.Time, maxDelay time.Duration) time.Duration {
|
||||
if g.EvalOffset != nil {
|
||||
// if offset is specified, ignore the maxDelay and return a duration aligned with offset
|
||||
currentOffsetPoint := ts.Truncate(g.Interval).Add(*g.EvalOffset)
|
||||
if currentOffsetPoint.Before(ts) {
|
||||
// wait until the next offset point
|
||||
return currentOffsetPoint.Add(interval).Sub(ts)
|
||||
return currentOffsetPoint.Add(g.Interval).Sub(ts)
|
||||
}
|
||||
return currentOffsetPoint.Sub(ts)
|
||||
}
|
||||
|
||||
// otherwise, return a random duration between [0..min(interval, maxDelay)] based on group ID
|
||||
interval := g.Interval
|
||||
if interval > maxDelay {
|
||||
// artificially limit interval, so groups with big intervals could start sooner.
|
||||
interval = maxDelay
|
||||
}
|
||||
var randSleep time.Duration
|
||||
randSleep = time.Duration(float64(interval) * (float64(key) / (1 << 64)))
|
||||
randSleep = time.Duration(float64(interval) * (float64(g.GetID()) / (1 << 64)))
|
||||
sleepOffset := time.Duration(ts.UnixNano() % interval.Nanoseconds())
|
||||
if randSleep < sleepOffset {
|
||||
randSleep += interval
|
||||
@@ -550,15 +561,13 @@ func (g *Group) Replay(start, end time.Time, rw remotewrite.RWClient, maxDataPoi
|
||||
if !disableProgressBar {
|
||||
bar = pb.StartNew(iterations * len(g.Rules))
|
||||
}
|
||||
for _, r := range g.Rules {
|
||||
for i := range g.Rules {
|
||||
rule := g.Rules[i]
|
||||
sem <- struct{}{}
|
||||
wg.Add(1)
|
||||
go func(r Rule, ri rangeIterator) {
|
||||
// pass ri as a copy, so it can be modified within the replayRuleRange
|
||||
res <- replayRuleRange(r, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||
wg.Go(func() {
|
||||
res <- replayRuleRange(rule, ri, bar, rw, replayRuleRetryAttempts, ruleEvaluationConcurrency)
|
||||
<-sem
|
||||
wg.Done()
|
||||
}(r, ri)
|
||||
})
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
@@ -588,10 +597,10 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
||||
res := make(chan int, int(ri.end.Sub(ri.start)/ri.step)+1)
|
||||
for ri.next() {
|
||||
sem <- struct{}{}
|
||||
wg.Add(1)
|
||||
|
||||
go func(s, e time.Time) {
|
||||
n, err := replayRule(r, s, e, rw, replayRuleRetryAttempts)
|
||||
start := ri.s
|
||||
end := ri.e
|
||||
wg.Go(func() {
|
||||
n, err := replayRule(r, start, end, rw, replayRuleRetryAttempts)
|
||||
if err != nil {
|
||||
logger.Fatalf("rule %q: %s", r, err)
|
||||
}
|
||||
@@ -600,8 +609,7 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
||||
}
|
||||
res <- n
|
||||
<-sem
|
||||
wg.Done()
|
||||
}(ri.s, ri.e)
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
close(res)
|
||||
@@ -615,10 +623,9 @@ func replayRuleRange(r Rule, ri rangeIterator, bar *pb.ProgressBar, rw remotewri
|
||||
}
|
||||
|
||||
// ExecOnce evaluates all the rules under group for once with given timestamp.
|
||||
func (g *Group) ExecOnce(ctx context.Context, nts func() []notifier.Notifier, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||
func (g *Group) ExecOnce(ctx context.Context, rw remotewrite.RWClient, evalTS time.Time) chan error {
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
Notifiers: nts,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
if len(g.Rules) < 1 {
|
||||
@@ -693,7 +700,6 @@ func (g *Group) getEvalDelay() time.Duration {
|
||||
|
||||
// executor contains group's notify and rw configs
|
||||
type executor struct {
|
||||
Notifiers func() []notifier.Notifier
|
||||
notifierHeaders map[string]string
|
||||
|
||||
Rw remotewrite.RWClient
|
||||
@@ -714,14 +720,13 @@ func (e *executor) execConcurrently(ctx context.Context, rules []Rule, ts time.T
|
||||
sem := make(chan struct{}, concurrency)
|
||||
go func() {
|
||||
wg := sync.WaitGroup{}
|
||||
for _, r := range rules {
|
||||
for i := range rules {
|
||||
rule := rules[i]
|
||||
sem <- struct{}{}
|
||||
wg.Add(1)
|
||||
go func(r Rule) {
|
||||
res <- e.exec(ctx, r, ts, resolveDuration, limit)
|
||||
wg.Go(func() {
|
||||
res <- e.exec(ctx, rule, ts, resolveDuration, limit)
|
||||
<-sem
|
||||
wg.Done()
|
||||
}(r)
|
||||
})
|
||||
}
|
||||
wg.Wait()
|
||||
close(res)
|
||||
@@ -775,17 +780,6 @@ func (e *executor) exec(ctx context.Context, r Rule, ts time.Time, resolveDurati
|
||||
return nil
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
errGr := new(vmalertutil.ErrGroup)
|
||||
for _, nt := range e.Notifiers() {
|
||||
wg.Add(1)
|
||||
go func(nt notifier.Notifier) {
|
||||
if err := nt.Send(ctx, alerts, e.notifierHeaders); err != nil {
|
||||
errGr.Add(fmt.Errorf("rule %q: failed to send alerts to addr %q: %w", r, nt.Addr(), err))
|
||||
}
|
||||
wg.Done()
|
||||
}(nt)
|
||||
}
|
||||
wg.Wait()
|
||||
errGr := notifier.Send(ctx, alerts, e.notifierHeaders)
|
||||
return errGr.Err()
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ func TestUpdateDuringRandSleep(t *testing.T) {
|
||||
updateCh: make(chan *Group),
|
||||
}
|
||||
g.Init()
|
||||
go g.Start(context.Background(), nil, nil, nil)
|
||||
go g.Start(context.Background(), nil, nil)
|
||||
|
||||
rule1 := AlertingRule{
|
||||
Name: "jobDown",
|
||||
@@ -346,7 +346,8 @@ func TestGroupStart(t *testing.T) {
|
||||
}
|
||||
|
||||
fs := &datasource.FakeQuerier{}
|
||||
fn := ¬ifier.FakeNotifier{}
|
||||
fn, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
const evalInterval = time.Millisecond
|
||||
g := NewGroup(groups[0], fs, evalInterval, map[string]string{"cluster": "east-1"})
|
||||
@@ -395,7 +396,7 @@ func TestGroupStart(t *testing.T) {
|
||||
fs.Add(m2)
|
||||
g.Init()
|
||||
go func() {
|
||||
g.Start(context.Background(), func() []notifier.Notifier { return []notifier.Notifier{fn} }, nil, fs)
|
||||
g.Start(context.Background(), nil, fs)
|
||||
close(finished)
|
||||
}()
|
||||
|
||||
@@ -472,15 +473,10 @@ func TestFaultyNotifier(t *testing.T) {
|
||||
r := newTestAlertingRule("instant", 0)
|
||||
r.q = fq
|
||||
|
||||
fn := ¬ifier.FakeNotifier{}
|
||||
e := &executor{
|
||||
Notifiers: func() []notifier.Notifier {
|
||||
return []notifier.Notifier{
|
||||
¬ifier.FaultyNotifier{},
|
||||
fn,
|
||||
}
|
||||
},
|
||||
}
|
||||
fn, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
e := &executor{}
|
||||
delay := 5 * time.Second
|
||||
ctx, cancel := context.WithTimeout(context.Background(), delay)
|
||||
defer cancel()
|
||||
@@ -553,7 +549,7 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
g := NewGroup(groups[0], fq, evalInterval, nil)
|
||||
g.Init()
|
||||
|
||||
go g.Start(context.Background(), nil, nil, nil)
|
||||
go g.Start(context.Background(), nil, nil)
|
||||
|
||||
time.Sleep(evalInterval * 20)
|
||||
|
||||
@@ -571,9 +567,10 @@ func TestCloseWithEvalInterruption(t *testing.T) {
|
||||
|
||||
func TestGroupStartDelay(t *testing.T) {
|
||||
g := &Group{}
|
||||
g.id = uint64(math.MaxUint64 / 10)
|
||||
// interval of 5min and key generate a static delay of 30s
|
||||
g.Interval = time.Minute * 5
|
||||
key := uint64(math.MaxUint64 / 10)
|
||||
maxDelay := time.Minute * 5
|
||||
|
||||
f := func(atS, expS string) {
|
||||
t.Helper()
|
||||
@@ -585,7 +582,7 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
delay := delayBeforeStart(at, key, g.Interval, g.EvalOffset)
|
||||
delay := g.delayBeforeStart(at, maxDelay)
|
||||
gotStart := at.Add(delay)
|
||||
if expTS != gotStart {
|
||||
t.Fatalf("expected to get %v; got %v instead", expTS, gotStart)
|
||||
@@ -606,6 +603,15 @@ func TestGroupStartDelay(t *testing.T) {
|
||||
f("2023-01-01T00:01:00.000+00:00", "2023-01-01T00:03:00.000+00:00")
|
||||
f("2023-01-01T00:03:30.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
f("2023-01-01T00:08:00.000+00:00", "2023-01-01T00:08:00.000+00:00")
|
||||
|
||||
maxDelay = time.Minute * 1
|
||||
g.EvalOffset = nil
|
||||
|
||||
// test group with maxDelay, and offset disabled
|
||||
f("2023-01-01T00:00:00.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
||||
f("2023-01-01T00:00:01.000+00:00", "2023-01-01T00:00:06.000+00:00")
|
||||
f("2023-01-01T00:00:06.100+00:00", "2023-01-01T00:01:06.000+00:00")
|
||||
f("2023-01-01T00:00:11.000+00:00", "2023-01-01T00:01:06.000+00:00")
|
||||
}
|
||||
|
||||
func TestGetPrometheusReqTimestamp(t *testing.T) {
|
||||
|
||||
@@ -34,11 +34,12 @@ body {
|
||||
padding-top: 4.5rem;
|
||||
}
|
||||
|
||||
.group-items {
|
||||
.vm-group {
|
||||
cursor: pointer;
|
||||
padding: 5px;
|
||||
margin-top: 5px;
|
||||
position: relative;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.btn svg, .dropdown-item svg {
|
||||
@@ -55,14 +56,22 @@ body {
|
||||
height: 38px;
|
||||
}
|
||||
|
||||
.group-items:not(:has(.sub-item:not(.d-none))) {
|
||||
display: none !important;
|
||||
.vm-item:not(.vm-found) {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.group-items:hover {
|
||||
.vm-group:has(.vm-item:is(.vm-found)), .vm-group:is(.vm-found) {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.vm-group:hover {
|
||||
background-color: #f8f9fa!important;
|
||||
}
|
||||
|
||||
.vm-group:is(.vm-found) .vm-item {
|
||||
display: table-row;
|
||||
}
|
||||
|
||||
.table {
|
||||
table-layout: fixed;
|
||||
}
|
||||
@@ -111,3 +120,9 @@ textarea.curl-area {
|
||||
.w-60 {
|
||||
width: 60%;
|
||||
}
|
||||
|
||||
.annotations {
|
||||
white-space: pre-wrap;
|
||||
color: gray;
|
||||
word-wrap: break-word;
|
||||
}
|
||||
|
||||
@@ -65,32 +65,34 @@ function getParamURL(key) {
|
||||
return url.searchParams.get(key)
|
||||
}
|
||||
|
||||
function matchText(search, item) {
|
||||
const text = item.innerText.toLowerCase();
|
||||
return text.indexOf(search) >= 0;
|
||||
}
|
||||
|
||||
function filterRules(searchPhrase) {
|
||||
document.querySelectorAll('.sub-items').forEach((rules) => {
|
||||
let found = false;
|
||||
rules.querySelectorAll('.sub-item').forEach((rule) => {
|
||||
if (searchPhrase) {
|
||||
const ruleName = rule.innerText.toLowerCase();
|
||||
const matches = []
|
||||
const hasValue = ruleName.indexOf(searchPhrase) >= 0;
|
||||
rule.querySelectorAll('.label').forEach((label) => {
|
||||
const text = label.innerText.toLowerCase();
|
||||
if (text.indexOf(searchPhrase) >= 0) {
|
||||
matches.push(text);
|
||||
}
|
||||
});
|
||||
if (!matches.length && !hasValue) {
|
||||
rule.classList.add('d-none');
|
||||
return;
|
||||
}
|
||||
document.querySelectorAll('.vm-group').forEach((group) => {
|
||||
if (!searchPhrase) {
|
||||
group.classList.add('vm-found');
|
||||
return;
|
||||
}
|
||||
for (const item of group.querySelectorAll('.vm-group-search')) {
|
||||
if (matchText(searchPhrase, item)) {
|
||||
group.classList.add('vm-found');
|
||||
return;
|
||||
}
|
||||
rule.classList.remove('d-none');
|
||||
found = true;
|
||||
});
|
||||
if (found && searchPhrase || !searchPhrase) {
|
||||
rules.classList.remove('d-none');
|
||||
} else {
|
||||
rules.classList.add('d-none');
|
||||
}
|
||||
group.classList.remove('vm-found');
|
||||
for (const item of group.querySelectorAll('.vm-item')) {
|
||||
if (matchText(searchPhrase, item)) {
|
||||
item.classList.add('vm-found');
|
||||
continue;
|
||||
}
|
||||
if (Array.from(item.querySelectorAll('.label')).find(l => matchText(searchPhrase, l))) {
|
||||
item.classList.add('vm-found');
|
||||
continue;
|
||||
}
|
||||
item.classList.remove('vm-found');
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -485,6 +485,12 @@ func templateFuncs() textTpl.FuncMap {
|
||||
|
||||
/* Helpers */
|
||||
|
||||
// now returns the Unix timestamp in seconds at the time of the template evaluation.
|
||||
// For example: {{ (now | toTime).Sub $activeAt }} will return the duration the alert has been active.
|
||||
"now": func() float64 {
|
||||
return float64(time.Now().Unix())
|
||||
},
|
||||
|
||||
// Converts a list of objects to a map with keys arg0, arg1 etc.
|
||||
// This is intended to allow multiple arguments to be passed to templates.
|
||||
"args": func(args ...any) map[string]any {
|
||||
|
||||
@@ -114,14 +114,17 @@
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||
{% if len(groups) > 0 %}
|
||||
{% for _, g := range groups %}
|
||||
<div id="group-{%s g.ID %}" class="d-flex w-100 border-0 flex-column group-items{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<span class="d-flex justify-content-between">
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||
<a
|
||||
class="vm-group-search"
|
||||
href="#group-{%s g.ID %}"
|
||||
>{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %} (every {%f.0 g.Interval %}s) #</a>
|
||||
<span
|
||||
class="flex-grow-1 d-flex justify-content-end"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>
|
||||
<span class="d-flex gap-2">
|
||||
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||
@@ -134,9 +137,9 @@
|
||||
class="d-flex flex-column row-gap-2 mb-2"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>
|
||||
<span class="fs-6 text-start w-100 fw-lighter">{%s g.File %}</span>
|
||||
<span class="fs-6 text-start vm-group-search w-100 fw-lighter">{%s g.File %}</span>
|
||||
{% if len(g.Params) > 0 %}
|
||||
<span class="fs-6 text-start w-100 d-flex justify-content-between fw-lighter">
|
||||
<span>Extra params</span>
|
||||
@@ -158,7 +161,7 @@
|
||||
</span>
|
||||
{% endif %}
|
||||
</span>
|
||||
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||
<div class="collapse" id="item-{%s g.ID %}">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -169,7 +172,7 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for _, r := range g.Rules %}
|
||||
<tr class="sub-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
||||
<tr class="vm-item{% if r.LastError != "" %} alert-danger{% endif %}">
|
||||
<td>
|
||||
<div class="row">
|
||||
<div class="col-12 mb-2">
|
||||
@@ -206,7 +209,12 @@
|
||||
</div>
|
||||
</td>
|
||||
<td class="text-center">{%d r.LastSamples %}</td>
|
||||
<td class="text-center">{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago</td>
|
||||
<td class="text-center">{% if r.LastEvaluation.IsZero() %}
|
||||
Never
|
||||
{% else %}
|
||||
{%f.3 time.Since(r.LastEvaluation).Seconds() %}s ago
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
@@ -241,14 +249,14 @@
|
||||
}
|
||||
sort.Strings(keys)
|
||||
%}
|
||||
<div class="d-flex w-100 flex-column group-items alert-danger">
|
||||
<div class="w-100 flex-column vm-group alert-danger">
|
||||
<span id="group-{%s g.ID %}" class="d-flex justify-content-between">
|
||||
<a href="#group-{%s g.ID %}">{%s g.Name %}{% if g.Type != "prometheus" %} ({%s g.Type %}){% endif %}</a>
|
||||
<span
|
||||
class="flex-grow-1 d-flex justify-content-end"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>
|
||||
<span class="badge bg-danger" title="Number of active alerts">{%d len(ga.Alerts) %}</span>
|
||||
</span>
|
||||
@@ -258,10 +266,10 @@
|
||||
class="fs-6 text-start w-100 fw-lighter"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#sub-{%s g.ID %}"
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>{%s g.File %}</span>
|
||||
</span>
|
||||
<div class="collapse sub-items" id="sub-{%s g.ID %}">
|
||||
<div class="collapse" id="item-{%s g.ID %}">
|
||||
{% for _, ruleID := range keys %}
|
||||
{%code
|
||||
defaultAR := alertsByRule[ruleID][0]
|
||||
@@ -272,7 +280,7 @@
|
||||
sort.Strings(labelKeys)
|
||||
%}
|
||||
<br>
|
||||
<div class="sub-item">
|
||||
<div class="vm-item">
|
||||
<b>alert:</b> {%s defaultAR.Name %} ({%d len(alertsByRule[ruleID]) %})
|
||||
| <span><a target="_blank" href="{%s defaultAR.SourceLink %}">Source</a></span>
|
||||
<br>
|
||||
@@ -337,20 +345,20 @@
|
||||
typeK, ns := keys[i], targets[notifier.TargetType(keys[i])]
|
||||
count := len(ns)
|
||||
%}
|
||||
<div class="d-flex w-100 flex-column group-items">
|
||||
<div class="w-100 flex-column vm-group">
|
||||
<span class="d-flex justify-content-between" id="group-{%s typeK %}">
|
||||
<a href="#group-{%s typeK %}">{%s typeK %} ({%d count %})</a>
|
||||
<span
|
||||
class="flex-grow-1"
|
||||
role="button"
|
||||
data-bs-toggle="collapse"
|
||||
data-bs-target="#sub-{%s typeK %}"
|
||||
data-bs-target="#item-{%s typeK %}"
|
||||
></span>
|
||||
</span>
|
||||
<div id="sub-{%s typeK %}" class="collapse show sub-items">
|
||||
<div id="item-{%s typeK %}" class="collapse show">
|
||||
<table class="table table-striped table-hover table-sm">
|
||||
<thead>
|
||||
<tr class="sub-item">
|
||||
<tr class="vm-item">
|
||||
<th scope="col">Labels</th>
|
||||
<th scope="col">Address</th>
|
||||
</tr>
|
||||
@@ -435,7 +443,7 @@
|
||||
<div class="col">
|
||||
{% for _, k := range annotationKeys %}
|
||||
<b>{%s k %}:</b><br>
|
||||
<p>{%s alert.Annotations[k] %}</p>
|
||||
<p class="annotations">{%s alert.Annotations[k] %}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
@@ -549,7 +557,7 @@
|
||||
<div class="col">
|
||||
{% for _, k := range annotationKeys %}
|
||||
<b>{%s k %}:</b><br>
|
||||
<p>{%s rule.Annotations[k] %}</p>
|
||||
<p class="annotations">{%s rule.Annotations[k] %}</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -23,6 +23,9 @@ func TestHandler(t *testing.T) {
|
||||
Timestamps: []int64{0},
|
||||
})
|
||||
m := &manager{groups: map[uint64]*rule.Group{}}
|
||||
_, cleanup := notifier.InitFakeNotifier()
|
||||
defer cleanup()
|
||||
|
||||
var ar *rule.AlertingRule
|
||||
var rr *rule.RecordingRule
|
||||
var groupIDs []uint64
|
||||
@@ -45,7 +48,7 @@ func TestHandler(t *testing.T) {
|
||||
}, fq, 1*time.Minute, nil)
|
||||
ar = g.Rules[0].(*rule.AlertingRule)
|
||||
rr = g.Rules[1].(*rule.RecordingRule)
|
||||
g.ExecOnce(context.Background(), func() []notifier.Notifier { return nil }, nil, time.Time{})
|
||||
g.ExecOnce(context.Background(), nil, time.Time{})
|
||||
id := g.CreateID()
|
||||
m.groups[id] = g
|
||||
groupIDs = append(groupIDs, id)
|
||||
|
||||
@@ -27,6 +27,9 @@ vmauth-linux-ppc64le-prod:
|
||||
vmauth-linux-386-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmauth-linux-s390x-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmauth-darwin-amd64-prod:
|
||||
APP_NAME=vmauth $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -482,27 +482,34 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
if bu.concurrentRequests.Load() == 0 {
|
||||
// Fast path - return the backend with zero concurrently executed requests.
|
||||
// Do not use CompareAndSwap() instead of Load(), since it is much slower on systems with many CPU cores.
|
||||
bu.concurrentRequests.Add(1)
|
||||
|
||||
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
|
||||
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
|
||||
atomicCounter.CompareAndSwap(n+1, idx+1)
|
||||
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
|
||||
return bu
|
||||
}
|
||||
}
|
||||
|
||||
// Slow path - return the backend with the minimum number of concurrently executed requests.
|
||||
buMin := bus[n%uint32(len(bus))]
|
||||
minRequests := buMin.concurrentRequests.Load()
|
||||
for _, bu := range bus {
|
||||
buMinIdx := n % uint32(len(bus))
|
||||
minRequests := bus[buMinIdx].concurrentRequests.Load()
|
||||
for i := uint32(0); i < uint32(len(bus)); i++ {
|
||||
idx := (n + i) % uint32(len(bus))
|
||||
bu := bus[idx]
|
||||
if bu.isBroken() {
|
||||
continue
|
||||
}
|
||||
if n := bu.concurrentRequests.Load(); n < minRequests || buMin.isBroken() {
|
||||
buMin = bu
|
||||
minRequests = n
|
||||
|
||||
reqs := bu.concurrentRequests.Load()
|
||||
if reqs < minRequests || bus[buMinIdx].isBroken() {
|
||||
buMinIdx = idx
|
||||
minRequests = reqs
|
||||
}
|
||||
}
|
||||
buMin := bus[buMinIdx]
|
||||
buMin.get()
|
||||
atomicCounter.CompareAndSwap(n+1, buMinIdx+1)
|
||||
return buMin
|
||||
}
|
||||
|
||||
|
||||
@@ -752,10 +752,12 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
||||
})
|
||||
up.loadBalancingPolicy = "least_loaded"
|
||||
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
|
||||
fn := func(ns ...int) {
|
||||
t.Helper()
|
||||
pbus := up.bus.Load()
|
||||
bus := *pbus
|
||||
|
||||
for i, b := range bus {
|
||||
got := int(b.concurrentRequests.Load())
|
||||
exp := ns[i]
|
||||
@@ -767,45 +769,52 @@ func TestGetLeastLoadedBackendURL(t *testing.T) {
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 0, 0)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 1, 0)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(1, 1, 1)
|
||||
|
||||
up.getBackendURL()
|
||||
up.getBackendURL()
|
||||
fn(2, 2, 1)
|
||||
|
||||
bus := up.bus.Load()
|
||||
pbus := *bus
|
||||
pbus[0].concurrentRequests.Add(2)
|
||||
pbus[2].concurrentRequests.Add(5)
|
||||
fn(4, 2, 6)
|
||||
bus[1].put()
|
||||
bus[2].put()
|
||||
fn(1, 0, 0)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 3, 6)
|
||||
fn(1, 1, 0)
|
||||
|
||||
bus[1].put()
|
||||
up.getBackendURL()
|
||||
fn(4, 4, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 5, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(5, 5, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(6, 5, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(6, 6, 6)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(6, 6, 7)
|
||||
fn(1, 0, 1)
|
||||
|
||||
up.getBackendURL()
|
||||
up.getBackendURL()
|
||||
fn(7, 7, 7)
|
||||
fn(1, 1, 2)
|
||||
|
||||
bus[0].concurrentRequests.Add(2)
|
||||
bus[2].concurrentRequests.Add(2)
|
||||
fn(3, 1, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(3, 2, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(3, 3, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 3, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 4, 4)
|
||||
|
||||
bus[0].put()
|
||||
bus[2].put()
|
||||
|
||||
up.getBackendURL()
|
||||
fn(3, 4, 4)
|
||||
|
||||
up.getBackendURL()
|
||||
fn(4, 4, 4)
|
||||
}
|
||||
|
||||
func TestBrokenBackend(t *testing.T) {
|
||||
|
||||
@@ -310,14 +310,21 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
|
||||
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||
res, err := ui.rt.RoundTrip(req)
|
||||
|
||||
if ctxErr := r.Context().Err(); ctxErr != nil {
|
||||
// Override the error returned by the RoundTrip with the context error if it isn't non-nil
|
||||
// This makes sure the proper logging for canceled and timed out requests - log the real cause of the error
|
||||
// instead of the random error, which could be returned from RoundTrip because of canceled or timed out request.
|
||||
err = ctxErr
|
||||
}
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
// Do not retry canceled or timed out requests
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
requestURI := httpserver.GetRequestURI(r)
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; error when proxying response body from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
// Timed out request must be counted as errors, since this usually means that the backend is slow.
|
||||
logger.Warnf("remoteAddr: %s; requestURI: %s; timeout while proxying the response from %s: %s", remoteAddr, requestURI, targetURL, err)
|
||||
ui.backendErrors.Inc()
|
||||
}
|
||||
return false, false
|
||||
|
||||
@@ -31,6 +31,9 @@ vmbackup-linux-ppc64le-prod:
|
||||
vmbackup-linux-386-prod:
|
||||
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmbackup-linux-s390x-prod:
|
||||
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmbackup-darwin-amd64-prod:
|
||||
APP_NAME=vmbackup EXTRA_GO_BUILD_TAGS=$(VMBACKUP_GO_BUILD_TAGS) $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -27,6 +27,9 @@ vmctl-linux-ppc64le-prod:
|
||||
vmctl-linux-386-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmctl-linux-s390x-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmctl-darwin-amd64-prod:
|
||||
APP_NAME=vmctl $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
@@ -689,15 +689,15 @@ var (
|
||||
Usage: "The time filter in RFC3339 format to select timeseries with timestamp equal or lower than provided value. E.g. '2020-01-01T20:07:00Z'",
|
||||
Layout: time.RFC3339,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadFilterLabel,
|
||||
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
Value: "__name__",
|
||||
&cli.StringSliceFlag{
|
||||
Name: remoteReadFilterLabel,
|
||||
Usage: "Prometheus label name to filter timeseries by. E.g. '__name__' will filter timeseries by name.",
|
||||
DefaultText: "__name__",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: remoteReadFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
|
||||
Value: ".*",
|
||||
&cli.StringSliceFlag{
|
||||
Name: remoteReadFilterLabelValue,
|
||||
Usage: fmt.Sprintf("Prometheus regular expression to filter label from %q flag.", remoteReadFilterLabelValue),
|
||||
DefaultText: ".*",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: remoteRead,
|
||||
|
||||
@@ -192,6 +192,14 @@ func main() {
|
||||
return fmt.Errorf("failed to create transport for -%s=%q: %s", remoteReadSrcAddr, addr, err)
|
||||
}
|
||||
|
||||
// Backwards compatible default values if none provided by user
|
||||
rrLabelNames := c.StringSlice(remoteReadFilterLabel)
|
||||
rrLabelValues := c.StringSlice(remoteReadFilterLabelValue)
|
||||
if len(rrLabelNames) == 0 && len(rrLabelValues) == 0 {
|
||||
rrLabelNames = []string{"__name__"}
|
||||
rrLabelValues = []string{".*"}
|
||||
}
|
||||
|
||||
rr, err := remoteread.NewClient(remoteread.Config{
|
||||
Addr: addr,
|
||||
Transport: tr,
|
||||
@@ -200,8 +208,8 @@ func main() {
|
||||
Timeout: c.Duration(remoteReadHTTPTimeout),
|
||||
UseStream: c.Bool(remoteReadUseStream),
|
||||
Headers: c.String(remoteReadHeaders),
|
||||
LabelName: c.String(remoteReadFilterLabel),
|
||||
LabelValue: c.String(remoteReadFilterLabelValue),
|
||||
LabelNames: rrLabelNames,
|
||||
LabelValues: rrLabelValues,
|
||||
DisablePathAppend: c.Bool(remoteReadDisablePathAppend),
|
||||
})
|
||||
if err != nil {
|
||||
|
||||
@@ -11,14 +11,15 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/gogo/protobuf/proto"
|
||||
"github.com/golang/snappy"
|
||||
"github.com/prometheus/prometheus/config"
|
||||
"github.com/prometheus/prometheus/prompb"
|
||||
"github.com/prometheus/prometheus/storage/remote"
|
||||
"github.com/prometheus/prometheus/tsdb/chunkenc"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmctl/vm"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -63,9 +64,9 @@ type Config struct {
|
||||
UseStream bool
|
||||
// Headers optional HTTP headers to send with each request to the corresponding remote storage
|
||||
Headers string
|
||||
// LabelName, LabelValue stands for label=~value pair used for read requests.
|
||||
// LabelNames, LabelValues stands for label=~value pair used for read requests.
|
||||
// Is optional.
|
||||
LabelName, LabelValue string
|
||||
LabelNames, LabelValues []string
|
||||
}
|
||||
|
||||
// Filter defines a list of filters applied to requested data
|
||||
@@ -94,12 +95,22 @@ func NewClient(cfg Config) (*Client, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var m *prompb.LabelMatcher
|
||||
if cfg.LabelName != "" && cfg.LabelValue != "" {
|
||||
m = &prompb.LabelMatcher{
|
||||
Type: prompb.LabelMatcher_RE,
|
||||
Name: cfg.LabelName,
|
||||
Value: cfg.LabelValue,
|
||||
var matchers []*prompb.LabelMatcher
|
||||
if len(cfg.LabelNames) > 0 || len(cfg.LabelValues) > 0 {
|
||||
if len(cfg.LabelNames) != len(cfg.LabelValues) {
|
||||
return nil, fmt.Errorf("the number of label names and label values must be the same")
|
||||
}
|
||||
|
||||
for i := range cfg.LabelNames {
|
||||
if cfg.LabelNames[i] == "" {
|
||||
return nil, fmt.Errorf("label name cannot be empty")
|
||||
}
|
||||
matcher := &prompb.LabelMatcher{
|
||||
Type: prompb.LabelMatcher_RE,
|
||||
Name: cfg.LabelNames[i],
|
||||
Value: cfg.LabelValues[i],
|
||||
}
|
||||
matchers = append(matchers, matcher)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,7 +127,7 @@ func NewClient(cfg Config) (*Client, error) {
|
||||
password: cfg.Password,
|
||||
useStream: cfg.UseStream,
|
||||
headers: headers,
|
||||
matchers: []*prompb.LabelMatcher{m},
|
||||
matchers: matchers,
|
||||
}
|
||||
|
||||
return c, nil
|
||||
|
||||
@@ -221,7 +221,7 @@ func (ctx *InsertCtx) FlushBufs() error {
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *InsertCtx) dropAggregatedRows(matchIdxs []byte) {
|
||||
func (ctx *InsertCtx) dropAggregatedRows(matchIdxs []uint32) {
|
||||
dst := ctx.mrs[:0]
|
||||
src := ctx.mrs
|
||||
if !*streamAggrDropInput {
|
||||
@@ -239,4 +239,4 @@ func (ctx *InsertCtx) dropAggregatedRows(matchIdxs []byte) {
|
||||
ctx.mrs = dst
|
||||
}
|
||||
|
||||
var matchIdxsPool bytesutil.ByteBufferPool
|
||||
var matchIdxsPool slicesutil.BufferPool[uint32]
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompb"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/streamaggr"
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
@@ -22,11 +23,11 @@ var (
|
||||
streamAggrConfig = flag.String("streamAggr.config", "", "Optional path to file with stream aggregation config. "+
|
||||
"See https://docs.victoriametrics.com/victoriametrics/stream-aggregation/ . "+
|
||||
"See also -streamAggr.keepInput, -streamAggr.dropInput and -streamAggr.dedupInterval")
|
||||
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep all the input samples after the aggregation with -streamAggr.config. "+
|
||||
"By default, only aggregated samples are dropped, while the remaining samples are stored in the database. "+
|
||||
streamAggrKeepInput = flag.Bool("streamAggr.keepInput", false, "Whether to keep input samples that match any rule in -streamAggr.config. "+
|
||||
"By default, matched raw samples are aggregated and dropped, while unmatched samples are written to the remote storage. "+
|
||||
"See also -streamAggr.dropInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop all the input samples after the aggregation with -streamAggr.config. "+
|
||||
"By default, only aggregated samples are dropped, while the remaining samples are stored in the database. "+
|
||||
streamAggrDropInput = flag.Bool("streamAggr.dropInput", false, "Whether to drop input samples that not matching any rule in -streamAggr.config. "+
|
||||
"By default, only matched raw samples are dropped, while unmatched samples are written to the remote storage."+
|
||||
"See also -streamAggr.keepInput and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/")
|
||||
streamAggrDedupInterval = flag.Duration("streamAggr.dedupInterval", 0, "Input samples are de-duplicated with this interval before optional aggregation with -streamAggr.config . "+
|
||||
"See also -streamAggr.dropInputLabels and -dedup.minScrapeInterval and https://docs.victoriametrics.com/victoriametrics/stream-aggregation/#deduplication")
|
||||
@@ -189,7 +190,7 @@ func (ctx *streamAggrCtx) Reset() {
|
||||
ctx.buf = ctx.buf[:0]
|
||||
}
|
||||
|
||||
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []byte) []byte {
|
||||
func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []uint32) []uint32 {
|
||||
mn := &ctx.mn
|
||||
tss := ctx.tss
|
||||
labels := ctx.labels
|
||||
@@ -248,7 +249,7 @@ func (ctx *streamAggrCtx) push(mrs []storage.MetricRow, matchIdxs []byte) []byte
|
||||
if sas.IsEnabled() {
|
||||
matchIdxs = sas.Push(tss, matchIdxs)
|
||||
} else if deduplicator != nil {
|
||||
matchIdxs = bytesutil.ResizeNoCopyMayOverallocate(matchIdxs, len(tss))
|
||||
matchIdxs = slicesutil.SetLength(matchIdxs, len(tss))
|
||||
for i := range matchIdxs {
|
||||
matchIdxs[i] = 1
|
||||
}
|
||||
|
||||
@@ -86,7 +86,7 @@ func loadRelabelConfig() (*promrelabel.ParsedConfigs, error) {
|
||||
if len(*relabelConfig) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
pcs, err := promrelabel.LoadRelabelConfigs(*relabelConfig)
|
||||
pcs, _, err := promrelabel.LoadRelabelConfigs(*relabelConfig)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error when reading -relabelConfig=%q: %w", *relabelConfig, err)
|
||||
}
|
||||
|
||||
@@ -31,6 +31,9 @@ vmrestore-linux-ppc64le-prod:
|
||||
vmrestore-linux-386-prod:
|
||||
APP_NAME=vmrestore EXTRA_GO_BUILD_TAGS=$(VMRESTORE_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-386
|
||||
|
||||
vmrestore-linux-s390x-prod:
|
||||
APP_NAME=vmrestore EXTRA_GO_BUILD_TAGS=$(VMRESTORE_GO_BUILD_TAGS) $(MAKE) app-via-docker-linux-s390x
|
||||
|
||||
vmrestore-darwin-amd64-prod:
|
||||
APP_NAME=vmrestore EXTRA_GO_BUILD_TAGS=$(VMRESTORE_GO_BUILD_TAGS) $(MAKE) app-via-docker-darwin-amd64
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -37,10 +37,10 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-D13qGB62.js"></script>
|
||||
<script type="module" crossorigin src="./assets/index-zpalCSif.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-DY9kCvzk.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-I8MVeF75.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-CBxdwuZH.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
8
app/vmui/packages/vmui/package-lock.json
generated
8
app/vmui/packages/vmui/package-lock.json
generated
@@ -17,7 +17,7 @@
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.6.3",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^7.1.5",
|
||||
"vite": "^7.1.11",
|
||||
"web-vitals": "^5.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -7660,9 +7660,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "7.1.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
|
||||
"integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
|
||||
"version": "7.1.11",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-7.1.11.tgz",
|
||||
"integrity": "sha512-uzcxnSDVjAopEUjljkWh8EIrg6tlzrjFUfMcR1EVsRDGwf/ccef0qQPRyOrROwhrTDaApueq+ja+KLPlzR/zdg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "^0.25.0",
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.6.3",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^7.1.5",
|
||||
"vite": "^7.1.11",
|
||||
"web-vitals": "^5.0.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -4,10 +4,10 @@ import { Alert as APIAlert } from "../../../types";
|
||||
import { createSearchParams } from "react-router-dom";
|
||||
import Button from "../../Main/Button/Button";
|
||||
import Badges, { BadgeColor } from "../Badges";
|
||||
import { formatEventTime } from "../helpers";
|
||||
import {
|
||||
SearchIcon,
|
||||
} from "../../Main/Icons";
|
||||
import dayjs from "dayjs";
|
||||
import CodeExample from "../../Main/CodeExample/CodeExample";
|
||||
|
||||
interface BaseAlertProps {
|
||||
@@ -66,7 +66,7 @@ const BaseAlert = ({ item }: BaseAlertProps) => {
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Active at</td>
|
||||
<td>{dayjs(item.activeAt).format("DD MMM YYYY HH:mm:ss")}</td>
|
||||
<td>{formatEventTime(item.activeAt)}</td>
|
||||
</tr>
|
||||
{!!Object.keys(alertLabels).length && (
|
||||
<tr>
|
||||
@@ -82,7 +82,7 @@ const BaseAlert = ({ item }: BaseAlertProps) => {
|
||||
</table>
|
||||
{!!Object.keys(item.annotations || {}).length && (
|
||||
<>
|
||||
<span className="title">Annotations</span>
|
||||
<span className="vm-alerts-title">Annotations</span>
|
||||
<table>
|
||||
<colgroup>
|
||||
<col className="vm-col-md"/>
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { useMemo } from "preact/compat";
|
||||
import "./style.scss";
|
||||
import { Group as APIGroup } from "../../../types";
|
||||
import dayjs from "dayjs";
|
||||
import { formatDuration } from "../helpers";
|
||||
import { formatDuration, formatEventTime } from "../helpers";
|
||||
import Badges, { BadgeColor } from "../Badges";
|
||||
|
||||
interface BaseGroupProps {
|
||||
@@ -48,12 +47,10 @@ const BaseGroup = ({ group }: BaseGroupProps) => {
|
||||
<td>{formatDuration(group.interval)}</td>
|
||||
</tr>
|
||||
)}
|
||||
{!!group.lastEvaluation && (
|
||||
<tr>
|
||||
<td className="vm-col-md">Last evaluation</td>
|
||||
<td>{dayjs(group.lastEvaluation).format("DD MMM YYYY HH:mm:ss")}</td>
|
||||
</tr>
|
||||
)}
|
||||
<tr>
|
||||
<td className="vm-col-md">Last evaluation</td>
|
||||
<td>{formatEventTime(group.lastEvaluation)}</td>
|
||||
</tr>
|
||||
{!!group.eval_offset && (
|
||||
<tr>
|
||||
<td className="vm-col-md">Eval offset</td>
|
||||
|
||||
@@ -6,8 +6,7 @@ import { SearchIcon, DetailsIcon } from "../../Main/Icons";
|
||||
import Button from "../../Main/Button/Button";
|
||||
import Alert from "../../Main/Alert/Alert";
|
||||
import Badges, { BadgeColor } from "../Badges";
|
||||
import dayjs from "dayjs";
|
||||
import { formatDuration } from "../helpers";
|
||||
import { formatDuration, formatEventTime } from "../helpers";
|
||||
import CodeExample from "../../Main/CodeExample/CodeExample";
|
||||
|
||||
interface BaseRuleProps {
|
||||
@@ -80,12 +79,10 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
<td>{formatDuration(item.duration)}</td>
|
||||
</tr>
|
||||
)}
|
||||
{!!item.lastEvaluation && (
|
||||
<tr>
|
||||
<td>Last evaluation</td>
|
||||
<td>{dayjs(item.lastEvaluation).format("DD MMM YYYY HH:mm:ss")}</td>
|
||||
</tr>
|
||||
)}
|
||||
<tr>
|
||||
<td>Last evaluation</td>
|
||||
<td>{formatEventTime(item.lastEvaluation)}</td>
|
||||
</tr>
|
||||
{!!item.lastError && item.health !== "ok" && (
|
||||
<tr>
|
||||
<td>Last error</td>
|
||||
@@ -108,7 +105,7 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
</table>
|
||||
{!!Object.keys(item?.annotations || {}).length && (
|
||||
<>
|
||||
<span className="title">Annotations</span>
|
||||
<span className="vm-alerts-title">Annotations</span>
|
||||
<table>
|
||||
<colgroup>
|
||||
<col className="vm-col-md"/>
|
||||
@@ -127,7 +124,7 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
)}
|
||||
{!!item?.updates?.length && (
|
||||
<>
|
||||
<span className="title">{`Last updates ${item.updates.length}/${item.max_updates_entries}`}</span>
|
||||
<span className="vm-alerts-title">{`Last updates ${item.updates.length}/${item.max_updates_entries}`}</span>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -143,11 +140,11 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
<tr
|
||||
key={update.at}
|
||||
>
|
||||
<td>{dayjs(update.time).format("DD MMM YYYY HH:mm:ss")}</td>
|
||||
<td>{formatEventTime(update.time)}</td>
|
||||
<td>{update.samples}</td>
|
||||
<td>{update.series_fetched}</td>
|
||||
<td>{formatDuration(update.duration / 1e9)}</td>
|
||||
<td>{dayjs(update.at).format("DD MMM YYYY HH:mm:ss")}</td>
|
||||
<td>{formatEventTime(update.at)}</td>
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
@@ -156,7 +153,7 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
)}
|
||||
{!!item?.alerts?.length && (
|
||||
<>
|
||||
<span className="title">Alerts</span>
|
||||
<span className="vm-alerts-title">Alerts</span>
|
||||
<table>
|
||||
<colgroup>
|
||||
<col className="vm-col-sm"/>
|
||||
@@ -170,7 +167,7 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
<th>Active since</th>
|
||||
<th>State</th>
|
||||
<th>Value</th>
|
||||
<th className="title">Labels</th>
|
||||
<th className="vm-alerts-title">Labels</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
@@ -180,9 +177,7 @@ const BaseRule = ({ item }: BaseRuleProps) => {
|
||||
id={`alert-${alert.id}`}
|
||||
key={alert.id}
|
||||
>
|
||||
<td>
|
||||
{dayjs(alert.activeAt).format("DD MMM YYYY HH:mm:ss")}
|
||||
</td>
|
||||
<td>{formatEventTime(alert.activeAt)}</td>
|
||||
<td>
|
||||
<Badges
|
||||
items={{ [alert.state]: { color: alert.state as BadgeColor } }}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
|
||||
.title {
|
||||
.vm-alerts-title {
|
||||
font-weight: bold;
|
||||
text-align: center;
|
||||
}
|
||||
@@ -48,11 +48,13 @@
|
||||
line-height: 30px;
|
||||
padding: 4px $padding-small;
|
||||
vertical-align: middle;
|
||||
white-space: nowrap;
|
||||
text-align: left;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}
|
||||
th {
|
||||
white-space: nowrap;
|
||||
}
|
||||
td.align-center {
|
||||
text-align: center
|
||||
}
|
||||
|
||||
@@ -13,3 +13,8 @@ export const formatDuration = (raw: number) => {
|
||||
}
|
||||
return duration.format(fmt.join(" "));
|
||||
};
|
||||
|
||||
export const formatEventTime = (raw: string) => {
|
||||
const t = dayjs(raw);
|
||||
return t.year() <= 1 ? "Never" : t.format("DD MMM YYYY HH:mm:ss");
|
||||
}
|
||||
|
||||
@@ -192,7 +192,7 @@ export interface Group {
|
||||
rules: Rule[];
|
||||
interval: number;
|
||||
limit: number;
|
||||
lastEvaluation: number;
|
||||
lastEvaluation: string;
|
||||
evaluationTime: number;
|
||||
type: string;
|
||||
id: string;
|
||||
@@ -216,7 +216,7 @@ export interface Rule {
|
||||
annotations: Record<string, string>;
|
||||
alerts: Alert[];
|
||||
health: string;
|
||||
lastEvaluation: number;
|
||||
lastEvaluation: string;
|
||||
lastError: string;
|
||||
evaluationTime: number;
|
||||
type: string;
|
||||
@@ -247,7 +247,7 @@ export interface Alert {
|
||||
expression: string;
|
||||
labels: Record<string, string>;
|
||||
annotations: Record<string, string>;
|
||||
activeAt: number;
|
||||
activeAt: string;
|
||||
id: string;
|
||||
source: string;
|
||||
restored: boolean;
|
||||
|
||||
@@ -2,8 +2,10 @@ package tests
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -34,6 +36,29 @@ func TestSingleVMAgentReloadConfigs(t *testing.T) {
|
||||
fmt.Sprintf(`-remoteWrite.urlRelabelConfig=%s`, relabelFilePath),
|
||||
}, ``)
|
||||
|
||||
checkResponse := func(query, expResponse string) {
|
||||
t.Helper()
|
||||
resp, err := http.Get(query)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot get response from %s: %s", query, err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
t.Fatalf("unexpected response from %s: %s", query, resp.Status)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot read response from %s: %s", query, err)
|
||||
}
|
||||
if !strings.Contains(string(body), expResponse) {
|
||||
t.Fatalf("expected to get\n%s\nbut got\n%s", expResponse, string(body))
|
||||
}
|
||||
}
|
||||
|
||||
vmagentAddr := fmt.Sprintf("http://%s", vmagent.HTTPAddr())
|
||||
checkResponse(vmagentAddr+"/remotewrite-url-relabel-config", "replacement: value1")
|
||||
checkResponse(vmagentAddr+"/api/v1/status/remotewrite-url-relabel-config", "replacement: value1")
|
||||
|
||||
vmagent.APIV1ImportPrometheus(t, []string{
|
||||
"foo_bar 1 1652169600000", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
@@ -63,6 +88,9 @@ func TestSingleVMAgentReloadConfigs(t *testing.T) {
|
||||
|
||||
vmagent.ReloadRelabelConfigs(t)
|
||||
|
||||
checkResponse(vmagentAddr+"/remotewrite-url-relabel-config", "replacement: value2")
|
||||
checkResponse(vmagentAddr+"/api/v1/status/remotewrite-url-relabel-config", "replacement: value2")
|
||||
|
||||
vmagent.APIV1ImportPrometheus(t, []string{
|
||||
"bar_foo 1 1652169600001", // 2022-05-10T08:00:00Z
|
||||
}, apptest.QueryOpts{})
|
||||
|
||||
@@ -156,6 +156,12 @@ func (app *Vmagent) ReloadRelabelConfigs(t *testing.T) {
|
||||
t.Fatalf("relabel configs were not reloaded after SIGHUP signal; previous total: %f, current total: %f", prevTotal, currTotal)
|
||||
}
|
||||
|
||||
// HTTPAddr returns the address at which the vmagent process is listening
|
||||
// for http connections.
|
||||
func (app *Vmagent) HTTPAddr() string {
|
||||
return app.httpListenAddr
|
||||
}
|
||||
|
||||
// sendBlocking sends the data to vmstorage by executing `send` function and
|
||||
// waits until the data is actually sent.
|
||||
//
|
||||
|
||||
@@ -4609,6 +4609,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -4670,6 +4671,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
|
||||
@@ -4680,6 +4680,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -4752,7 +4753,9 @@
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"targets": [
|
||||
|
||||
@@ -1994,7 +1994,7 @@
|
||||
"baseFilters": [],
|
||||
"datasource": {
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "PE8D8DB4BEE4E4B22"
|
||||
},
|
||||
"filters": [],
|
||||
"name": "adhoc",
|
||||
|
||||
@@ -1169,7 +1169,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99,sum(rate(controller_runtime_reconcile_time_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by(le,controller) )",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(controller_runtime_reconcile_time_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le, controller) )",
|
||||
"legendFormat": "q.99 {{controller}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
@@ -1266,7 +1266,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=~\"$job\"}[$__interval])) by (method,code)",
|
||||
"expr": "sum(rate(rest_client_requests_total{job=~\"$job\"}[$__interval])) by (method, code)",
|
||||
"instant": false,
|
||||
"legendFormat": "{{method}} {{code}}",
|
||||
"range": true,
|
||||
@@ -1490,7 +1490,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by(job)",
|
||||
"expr": "max(histogram_quantile(0.99, sum(rate(go_sched_latencies_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (job, instance, le))) by (job)",
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
@@ -1589,7 +1589,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99,sum(rate(rest_client_request_duration_seconds_bucket{job=~\"$job\"})) by(le,method,api) )",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{job=~\"$job\"}[$__rate_interval])) by (le, method, api))",
|
||||
"instant": false,
|
||||
"legendFormat": "{{method}} {{api}}",
|
||||
"range": true,
|
||||
|
||||
@@ -4609,6 +4609,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -4670,6 +4671,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
@@ -5637,7 +5640,7 @@
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows the approx time needed to reach 100% of allowed disk capacity for at least one vmstorage node based on the following params:\n* free disk space (after -storage.minFreeDiskSpaceBytes);\n* row ingestion rate;\n* compression.",
|
||||
@@ -5735,7 +5738,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
@@ -10294,7 +10297,7 @@
|
||||
"uid": "$ds"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job_storage\", instance=~\"$instance\"}) \n/ \nignoring(path) (\n (rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d]) - \n sum(rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])) without(type)) * \n (\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"}) without(type) /\n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"}) without(type)\n )\n +\n rate(vm_new_timeseries_created_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d]) * \n (\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type=\"indexdb/file\"}) without(type) /\n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type=\"indexdb/file\"}) without(type) \n )\n) > 0",
|
||||
"expr": "(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"}-vm_free_disk_space_limit_bytes{job=~\"$job_storage\", instance=~\"$instance\"}) \n/ \nignoring(path) (\n (rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d]) - \n sum(rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])) without(type)) * \n (\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"}) without(type) /\n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"}) without(type)\n )\n +\n rate(vm_new_timeseries_created_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d]) * \n (\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type=\"indexdb/file\"}) without(type) /\n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type=\"indexdb/file\"}) without(type)\n )\n) > 0",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
@@ -10646,8 +10649,8 @@
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "victoriametrics-metrics-datasource",
|
||||
"value": "ceuqoq3dxttkwb"
|
||||
"text": "VictoriaMetrics - cluster",
|
||||
"value": "PAF93674D0B4E9963"
|
||||
},
|
||||
"includeAll": false,
|
||||
"name": "ds",
|
||||
|
||||
@@ -4681,6 +4681,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -4753,7 +4754,9 @@
|
||||
],
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"targets": [
|
||||
|
||||
@@ -4192,6 +4192,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -4253,6 +4254,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
|
||||
@@ -2509,6 +2509,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -2570,6 +2571,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
@@ -4238,4 +4241,4 @@
|
||||
"title": "VictoriaMetrics - vmalert (VM)",
|
||||
"uid": "LzldHAVnz_vm",
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
@@ -2238,6 +2238,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -2300,6 +2301,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
@@ -2652,7 +2655,7 @@
|
||||
{
|
||||
"datasource": {
|
||||
"type": "victoriametrics-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "P38648FE0F8C5BEA2"
|
||||
},
|
||||
"filters": [],
|
||||
"hide": 0,
|
||||
|
||||
@@ -4191,6 +4191,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -4252,6 +4253,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
|
||||
@@ -2508,6 +2508,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -2569,6 +2570,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
|
||||
@@ -2237,6 +2237,7 @@
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"filterable": true,
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
@@ -2299,6 +2300,8 @@
|
||||
"show": false
|
||||
},
|
||||
"showHeader": true,
|
||||
"showSearch": true,
|
||||
"filterable": true,
|
||||
"sortBy": [
|
||||
{
|
||||
"desc": true,
|
||||
|
||||
@@ -176,6 +176,9 @@ app-via-docker-linux-ppc64le:
|
||||
app-via-docker-linux-386:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=386 $(MAKE) app-via-docker-goos-goarch
|
||||
|
||||
app-via-docker-linux-s390x:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=s390x $(MAKE) app-via-docker-goos-goarch
|
||||
|
||||
app-via-docker-darwin-amd64:
|
||||
CGO_ENABLED=0 GOOS=darwin GOARCH=amd64 $(MAKE) app-via-docker-goos-goarch
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.127.0
|
||||
image: victoriametrics/vmagent:v1.129.1
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -37,14 +37,14 @@ services:
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.127.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.129.1-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.127.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.129.1-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.127.0-cluster
|
||||
image: victoriametrics/vminsert:v1.129.1-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -63,7 +63,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.127.0-cluster
|
||||
image: victoriametrics/vminsert:v1.129.1-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -75,7 +75,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.127.0-cluster
|
||||
image: victoriametrics/vmselect:v1.129.1-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -85,7 +85,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.127.0-cluster
|
||||
image: victoriametrics/vmselect:v1.129.1-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -100,7 +100,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.127.0
|
||||
image: victoriametrics/vmauth:v1.129.1
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -114,7 +114,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.127.0
|
||||
image: victoriametrics/vmalert:v1.129.1
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.127.0
|
||||
image: victoriametrics/vmagent:v1.129.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.127.0
|
||||
image: victoriametrics/victoria-metrics:v1.129.1
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -54,7 +54,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.127.0
|
||||
image: victoriametrics/vmalert:v1.129.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.127.0
|
||||
image: victoriametrics/vmagent:v1.129.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.127.0
|
||||
image: victoriametrics/victoria-metrics:v1.129.1
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.127.0
|
||||
image: victoriametrics/vmalert:v1.129.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.26.2
|
||||
image: victoriametrics/vmanomaly:v1.27.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
---
|
||||
weight: 6
|
||||
weight: 7
|
||||
title: CHANGELOG
|
||||
menu:
|
||||
docs:
|
||||
identifier: "vmanomaly-changelog"
|
||||
parent: "anomaly-detection"
|
||||
weight: 6
|
||||
weight: 7
|
||||
tags:
|
||||
- metrics
|
||||
- enterprise
|
||||
@@ -14,6 +14,24 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.27.0
|
||||
Released: 2025-10-31
|
||||
|
||||
- FEATURE: Added runtime state compatibility guard for [stateful](https://docs.victoriametrics.com/anomaly-detection/components/settings/#restore-state) deployments. The service now persists normalized versions, evaluates an [upgrade/downgrade compatibility matrix](https://docs.victoriametrics.com/anomaly-detection/migration/#compatibility-matrix), and selectively drops or reuses DB records and on-disk artifacts to keep migrations safe and automatic. Please refer to the [migration page](https://docs.victoriametrics.com/anomaly-detection/migration/) for more details.
|
||||
|
||||
- IMPROVEMENT: Parallelization now honours container cgroup CPU/RAM limits, so `settings.n_workers` in the [settings section](https://docs.victoriametrics.com/anomaly-detection/components/settings/#parallelization), internal routines and the `vmanomaly_available_memory_bytes`/`vmanomaly_cpu_cores_available` [startup metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#startup-metrics) report or use container resources instead of host totals, keeping the [self-monitoring dashboard](https://docs.victoriametrics.com/anomaly-detection/self-monitoring/#grafana-dashboard) accurate.
|
||||
|
||||
- IMPROVEMENT: optimized data reading and storage in [on-disk mode](https://docs.victoriametrics.com/anomaly-detection/faq/#on-disk-mode) for both [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) and [VlogsReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#victorialogs-reader), resulting in drop of peak RAM usage during fit/infer calls (up to 2x reduction in peak RAM, depending on the configuration complexity).
|
||||
|
||||
- IMPROVEMENT: `--dryRun` [CLI argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments) now also provides insights about the migration of the existing state database and on-disk artifacts, allowing to preview the changes that will be applied during the first run after upgrade/downgrade to a specific version. See the [migration page](https://docs.victoriametrics.com/anomaly-detection/migration/#dry-run) for more details.
|
||||
|
||||
- UI: Updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.0.0](https://docs.victoriametrics.com/anomaly-detection/ui/#v100) to [v1.1.0](https://docs.victoriametrics.com/anomaly-detection/ui/#v110). Please refer to the [UI changelog](https://docs.victoriametrics.com/anomaly-detection/ui/#changelog) for more details.
|
||||
|
||||
- BUGFIX: Fixed the bug that lead to unexpected model files drops in [on-disk mode](https://docs.victoriametrics.com/anomaly-detection/faq/#on-disk-mode) and skipping of the following inference calls due to "model instance not found" warnings. **Updating is suggested for all on-disk deployments that use cluster version of VictoriaMetrics as datasource, from versions [v1.24.0+](#v1240)**.
|
||||
|
||||
- BUGFIX: Fixed `TypeError: cannot pickle '_thread.lock' object` failures when [backtesting](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#backtesting-scheduler) schedulers run with `n_jobs>1`, resulted in stable multiprocessing behaviour.
|
||||
|
||||
|
||||
## v1.26.2
|
||||
Released: 2025-10-09
|
||||
|
||||
@@ -484,9 +502,6 @@ Released: 2024-06-11
|
||||
## v1.12.0
|
||||
Released: 2024-03-31
|
||||
- FEATURE: Introduction of `AutoTunedModel` model class to optimize any [built-in model](https://docs.victoriametrics.com/anomaly-detection/components/models/#built-in-models) on data during `fit` phase. Specify as little as `anomaly_percentage` param from `(0, 0.5)` interval and `tuned_model_class` (i.e. [`model.zscore.ZscoreModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#z-score)) to get it working with best settings that match your data. See details [here](https://docs.victoriametrics.com/anomaly-detection/components/models/#autotuned).
|
||||
<!--
|
||||
- FEATURE: Preset support enablement. From now users will be able to specify only a few parameters (like `datasource_url`) + a new (backward-compatible) `preset: preset_name` field in a config file and get a service run with **predefined queries, scheduling and models**. Also, now preset assets (guide, configs, dashboards) will be available at `:8490/presets` endpoint.
|
||||
-->
|
||||
- IMPROVEMENT: Better logging of model lifecycle (fit/infer stages).
|
||||
- IMPROVEMENT: Introduce `provide_series` arg to all the [built-in models](https://docs.victoriametrics.com/anomaly-detection/components/models/#built-in-models) to define what output fields to generate for writing (i.e. `provide_series: ['anomaly_score']` means only scores are being produced)
|
||||
- BUGFIX: [Self-monitoring metrics](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#models-behaviour-metrics) are now aggregated to `queries` aliases level (not to label sets of individual timeseries) and aligned with [reader, writer and model sections](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#metrics-generated-by-vmanomaly) description , so `/metrics` endpoint holds only necessary information for scraping.
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
---
|
||||
weight: 5
|
||||
weight: 6
|
||||
title: FAQ
|
||||
menu:
|
||||
docs:
|
||||
identifier: "vmanomaly-faq"
|
||||
parent: "anomaly-detection"
|
||||
weight: 5
|
||||
weight: 6
|
||||
aliases:
|
||||
- /anomaly-detection/FAQ.html
|
||||
---
|
||||
@@ -133,6 +133,10 @@ Please refer to the [state restoration section](https://docs.victoriametrics.com
|
||||
|
||||
`vmanomaly` can be deployed in various environments, including Docker, Kubernetes, and VM Operator. For detailed deployment instructions, refer to the [QuickStart section](https://docs.victoriametrics.com/anomaly-detection/quickstart/#how-to-install-and-run-vmanomaly).
|
||||
|
||||
## Migration
|
||||
|
||||
For information on migrating between different versions of `vmanomaly`, please refer to the [Migration section](https://docs.victoriametrics.com/anomaly-detection/migration/) for compatibility considerations and steps for a smooth transition.
|
||||
|
||||
## Choosing the right model for vmanomaly
|
||||
Selecting the best model for `vmanomaly` depends on the data's nature and the [types of anomalies](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-2/#categories-of-anomalies) to detect. For instance, [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) is suitable for data without trends or seasonality, while more complex patterns might require models like [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet).
|
||||
|
||||
@@ -397,7 +401,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.26.2
|
||||
image: victoriametrics/vmanomaly:v1.27.0
|
||||
# ...
|
||||
ports:
|
||||
- "8490:8490"
|
||||
@@ -612,7 +616,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.26.2 && docker image tag victoriametrics/vmanomaly:v1.26.2 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.27.0 && docker image tag victoriametrics/vmanomaly:v1.27.0 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
82
docs/anomaly-detection/Migration.md
Normal file
82
docs/anomaly-detection/Migration.md
Normal file
@@ -0,0 +1,82 @@
|
||||
---
|
||||
weight: 5
|
||||
title: Migration
|
||||
menu:
|
||||
docs:
|
||||
identifier: "vmanomaly-migration"
|
||||
parent: "anomaly-detection"
|
||||
weight: 5
|
||||
tags:
|
||||
- metrics
|
||||
- enterprise
|
||||
- migration
|
||||
aliases:
|
||||
- /anomaly-detection/migration/
|
||||
- /anomaly-detection/migration/index.html
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
This document provides guidelines for migrating to the latest version of [VictoriaMetrics Anomaly Detection](https://docs.victoriametrics.com/anomaly-detection/) (`vmanomaly`). It covers the key changes, compatibility considerations, and best practices to ensure a smooth transition for [stateful](#stateful-mode) and [stateless](#stateless-mode) modes of operation.
|
||||
|
||||
> **Upgrading to [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) or newer is recommended to benefit from simplified migration process.**
|
||||
|
||||
## Dry Run
|
||||
|
||||
The `--dryRun` [command-line argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments) allows {{% available_from "v1.27.0" anomaly %}} to simulate the migration process without making any actual changes. This is useful for identifying potential issues and understanding the impact of the migration before applying it, e.g. dropping of existing state database or on-disk artifacts for all (or some) of the configured models and data. Starting from version [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270), the upgrade impact to any new version can be assessed by running `vmanomaly` with the `--dryRun` flag **automatically**. Downgrade check from v1.27.0 (or newer) to earlier versions than [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) **requires setting env variable** `VMANOMALY_STATE_VERSION_OVERRIDE=<version>`, e.g.:
|
||||
|
||||
```bash
|
||||
export VMANOMALY_STATE_VERSION_OVERRIDE=1.25.2
|
||||
```
|
||||
|
||||
## Compatibility Matrix
|
||||
|
||||
This section outlines the compatibility of different `vmanomaly` versions with various components, including data, models, and configuration formats, for both [stateful](#stateful-mode) and [stateless](#stateless-mode) modes.
|
||||
|
||||
> Refer to the **global** [changelog](https://docs.victoriametrics.com/anomaly-detection/changelog/) for detailed information on changes in each version. Use `--dryRun` {{% available_from "v1.27.0" anomaly %}} mode to check for compatibility issues before performing the actual migration. See [Dry Run](#dry-run) section for more details.
|
||||
|
||||
### Stateful Mode
|
||||
|
||||
> Used if `settings.restore_state` is set to `true`. See argument details in the [configuration documentation](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration).
|
||||
|
||||
There are 2 types of compatibilitity to consider when migrating in stateful mode:
|
||||
- **Global (in)compatibility**: The new version can seamlessly read and utilize the existing state without any modifications or data loss. Or, in case of incompatibility, the existing state must be dropped completely to proceed with the migration.
|
||||
- **Component (in)compatibility**: The new version may introduce changes that affect specific components (e.g., specific models, data formats) but can still operate with the existing state with some adjustments or drop of incompatible on disk artifacts.
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Fully Compatible | - |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
| [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) | [v1.25.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1252) | Fully Compatible | In [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) there was a change to `vmanomaly.db` metadata database format, so migrating from v1.24.0-v1.25.0 requires deletion of a state, see note above the table |
|
||||
| [v1.24.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1241) | [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) | Partially Compatible* | In [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) there were changes to **data dump layout** and to `online_quantile` and `isolation_forest_multivariate` [model](https://docs.victoriametrics.com/anomaly-detection/components/models/) states, so to migrate from v1.24.0-v1.24.1 it is recommended to drop the state |
|
||||
| [v1.24.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1240) | [v1.24.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1241) | Fully Compatible | - |
|
||||
| [v1.23.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1233) and earlier | [v1.24.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1240) | Fully Incompatible* | *As no state (prior to [v1.24.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1240)) existed, it was not saved (even if [on-disk mode](https://docs.victoriametrics.com/anomaly-detection/faq/#on-disk-mode) was used). Also, see config breaking changes list in [stateless](https://docs.victoriametrics.com/anomaly-detection/migration/#stateless-mode) mode |
|
||||
|
||||
### Clearing State
|
||||
|
||||
For releases [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) and newer, the migration process is automatically handled by `vmanomaly` when started with `settings.restore_state: true`, so no manual intervention is required to clear existing state if incompatible.
|
||||
|
||||
However, for releases [v1.24.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1240) - [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), to clear the existing state (if ended with `settings.restore_state: true`), please **manually delete** the existing state database and on-disk artifacts before starting the new version of `vmanomaly - either:
|
||||
- Manually delete the content of `VMANOMALY_MODEL_DUMPS_DIR` / `VMANOMALY_DATA_DUMPS_DIR` folders or
|
||||
- Set `settings.restore_state: false` in the config the first run of the new version, then stop `vmanomaly`, set back `settings.restore_state: true`, and restart `vmanomaly`.
|
||||
|
||||
|
||||
### Stateless Mode
|
||||
|
||||
> Used if `settings.restore_state` is set to `false`. See argument details in the [configuration documentation](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration).
|
||||
|
||||
In stateless mode, the migration process is almost straightforward as there are no persistent states to manage. One may simply upgrade the `vmanomaly` service to the latest version and restart it, up to a slight change in the config .YAML files for backward-incompatible changes, see the list below.
|
||||
|
||||
**Breaking Changes**
|
||||
|
||||
- [v1.12.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1120) **ARIMA** model is removed from [built-in models](https://docs.victoriametrics.com/anomaly-detection/components/models/#built-in-models); Action: replace ARIMA by [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) or alternative seasonal models in `model(s)` section of your configuration files.
|
||||
|
||||
- [v1.9.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v190) The `sampling_period` parameter is now mandatory in `VmReader`. This change aims to clarify and standardize the frequency of input/output in `vmanomaly`, thereby reducing uncertainty and aligning with user expectations; Action: Add the `sampling_period` parameter to your `VmReader` configuration, e.g.:
|
||||
|
||||
```yaml
|
||||
reader:
|
||||
# Other VmReader settings...
|
||||
sampling_period: 1m
|
||||
...
|
||||
```
|
||||
@@ -151,7 +151,7 @@ To analyze anomalies effectively, start from a high-level overview and progressi
|
||||
|
||||
5. **Zoom in on the most anomalous groups:**
|
||||
- Focus on the most affected category (`context_switch` in this case).
|
||||
- Notice when the anomaly score first exceeded the threshold—around 15:35 in the example.
|
||||
- Notice when the anomaly score first exceeded the threshold — around 15:35 in the example.
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -121,13 +121,13 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.26.2
|
||||
docker pull victoriametrics/vmanomaly:v1.27.0
|
||||
```
|
||||
|
||||
2. (Optional step) tag the `vmanomaly` Docker image:
|
||||
|
||||
```sh
|
||||
docker image tag victoriametrics/vmanomaly:v1.26.2 vmanomaly
|
||||
docker image tag victoriametrics/vmanomaly:v1.27.0 vmanomaly
|
||||
```
|
||||
|
||||
3. Start the `vmanomaly` Docker container with a *license file*, use the command below.
|
||||
@@ -163,7 +163,7 @@ docker run -it --user 1000:1000 \
|
||||
services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.26.2
|
||||
image: victoriametrics/vmanomaly:v1.27.0
|
||||
volumes:
|
||||
$YOUR_LICENSE_FILE_PATH:/license
|
||||
$YOUR_CONFIG_FILE_PATH:/config.yml
|
||||
@@ -208,7 +208,7 @@ To run `vmanomaly`, use YAML files or directories containing YAML files. The con
|
||||
> vmanomaly config1.yaml config2.yaml ./config_dir/
|
||||
> ```
|
||||
|
||||
Before deploying, check the correctness of your configuration validate config file(s) with `--dryRun` [command-line](#command-line-arguments) flag for chosen deployment method (Docker, Kubernetes, etc.). This will parse and merge all YAML files, run schema checks, log errors and warnings (if found) and then exit without starting the service and requiring a license.
|
||||
Before deploying, check the correctness of your configuration validate config file(s) with `--dryRun` [command-line](#command-line-arguments) flag for chosen deployment method (Docker, Kubernetes, etc.). This will parse and merge all YAML files, run schema checks, log errors and warnings (if found) and then exit without starting the service and requiring a license. {{% available_from "v1.27.0" anomaly %}} it can be also used to check for migration compatibility issues when upgrading to a newer version of `vmanomaly`. See [Migration](https://docs.victoriametrics.com/anomaly-detection/migration/) section for more details.
|
||||
|
||||
### Example
|
||||
|
||||
@@ -292,7 +292,7 @@ For optimal service behavior, consider the following tweaks when configuring `vm
|
||||
- Define queries for input data using [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) under `reader.queries` section. Note, it's possible to override reader-level arguments at query level for increased flexibility, e.g. specifying per-query [timezone](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-timezones) or [sampling period](https://docs.victoriametrics.com/anomaly-detection/components/reader/#sampling-period).
|
||||
- For longer `fit_window` intervals in scheduler, consider splitting queries into smaller time ranges to avoid excessive memory usage, timeouts and hitting server-side constraints, so they can be queried separately and reconstructed on `vmanomaly` side. Please refer to this [example](https://docs.victoriametrics.com/anomaly-detection/faq/#handling-large-queries-in-vmanomaly) for more details.
|
||||
|
||||
> If applicable - consider trying [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader) {{% available_from "v1.26.0" anomaly %}} to perform anomaly detection on log-derived **metrics**. This is particularly useful for scenarios where log data needs to be analyzed for unusual patterns or behaviors, such as error rates or request latencies.
|
||||
> If applicable - consider [`VLogsReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader) {{% available_from "v1.26.0" anomaly %}} to perform anomaly detection on **log-derived metrics**. This is particularly useful for scenarios where log data needs to be analyzed for unusual patterns or behaviors, such as error rates or request latencies.
|
||||
|
||||
**Writer**:
|
||||
- Specify where and how to store anomaly detection metrics in the [writer](https://docs.victoriametrics.com/anomaly-detection/components/writer/) section.
|
||||
@@ -320,5 +320,7 @@ Please refer to the following links for a deeper understanding of Anomaly Detect
|
||||
- [State Restoration](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration)
|
||||
- [Guide: Anomaly Detection and Alerting Setup](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/)
|
||||
- [FAQ](https://docs.victoriametrics.com/anomaly-detection/faq/)
|
||||
- [Migration Guide](https://docs.victoriametrics.com/anomaly-detection/migration/)
|
||||
- [CHANGELOG](https://docs.victoriametrics.com/anomaly-detection/changelog/)
|
||||
- [UI CHANGELOG](https://docs.victoriametrics.com/anomaly-detection/ui/#changelog)
|
||||
- [Anomaly Detection Blog](https://victoriametrics.com/tags/anomaly-detection/)
|
||||
|
||||
@@ -67,6 +67,8 @@ Get started with VictoriaMetrics Anomaly Detection by following our guides and i
|
||||
|
||||
- **Self-Monitoring**: Ensure `vmanomaly` is functioning optimally, using provided Grafana dashboards and alerting rules to track service health and operational metrics. Find the guide [here](https://docs.victoriametrics.com/anomaly-detection/self-monitoring/).
|
||||
|
||||
- **Migration**: For information on migrating between different versions of `vmanomaly`, please refer to the [Migration section](https://docs.victoriametrics.com/anomaly-detection/migration/) for compatibility considerations and steps for a smooth transition.
|
||||
|
||||
> Starting from [v1.5.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v150) `vmanomaly` requires a [license key](https://docs.victoriametrics.com/anomaly-detection/quickstart/#licensing) to run. You can obtain a trial license key [**here**](https://victoriametrics.com/products/enterprise/trial/).
|
||||
|
||||
## Key Components
|
||||
|
||||
@@ -37,6 +37,37 @@ server:
|
||||
|
||||
For impactful parameters please refer to [optimize resource usage](#optimize-resource-usage) section of this page.
|
||||
|
||||
## Authentication
|
||||
|
||||
{{% available_from "v1.27.0" anomaly %}} The vmanomaly UI supports proxying authentication headers from [v1.1.0](#v110) and onwards.
|
||||
|
||||
Consider using [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) in front of both vmanomaly (UI and API) and data sources (VictoriaMetrics / VictoriaLogs) to enforce end-to-end setup for accessing the data from the UI.
|
||||
|
||||
> Please refer to [config format](https://docs.victoriametrics.com/victoriametrics/vmauth/#auth-config) of `vmauth` and check [generic proxy example for different backends](https://docs.victoriametrics.com/victoriametrics/vmauth/#generic-http-proxy-for-different-backends)
|
||||
|
||||
Use the following example configuration snippet for `vmauth` to proxy auth headers to VictoriaMetrics, VictoriaLogs and vmanomaly instances:
|
||||
|
||||
```yaml
|
||||
users:
|
||||
- username: '<username>'
|
||||
password: '<password>'
|
||||
url_map:
|
||||
- src_hosts:
|
||||
- "metrics.local.some-domain.net"
|
||||
url_prefix: "http://victoriametrics:8428"
|
||||
- src_hosts:
|
||||
- "vl.local.some-domain.net"
|
||||
url_prefix: "http://victorialogs:9428"
|
||||
- src_hosts:
|
||||
- "vmanomaly.local.some-domain.net"
|
||||
url_prefix: "http://vmanomaly:8490"
|
||||
keep_original_host: true
|
||||
```
|
||||
|
||||
Then, on [settings panel](#settings-panel) of the UI, set the URLs accordingly, also check the option to forward auth headers to the datasource:
|
||||
|
||||

|
||||
|
||||
## Preset
|
||||
|
||||
Vmanomaly can be deployed in efficient "UI mode" [preset](https://docs.victoriametrics.com/anomaly-detection/presets/#ui), with as simple configuration as:
|
||||
@@ -111,7 +142,7 @@ A form-based menu for finetuning model hyperparameters and applying domain knowl
|
||||
|
||||
- Model type selection (e.g., rolling quantile, Prophet, etc.)
|
||||

|
||||
- Wizard with **model-agnostic parameters** (e.g., detection direction, data range, scale, clipping, minimum deviation from expected, etc.) and **model-specific hyperparameters** for chosen model type (e.g., quantile and window steps for [rolling quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-quantile) model).
|
||||
- Wizard with **model-agnostic parameters** (e.g., detection direction, data range, scale, clipping, minimum deviation from expected, etc.) and **model-specific hyperparameters** for chosen model type (e.g., quantile and window steps for [rolling quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-quantile) model). {{% available_from "v1.27.0" anomaly %}} autocomplete of example parameters by hitting Tab key is supported.
|
||||

|
||||
|
||||
[Back to UI navigation](#ui-navigation)
|
||||
@@ -124,6 +155,7 @@ The vmui-like "Settings" panel allows users to configure global settings and pre
|
||||
- Datasource URL (VictoriaMetrics, VictoriaLogs)
|
||||
- Timezone
|
||||
- UI Theme
|
||||
- {{% available_from "v1.27.0" anomaly %}} Auth Headers forwarding to datasource (VictoriaMetrics, VictoriaLogs).
|
||||
|
||||

|
||||
|
||||
@@ -157,7 +189,7 @@ Accessing the "YAML" Tab in the model configuration section
|
||||

|
||||
|
||||
|
||||
Clicking the "Open Config" button to access (model-only or full) configuration and hitting "Download" button to get the configuration as a YAML file.
|
||||
Clicking the "Show Config" button to access (model-only or full) configuration and hitting "Download" button to get the configuration as a YAML file.
|
||||
|
||||

|
||||
|
||||
@@ -320,6 +352,31 @@ If the **results** do not look good, the model hyperparameters and domain knowle
|
||||
|
||||
If the **results** look good, but should be shared with others first, timeseries can be downloaded as files by hitting the respective button in the Model Panel. See also [configuration sharing](#configuration-sharing) section for details.
|
||||
|
||||
If the **results** look good and the **model configuration should be deployed in production jobs of anomaly detection**, the equivalent configuration in production-ready YAML format can be obtained by accessing the "YAML" Tab in the model configuration section and hitting the "Open Config" button to access (model-only or full) configuration and hitting "Download" button to get the configuration as a YAML file.
|
||||
If the **results** look good and the **model configuration should be deployed in production jobs of anomaly detection**, the equivalent configuration in production-ready YAML format can be obtained by accessing the "YAML" Tab in the model configuration section and hitting the "Show Config" button to access (model-only or full) configuration and hitting "Download" button to get the configuration as a YAML file.
|
||||
|
||||

|
||||

|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.1.0
|
||||
Released: 2025-10-31
|
||||
|
||||
vmanomaly version: [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270)
|
||||
|
||||
- FEATURE: Added support of auth headers, that can be forwarded from vmanomaly to datasources configured in the UI (VictoriaMetrics, VictoriaLogs), see [authentication](#authentication) for details.
|
||||
|
||||
- FEATURE: queries (`config.reader.queries.[xxx].expr` values) if [mixed mode is used](#mixed-usage) are read from the server reader config on the first UI initialization, to ease the exploration of existing production queries without the need to retype them in the UI. Press "show history" button next to "execute query" button (in the [Query Explorer](#query-explorer) section) and choose the tab "Server Queries".
|
||||
|
||||
- IMPROVEMENT: autocomplete of example parameters for model param fields by hitting Tab key in the [Model Settings wizard](#model-panel), e.g. hitting Tab in the "detection direction" field will cycle through the available options.
|
||||
|
||||
- IMPROVEMENT: anomaly threshold element on [Model Panel](#model-panel) is now reactive - no need to refit a model ("Detect Anomalies" button) to see the effect from changed anomaly threshold value.
|
||||
|
||||
- IMPROVEMENT: datasource value is initialized from the server reader config (on the first UI initialization) if [mixed mode is used](#mixed-usage). Can be reset to the default value anytime by hitting the "Reset to Default" button next to the datasource field in the [Settings Panel](#settings-panel).
|
||||
|
||||
|
||||
### v1.0.0
|
||||
Released: 2025-10-02
|
||||
|
||||
vmanomaly version: [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1260)
|
||||
|
||||
Initial public release of the vmanomaly UI.
|
||||
@@ -1312,7 +1312,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.26.2
|
||||
docker pull victoriametrics/vmanomaly:v1.27.0
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1326,7 +1326,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.26.2 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.27.0 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
|
||||
@@ -10,9 +10,9 @@ sitemap:
|
||||
|
||||
- To use *vmanomaly*, part of the enterprise package, a license key is required. Obtain your key [here](https://victoriametrics.com/products/enterprise/trial/) for this tutorial or for enterprise use.
|
||||
- In the tutorial, we'll be using the following VictoriaMetrics components:
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.127.0)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.127.0)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.127.0)
|
||||
- [VictoriaMetrics Single-Node](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) (v1.129.1)
|
||||
- [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/) (v1.129.1)
|
||||
- [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) (v1.129.1)
|
||||
- [Grafana](https://grafana.com/) (v.10.2.1)
|
||||
- [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/)
|
||||
- [Node exporter](https://github.com/prometheus/node_exporter#node-exporter) (v1.7.0) and [Alertmanager](https://prometheus.io/docs/alerting/latest/alertmanager/) (v0.27.0)
|
||||
@@ -323,7 +323,7 @@ Let's wrap it all up together into the `docker-compose.yml` file.
|
||||
services:
|
||||
vmagent:
|
||||
container_name: vmagent
|
||||
image: victoriametrics/vmagent:v1.127.0
|
||||
image: victoriametrics/vmagent:v1.129.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -340,7 +340,7 @@ services:
|
||||
|
||||
victoriametrics:
|
||||
container_name: victoriametrics
|
||||
image: victoriametrics/victoria-metrics:v1.127.0
|
||||
image: victoriametrics/victoria-metrics:v1.129.1
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -373,7 +373,7 @@ services:
|
||||
|
||||
vmalert:
|
||||
container_name: vmalert
|
||||
image: victoriametrics/vmalert:v1.127.0
|
||||
image: victoriametrics/vmalert:v1.129.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.26.2
|
||||
image: victoriametrics/vmanomaly:v1.27.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 2.0 KiB After Width: | Height: | Size: 2.4 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 90 KiB After Width: | Height: | Size: 227 KiB |
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user