Compare commits
73 Commits
graphite-w
...
debug-grou
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06054b8a73 | ||
|
|
9197cc8c4c | ||
|
|
1998469cb6 | ||
|
|
faba8b985b | ||
|
|
d233a409d9 | ||
|
|
96cbd6fff3 | ||
|
|
b1d009b13a | ||
|
|
57ce00a5c6 | ||
|
|
cfb53cbfb9 | ||
|
|
febafc1cf1 | ||
|
|
f1f70e976e | ||
|
|
5aa0a75ff8 | ||
|
|
d83f142c63 | ||
|
|
a07cae3279 | ||
|
|
8cda999238 | ||
|
|
2d6cf8827d | ||
|
|
c59ca79f2b | ||
|
|
be5ae9b95c | ||
|
|
60aef0510f | ||
|
|
b3b555c09c | ||
|
|
c57ea02564 | ||
|
|
5983d27b00 | ||
|
|
d36f7b6b49 | ||
|
|
70ab2c1585 | ||
|
|
c854816642 | ||
|
|
285e3d2a63 | ||
|
|
95175e00b4 | ||
|
|
d21d9e8382 | ||
|
|
235daa6208 | ||
|
|
10f4a86540 | ||
|
|
79cfffb984 | ||
|
|
23e2379c28 | ||
|
|
e761f22049 | ||
|
|
fb579cf592 | ||
|
|
fd0d764720 | ||
|
|
fe8aaa8885 | ||
|
|
b903fc29ec | ||
|
|
a6833ffd08 | ||
|
|
4516a58df9 | ||
|
|
5ad7b645e6 | ||
|
|
51a53014c8 | ||
|
|
e47abd6385 | ||
|
|
c04a5a597d | ||
|
|
e695d5f425 | ||
|
|
2bb03f6e34 | ||
|
|
92f03344eb | ||
|
|
e3360b87ff | ||
|
|
4c98b912fa | ||
|
|
225e2e870b | ||
|
|
2b078301c1 | ||
|
|
14090c5a07 | ||
|
|
66d47f23e4 | ||
|
|
eacdb80ed7 | ||
|
|
504cf31dab | ||
|
|
34d190b32a | ||
|
|
44fa216bb5 | ||
|
|
4589442345 | ||
|
|
78ad4b974c | ||
|
|
d12524749f | ||
|
|
1a5235a18f | ||
|
|
27847dbbb8 | ||
|
|
33fab3a2d6 | ||
|
|
695b21ecfc | ||
|
|
4ba488f806 | ||
|
|
1e046d35a8 | ||
|
|
8c9b202c94 | ||
|
|
060423141d | ||
|
|
0ee16ff2e5 | ||
|
|
b22853b97f | ||
|
|
b578fe9817 | ||
|
|
e9b7adc0e5 | ||
|
|
82eab5c5b7 | ||
|
|
5b4ab4456e |
16
.github/workflows/check-commit-signed.yml
vendored
@@ -27,11 +27,21 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
|
||||
# Check raw commit objects for a "gpgsig" header as a fast early signal for
|
||||
# contributors. Both GPG and SSH signatures use this header.
|
||||
# This avoids relying on %G? which returns N for SSH commits.
|
||||
# This check is not a security enforcement — unsigned commits cannot be merged
|
||||
# anyway due to the GitHub repository merge policy.
|
||||
unsigned=""
|
||||
for sha in $(git rev-list $RANGE); do
|
||||
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
|
||||
unsigned="$unsigned $sha"
|
||||
fi
|
||||
done
|
||||
if [ -n "$unsigned" ]; then
|
||||
echo "Found unsigned commits:"
|
||||
echo "$unsigned"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All commits in PR are signed (G or E)"
|
||||
|
||||
echo "All commits in PR are signed (GPG or SSH)"
|
||||
2
.github/workflows/docs.yaml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
path: __vm-docs
|
||||
|
||||
- name: Import GPG key
|
||||
uses: crazy-max/ghaction-import-gpg@v6
|
||||
uses: crazy-max/ghaction-import-gpg@v7
|
||||
id: import-gpg
|
||||
with:
|
||||
gpg_private_key: ${{ secrets.VM_BOT_GPG_PRIVATE_KEY }}
|
||||
|
||||
2
.github/workflows/test.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
run: make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v5
|
||||
uses: codecov/codecov-action@v6
|
||||
with:
|
||||
files: ./coverage.txt
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# VictoriaMetrics
|
||||
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||

|
||||
[](https://hub.docker.com/u/victoriametrics)
|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
|
||||
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||

|
||||
[](https://slack.victoriametrics.com)
|
||||
[](https://x.com/VictoriaMetrics/)
|
||||
[](https://www.reddit.com/r/VictoriaMetrics/)
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ func (m *manager) close() {
|
||||
m.wg.Wait()
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
|
||||
id := g.GetID()
|
||||
g.Init()
|
||||
m.wg.Go(func() {
|
||||
@@ -110,7 +110,6 @@ func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) e
|
||||
})
|
||||
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
||||
@@ -119,7 +118,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
for _, cfg := range groupsCfg {
|
||||
for _, r := range cfg.Rules {
|
||||
if rrPresent && arPresent {
|
||||
continue
|
||||
break
|
||||
}
|
||||
if r.Record != "" {
|
||||
rrPresent = true
|
||||
@@ -162,10 +161,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
}
|
||||
}
|
||||
for _, ng := range groupsRegistry {
|
||||
if err := m.startGroup(ctx, ng, restore); err != nil {
|
||||
m.groupsMu.Unlock()
|
||||
return err
|
||||
}
|
||||
m.startGroup(ctx, ng, restore)
|
||||
}
|
||||
m.groupsMu.Unlock()
|
||||
|
||||
|
||||
@@ -789,16 +789,7 @@ func firingAlertStaleTimeSeries(ls map[string]string, timestamp int64) []prompb.
|
||||
|
||||
// restore restores the value of ActiveAt field for active alerts,
|
||||
// based on previously written time series `alertForStateMetricName`.
|
||||
// Only rules with For > 0 can be restored.
|
||||
func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
if ar.For < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
|
||||
if !*disableAlertGroupLabel {
|
||||
nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
|
||||
|
||||
@@ -8,6 +8,7 @@ import (
|
||||
"hash/fnv"
|
||||
"maps"
|
||||
"net/url"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -213,6 +214,7 @@ func (g *Group) CreateID() uint64 {
|
||||
// restore restores alerts state for group rules
|
||||
func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
// Only alerting rule with for > 0 and has active alerts from the first evaluation can be restored
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
@@ -220,6 +222,9 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
|
||||
if ar.For < 1 {
|
||||
continue
|
||||
}
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
q := qb.BuildWithParams(datasource.QuerierParams{
|
||||
EvaluationInterval: g.Interval,
|
||||
QueryParams: g.Params,
|
||||
@@ -333,6 +338,11 @@ func (g *Group) Init() {
|
||||
// Start starts group's evaluation
|
||||
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
|
||||
evalTS := time.Now()
|
||||
// sleep random duration to spread group rules evaluation
|
||||
// over maxStartDelay to reduce the load on datasource.
|
||||
@@ -367,11 +377,6 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
evalTS = evalTS.Add(sleepBeforeStart)
|
||||
}
|
||||
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
|
||||
g.infof("started")
|
||||
|
||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
||||
@@ -381,7 +386,9 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
|
||||
if len(g.Rules) < 1 {
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.mu.Lock()
|
||||
g.LastEvaluation = start
|
||||
g.mu.Unlock()
|
||||
return ts
|
||||
}
|
||||
|
||||
@@ -395,7 +402,32 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
}
|
||||
}
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.mu.Lock()
|
||||
g.LastEvaluation = start
|
||||
g.mu.Unlock()
|
||||
if g.EvalOffset != nil && e.Rw != nil {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
hostname = "unknown"
|
||||
}
|
||||
labels := map[string]string{
|
||||
"__name__": "vmalert_eval_timestamp",
|
||||
"host": hostname,
|
||||
"group": g.Name,
|
||||
"file": g.File,
|
||||
}
|
||||
var ls []prompb.Label
|
||||
for k, v := range labels {
|
||||
ls = append(ls, prompb.Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
ts := newTimeSeries([]float64{float64(ts.Unix())}, []int64{start.Unix()}, ls)
|
||||
if err := e.Rw.Push(ts); err != nil {
|
||||
logger.Errorf("group %q: failed to push evaluation timestamp: %s", g.Name, err)
|
||||
}
|
||||
}
|
||||
return ts
|
||||
}
|
||||
|
||||
@@ -405,11 +437,11 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
g.mu.Unlock()
|
||||
defer g.evalCancel()
|
||||
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
|
||||
@@ -57,12 +57,8 @@ type ApiGroup struct {
|
||||
EvalOffset float64 `json:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
EvalDelay float64 `json:"eval_delay,omitempty"`
|
||||
// Unhealthy unhealthy rules count
|
||||
Unhealthy int
|
||||
// Healthy passing rules count
|
||||
Healthy int
|
||||
// NoMatch not matching rules count
|
||||
NoMatch int
|
||||
// States represents counts per each rule state
|
||||
States map[string]int `json:"states"`
|
||||
}
|
||||
|
||||
// APILink returns a link to the group's JSON representation.
|
||||
@@ -134,6 +130,11 @@ type ApiRule struct {
|
||||
Updates []StateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// IsNoMatch returns true if rule is in nomatch state
|
||||
func (r *ApiRule) IsNoMatch() bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
|
||||
// ApiAlert represents a notifier.AlertingRule state
|
||||
// for WEB view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
@@ -235,6 +236,20 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
return aa
|
||||
}
|
||||
|
||||
func (r *ApiRule) ExtendState() {
|
||||
if len(r.Alerts) > 0 {
|
||||
return
|
||||
}
|
||||
if r.State == "" {
|
||||
r.State = "ok"
|
||||
}
|
||||
if r.Health != "ok" {
|
||||
r.State = "unhealthy"
|
||||
} else if r.IsNoMatch() {
|
||||
r.State = "nomatch"
|
||||
}
|
||||
}
|
||||
|
||||
// ToAPI returns ApiGroup representation of g
|
||||
func (g *Group) ToAPI() *ApiGroup {
|
||||
g.mu.RLock()
|
||||
@@ -252,6 +267,7 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
Headers: headersToStrings(g.Headers),
|
||||
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
||||
Labels: g.Labels,
|
||||
States: make(map[string]int),
|
||||
}
|
||||
if g.EvalOffset != nil {
|
||||
ag.EvalOffset = g.EvalOffset.Seconds()
|
||||
@@ -259,9 +275,10 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
if g.EvalDelay != nil {
|
||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||
}
|
||||
ag.Rules = make([]ApiRule, 0)
|
||||
ag.Rules = make([]ApiRule, 0, len(g.Rules))
|
||||
for _, r := range g.Rules {
|
||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
||||
ar := r.ToAPI()
|
||||
ag.Rules = append(ag.Rules, ar)
|
||||
}
|
||||
return &ag
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
|
||||
</symbol>
|
||||
|
||||
<symbol id="filter" viewBox="-10 -10 320 310">
|
||||
<symbol id="state" viewBox="-10 -10 320 310">
|
||||
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
|
||||
</symbol>
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 4.7 KiB |
@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
|
||||
});
|
||||
}
|
||||
|
||||
function groupFilter(key) {
|
||||
function groupForState(key) {
|
||||
if (key) {
|
||||
location.href = `?filter=${key}`;
|
||||
location.href = `?state=${key}`;
|
||||
} else {
|
||||
window.location = window.location.pathname;
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
@@ -50,6 +52,7 @@ var (
|
||||
"alert": rule.TypeAlerting,
|
||||
"record": rule.TypeRecording,
|
||||
}
|
||||
ruleStates = []string{"ok", "nomatch", "inactive", "firing", "pending", "unhealthy"}
|
||||
)
|
||||
|
||||
type requestHandler struct {
|
||||
@@ -63,6 +66,14 @@ var (
|
||||
staticServer = http.StripPrefix("/vmalert", staticHandler)
|
||||
)
|
||||
|
||||
func marshalJson(v any, kind string) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to marshal %s: %s", kind, err), http.StatusInternalServerError)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
|
||||
staticServer.ServeHTTP(w, r)
|
||||
@@ -94,40 +105,32 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
WriteRuleDetails(w, r, rule)
|
||||
WriteRule(w, r, rule)
|
||||
return true
|
||||
case "/vmalert/groups":
|
||||
// current used by old vmalert UI and Grafana Alerts
|
||||
case "/vmalert/groups", "/rules":
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data := rh.groups(rf)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
// only support filtering by a single state
|
||||
state := ""
|
||||
if len(rf.states) > 0 {
|
||||
state = rf.states[0]
|
||||
rf.states = rf.states[:1]
|
||||
}
|
||||
lr := rh.groups(rf)
|
||||
WriteListGroups(w, r, lr.Data.Groups, state)
|
||||
return true
|
||||
case "/vmalert/notifiers":
|
||||
WriteListTargets(w, r, notifier.GetTargets())
|
||||
return true
|
||||
|
||||
// special cases for Grafana requests,
|
||||
// served without `vmalert` prefix:
|
||||
case "/rules":
|
||||
// Grafana makes an extra request to `/rules`
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||
var data []*rule.ApiGroup
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data = rh.groups(rf)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
|
||||
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
|
||||
data, err := rh.listNotifiers()
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -135,15 +138,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/vmalert/api/v1/rules", "/api/v1/rules":
|
||||
// path used by Grafana for ng alerting
|
||||
var data []byte
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
data, err = rh.listGroups(rf)
|
||||
data, err := rh.listGroups(rf)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -152,14 +154,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
|
||||
// path used by Grafana for ng alerting
|
||||
rf, err := newRulesFilter(r)
|
||||
gf, err := newGroupsFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
data, err := rh.listAlerts(rf)
|
||||
data, err := rh.listAlerts(gf)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -168,12 +170,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/alert", "/api/v1/alert":
|
||||
alert, err := rh.getAlert(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
data, err := json.Marshal(alert)
|
||||
data, err := marshalJson(alert, "alert")
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -182,16 +184,16 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||
apiRule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
rwu := rule.ApiRuleWithUpdates{
|
||||
ApiRule: apiRule,
|
||||
StateUpdates: apiRule.Updates,
|
||||
}
|
||||
data, err := json.Marshal(rwu)
|
||||
data, err := marshalJson(rwu, "rule")
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -200,12 +202,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/group", "/api/v1/group":
|
||||
group, err := rh.getGroup(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
data, err := json.Marshal(group)
|
||||
data, err := marshalJson(group, "group")
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
|
||||
errJson(w, r, err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -225,10 +227,10 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver.ErrorWithStatusCode) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
}
|
||||
obj, err := rh.m.groupAPI(groupID)
|
||||
if err != nil {
|
||||
@@ -237,14 +239,14 @@ func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.ErrorWithStatusCode) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
}
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
|
||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err), http.StatusBadRequest)
|
||||
}
|
||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||
if err != nil {
|
||||
@@ -253,14 +255,14 @@ func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver.ErrorWithStatusCode) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
}
|
||||
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err), http.StatusBadRequest)
|
||||
}
|
||||
a, err := rh.m.alertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
@@ -270,28 +272,76 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
||||
}
|
||||
|
||||
type listGroupsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Status string `json:"status"`
|
||||
Page int `json:"page,omitempty"`
|
||||
TotalPages int `json:"total_pages,omitempty"`
|
||||
TotalGroups int `json:"total_groups,omitempty"`
|
||||
TotalRules int `json:"total_rules,omitempty"`
|
||||
Data struct {
|
||||
Groups []*rule.ApiGroup `json:"groups"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
||||
type rulesFilter struct {
|
||||
files []string
|
||||
groupNames []string
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
filter string
|
||||
dsType config.Type
|
||||
type groupsFilter struct {
|
||||
groupNames []string
|
||||
files []string
|
||||
dsType config.Type
|
||||
}
|
||||
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
|
||||
rf := &rulesFilter{}
|
||||
query := r.URL.Query()
|
||||
func newGroupsFilter(r *http.Request) (*groupsFilter, *httpserver.ErrorWithStatusCode) {
|
||||
_ = r.ParseForm()
|
||||
vs := r.Form
|
||||
gf := &groupsFilter{
|
||||
groupNames: vs["rule_group[]"],
|
||||
files: vs["file[]"],
|
||||
}
|
||||
dsType := vs.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
gf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
return gf, nil
|
||||
}
|
||||
|
||||
ruleTypeParam := query.Get("type")
|
||||
func (gf *groupsFilter) matches(group *rule.Group) bool {
|
||||
if len(gf.groupNames) > 0 && !slices.Contains(gf.groupNames, group.Name) {
|
||||
return false
|
||||
}
|
||||
if len(gf.files) > 0 && !slices.Contains(gf.files, group.File) {
|
||||
return false
|
||||
}
|
||||
if len(gf.dsType.Name) > 0 && gf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
||||
type rulesFilter struct {
|
||||
gf *groupsFilter
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
states []string
|
||||
maxGroups int
|
||||
pageNum int
|
||||
search string
|
||||
extendedStates bool
|
||||
}
|
||||
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusCode) {
|
||||
gf, err := newGroupsFilter(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var rf rulesFilter
|
||||
rf.gf = gf
|
||||
vs := r.Form
|
||||
ruleTypeParam := vs.Get("type")
|
||||
if len(ruleTypeParam) > 0 {
|
||||
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
|
||||
rf.ruleType = ruleType
|
||||
@@ -300,102 +350,146 @@ func newRulesFilter(r *http.Request) (*rulesFilter, error) {
|
||||
}
|
||||
}
|
||||
|
||||
dsType := query.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
rf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
states := vs["state"]
|
||||
if len(states) == 0 {
|
||||
states = vs["filter"]
|
||||
}
|
||||
|
||||
filter := strings.ToLower(query.Get("filter"))
|
||||
if len(filter) > 0 {
|
||||
if filter == "nomatch" || filter == "unhealthy" {
|
||||
rf.filter = filter
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
|
||||
for _, s := range states {
|
||||
values := strings.Split(s, ",")
|
||||
for _, v := range values {
|
||||
if len(v) == 0 {
|
||||
continue
|
||||
}
|
||||
if !slices.Contains(ruleStates, v) {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "state": contains not supported value %q`, v), http.StatusBadRequest)
|
||||
}
|
||||
rf.states = append(rf.states, v)
|
||||
}
|
||||
}
|
||||
|
||||
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
|
||||
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
|
||||
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
|
||||
rf.files = append([]string{}, r.Form["file[]"]...)
|
||||
return rf, nil
|
||||
rf.extendedStates = httputil.GetBool(r, "extended_states")
|
||||
rf.ruleNames = append([]string{}, vs["rule_name[]"]...)
|
||||
rf.search = strings.ToLower(vs.Get("search"))
|
||||
|
||||
pageNum := vs.Get("page_num")
|
||||
maxGroups := vs.Get("group_limit")
|
||||
if pageNum != "" {
|
||||
if maxGroups == "" {
|
||||
return nil, errResponse(fmt.Errorf(`"group_limit" needs to be present in order to paginate over the groups`), http.StatusBadRequest)
|
||||
}
|
||||
v, err := strconv.Atoi(pageNum)
|
||||
if err != nil || v <= 0 {
|
||||
return nil, errResponse(fmt.Errorf(`"page_num" is expected to be a positive number, found %q`, pageNum), http.StatusBadRequest)
|
||||
}
|
||||
rf.pageNum = v
|
||||
}
|
||||
if maxGroups != "" {
|
||||
v, err := strconv.Atoi(maxGroups)
|
||||
if err != nil || v <= 0 {
|
||||
return nil, errResponse(fmt.Errorf(`"group_limit" is expected to be a positive number, found %q`, maxGroups), http.StatusBadRequest)
|
||||
}
|
||||
rf.maxGroups = v
|
||||
}
|
||||
return &rf, nil
|
||||
}
|
||||
|
||||
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
||||
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
|
||||
func (rf *rulesFilter) matchesRule(r *rule.ApiRule) bool {
|
||||
if rf.ruleType != "" && rf.ruleType != r.Type {
|
||||
return false
|
||||
}
|
||||
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
|
||||
return false
|
||||
}
|
||||
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
if len(rf.states) == 0 {
|
||||
return true
|
||||
}
|
||||
return true
|
||||
return slices.Contains(rf.states, r.State)
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) *listGroupsResponse {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
groups := make([]*rule.ApiGroup, 0)
|
||||
skipGroups := (rf.pageNum - 1) * rf.maxGroups
|
||||
lr := &listGroupsResponse{
|
||||
Status: "success",
|
||||
}
|
||||
lr.Data.Groups = make([]*rule.ApiGroup, 0)
|
||||
if skipGroups >= len(rh.m.groups) {
|
||||
return lr
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
groups := make([]*rule.Group, 0, len(rh.m.groups))
|
||||
for _, group := range rh.m.groups {
|
||||
if !rf.matchesGroup(group) {
|
||||
groups = append(groups, group)
|
||||
}
|
||||
|
||||
slices.SortFunc(groups, func(a, b *rule.Group) int {
|
||||
nameCmp := cmp.Compare(a.Name, b.Name)
|
||||
if nameCmp != 0 {
|
||||
return nameCmp
|
||||
}
|
||||
return cmp.Compare(a.File, b.File)
|
||||
})
|
||||
for _, group := range groups {
|
||||
if !rf.gf.matches(group) {
|
||||
continue
|
||||
}
|
||||
groupFound := len(rf.search) == 0 || strings.Contains(strings.ToLower(group.Name), rf.search) || strings.Contains(strings.ToLower(group.File), rf.search)
|
||||
g := group.ToAPI()
|
||||
// the returned list should always be non-nil
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||
filteredRules := make([]rule.ApiRule, 0)
|
||||
for _, rule := range g.Rules {
|
||||
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||
if !groupFound && !strings.Contains(strings.ToLower(rule.Name), rf.search) {
|
||||
continue
|
||||
}
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
|
||||
continue
|
||||
if rf.extendedStates {
|
||||
rule.ExtendState()
|
||||
}
|
||||
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
|
||||
if !rf.matchesRule(&rule) {
|
||||
continue
|
||||
}
|
||||
if rf.excludeAlerts {
|
||||
rule.Alerts = nil
|
||||
}
|
||||
if rule.LastError != "" {
|
||||
g.Unhealthy++
|
||||
} else {
|
||||
g.Healthy++
|
||||
}
|
||||
if isNoMatch(rule) {
|
||||
g.NoMatch++
|
||||
}
|
||||
g.States[rule.State]++
|
||||
filteredRules = append(filteredRules, rule)
|
||||
}
|
||||
g.Rules = filteredRules
|
||||
groups = append(groups, g)
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
|
||||
if a.Name != b.Name {
|
||||
return strings.Compare(a.Name, b.Name)
|
||||
if len(g.Rules) == 0 || len(filteredRules) > 0 {
|
||||
if rf.maxGroups > 0 {
|
||||
lr.TotalGroups++
|
||||
lr.TotalRules += len(filteredRules)
|
||||
}
|
||||
if skipGroups > 0 {
|
||||
skipGroups--
|
||||
continue
|
||||
}
|
||||
if rf.maxGroups == 0 || len(lr.Data.Groups) < rf.maxGroups {
|
||||
g.Rules = filteredRules
|
||||
lr.Data.Groups = append(lr.Data.Groups, g)
|
||||
}
|
||||
}
|
||||
return strings.Compare(a.File, b.File)
|
||||
})
|
||||
return groups
|
||||
}
|
||||
if rf.maxGroups > 0 {
|
||||
lr.Page = rf.pageNum
|
||||
lr.TotalPages = max(int(math.Ceil(float64(lr.TotalGroups)/float64(rf.maxGroups))), 1)
|
||||
}
|
||||
return lr
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
||||
lr := listGroupsResponse{Status: "success"}
|
||||
lr.Data.Groups = rh.groups(rf)
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
lr := rh.groups(rf)
|
||||
if rf.pageNum > 1 && len(lr.Data.Groups) == 0 {
|
||||
return nil, errResponse(fmt.Errorf(`page_num exceeds total amount of pages`), http.StatusBadRequest)
|
||||
}
|
||||
if lr.Page > lr.TotalPages {
|
||||
return nil, errResponse(fmt.Errorf(`page_num=%d exceeds total amount of pages in result=%d`, lr.Page, lr.TotalPages), http.StatusBadRequest)
|
||||
}
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of groups: %w`, err), http.StatusInternalServerError)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -434,14 +528,14 @@ func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
||||
return gAlerts
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if !rf.matchesGroup(group) {
|
||||
if !gf.matches(group) {
|
||||
continue
|
||||
}
|
||||
g := group.ToAPI()
|
||||
@@ -460,10 +554,7 @@ func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of active alerts: %w`, err), http.StatusInternalServerError)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -475,7 +566,7 @@ type listNotifiersResponse struct {
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
targets := notifier.GetTargets()
|
||||
|
||||
lr := listNotifiersResponse{Status: "success"}
|
||||
@@ -497,10 +588,7 @@ func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of notifiers: %w`, err), http.StatusInternalServerError)
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -511,3 +599,8 @@ func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
|
||||
StatusCode: sc,
|
||||
}
|
||||
}
|
||||
|
||||
func errJson(w http.ResponseWriter, r *http.Request, err *httpserver.ErrorWithStatusCode) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
httpserver.Errorf(w, r, `{"error":%q,"errorType":%d}`, err, err.StatusCode)
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
) %}
|
||||
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) %}
|
||||
<div class="btn-toolbar mb-3" role="toolbar">
|
||||
<div class="d-flex gap-2 justify-content-between w-100">
|
||||
<div class="d-flex gap-2 align-items-center">
|
||||
@@ -28,10 +28,10 @@
|
||||
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
|
||||
</svg>
|
||||
</a>
|
||||
{% if len(filters) > 0 %}
|
||||
{% if len(states) > 0 %}
|
||||
<span class="d-none d-md-inline-block">Filter by status:</span>
|
||||
<svg class="d-md-none" width="20" height="20">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#filter">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#state">
|
||||
</svg>
|
||||
<div class="dropdown">
|
||||
<button
|
||||
@@ -46,10 +46,10 @@
|
||||
</svg>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
{% for key, title := range filters %}
|
||||
{% for key, title := range states %}
|
||||
{% if title != currentText %}
|
||||
<li>
|
||||
<a class="dropdown-item" onclick="groupFilter('{%s key %}')">
|
||||
<a class="dropdown-item" onclick="groupForState('{%s key %}')">
|
||||
<span class="d-none d-md-inline-block">{%s title %}</span>
|
||||
<svg class="d-md-none" width="22" height="22">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
|
||||
@@ -97,10 +97,10 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) %}
|
||||
{%code
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
filters := map[string]string{
|
||||
states := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"nomatch": "No Match",
|
||||
@@ -110,14 +110,14 @@
|
||||
"unhealthy": "unhealthy",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
currentText := states[state]
|
||||
currentIcon := icons[state]
|
||||
%}
|
||||
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, states, true) %}
|
||||
{% if len(groups) > 0 %}
|
||||
{% for _, g := range groups %}
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.States["unhealthy"] > 0 %} alert-danger{% endif %}">
|
||||
<span class="d-flex justify-content-between">
|
||||
<a
|
||||
class="vm-group-search"
|
||||
@@ -130,9 +130,9 @@
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>
|
||||
<span class="d-flex gap-2">
|
||||
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||
{% if g.NoMatch > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.NoMatch %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.Healthy %}</span>
|
||||
{% if g.States["unhealthy"] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.States["unhealthy"] %}</span> {% endif %}
|
||||
{% if g.States["nomatch"] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.States["nomatch"] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.States["ok"] %}</span>
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
@@ -189,7 +189,7 @@
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
|
|
||||
{%= seriesFetchedWarn(prefix, r) %}
|
||||
{%= seriesFetchedWarn(prefix, &r) %}
|
||||
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
@@ -476,7 +476,7 @@
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
|
||||
{% func Rule(r *http.Request, rule rule.ApiRule) %}
|
||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||
{%code
|
||||
@@ -661,8 +661,8 @@
|
||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||
{% endfunc %}
|
||||
|
||||
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
|
||||
{% if isNoMatch(r) %}
|
||||
{% func seriesFetchedWarn(prefix string, r *rule.ApiRule) %}
|
||||
{% if r.IsNoMatch() %}
|
||||
<svg
|
||||
data-bs-toggle="tooltip"
|
||||
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
|
||||
@@ -673,9 +673,3 @@
|
||||
</svg>
|
||||
{% endif %}
|
||||
{% endfunc %}
|
||||
|
||||
{%code
|
||||
func isNoMatch (r rule.ApiRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
%}
|
||||
|
||||
@@ -31,7 +31,7 @@ var (
|
||||
)
|
||||
|
||||
//line app/vmalert/web.qtpl:15
|
||||
func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) {
|
||||
func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText string, icons, states map[string]string, search bool) {
|
||||
//line app/vmalert/web.qtpl:15
|
||||
qw422016.N().S(`
|
||||
<div class="btn-toolbar mb-3" role="toolbar">
|
||||
@@ -59,7 +59,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
</a>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:31
|
||||
if len(filters) > 0 {
|
||||
if len(states) > 0 {
|
||||
//line app/vmalert/web.qtpl:31
|
||||
qw422016.N().S(`
|
||||
<span class="d-none d-md-inline-block">Filter by status:</span>
|
||||
@@ -68,7 +68,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
//line app/vmalert/web.qtpl:34
|
||||
qw422016.E().S(prefix)
|
||||
//line app/vmalert/web.qtpl:34
|
||||
qw422016.N().S(`static/icons/icons.svg#filter">
|
||||
qw422016.N().S(`static/icons/icons.svg#state">
|
||||
</svg>
|
||||
<div class="dropdown">
|
||||
<button
|
||||
@@ -97,7 +97,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
<ul class="dropdown-menu">
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:49
|
||||
for key, title := range filters {
|
||||
for key, title := range states {
|
||||
//line app/vmalert/web.qtpl:49
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -106,7 +106,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
//line app/vmalert/web.qtpl:50
|
||||
qw422016.N().S(`
|
||||
<li>
|
||||
<a class="dropdown-item" onclick="groupFilter('`)
|
||||
<a class="dropdown-item" onclick="groupForState('`)
|
||||
//line app/vmalert/web.qtpl:52
|
||||
qw422016.E().S(key)
|
||||
//line app/vmalert/web.qtpl:52
|
||||
@@ -176,22 +176,22 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:77
|
||||
func WriteControls(qq422016 qtio422016.Writer, prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) {
|
||||
func WriteControls(qq422016 qtio422016.Writer, prefix, currentIcon, currentText string, icons, states map[string]string, search bool) {
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, filters, search)
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, states, search)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:77
|
||||
func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) string {
|
||||
func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) string {
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:77
|
||||
WriteControls(qb422016, prefix, currentIcon, currentText, icons, filters, search)
|
||||
WriteControls(qb422016, prefix, currentIcon, currentText, icons, states, search)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
@@ -324,13 +324,13 @@ func Welcome(r *http.Request) string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:100
|
||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule.ApiGroup, filter string) {
|
||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule.ApiGroup, state string) {
|
||||
//line app/vmalert/web.qtpl:100
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:102
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
filters := map[string]string{
|
||||
states := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"nomatch": "No Match",
|
||||
@@ -340,8 +340,8 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
"unhealthy": "unhealthy",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
currentText := states[state]
|
||||
currentIcon := icons[state]
|
||||
|
||||
//line app/vmalert/web.qtpl:115
|
||||
qw422016.N().S(`
|
||||
@@ -352,7 +352,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:117
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, filters, true)
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, states, true)
|
||||
//line app/vmalert/web.qtpl:117
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -371,7 +371,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
//line app/vmalert/web.qtpl:120
|
||||
qw422016.N().S(`" class="w-100 border-0 flex-column vm-group`)
|
||||
//line app/vmalert/web.qtpl:120
|
||||
if g.Unhealthy > 0 {
|
||||
if g.States["unhealthy"] > 0 {
|
||||
//line app/vmalert/web.qtpl:120
|
||||
qw422016.N().S(` alert-danger`)
|
||||
//line app/vmalert/web.qtpl:120
|
||||
@@ -418,11 +418,11 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
<span class="d-flex gap-2">
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
if g.Unhealthy > 0 {
|
||||
if g.States["unhealthy"] > 0 {
|
||||
//line app/vmalert/web.qtpl:133
|
||||
qw422016.N().S(`<span class="badge bg-danger" title="Number of rules with status Error">`)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
qw422016.N().D(g.Unhealthy)
|
||||
qw422016.N().D(g.States["unhealthy"])
|
||||
//line app/vmalert/web.qtpl:133
|
||||
qw422016.N().S(`</span> `)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
@@ -431,11 +431,11 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
if g.NoMatch > 0 {
|
||||
if g.States["nomatch"] > 0 {
|
||||
//line app/vmalert/web.qtpl:134
|
||||
qw422016.N().S(`<span class="badge bg-warning" title="Number of rules with status NoMatch">`)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
qw422016.N().D(g.NoMatch)
|
||||
qw422016.N().D(g.States["nomatch"])
|
||||
//line app/vmalert/web.qtpl:134
|
||||
qw422016.N().S(`</span> `)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
@@ -444,7 +444,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
qw422016.N().S(`
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">`)
|
||||
//line app/vmalert/web.qtpl:135
|
||||
qw422016.N().D(g.Healthy)
|
||||
qw422016.N().D(g.States["ok"])
|
||||
//line app/vmalert/web.qtpl:135
|
||||
qw422016.N().S(`</span>
|
||||
</span>
|
||||
@@ -617,7 +617,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
|
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:192
|
||||
streamseriesFetchedWarn(qw422016, prefix, r)
|
||||
streamseriesFetchedWarn(qw422016, prefix, &r)
|
||||
//line app/vmalert/web.qtpl:192
|
||||
qw422016.N().S(`
|
||||
<span><a target="_blank" href="`)
|
||||
@@ -750,22 +750,22 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:234
|
||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []*rule.ApiGroup, filter string) {
|
||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []*rule.ApiGroup, state string) {
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
StreamListGroups(qw422016, r, groups, filter)
|
||||
StreamListGroups(qw422016, r, groups, state)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:234
|
||||
func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) string {
|
||||
func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) string {
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:234
|
||||
WriteListGroups(qb422016, r, groups, filter)
|
||||
WriteListGroups(qb422016, r, groups, state)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
@@ -1462,7 +1462,7 @@ func Alert(r *http.Request, alert *rule.ApiAlert) string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:479
|
||||
func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
func StreamRule(qw422016 *qt422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:479
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -1859,22 +1859,22 @@ func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule rule.Api
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:642
|
||||
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
func WriteRule(qq422016 qtio422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
StreamRuleDetails(qw422016, r, rule)
|
||||
StreamRule(qw422016, r, rule)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:642
|
||||
func RuleDetails(r *http.Request, rule rule.ApiRule) string {
|
||||
func Rule(r *http.Request, rule rule.ApiRule) string {
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:642
|
||||
WriteRuleDetails(qb422016, r, rule)
|
||||
WriteRule(qb422016, r, rule)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
@@ -2015,12 +2015,12 @@ func badgeStabilizing() string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:664
|
||||
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, prefix string, r rule.ApiRule) {
|
||||
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, prefix string, r *rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:664
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:665
|
||||
if isNoMatch(r) {
|
||||
if r.IsNoMatch() {
|
||||
//line app/vmalert/web.qtpl:665
|
||||
qw422016.N().S(`
|
||||
<svg
|
||||
@@ -2045,7 +2045,7 @@ func streamseriesFetchedWarn(qw422016 *qt422016.Writer, prefix string, r rule.Ap
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:675
|
||||
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, prefix string, r rule.ApiRule) {
|
||||
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, prefix string, r *rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:675
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:675
|
||||
@@ -2056,7 +2056,7 @@ func writeseriesFetchedWarn(qq422016 qtio422016.Writer, prefix string, r rule.Ap
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:675
|
||||
func seriesFetchedWarn(prefix string, r rule.ApiRule) string {
|
||||
func seriesFetchedWarn(prefix string, r *rule.ApiRule) string {
|
||||
//line app/vmalert/web.qtpl:675
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:675
|
||||
@@ -2069,8 +2069,3 @@ func seriesFetchedWarn(prefix string, r rule.ApiRule) string {
|
||||
return qs422016
|
||||
//line app/vmalert/web.qtpl:675
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:678
|
||||
func isNoMatch(r rule.ApiRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
|
||||
@@ -210,7 +210,7 @@ func TestHandler(t *testing.T) {
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||
t.Run("/api/v1/rules&states", func(t *testing.T) {
|
||||
check := func(url string, statusCode, expGroups, expRules int) {
|
||||
t.Helper()
|
||||
lr := listGroupsResponse{}
|
||||
@@ -252,9 +252,15 @@ func TestHandler(t *testing.T) {
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?group_limit=1", 200, 1, 2)
|
||||
check("/api/v1/rules?group_limit=1&type=alert", 200, 1, 1)
|
||||
check("/api/v1/rules?group_limit=1&type=record", 200, 1, 1)
|
||||
check("/api/v1/rules?group_limit=2", 200, 2, 4)
|
||||
check(fmt.Sprintf("/api/v1/rules?group_limit=1&page_num=%d", 1), 200, 1, 2)
|
||||
})
|
||||
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
||||
// check if response returns active alerts by default
|
||||
|
||||
@@ -147,7 +147,7 @@ func (ui *UserInfo) beginConcurrencyLimit(ctx context.Context) error {
|
||||
case ui.concurrencyLimitCh <- struct{}{}:
|
||||
return nil
|
||||
default:
|
||||
// The number of concurrently executed requests for the given user equals the limt.
|
||||
// The number of concurrently executed requests for the given user equals the limit.
|
||||
// Wait until some of the currently executed requests are finished, so the current request could be executed.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10078
|
||||
select {
|
||||
@@ -635,7 +635,7 @@ func getLeastLoadedBackendURL(bus []*backendURL, atomicCounter *atomic.Uint32) *
|
||||
// The Load() in front of CompareAndSwap() avoids CAS overhead for items with values bigger than 0.
|
||||
if bu.concurrentRequests.Load() == 0 && bu.concurrentRequests.CompareAndSwap(0, 1) {
|
||||
atomicCounter.CompareAndSwap(n+1, idx+1)
|
||||
// There is no need in the call bu.get(), because we already incremented bu.concrrentRequests above.
|
||||
// There is no need in the call bu.get(), because we already incremented bu.concurrentRequests above.
|
||||
return bu
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,7 +89,11 @@ func parseJWTUsers(ac *AuthConfig) ([]*UserInfo, *oidcDiscovererPool, error) {
|
||||
parsedClaims := make([]*jwt.Claim, 0, len(jwtToken.MatchClaims))
|
||||
for ck, cv := range jwtToken.MatchClaims {
|
||||
sortedClaims = append(sortedClaims, fmt.Sprintf("%s=%s", ck, cv))
|
||||
parsedClaims = append(parsedClaims, jwt.NewClaim(ck, cv))
|
||||
pc, err := jwt.NewClaim(ck, cv)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("incorrect match claim, key=%q, value regex=%q: %w", ck, cv, err)
|
||||
}
|
||||
parsedClaims = append(parsedClaims, pc)
|
||||
}
|
||||
ui.JWT.parsedMatchClaims = parsedClaims
|
||||
sort.Strings(sortedClaims)
|
||||
|
||||
@@ -48,7 +48,7 @@ var (
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
|
||||
requestBufferSize = flagutil.NewBytes("requestBufferSize", 32*1024, "The size of the buffer for reading the request body before proxying the request to backends. "+
|
||||
"This allows reducing the comsumption of backend resources when processing requests from clients connected via slow networks. "+
|
||||
"This allows reducing the consumption of backend resources when processing requests from clients connected via slow networks. "+
|
||||
"Set to 0 to disable request buffering. See https://docs.victoriametrics.com/victoriametrics/vmauth/#request-body-buffering")
|
||||
maxRequestBodySizeToRetry = flagutil.NewBytes("maxRequestBodySizeToRetry", 16*1024, "The maximum request body size to buffer in memory for potential retries at other backends. "+
|
||||
"Request bodies larger than this size cannot be retried if the backend fails. Zero or negative value disables request body buffering and retries. "+
|
||||
|
||||
@@ -2,14 +2,10 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/ecdsa"
|
||||
"crypto/elliptic"
|
||||
"crypto/rsa"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -121,12 +117,7 @@ func (d *oidcDiscoverer) refreshVerifierPools(ctx context.Context) error {
|
||||
return fmt.Errorf("openid configuration issuer %q does not match expected issuer %q", cfg.Issuer, d.issuer)
|
||||
}
|
||||
|
||||
keys, err := fetchJWKs(ctx, cfg.JWKsURI)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
verifierPool, err := jwt.NewVerifierPool(keys)
|
||||
verifierPool, err := fetchAndParseJWKs(ctx, cfg.JWKsURI)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -137,27 +128,6 @@ func (d *oidcDiscoverer) refreshVerifierPools(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type jwksResponse struct {
|
||||
Keys []jwk `json:"keys"`
|
||||
}
|
||||
|
||||
// See https://www.rfc-editor.org/rfc/rfc7517 for details.
|
||||
type jwk struct {
|
||||
Type string `json:"kty"`
|
||||
Alg string `json:"alg"`
|
||||
Use string `json:"use"`
|
||||
Kid string `json:"kid"`
|
||||
|
||||
// RSA keys contents
|
||||
E string `json:"e"`
|
||||
N string `json:"n"`
|
||||
|
||||
// EC keys contents
|
||||
Crv string `json:"crv"`
|
||||
X string `json:"x"`
|
||||
Y string `json:"y"`
|
||||
}
|
||||
|
||||
// See https://openid.net/specs/openid-connect-discovery-1_0.html#ProviderMetadata for details.
|
||||
type openidConfig struct {
|
||||
Issuer string `json:"issuer"`
|
||||
@@ -168,7 +138,7 @@ var oidcHTTPClient = &http.Client{
|
||||
Timeout: time.Second * 5,
|
||||
}
|
||||
|
||||
func fetchJWKs(ctx context.Context, jwksURI string) ([]any, error) {
|
||||
func fetchAndParseJWKs(ctx context.Context, jwksURI string) (*jwt.VerifierPool, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, jwksURI, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request for fetching jwks keys from %q: %w", jwksURI, err)
|
||||
@@ -184,17 +154,17 @@ func fetchJWKs(ctx context.Context, jwksURI string) ([]any, error) {
|
||||
return nil, fmt.Errorf("unexpected status code %d when fetching jwks keys from %q", resp.StatusCode, jwksURI)
|
||||
}
|
||||
|
||||
var jwks jwksResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&jwks); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks response from %q: %v", jwksURI, err)
|
||||
b, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read response body from %q: %w", jwksURI, err)
|
||||
}
|
||||
|
||||
keys, err := parseJwksKeys(&jwks)
|
||||
vp, err := jwt.ParseJWKs(b)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse jwks keys from %q: %v", jwksURI, err)
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
return vp, nil
|
||||
}
|
||||
|
||||
func getOpenIDConfiguration(ctx context.Context, issuer string) (openidConfig, error) {
|
||||
@@ -223,68 +193,3 @@ func getOpenIDConfiguration(ctx context.Context, issuer string) (openidConfig, e
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
func parseJwksKeys(resp *jwksResponse) ([]any, error) {
|
||||
keys := make([]any, 0)
|
||||
for _, key := range resp.Keys {
|
||||
if key.Kid == "" {
|
||||
return nil, fmt.Errorf("jwks key without kid found")
|
||||
}
|
||||
|
||||
switch key.Type {
|
||||
case "RSA":
|
||||
if key.E == "" || key.N == "" {
|
||||
return nil, fmt.Errorf("jwks key without e or n found")
|
||||
}
|
||||
e, err := base64.RawURLEncoding.DecodeString(key.E)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key e: %w", err)
|
||||
}
|
||||
exp := big.NewInt(0).SetBytes(e)
|
||||
if !exp.IsInt64() || exp.Int64() < 1 {
|
||||
return nil, fmt.Errorf("invalid RSA exponent")
|
||||
}
|
||||
|
||||
n, err := base64.RawURLEncoding.DecodeString(key.N)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key n: %w", err)
|
||||
}
|
||||
keys = append(keys, &rsa.PublicKey{
|
||||
E: int(exp.Int64()),
|
||||
N: big.NewInt(0).SetBytes(n),
|
||||
})
|
||||
case "EC":
|
||||
if key.Crv == "" || key.X == "" || key.Y == "" {
|
||||
return nil, fmt.Errorf("jwks key without crv or x or y found")
|
||||
}
|
||||
x, err := base64.RawURLEncoding.DecodeString(key.X)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key x: %w", err)
|
||||
}
|
||||
y, err := base64.RawURLEncoding.DecodeString(key.Y)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to decode jwks key y: %w", err)
|
||||
}
|
||||
var curve elliptic.Curve
|
||||
switch key.Crv {
|
||||
case "P-256":
|
||||
curve = elliptic.P256()
|
||||
case "P-384":
|
||||
curve = elliptic.P384()
|
||||
case "P-521":
|
||||
curve = elliptic.P521()
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported jwks key crv %q found", key.Crv)
|
||||
}
|
||||
keys = append(keys, &ecdsa.PublicKey{
|
||||
Curve: curve,
|
||||
X: big.NewInt(0).SetBytes(x),
|
||||
Y: big.NewInt(0).SetBytes(y),
|
||||
})
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported jwk.KTY: %s; want RSA or EC", key.Type)
|
||||
}
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ var (
|
||||
deduplicator *streamaggr.Deduplicator
|
||||
)
|
||||
|
||||
// CheckStreamAggrConfig checks config pointed by -stramaggr.config
|
||||
// CheckStreamAggrConfig checks config pointed by -streamaggr.config
|
||||
func CheckStreamAggrConfig() error {
|
||||
if *streamAggrConfig == "" {
|
||||
return nil
|
||||
|
||||
@@ -77,7 +77,7 @@ func push(ctx *common.InsertCtx, tss []prompb.TimeSeries) {
|
||||
r := &ts.Samples[i]
|
||||
metricNameRaw, err = ctx.WriteDataPointExt(metricNameRaw, ctx.Labels, r.Timestamp, r.Value)
|
||||
if err != nil {
|
||||
logger.Errorf("cannot write promscape data to storage: %s", err)
|
||||
logger.Errorf("cannot write promscrape data to storage: %s", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ var (
|
||||
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
|
||||
maxBytesPerSecond = flagutil.NewBytes("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
|
||||
skipBackupCompleteCheck = flag.Bool("skipBackupCompleteCheck", false, "Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file")
|
||||
SkipPreallocation = flag.Bool("skipFilePreallocation", false, "Whether to skip pre-allocated files. This will likely be slower in most cases, but allows restores to resume mid file on failure")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -63,6 +64,7 @@ func main() {
|
||||
Src: srcFS,
|
||||
Dst: dstFS,
|
||||
SkipBackupCompleteCheck: *skipBackupCompleteCheck,
|
||||
SkipPreallocation: *SkipPreallocation,
|
||||
}
|
||||
pushmetrics.Init()
|
||||
if err := a.Run(ctx); err != nil {
|
||||
|
||||
@@ -743,6 +743,26 @@ func proxyVMAlertRequests(w http.ResponseWriter, r *http.Request, path string) {
|
||||
req := r.Clone(r.Context())
|
||||
req.URL.Path = strings.TrimPrefix(path, "prometheus")
|
||||
req.Host = vmalertProxyHost
|
||||
|
||||
if strings.HasPrefix(r.Header.Get(`User-Agent`), `Grafana`) {
|
||||
// Grafana currently supports only Prometheus-style alerts. If other alert types
|
||||
// (e.g. logs or traces) are returned, it may fail with "Error loading alerts".
|
||||
//
|
||||
// Grafana queries the vmalert API directly, bypassing the VictoriaMetrics datasource,
|
||||
// so query params (such as datasource_type) cannot be enforced on the Grafana side.
|
||||
//
|
||||
// To ensure compatibility, we detect Grafana requests via the User-Agent and enforce
|
||||
// `datasource_type=prometheus`.
|
||||
//
|
||||
// See:
|
||||
// - https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329#issuecomment-3847585443
|
||||
// - https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/59
|
||||
q := req.URL.Query()
|
||||
q.Set("datasource_type", "prometheus")
|
||||
req.URL.RawQuery = q.Encode()
|
||||
req.RequestURI = ""
|
||||
}
|
||||
|
||||
vmalertProxy.ServeHTTP(w, req)
|
||||
}
|
||||
|
||||
|
||||
1
app/vmselect/vmui/assets/index-D2OEy8Ra.css
Normal file
198
app/vmselect/vmui/assets/index-KEOgEEMl.js
Normal file
1
app/vmselect/vmui/assets/rolldown-runtime-COnpUsM8.js
Normal file
@@ -0,0 +1 @@
|
||||
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),d=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{u as a,d as i,o as n,c as r,s as t};
|
||||
1
app/vmselect/vmui/assets/vendor-CnsZ1jie.css
Normal file
@@ -0,0 +1 @@
|
||||
.uplot,.uplot *,.uplot :before,.uplot :after{box-sizing:border-box}.uplot{width:min-content;font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;line-height:1.5}.u-title{text-align:center;font-size:18px;font-weight:700}.u-wrap{-webkit-user-select:none;user-select:none;position:relative}.u-over,.u-under{position:absolute}.u-under{overflow:hidden}.uplot canvas{width:100%;height:100%;display:block;position:relative}.u-axis{position:absolute}.u-legend{text-align:center;margin:auto;font-size:14px}.u-inline{display:block}.u-inline *{display:inline-block}.u-inline tr{margin-right:16px}.u-legend th{font-weight:600}.u-legend th>*{vertical-align:middle;display:inline-block}.u-legend .u-marker{width:1em;height:1em;margin-right:4px;background-clip:padding-box!important}.u-inline.u-live th:after{content:":";vertical-align:middle}.u-inline:not(.u-live) .u-value{display:none}.u-series>*{padding:4px}.u-series th{cursor:pointer}.u-legend .u-off>*{opacity:.3}.u-select{pointer-events:none;background:#00000012;position:absolute}.u-cursor-x,.u-cursor-y{pointer-events:none;will-change:transform;position:absolute;top:0;left:0}.u-hz .u-cursor-x,.u-vt .u-cursor-y{border-right:1px dashed #607d8b;height:100%}.u-hz .u-cursor-y,.u-vt .u-cursor-x{border-bottom:1px dashed #607d8b;width:100%}.u-cursor-pt{pointer-events:none;will-change:transform;border:0 solid;border-radius:50%;position:absolute;top:0;left:0;background-clip:padding-box!important}.u-axis.u-off,.u-select.u-off,.u-cursor-x.u-off,.u-cursor-y.u-off,.u-cursor-pt.u-off{display:none}
|
||||
@@ -1 +0,0 @@
|
||||
.uplot,.uplot *,.uplot *:before,.uplot *:after{box-sizing:border-box}.uplot{font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";line-height:1.5;width:min-content}.u-title{text-align:center;font-size:18px;font-weight:700}.u-wrap{position:relative;-webkit-user-select:none;user-select:none}.u-over,.u-under{position:absolute}.u-under{overflow:hidden}.uplot canvas{display:block;position:relative;width:100%;height:100%}.u-axis{position:absolute}.u-legend{font-size:14px;margin:auto;text-align:center}.u-inline{display:block}.u-inline *{display:inline-block}.u-inline tr{margin-right:16px}.u-legend th{font-weight:600}.u-legend th>*{vertical-align:middle;display:inline-block}.u-legend .u-marker{width:1em;height:1em;margin-right:4px;background-clip:padding-box!important}.u-inline.u-live th:after{content:":";vertical-align:middle}.u-inline:not(.u-live) .u-value{display:none}.u-series>*{padding:4px}.u-series th{cursor:pointer}.u-legend .u-off>*{opacity:.3}.u-select{background:#00000012;position:absolute;pointer-events:none}.u-cursor-x,.u-cursor-y{position:absolute;left:0;top:0;pointer-events:none;will-change:transform}.u-hz .u-cursor-x,.u-vt .u-cursor-y{height:100%;border-right:1px dashed #607D8B}.u-hz .u-cursor-y,.u-vt .u-cursor-x{width:100%;border-bottom:1px dashed #607D8B}.u-cursor-pt{position:absolute;top:0;left:0;border-radius:50%;border:0 solid;pointer-events:none;will-change:transform;background-clip:padding-box!important}.u-axis.u-off,.u-select.u-off,.u-cursor-x.u-off,.u-cursor-y.u-off,.u-cursor-pt.u-off{display:none}
|
||||
66
app/vmselect/vmui/assets/vendor-Mr0bmX1E.js
Normal file
@@ -37,10 +37,11 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-DIRuq0ns.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-BR6Q0Fin.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D7CzMv1O.css">
|
||||
<script type="module" crossorigin src="./assets/index-KEOgEEMl.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-COnpUsM8.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-Mr0bmX1E.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-CnsZ1jie.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D2OEy8Ra.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
@@ -319,6 +319,7 @@ func Stop() {
|
||||
Storage.MustClose()
|
||||
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
fs.MustStopDirRemover()
|
||||
logger.Infof("the storage has been stopped")
|
||||
}
|
||||
|
||||
|
||||
3774
app/vmui/packages/vmui/package-lock.json
generated
@@ -21,43 +21,42 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"classnames": "^2.5.1",
|
||||
"dayjs": "^1.11.19",
|
||||
"dayjs": "^1.11.20",
|
||||
"lodash.debounce": "^4.0.8",
|
||||
"marked": "^17.0.1",
|
||||
"preact": "^10.28.3",
|
||||
"qs": "^6.14.1",
|
||||
"marked": "^17.0.5",
|
||||
"preact": "^10.29.0",
|
||||
"qs": "^6.15.0",
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.13.0",
|
||||
"react-router-dom": "^7.13.2",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^7.3.1",
|
||||
"vite": "^8.0.2",
|
||||
"web-vitals": "^5.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3.3.3",
|
||||
"@eslint/eslintrc": "^3.3.5",
|
||||
"@eslint/js": "^9.39.2",
|
||||
"@preact/preset-vite": "^2.10.3",
|
||||
"@preact/preset-vite": "^2.10.5",
|
||||
"@testing-library/jest-dom": "^6.9.1",
|
||||
"@testing-library/preact": "^3.2.4",
|
||||
"@types/lodash.debounce": "^4.0.9",
|
||||
"@types/node": "^25.2.0",
|
||||
"@types/qs": "^6.14.0",
|
||||
"@types/react": "^19.2.10",
|
||||
"@types/node": "^25.5.0",
|
||||
"@types/qs": "^6.15.0",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/react-input-mask": "^3.0.6",
|
||||
"@types/react-router-dom": "^5.3.3",
|
||||
"@typescript-eslint/eslint-plugin": "^8.54.0",
|
||||
"@typescript-eslint/parser": "^8.54.0",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.2",
|
||||
"@typescript-eslint/parser": "^8.57.2",
|
||||
"cross-env": "^10.1.0",
|
||||
"eslint": "^9.39.2",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-unused-imports": "^4.3.0",
|
||||
"globals": "^17.3.0",
|
||||
"eslint-plugin-unused-imports": "^4.4.1",
|
||||
"globals": "^17.4.0",
|
||||
"http-proxy-middleware": "^3.0.5",
|
||||
"jsdom": "^28.0.0",
|
||||
"postcss": "^8.5.6",
|
||||
"rollup-plugin-visualizer": "^6.0.5",
|
||||
"sass-embedded": "^1.97.3",
|
||||
"jsdom": "^29.0.1",
|
||||
"postcss": "^8.5.8",
|
||||
"sass-embedded": "^1.98.0",
|
||||
"typescript": "^5.9.3",
|
||||
"vitest": "^4.0.18"
|
||||
"vitest": "^4.1.1"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
export const getGroupsUrl = (server: string): string => {
|
||||
return `${server}/vmalert/api/v1/rules?datasource_type=prometheus`;
|
||||
export const getGroupsUrl = (server: string, search: string, type: string, states: string[], maxGroups: number): string => {
|
||||
return `${server}/vmalert/api/v1/rules?datasource_type=prometheus&search=${encodeURIComponent(search)}&type=${encodeURIComponent(type)}&state=${states.map(encodeURIComponent).join(",")}&group_limit=${maxGroups}&extended_states=true`;
|
||||
};
|
||||
|
||||
export const getItemUrl = (
|
||||
|
||||
@@ -60,7 +60,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
const options = useMemo(() => {
|
||||
switch (context) {
|
||||
case QueryContextType.metricsql:
|
||||
return [...metrics, ...metricsqlFunctions];
|
||||
return includeFunctions ? [...metrics, ...metricsqlFunctions] : metrics;
|
||||
case QueryContextType.label:
|
||||
return labels;
|
||||
case QueryContextType.labelValue:
|
||||
@@ -68,7 +68,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
default:
|
||||
return [];
|
||||
}
|
||||
}, [context, metrics, labels, labelValues, metricsqlFunctions]);
|
||||
}, [context, metrics, labels, labelValues, metricsqlFunctions, includeFunctions]);
|
||||
|
||||
const handleSelect = useCallback((insert: string) => {
|
||||
// Find the start and end of valueByContext in the query string
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import "./style.scss";
|
||||
import { ReactNode } from "react";
|
||||
|
||||
export type BadgeColor = "firing" | "inactive" | "pending" | "no-match" | "unhealthy" | "ok" | "passive";
|
||||
export type BadgeColor = "firing" | "inactive" | "pending" | "nomatch" | "unhealthy" | "ok" | "passive";
|
||||
|
||||
interface BadgeItem {
|
||||
value?: number | string;
|
||||
|
||||
@@ -4,7 +4,7 @@ $badge-colors: (
|
||||
"firing": $color-error,
|
||||
"inactive": $color-success,
|
||||
"pending": $color-warning,
|
||||
"no-match": $color-notice,
|
||||
"nomatch": $color-notice,
|
||||
"unhealthy": $color-broken,
|
||||
"ok": $color-info,
|
||||
"passive": $color-passive,
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { useMemo } from "preact/compat";
|
||||
import "./style.scss";
|
||||
import { Group as APIGroup } from "../../../types";
|
||||
import { formatDuration, formatEventTime } from "../helpers";
|
||||
import ItemHeader from "../ItemHeader";
|
||||
import { getStates, formatDuration, formatEventTime } from "../helpers";
|
||||
import Badges, { BadgeColor } from "../Badges";
|
||||
|
||||
interface BaseGroupProps {
|
||||
@@ -117,6 +118,21 @@ const BaseGroup = ({ group }: BaseGroupProps) => {
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
<div className="vm-explore-alerts-rule-item">
|
||||
<span className="vm-alerts-title">Rules</span>
|
||||
{group.rules.map((rule) => (
|
||||
<ItemHeader
|
||||
classes={["vm-badge-item", rule.state]}
|
||||
key={rule.id}
|
||||
entity="rule"
|
||||
type={rule.type}
|
||||
groupId={rule.group_id}
|
||||
states={getStates(rule)}
|
||||
id={rule.id}
|
||||
name={rule.name}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
import Button from "../../Main/Button/Button";
|
||||
|
||||
interface ItemHeaderControlsProps {
|
||||
classes?: string[];
|
||||
entity: string;
|
||||
type?: string;
|
||||
groupId: string;
|
||||
@@ -27,12 +28,19 @@ interface ItemHeaderControlsProps {
|
||||
onClose?: () => void;
|
||||
}
|
||||
|
||||
const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, type, states, onClose }) => {
|
||||
const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, type, states, onClose, classes }) => {
|
||||
const { isMobile } = useDeviceDetect();
|
||||
const { serverUrl } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const copyToClipboard = useCopyToClipboard();
|
||||
|
||||
const openGroupLink = () => {
|
||||
navigate({
|
||||
pathname: "/rules",
|
||||
search: `group_id=${groupId}`,
|
||||
});
|
||||
};
|
||||
|
||||
const openItemLink = () => {
|
||||
navigate({
|
||||
pathname: "/rules",
|
||||
@@ -49,7 +57,7 @@ const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, ty
|
||||
const headerClasses = classNames({
|
||||
"vm-explore-alerts-item-header": true,
|
||||
"vm-explore-alerts-item-header_mobile": isMobile,
|
||||
});
|
||||
}, classes);
|
||||
|
||||
const renderIcon = () => {
|
||||
switch(entity) {
|
||||
@@ -105,16 +113,30 @@ const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, ty
|
||||
items={badgesItems}
|
||||
/>
|
||||
{onClose ? (
|
||||
<Button
|
||||
className="vm-back-button"
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="gray"
|
||||
startIcon={<LinkIcon />}
|
||||
onClick={copyLink}
|
||||
>
|
||||
<span className="vm-button-text">Copy Link</span>
|
||||
</Button>
|
||||
<>
|
||||
{id && (
|
||||
<Button
|
||||
className="vm-back-button"
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="gray"
|
||||
startIcon={<GroupIcon />}
|
||||
onClick={openGroupLink}
|
||||
>
|
||||
<span className="vm-button-text">Open Group</span>
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
className="vm-back-button"
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="gray"
|
||||
startIcon={<LinkIcon />}
|
||||
onClick={copyLink}
|
||||
>
|
||||
<span className="vm-button-text">Copy Link</span>
|
||||
</Button>
|
||||
</>
|
||||
) : (
|
||||
<Button
|
||||
className="vm-button-borderless"
|
||||
|
||||
@@ -6,6 +6,10 @@
|
||||
justify-content: space-between;
|
||||
gap: $padding-global;
|
||||
|
||||
&:is(.vm-badge-item) {
|
||||
padding: 6px 0 6px 6px;
|
||||
}
|
||||
|
||||
.vm-button_small {
|
||||
padding: 4px;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
import Button from "../../Main/Button/Button";
|
||||
import { ArrowDownIcon } from "../../Main/Icons";
|
||||
import "./style.scss";
|
||||
import classNames from "classnames";
|
||||
|
||||
interface PaginationProps {
|
||||
page: number;
|
||||
totalPages: number;
|
||||
totalRules: number;
|
||||
totalGroups: number;
|
||||
pageRules: number;
|
||||
pageGroups: number;
|
||||
onPageChange: (num: number) => () => void;
|
||||
}
|
||||
|
||||
const getButtons = (page: number, totalPages: number) => {
|
||||
const result: number[] = [];
|
||||
if (totalPages < 2) return result;
|
||||
result.push(1);
|
||||
if (page > 3) result.push(0);
|
||||
if (page > 2) result.push(page - 1);
|
||||
if (page > 1 && page < totalPages) result.push(page);
|
||||
if (page > 0 && page < totalPages - 1) result.push(page + 1);
|
||||
if (totalPages - page > 2) result.push(0);
|
||||
result.push(totalPages);
|
||||
return result;
|
||||
};
|
||||
|
||||
const Pagination = ({
|
||||
page,
|
||||
totalPages,
|
||||
onPageChange,
|
||||
totalGroups,
|
||||
totalRules,
|
||||
pageGroups,
|
||||
pageRules,
|
||||
}: PaginationProps) => {
|
||||
|
||||
const buttons = getButtons(page, totalPages);
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
className="vm-pagination"
|
||||
>
|
||||
<span className="vm-pagination-stats">
|
||||
<span>Page rules/groups:</span> <b>{pageRules}</b> / <b>{pageGroups}</b>
|
||||
</span>
|
||||
{!!buttons.length && (
|
||||
<div className="vm-pagination-buttons">
|
||||
<Button
|
||||
className="vm-button-borderless vm-pagination-prev"
|
||||
size="small"
|
||||
color="gray"
|
||||
disabled={page == 1}
|
||||
variant="outlined"
|
||||
startIcon={<ArrowDownIcon />}
|
||||
onClick={onPageChange(page-1)}
|
||||
/>
|
||||
{buttons.map((button, index) => {
|
||||
return button ? (
|
||||
<Button
|
||||
className={classNames({
|
||||
"vm-button-borderless": page !== button,
|
||||
})}
|
||||
key={index}
|
||||
size="small"
|
||||
color="gray"
|
||||
variant="outlined"
|
||||
onClick={onPageChange(button)}
|
||||
>{button}</Button>
|
||||
) : (
|
||||
<span className="vm-pagination-more">...</span>
|
||||
);
|
||||
})}
|
||||
<Button
|
||||
className="vm-button-borderless vm-pagination-next"
|
||||
size="small"
|
||||
color="gray"
|
||||
disabled={page==totalPages}
|
||||
variant="outlined"
|
||||
startIcon={<ArrowDownIcon />}
|
||||
onClick={onPageChange(page+1)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<span className="vm-pagination-stats">
|
||||
<span>Total rules/groups:</span> <b>{totalRules}</b> / <b>{totalGroups}</b>
|
||||
</span>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default Pagination;
|
||||
@@ -0,0 +1,33 @@
|
||||
@use "src/styles/variables" as *;
|
||||
|
||||
.vm-pagination {
|
||||
display: flex;
|
||||
min-height: 24px;
|
||||
justify-content: space-between;
|
||||
&-stats {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
color: var(--color-text-secondary);
|
||||
column-gap: $padding-tiny;
|
||||
}
|
||||
&-buttons {
|
||||
display: flex;
|
||||
column-gap: $padding-small;
|
||||
}
|
||||
.vm-button-borderless {
|
||||
border: 0;
|
||||
}
|
||||
&-more {
|
||||
align-self: center;
|
||||
}
|
||||
&-prev {
|
||||
svg {
|
||||
transform: rotate(90deg);
|
||||
}
|
||||
}
|
||||
&-next {
|
||||
svg {
|
||||
transform: rotate(-90deg);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { FC, useMemo } from "preact/compat";
|
||||
import { useMemo } from "preact/compat";
|
||||
import Select from "../../Main/Select/Select";
|
||||
import { SearchIcon } from "../../Main/Icons";
|
||||
import TextField from "../../Main/TextField/TextField";
|
||||
@@ -8,25 +8,25 @@ import useDeviceDetect from "../../../hooks/useDeviceDetect";
|
||||
|
||||
interface RulesHeaderProps {
|
||||
types: string[];
|
||||
allTypes: string[];
|
||||
allRuleTypes: string[];
|
||||
allStates: string[];
|
||||
states: string[];
|
||||
search: string;
|
||||
onChangeTypes: (input: string) => void;
|
||||
onChangeRuleType: (input: string) => void;
|
||||
onChangeStates: (input: string) => void;
|
||||
onChangeSearch: (input: string) => void;
|
||||
}
|
||||
|
||||
const RulesHeader: FC<RulesHeaderProps> = ({
|
||||
const RulesHeader = ({
|
||||
types,
|
||||
allTypes,
|
||||
allRuleTypes,
|
||||
allStates,
|
||||
states,
|
||||
search,
|
||||
onChangeTypes,
|
||||
onChangeRuleType,
|
||||
onChangeStates,
|
||||
onChangeSearch,
|
||||
}) => {
|
||||
}: RulesHeaderProps) => {
|
||||
const noStateText = useMemo(
|
||||
() => (types.length ? "" : "No states. Please select rule states"),
|
||||
[types],
|
||||
@@ -46,10 +46,10 @@ const RulesHeader: FC<RulesHeaderProps> = ({
|
||||
<div className="vm-explore-alerts-header__rule_type">
|
||||
<Select
|
||||
value={types}
|
||||
list={allTypes}
|
||||
label="Rules type"
|
||||
list={allRuleTypes}
|
||||
label="Rule type"
|
||||
placeholder="Please select rule type"
|
||||
onChange={onChangeTypes}
|
||||
onChange={onChangeRuleType}
|
||||
autofocus={!!types.length && !isMobile}
|
||||
includeAll
|
||||
searchable
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import dayjs from "dayjs";
|
||||
import { Rule } from "../../types";
|
||||
|
||||
export const formatDuration = (raw: number) => {
|
||||
const duration = dayjs.duration(Math.round(raw * 1000));
|
||||
@@ -18,3 +19,13 @@ export const formatEventTime = (raw: string) => {
|
||||
const t = dayjs(raw);
|
||||
return t.year() <= 1 ? "Never" : t.format("DD MMM YYYY HH:mm:ss");
|
||||
};
|
||||
|
||||
export const getStates = (rule: Rule) => {
|
||||
if (!rule.alerts?.length) {
|
||||
return { [rule.state]: 1 };
|
||||
}
|
||||
return rule.alerts.reduce((acc, alert) => {
|
||||
acc[alert.state] = (acc[alert.state] ?? 0) + 1;
|
||||
return acc;
|
||||
}, {} as Record<string, number>);
|
||||
};
|
||||
|
||||
@@ -72,9 +72,9 @@ const useGetMetricsQL = (includeFunctions: boolean) => {
|
||||
}
|
||||
};
|
||||
fetchMarkdown();
|
||||
}, []);
|
||||
}, [includeFunctions, metricsQLFunctions.length, queryDispatch]);
|
||||
|
||||
return includeFunctions ? metricsQLFunctions : [];
|
||||
return metricsQLFunctions;
|
||||
};
|
||||
|
||||
export default useGetMetricsQL;
|
||||
|
||||
@@ -80,7 +80,7 @@ export default class AppConfigurator {
|
||||
|
||||
let keys: string[] = [];
|
||||
if (focusLabel || isMetricWithLabel) {
|
||||
keys = keys.concat("seriesCountByFocusLabelValue");
|
||||
keys = keys.concat("seriesCountByMetricName", "seriesCountByFocusLabelValue");
|
||||
} else if (isMetric) {
|
||||
keys = keys.concat("labelValueCountByLabelName");
|
||||
} else if (isLabel) {
|
||||
|
||||
@@ -6,7 +6,7 @@ import { Rule as APIRule } from "../../types";
|
||||
import ItemHeader from "../../components/ExploreAlerts/ItemHeader";
|
||||
import BaseRule from "../../components/ExploreAlerts/BaseRule";
|
||||
import Modal from "../../components/Main/Modal/Modal";
|
||||
import { getStates } from "./helpers";
|
||||
import { getStates } from "../../components/ExploreAlerts/helpers";
|
||||
|
||||
interface ExploreRuleProps {
|
||||
groupId: string;
|
||||
|
||||
@@ -7,30 +7,36 @@ import Accordion from "../../components/Main/Accordion/Accordion";
|
||||
import { useFetchGroups } from "./hooks/useFetchGroups";
|
||||
import "./style.scss";
|
||||
import RulesHeader from "../../components/ExploreAlerts/RulesHeader";
|
||||
import Pagination from "../../components/ExploreAlerts/Pagination";
|
||||
import GroupHeader from "../../components/ExploreAlerts/GroupHeader";
|
||||
import Rule from "../../components/ExploreAlerts/Rule";
|
||||
import ExploreRule from "../../pages/ExploreAlerts/ExploreRule";
|
||||
import ExploreAlert from "../../pages/ExploreAlerts/ExploreAlert";
|
||||
import ExploreGroup from "../../pages/ExploreAlerts/ExploreGroup";
|
||||
import { getQueryStringValue } from "../../utils/query-string";
|
||||
import { getStates, getChanges, filterGroups } from "./helpers";
|
||||
import { getChanges } from "./helpers";
|
||||
import debounce from "lodash.debounce";
|
||||
import { getStates } from "../../components/ExploreAlerts/helpers";
|
||||
|
||||
const defaultTypesStr = getQueryStringValue("types", "") as string;
|
||||
const defaultTypes = defaultTypesStr.split("&").filter((rt) => rt) as string[];
|
||||
const defaultRuleType = getQueryStringValue("type", "") as string;
|
||||
const defaultStatesStr = getQueryStringValue("states", "") as string;
|
||||
const defaultStates = defaultStatesStr.split("&").filter((s) => s) as string[];
|
||||
const defaultSearchInput = getQueryStringValue("search", "") as string;
|
||||
const TYPE_STATES: Record<string, string[]> = {
|
||||
alert: ["inactive", "firing", "nomatch", "pending", "unhealthy"],
|
||||
record: ["unhealthy", "nomatch", "ok"],
|
||||
};
|
||||
|
||||
const ExploreRules: FC = () => {
|
||||
const pageNum = getQueryStringValue("page_num", "1") as string;
|
||||
const groupId = getQueryStringValue("group_id", "") as string;
|
||||
const ruleId = getQueryStringValue("rule_id", "") as string;
|
||||
const alertId = getQueryStringValue("alert_id", "") as string;
|
||||
|
||||
const [searchInput, setSearchInput] = useState(defaultSearchInput);
|
||||
const [types, setTypes] = useState(defaultTypes);
|
||||
const [ruleType, setRuleType] = useState(defaultRuleType);
|
||||
const [states, setStates] = useState(defaultStates);
|
||||
const [modalOpen, setModalOpen] = useState(true);
|
||||
const [modalOpen, setModalOpen] = useState(false);
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
|
||||
useEffect(() => {
|
||||
@@ -38,7 +44,7 @@ const ExploreRules: FC = () => {
|
||||
}, [groupId]);
|
||||
|
||||
useSetQueryParams({
|
||||
types: types.join("&"),
|
||||
type: ruleType,
|
||||
states: states.join("&"),
|
||||
search: searchInput,
|
||||
group_id: groupId,
|
||||
@@ -47,12 +53,11 @@ const ExploreRules: FC = () => {
|
||||
});
|
||||
|
||||
const handleChangeSearch = useCallback((input: string) => {
|
||||
if (!input) {
|
||||
setSearchInput("");
|
||||
} else {
|
||||
setSearchInput(input);
|
||||
}
|
||||
}, [searchInput]);
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", "1");
|
||||
setSearchParams(newParams);
|
||||
setSearchInput(input || "");
|
||||
}, [searchInput, searchParams]);
|
||||
|
||||
const getModal = () => {
|
||||
if (ruleId) {
|
||||
@@ -94,55 +99,79 @@ const ExploreRules: FC = () => {
|
||||
setModalOpen(false);
|
||||
};
|
||||
|
||||
const onPageChange = (num: number) => {
|
||||
return () => {
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", num.toString());
|
||||
setSearchParams(newParams);
|
||||
};
|
||||
};
|
||||
|
||||
const allRuleTypes = Object.keys(TYPE_STATES);
|
||||
const allStates = useMemo(
|
||||
() => Array.from(ruleType === "" ? new Set(Object.values(TYPE_STATES).flat()) : TYPE_STATES[ruleType] || []),
|
||||
[ruleType]
|
||||
);
|
||||
const selectedRuleTypes = [ruleType].filter(Boolean);
|
||||
useEffect(() => {
|
||||
if (!states.every(v => allStates.includes(v))) {
|
||||
setStates([]);
|
||||
}
|
||||
}, [states, allStates]);
|
||||
|
||||
const pageNumInt: number = Math.max(1, parseInt(pageNum, 10) || 1);
|
||||
const {
|
||||
groups,
|
||||
isLoading,
|
||||
error,
|
||||
} = useFetchGroups({ blockFetch: modalOpen });
|
||||
|
||||
const { filteredGroups, allTypes, allStates } = useMemo(
|
||||
() => filterGroups(groups || [], types, states, searchInput),
|
||||
[groups, types, states, searchInput]
|
||||
);
|
||||
|
||||
if (!types.every(v => allTypes.has(v))) {
|
||||
setTypes([]);
|
||||
}
|
||||
const selectedTypes = allTypes.size === types.length ? [] : types;
|
||||
|
||||
if (!states.every(v => allStates.has(v))) {
|
||||
setStates([]);
|
||||
}
|
||||
const selectedStates = allStates.size === states.length ? [] : states;
|
||||
pageInfo,
|
||||
} = useFetchGroups({ blockFetch: modalOpen, search: searchInput, ruleType, states, pageNum: pageNumInt, onPageChange });
|
||||
|
||||
const handleChangeStates = useCallback((title: string) => {
|
||||
setStates(getChanges(title, selectedStates));
|
||||
}, [states]);
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", "1");
|
||||
setSearchParams(newParams);
|
||||
const changes = getChanges(title, states);
|
||||
setStates(changes.length == allStates.length ? [] : changes);
|
||||
}, [states, searchParams]);
|
||||
|
||||
const handleChangeTypes = useCallback((title: string) => {
|
||||
setTypes(getChanges(title, selectedTypes));
|
||||
}, [types]);
|
||||
const handleChangeRuleType = useCallback((title: string) => {
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", "1");
|
||||
setSearchParams(newParams);
|
||||
const changes = getChanges(title, selectedRuleTypes);
|
||||
setRuleType(changes.length && changes.length !== allRuleTypes.length ? changes[0] : "");
|
||||
}, [ruleType, searchParams]);
|
||||
|
||||
return (
|
||||
<>
|
||||
{modalOpen && getModal()}
|
||||
{(!modalOpen || !!allStates?.size) && (
|
||||
{(!modalOpen || !!allStates?.length) && (
|
||||
<div className="vm-explore-alerts">
|
||||
<RulesHeader
|
||||
types={selectedTypes}
|
||||
allTypes={Array.from(allTypes)}
|
||||
states={selectedStates}
|
||||
allStates={Array.from(allStates)}
|
||||
types={selectedRuleTypes}
|
||||
allRuleTypes={allRuleTypes}
|
||||
states={states}
|
||||
allStates={allStates}
|
||||
search={searchInput}
|
||||
onChangeTypes={handleChangeTypes}
|
||||
onChangeRuleType={handleChangeRuleType}
|
||||
onChangeStates={handleChangeStates}
|
||||
onChangeSearch={debounce(handleChangeSearch, 500)}
|
||||
/>
|
||||
<Pagination
|
||||
page={pageInfo.page}
|
||||
totalPages={pageInfo.total_pages}
|
||||
pageRules={groups.reduce((total, g) => total + g?.rules.length, 0)}
|
||||
pageGroups={groups.length}
|
||||
totalRules={pageInfo.total_rules}
|
||||
totalGroups={pageInfo.total_groups}
|
||||
onPageChange={onPageChange}
|
||||
/>
|
||||
{(isLoading && <Spinner />) || (error && <Alert variant="error">{error}</Alert>) || (
|
||||
!filteredGroups.length && <Alert variant="info">{noRuleFound}</Alert>
|
||||
!groups.length && <Alert variant="info">{noRuleFound}</Alert>
|
||||
) || (
|
||||
<div className="vm-explore-alerts-body">
|
||||
{filteredGroups.map((group) => (
|
||||
{groups.map((group) => (
|
||||
<div
|
||||
key={group.id}
|
||||
className="vm-explore-alert-group vm-block vm-block_empty-padding"
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import { Rule, Group } from "../../types";
|
||||
|
||||
export const getChanges = (title: string, prevValues: string[]): string[] => {
|
||||
if (title === "All") return [];
|
||||
|
||||
@@ -12,77 +10,3 @@ export const getChanges = (title: string, prevValues: string[]): string[] => {
|
||||
|
||||
return Array.from(newValues);
|
||||
};
|
||||
|
||||
export const getState = (rule: Rule) => {
|
||||
let state = rule?.state || "ok";
|
||||
if (rule?.health !== "ok") {
|
||||
state = "unhealthy";
|
||||
} else if (!rule?.lastSamples && !rule?.lastSeriesFetched) {
|
||||
state = "no match";
|
||||
}
|
||||
return state;
|
||||
};
|
||||
|
||||
export const getStates = (rule: Rule) => {
|
||||
const output: Record<string, number> = {};
|
||||
const alertsCount = rule?.alerts?.length || 0;
|
||||
if (alertsCount > 0) {
|
||||
rule.alerts.forEach((alert) => {
|
||||
if (alert.state in output) {
|
||||
output[alert.state] += 1;
|
||||
} else {
|
||||
output[alert.state] = 1;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
output[getState(rule)] = 1;
|
||||
}
|
||||
return output;
|
||||
};
|
||||
|
||||
export const filterGroups = (groups: Group[], types: string[], states: string[], searchInput: string) => {
|
||||
const allTypes: Set<string> = new Set();
|
||||
const allStates: Set<string> = new Set();
|
||||
const filteredGroups: Group[] = [];
|
||||
|
||||
groups.forEach((group) => {
|
||||
const filteredRules: Rule[] = [];
|
||||
const statesPerGroup: Record<string, number> = {};
|
||||
group.rules.forEach((rule) => {
|
||||
const ruleType = rule.type.charAt(0).toUpperCase() + rule.type.slice(1);
|
||||
allTypes.add(ruleType);
|
||||
if (types?.length && !types.includes(ruleType)) return;
|
||||
|
||||
const state = getState(rule);
|
||||
const stateName = state.charAt(0).toUpperCase() + state.slice(1);
|
||||
allStates.add(stateName);
|
||||
if (states?.length && !states.includes(stateName)) return;
|
||||
|
||||
if (
|
||||
searchInput &&
|
||||
!rule.name.toLowerCase().includes(searchInput.toLowerCase()) &&
|
||||
!group.name.toLowerCase().includes(searchInput.toLowerCase()) &&
|
||||
!group.file.toLowerCase().includes(searchInput.toLowerCase())
|
||||
)
|
||||
return;
|
||||
|
||||
filteredRules.push(rule);
|
||||
if (state !== "no match" && state !== "unhealthy" && state !== "firing" && state !== "pending")
|
||||
return;
|
||||
|
||||
const count = state === "firing" || state === "pending" ? rule?.alerts?.length : 1;
|
||||
if (stateName in statesPerGroup) {
|
||||
statesPerGroup[stateName] += count;
|
||||
} else {
|
||||
statesPerGroup[stateName] = count;
|
||||
}
|
||||
});
|
||||
if (filteredRules.length) {
|
||||
const g = Object.assign({}, group);
|
||||
g.rules = filteredRules;
|
||||
g.states = statesPerGroup;
|
||||
filteredGroups.push(g);
|
||||
}
|
||||
});
|
||||
return { filteredGroups, allTypes, allStates };
|
||||
};
|
||||
|
||||
@@ -1,46 +1,75 @@
|
||||
import { useTimeState } from "../../../state/time/TimeStateContext";
|
||||
import { useEffect, useMemo, useState } from "preact/compat";
|
||||
import { useMemo, useEffect, useState } from "preact/compat";
|
||||
import { getGroupsUrl } from "../../../api/explore-alerts";
|
||||
import { useAppState } from "../../../state/common/StateContext";
|
||||
import { ErrorTypes, Group } from "../../../types";
|
||||
import { useTimeState } from "../../../state/time/TimeStateContext";
|
||||
|
||||
interface FetchGroupsReturn {
|
||||
groups: Group[];
|
||||
isLoading: boolean;
|
||||
error?: ErrorTypes | string;
|
||||
pageInfo: PageInfo;
|
||||
}
|
||||
|
||||
interface FetchGroupsProps {
|
||||
blockFetch: boolean
|
||||
blockFetch: boolean;
|
||||
search: string;
|
||||
ruleType: string;
|
||||
states: string[];
|
||||
pageNum: number;
|
||||
onPageChange: (num: number) => () => void;
|
||||
}
|
||||
|
||||
export const useFetchGroups = ({ blockFetch }: FetchGroupsProps): FetchGroupsReturn => {
|
||||
interface PageInfo {
|
||||
page: number;
|
||||
total_pages: number;
|
||||
total_groups: number;
|
||||
total_rules: number;
|
||||
}
|
||||
|
||||
const MAX_GROUPS = 100;
|
||||
|
||||
export const useFetchGroups = ({ blockFetch, pageNum, search, ruleType, states, onPageChange }: FetchGroupsProps): FetchGroupsReturn => {
|
||||
const { serverUrl } = useAppState();
|
||||
const { period } = useTimeState();
|
||||
|
||||
const [groups, setGroups] = useState<Group[]>([]);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [pageInfo, setPageInfo] = useState<PageInfo>({
|
||||
page: pageNum,
|
||||
total_pages: 1,
|
||||
total_groups: 0,
|
||||
total_rules: 0,
|
||||
});
|
||||
const [error, setError] = useState<ErrorTypes | string>();
|
||||
|
||||
const fetchUrl = useMemo(
|
||||
() => getGroupsUrl(serverUrl),
|
||||
[serverUrl],
|
||||
() => getGroupsUrl(serverUrl, search, ruleType, states, MAX_GROUPS),
|
||||
[serverUrl, search, ruleType, states],
|
||||
);
|
||||
|
||||
const loaded = !!groups.length || !blockFetch;
|
||||
|
||||
useEffect(() => {
|
||||
if (blockFetch) return;
|
||||
const fetchData = async () => {
|
||||
setIsLoading(true);
|
||||
try {
|
||||
const response = await fetch(fetchUrl);
|
||||
const url = `${fetchUrl}&page_num=${pageNum}`;
|
||||
const response = await fetch(url);
|
||||
const resp = await response.json();
|
||||
|
||||
if (response.ok) {
|
||||
const data = (resp.data.groups || []) as Group[];
|
||||
setGroups(data.sort((a, b) => a.name.localeCompare(b.name)));
|
||||
const loadedGroups = (resp.data.groups || []) as Group[];
|
||||
setGroups(loadedGroups);
|
||||
setPageInfo({
|
||||
page: resp.page || 1,
|
||||
total_pages: resp.total_pages || 1,
|
||||
total_groups: resp.total_groups || 0,
|
||||
total_rules: resp.total_rules || 0,
|
||||
});
|
||||
setError(undefined);
|
||||
} else if (response.status === 400 && resp?.error?.includes("exceeds total amount of pages")) {
|
||||
onPageChange(1)();
|
||||
setError(`${resp.errorType}\r\n${resp?.error}`);
|
||||
} else {
|
||||
setError(`${resp.errorType}\r\n${resp?.error}`);
|
||||
}
|
||||
@@ -51,9 +80,8 @@ export const useFetchGroups = ({ blockFetch }: FetchGroupsProps): FetchGroupsRet
|
||||
}
|
||||
setIsLoading(false);
|
||||
};
|
||||
|
||||
fetchData().catch(console.error);
|
||||
}, [fetchUrl, period, loaded]);
|
||||
}, [fetchUrl, period, loaded, pageNum]);
|
||||
|
||||
return { groups, isLoading, error };
|
||||
return { groups, isLoading, error, pageInfo };
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@ import { compactObject } from "../../../utils/object";
|
||||
import useSearchParamsFromObject from "../../../hooks/useSearchParamsFromObject";
|
||||
|
||||
interface rulesQueryProps {
|
||||
types?: string;
|
||||
type?: string;
|
||||
states?: string;
|
||||
search?: string;
|
||||
rule_id: string;
|
||||
@@ -12,7 +12,7 @@ interface rulesQueryProps {
|
||||
}
|
||||
|
||||
export const useRulesSetQueryParams = ({
|
||||
types,
|
||||
type,
|
||||
states,
|
||||
search,
|
||||
rule_id,
|
||||
@@ -23,7 +23,7 @@ export const useRulesSetQueryParams = ({
|
||||
|
||||
const setSearchParamsFromState = () => {
|
||||
const params = compactObject({
|
||||
types,
|
||||
type,
|
||||
states,
|
||||
search,
|
||||
alert_id,
|
||||
@@ -35,7 +35,7 @@ export const useRulesSetQueryParams = ({
|
||||
};
|
||||
|
||||
useEffect(setSearchParamsFromState, [
|
||||
types,
|
||||
type,
|
||||
states,
|
||||
search,
|
||||
rule_id,
|
||||
|
||||
@@ -17,6 +17,19 @@
|
||||
}
|
||||
}
|
||||
|
||||
.vm-explore-alerts-load {
|
||||
text-align: center;
|
||||
color: var(--color-text-disabled);
|
||||
button {
|
||||
border: none;
|
||||
}
|
||||
&-before {
|
||||
svg {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.vm-list-item-inner {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
@@ -230,6 +230,7 @@ export interface Rule {
|
||||
debug: boolean;
|
||||
updates: RuleUpdate[];
|
||||
max_updates_entries: number;
|
||||
states: Record<string, number>;
|
||||
}
|
||||
|
||||
interface RuleUpdate {
|
||||
|
||||
@@ -22,7 +22,7 @@ func NewPrometheusMockStorage(series []*prompb.TimeSeries) *PrometheusMockStorag
|
||||
return &PrometheusMockStorage{store: series}
|
||||
}
|
||||
|
||||
// ReadMultiple implemnets the storage.ReadClient interface for reading time series data.
|
||||
// ReadMultiple implements the storage.ReadClient interface for reading time series data.
|
||||
func (ms *PrometheusMockStorage) ReadMultiple(ctx context.Context, queries []*prompb.Query, sortSeries bool) (storage.SeriesSet, error) {
|
||||
if len(queries) != 1 {
|
||||
panic(fmt.Errorf("reading multiple queries isn't implemented"))
|
||||
|
||||
1149
dashboards/metrics-explorer.json
Normal file
@@ -51,7 +51,7 @@
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": 2,
|
||||
"id": 3,
|
||||
"links": [
|
||||
{
|
||||
"icon": "doc",
|
||||
@@ -1769,7 +1769,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 698
|
||||
"y": 141
|
||||
},
|
||||
"id": 111,
|
||||
"options": {
|
||||
@@ -1884,7 +1884,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 698
|
||||
"y": 141
|
||||
},
|
||||
"id": 157,
|
||||
"options": {
|
||||
@@ -1996,7 +1996,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 758
|
||||
"y": 196
|
||||
},
|
||||
"id": 155,
|
||||
"options": {
|
||||
@@ -2103,7 +2103,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 758
|
||||
"y": 196
|
||||
},
|
||||
"id": 158,
|
||||
"options": {
|
||||
@@ -2226,7 +2226,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 766
|
||||
"y": 204
|
||||
},
|
||||
"id": 156,
|
||||
"options": {
|
||||
@@ -2370,7 +2370,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 766
|
||||
"y": 204
|
||||
},
|
||||
"id": 81,
|
||||
"options": {
|
||||
@@ -2497,7 +2497,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 774
|
||||
"y": 212
|
||||
},
|
||||
"id": 39,
|
||||
"options": {
|
||||
@@ -2603,7 +2603,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 774
|
||||
"y": 212
|
||||
},
|
||||
"id": 159,
|
||||
"options": {
|
||||
@@ -2729,7 +2729,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 782
|
||||
"y": 220
|
||||
},
|
||||
"id": 41,
|
||||
"options": {
|
||||
@@ -2849,7 +2849,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 782
|
||||
"y": 220
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
@@ -2971,7 +2971,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 790
|
||||
"y": 228
|
||||
},
|
||||
"id": 135,
|
||||
"options": {
|
||||
@@ -3081,7 +3081,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 790
|
||||
"y": 228
|
||||
},
|
||||
"id": 149,
|
||||
"options": {
|
||||
@@ -3187,7 +3187,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 798
|
||||
"y": 236
|
||||
},
|
||||
"id": 154,
|
||||
"options": {
|
||||
@@ -3297,7 +3297,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 798
|
||||
"y": 236
|
||||
},
|
||||
"id": 83,
|
||||
"options": {
|
||||
@@ -3386,6 +3386,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3400,7 +3401,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3416,7 +3418,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3116
|
||||
"y": 142
|
||||
},
|
||||
"id": 92,
|
||||
"options": {
|
||||
@@ -3438,7 +3440,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3454,7 +3456,7 @@
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top 10 jobs by unique samples",
|
||||
"title": "Top 10 jobs by newly added series",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -3462,7 +3464,7 @@
|
||||
"type": "victoriametrics-metrics-datasource",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows top 10 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.",
|
||||
"description": "Shows top 10 targets by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3492,6 +3494,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3506,7 +3509,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3522,7 +3526,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3116
|
||||
"y": 142
|
||||
},
|
||||
"id": 95,
|
||||
"options": {
|
||||
@@ -3544,7 +3548,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3553,14 +3557,14 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk(10, sum(sum_over_time(scrape_series_added[5m])) by (instance)) > 0",
|
||||
"expr": "topk(10, sum(sum_over_time(scrape_series_added[5m])) by (job,instance)) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "{{job}}-{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top 10 instances by unique samples",
|
||||
"title": "Top 10 targets by newly added series",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -3599,6 +3603,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3615,7 +3620,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "transparent"
|
||||
"color": "transparent",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3631,7 +3637,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3124
|
||||
"y": 150
|
||||
},
|
||||
"id": 98,
|
||||
"options": {
|
||||
@@ -3653,7 +3659,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3708,6 +3714,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3724,7 +3731,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "transparent"
|
||||
"color": "transparent",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3740,7 +3748,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3124
|
||||
"y": 150
|
||||
},
|
||||
"id": 99,
|
||||
"options": {
|
||||
@@ -3762,7 +3770,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3816,6 +3824,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3832,7 +3841,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3848,7 +3858,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3132
|
||||
"y": 158
|
||||
},
|
||||
"id": 79,
|
||||
"options": {
|
||||
@@ -3870,7 +3880,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3924,6 +3934,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3940,7 +3951,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3956,7 +3968,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3132
|
||||
"y": 158
|
||||
},
|
||||
"id": 18,
|
||||
"links": [
|
||||
@@ -3985,7 +3997,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -4070,7 +4082,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3140
|
||||
"y": 166
|
||||
},
|
||||
"id": 127,
|
||||
"options": {
|
||||
@@ -4176,7 +4188,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3140
|
||||
"y": 166
|
||||
},
|
||||
"id": 50,
|
||||
"options": {
|
||||
@@ -4278,7 +4290,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3148
|
||||
"y": 174
|
||||
},
|
||||
"id": 129,
|
||||
"options": {
|
||||
@@ -4413,7 +4425,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3148
|
||||
"y": 174
|
||||
},
|
||||
"id": 150,
|
||||
"options": {
|
||||
@@ -4516,7 +4528,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3155
|
||||
"y": 181
|
||||
},
|
||||
"id": 151,
|
||||
"options": {
|
||||
@@ -4637,7 +4649,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3361
|
||||
"y": 4209
|
||||
},
|
||||
"id": 48,
|
||||
"options": {
|
||||
@@ -4745,7 +4757,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3361
|
||||
"y": 4209
|
||||
},
|
||||
"id": 76,
|
||||
"options": {
|
||||
@@ -4851,7 +4863,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3368
|
||||
"y": 4216
|
||||
},
|
||||
"id": 132,
|
||||
"options": {
|
||||
@@ -4959,7 +4971,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3368
|
||||
"y": 4216
|
||||
},
|
||||
"id": 133,
|
||||
"options": {
|
||||
@@ -5066,7 +5078,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3375
|
||||
"y": 4223
|
||||
},
|
||||
"id": 20,
|
||||
"options": {
|
||||
@@ -5172,7 +5184,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3375
|
||||
"y": 4223
|
||||
},
|
||||
"id": 126,
|
||||
"options": {
|
||||
@@ -5277,7 +5289,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3383
|
||||
"y": 4231
|
||||
},
|
||||
"id": 46,
|
||||
"options": {
|
||||
@@ -5382,7 +5394,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3383
|
||||
"y": 4231
|
||||
},
|
||||
"id": 148,
|
||||
"options": {
|
||||
@@ -5487,7 +5499,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3391
|
||||
"y": 4239
|
||||
},
|
||||
"id": 31,
|
||||
"options": {
|
||||
@@ -5654,7 +5666,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3083
|
||||
"y": 3931
|
||||
},
|
||||
"id": 73,
|
||||
"options": {
|
||||
@@ -5771,7 +5783,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3083
|
||||
"y": 3931
|
||||
},
|
||||
"id": 131,
|
||||
"options": {
|
||||
@@ -5875,7 +5887,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3359
|
||||
"y": 4207
|
||||
},
|
||||
"id": 130,
|
||||
"options": {
|
||||
@@ -5992,7 +6004,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3359
|
||||
"y": 4207
|
||||
},
|
||||
"id": 77,
|
||||
"options": {
|
||||
@@ -6117,7 +6129,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3406
|
||||
"y": 4254
|
||||
},
|
||||
"id": 146,
|
||||
"options": {
|
||||
@@ -6219,7 +6231,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3406
|
||||
"y": 4254
|
||||
},
|
||||
"id": 143,
|
||||
"options": {
|
||||
@@ -6315,7 +6327,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3414
|
||||
"y": 4262
|
||||
},
|
||||
"id": 147,
|
||||
"options": {
|
||||
@@ -6418,7 +6430,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3414
|
||||
"y": 4262
|
||||
},
|
||||
"id": 139,
|
||||
"options": {
|
||||
@@ -6529,7 +6541,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3422
|
||||
"y": 4270
|
||||
},
|
||||
"id": 142,
|
||||
"options": {
|
||||
@@ -6626,7 +6638,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3422
|
||||
"y": 4270
|
||||
},
|
||||
"id": 137,
|
||||
"options": {
|
||||
@@ -6739,7 +6751,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3462
|
||||
"y": 4310
|
||||
},
|
||||
"id": 141,
|
||||
"options": {
|
||||
@@ -6869,7 +6881,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1411
|
||||
"y": 2259
|
||||
},
|
||||
"id": 60,
|
||||
"options": {
|
||||
@@ -6977,7 +6989,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1411
|
||||
"y": 2259
|
||||
},
|
||||
"id": 66,
|
||||
"options": {
|
||||
@@ -7085,7 +7097,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1419
|
||||
"y": 2267
|
||||
},
|
||||
"id": 61,
|
||||
"options": {
|
||||
@@ -7193,7 +7205,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1419
|
||||
"y": 2267
|
||||
},
|
||||
"id": 65,
|
||||
"options": {
|
||||
@@ -7300,7 +7312,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1427
|
||||
"y": 2275
|
||||
},
|
||||
"id": 88,
|
||||
"options": {
|
||||
@@ -7404,7 +7416,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1427
|
||||
"y": 2275
|
||||
},
|
||||
"id": 84,
|
||||
"options": {
|
||||
@@ -7511,7 +7523,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1435
|
||||
"y": 2283
|
||||
},
|
||||
"id": 90,
|
||||
"options": {
|
||||
@@ -7569,7 +7581,7 @@
|
||||
"h": 2,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 70
|
||||
"y": 918
|
||||
},
|
||||
"id": 115,
|
||||
"options": {
|
||||
@@ -7651,7 +7663,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 72
|
||||
"y": 920
|
||||
},
|
||||
"id": 119,
|
||||
"options": {
|
||||
@@ -7759,7 +7771,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 72
|
||||
"y": 920
|
||||
},
|
||||
"id": 117,
|
||||
"options": {
|
||||
@@ -7869,7 +7881,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 80
|
||||
"y": 928
|
||||
},
|
||||
"id": 125,
|
||||
"links": [
|
||||
@@ -7995,7 +8007,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 80
|
||||
"y": 928
|
||||
},
|
||||
"id": 123,
|
||||
"options": {
|
||||
@@ -8129,7 +8141,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 88
|
||||
"y": 936
|
||||
},
|
||||
"id": 121,
|
||||
"options": {
|
||||
@@ -8256,7 +8268,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 88
|
||||
"y": 936
|
||||
},
|
||||
"id": 161,
|
||||
"links": [
|
||||
@@ -8378,9 +8390,9 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 461
|
||||
"y": 1309
|
||||
},
|
||||
"id": 154,
|
||||
"id": 162,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
@@ -8537,4 +8549,4 @@
|
||||
"title": "VictoriaMetrics - vmagent (VM)",
|
||||
"uid": "G7Z9GzMGz_vm",
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
@@ -50,7 +50,7 @@
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": 2,
|
||||
"id": 3,
|
||||
"links": [
|
||||
{
|
||||
"icon": "doc",
|
||||
@@ -1768,7 +1768,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 698
|
||||
"y": 141
|
||||
},
|
||||
"id": 111,
|
||||
"options": {
|
||||
@@ -1883,7 +1883,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 698
|
||||
"y": 141
|
||||
},
|
||||
"id": 157,
|
||||
"options": {
|
||||
@@ -1995,7 +1995,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 758
|
||||
"y": 196
|
||||
},
|
||||
"id": 155,
|
||||
"options": {
|
||||
@@ -2102,7 +2102,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 758
|
||||
"y": 196
|
||||
},
|
||||
"id": 158,
|
||||
"options": {
|
||||
@@ -2225,7 +2225,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 766
|
||||
"y": 204
|
||||
},
|
||||
"id": 156,
|
||||
"options": {
|
||||
@@ -2369,7 +2369,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 766
|
||||
"y": 204
|
||||
},
|
||||
"id": 81,
|
||||
"options": {
|
||||
@@ -2496,7 +2496,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 774
|
||||
"y": 212
|
||||
},
|
||||
"id": 39,
|
||||
"options": {
|
||||
@@ -2602,7 +2602,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 774
|
||||
"y": 212
|
||||
},
|
||||
"id": 159,
|
||||
"options": {
|
||||
@@ -2728,7 +2728,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 782
|
||||
"y": 220
|
||||
},
|
||||
"id": 41,
|
||||
"options": {
|
||||
@@ -2848,7 +2848,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 782
|
||||
"y": 220
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
@@ -2970,7 +2970,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 790
|
||||
"y": 228
|
||||
},
|
||||
"id": 135,
|
||||
"options": {
|
||||
@@ -3080,7 +3080,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 790
|
||||
"y": 228
|
||||
},
|
||||
"id": 149,
|
||||
"options": {
|
||||
@@ -3186,7 +3186,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 798
|
||||
"y": 236
|
||||
},
|
||||
"id": 154,
|
||||
"options": {
|
||||
@@ -3296,7 +3296,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 798
|
||||
"y": 236
|
||||
},
|
||||
"id": 83,
|
||||
"options": {
|
||||
@@ -3385,6 +3385,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3399,7 +3400,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3415,7 +3417,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3116
|
||||
"y": 142
|
||||
},
|
||||
"id": 92,
|
||||
"options": {
|
||||
@@ -3437,7 +3439,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3453,7 +3455,7 @@
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top 10 jobs by unique samples",
|
||||
"title": "Top 10 jobs by newly added series",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -3461,7 +3463,7 @@
|
||||
"type": "prometheus",
|
||||
"uid": "$ds"
|
||||
},
|
||||
"description": "Shows top 10 instances by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.",
|
||||
"description": "Shows top 10 targets by the number of new series registered by vmagent over the 5min range. These instances generate the most of the churn rate.",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
@@ -3491,6 +3493,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3505,7 +3508,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3521,7 +3525,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3116
|
||||
"y": 142
|
||||
},
|
||||
"id": 95,
|
||||
"options": {
|
||||
@@ -3543,7 +3547,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3552,14 +3556,14 @@
|
||||
},
|
||||
"editorMode": "code",
|
||||
"exemplar": false,
|
||||
"expr": "topk(10, sum(sum_over_time(scrape_series_added[5m])) by (instance)) > 0",
|
||||
"expr": "topk(10, sum(sum_over_time(scrape_series_added[5m])) by (job,instance)) > 0",
|
||||
"interval": "",
|
||||
"legendFormat": "__auto",
|
||||
"legendFormat": "{{job}}-{{instance}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top 10 instances by unique samples",
|
||||
"title": "Top 10 targets by newly added series",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
@@ -3598,6 +3602,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3614,7 +3619,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "transparent"
|
||||
"color": "transparent",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3630,7 +3636,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3124
|
||||
"y": 150
|
||||
},
|
||||
"id": 98,
|
||||
"options": {
|
||||
@@ -3652,7 +3658,7 @@
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3707,6 +3713,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3723,7 +3730,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "transparent"
|
||||
"color": "transparent",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3739,7 +3747,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3124
|
||||
"y": 150
|
||||
},
|
||||
"id": 99,
|
||||
"options": {
|
||||
@@ -3761,7 +3769,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3815,6 +3823,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3831,7 +3840,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3847,7 +3857,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3132
|
||||
"y": 158
|
||||
},
|
||||
"id": 79,
|
||||
"options": {
|
||||
@@ -3869,7 +3879,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -3923,6 +3933,7 @@
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"showValues": false,
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
@@ -3939,7 +3950,8 @@
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green"
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
@@ -3955,7 +3967,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3132
|
||||
"y": 158
|
||||
},
|
||||
"id": 18,
|
||||
"links": [
|
||||
@@ -3984,7 +3996,7 @@
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.5.0",
|
||||
"pluginVersion": "12.2.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
@@ -4069,7 +4081,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3140
|
||||
"y": 166
|
||||
},
|
||||
"id": 127,
|
||||
"options": {
|
||||
@@ -4175,7 +4187,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3140
|
||||
"y": 166
|
||||
},
|
||||
"id": 50,
|
||||
"options": {
|
||||
@@ -4277,7 +4289,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3148
|
||||
"y": 174
|
||||
},
|
||||
"id": 129,
|
||||
"options": {
|
||||
@@ -4412,7 +4424,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3148
|
||||
"y": 174
|
||||
},
|
||||
"id": 150,
|
||||
"options": {
|
||||
@@ -4515,7 +4527,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3155
|
||||
"y": 181
|
||||
},
|
||||
"id": 151,
|
||||
"options": {
|
||||
@@ -4636,7 +4648,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3361
|
||||
"y": 4209
|
||||
},
|
||||
"id": 48,
|
||||
"options": {
|
||||
@@ -4744,7 +4756,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3361
|
||||
"y": 4209
|
||||
},
|
||||
"id": 76,
|
||||
"options": {
|
||||
@@ -4850,7 +4862,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3368
|
||||
"y": 4216
|
||||
},
|
||||
"id": 132,
|
||||
"options": {
|
||||
@@ -4958,7 +4970,7 @@
|
||||
"h": 7,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3368
|
||||
"y": 4216
|
||||
},
|
||||
"id": 133,
|
||||
"options": {
|
||||
@@ -5065,7 +5077,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3375
|
||||
"y": 4223
|
||||
},
|
||||
"id": 20,
|
||||
"options": {
|
||||
@@ -5171,7 +5183,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3375
|
||||
"y": 4223
|
||||
},
|
||||
"id": 126,
|
||||
"options": {
|
||||
@@ -5276,7 +5288,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3383
|
||||
"y": 4231
|
||||
},
|
||||
"id": 46,
|
||||
"options": {
|
||||
@@ -5381,7 +5393,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3383
|
||||
"y": 4231
|
||||
},
|
||||
"id": 148,
|
||||
"options": {
|
||||
@@ -5486,7 +5498,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3391
|
||||
"y": 4239
|
||||
},
|
||||
"id": 31,
|
||||
"options": {
|
||||
@@ -5653,7 +5665,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3083
|
||||
"y": 3931
|
||||
},
|
||||
"id": 73,
|
||||
"options": {
|
||||
@@ -5770,7 +5782,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3083
|
||||
"y": 3931
|
||||
},
|
||||
"id": 131,
|
||||
"options": {
|
||||
@@ -5874,7 +5886,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3359
|
||||
"y": 4207
|
||||
},
|
||||
"id": 130,
|
||||
"options": {
|
||||
@@ -5991,7 +6003,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3359
|
||||
"y": 4207
|
||||
},
|
||||
"id": 77,
|
||||
"options": {
|
||||
@@ -6116,7 +6128,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3406
|
||||
"y": 4254
|
||||
},
|
||||
"id": 146,
|
||||
"options": {
|
||||
@@ -6218,7 +6230,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3406
|
||||
"y": 4254
|
||||
},
|
||||
"id": 143,
|
||||
"options": {
|
||||
@@ -6314,7 +6326,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3414
|
||||
"y": 4262
|
||||
},
|
||||
"id": 147,
|
||||
"options": {
|
||||
@@ -6417,7 +6429,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3414
|
||||
"y": 4262
|
||||
},
|
||||
"id": 139,
|
||||
"options": {
|
||||
@@ -6528,7 +6540,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 3422
|
||||
"y": 4270
|
||||
},
|
||||
"id": 142,
|
||||
"options": {
|
||||
@@ -6625,7 +6637,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3422
|
||||
"y": 4270
|
||||
},
|
||||
"id": 137,
|
||||
"options": {
|
||||
@@ -6738,7 +6750,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 3462
|
||||
"y": 4310
|
||||
},
|
||||
"id": 141,
|
||||
"options": {
|
||||
@@ -6868,7 +6880,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1411
|
||||
"y": 2259
|
||||
},
|
||||
"id": 60,
|
||||
"options": {
|
||||
@@ -6976,7 +6988,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1411
|
||||
"y": 2259
|
||||
},
|
||||
"id": 66,
|
||||
"options": {
|
||||
@@ -7084,7 +7096,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1419
|
||||
"y": 2267
|
||||
},
|
||||
"id": 61,
|
||||
"options": {
|
||||
@@ -7192,7 +7204,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1419
|
||||
"y": 2267
|
||||
},
|
||||
"id": 65,
|
||||
"options": {
|
||||
@@ -7299,7 +7311,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1427
|
||||
"y": 2275
|
||||
},
|
||||
"id": 88,
|
||||
"options": {
|
||||
@@ -7403,7 +7415,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 1427
|
||||
"y": 2275
|
||||
},
|
||||
"id": 84,
|
||||
"options": {
|
||||
@@ -7510,7 +7522,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 1435
|
||||
"y": 2283
|
||||
},
|
||||
"id": 90,
|
||||
"options": {
|
||||
@@ -7568,7 +7580,7 @@
|
||||
"h": 2,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 70
|
||||
"y": 918
|
||||
},
|
||||
"id": 115,
|
||||
"options": {
|
||||
@@ -7650,7 +7662,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 72
|
||||
"y": 920
|
||||
},
|
||||
"id": 119,
|
||||
"options": {
|
||||
@@ -7758,7 +7770,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 72
|
||||
"y": 920
|
||||
},
|
||||
"id": 117,
|
||||
"options": {
|
||||
@@ -7868,7 +7880,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 80
|
||||
"y": 928
|
||||
},
|
||||
"id": 125,
|
||||
"links": [
|
||||
@@ -7994,7 +8006,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 80
|
||||
"y": 928
|
||||
},
|
||||
"id": 123,
|
||||
"options": {
|
||||
@@ -8128,7 +8140,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 88
|
||||
"y": 936
|
||||
},
|
||||
"id": 121,
|
||||
"options": {
|
||||
@@ -8255,7 +8267,7 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 88
|
||||
"y": 936
|
||||
},
|
||||
"id": 161,
|
||||
"links": [
|
||||
@@ -8377,9 +8389,9 @@
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 461
|
||||
"y": 1309
|
||||
},
|
||||
"id": 154,
|
||||
"id": 162,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
@@ -8536,4 +8548,4 @@
|
||||
"title": "VictoriaMetrics - vmagent",
|
||||
"uid": "G7Z9GzMGz",
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -25,27 +25,31 @@ services:
|
||||
ports:
|
||||
- 3000:3000
|
||||
restart: always
|
||||
environment:
|
||||
- GF_PLUGINS_PREINSTALL=yesoreyeram-infinity-datasource
|
||||
volumes:
|
||||
- grafanadata:/var/lib/grafana
|
||||
- ./provisioning/datasources/prometheus-datasource/cluster.yml:/etc/grafana/provisioning/datasources/cluster.yml
|
||||
- ./provisioning/datasources/prometheus/cluster.yml:/etc/grafana/provisioning/datasources/cluster.yml
|
||||
- ./provisioning/datasources/infinity/cluster.yml:/etc/grafana/provisioning/datasources/infinity-cluster.yml
|
||||
- ./provisioning/dashboards:/etc/grafana/provisioning/dashboards
|
||||
- ./../../dashboards/victoriametrics-cluster.json:/var/lib/grafana/dashboards/vm.json
|
||||
- ./../../dashboards/vmagent.json:/var/lib/grafana/dashboards/vmagent.json
|
||||
- ./../../dashboards/vmalert.json:/var/lib/grafana/dashboards/vmalert.json
|
||||
- ./../../dashboards/vmauth.json:/var/lib/grafana/dashboards/vmauth.json
|
||||
- ./../../dashboards/alert-statistics.json:/var/lib/grafana/dashboards/alert-statistics.json
|
||||
- ./../../dashboards/metrics-explorer.json:/var/lib/grafana/dashboards/metrics-explorer.json
|
||||
|
||||
# vmstorage shards. Each shard receives 1/N of all metrics sent to vminserts,
|
||||
# where N is number of vmstorages (2 in this case).
|
||||
vmstorage-1:
|
||||
image: victoriametrics/vmstorage:v1.137.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
volumes:
|
||||
- strgdata-1:/storage
|
||||
command:
|
||||
- "--storageDataPath=/storage"
|
||||
restart: always
|
||||
vmstorage-2:
|
||||
image: victoriametrics/vmstorage:v1.137.0-cluster
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
volumes:
|
||||
- strgdata-2:/storage
|
||||
command:
|
||||
@@ -55,7 +59,7 @@ services:
|
||||
# vminsert is ingestion frontend. It receives metrics pushed by vmagent,
|
||||
# pre-process them and distributes across configured vmstorage shards.
|
||||
vminsert-1:
|
||||
image: victoriametrics/vminsert:v1.137.0-cluster
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -64,7 +68,7 @@ services:
|
||||
- "--storageNode=vmstorage-2:8400"
|
||||
restart: always
|
||||
vminsert-2:
|
||||
image: victoriametrics/vminsert:v1.137.0-cluster
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -76,7 +80,7 @@ services:
|
||||
# vmselect is a query fronted. It serves read queries in MetricsQL or PromQL.
|
||||
# vmselect collects results from configured `--storageNode` shards.
|
||||
vmselect-1:
|
||||
image: victoriametrics/vmselect:v1.137.0-cluster
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -86,7 +90,7 @@ services:
|
||||
- "--vmalert.proxyURL=http://vmalert:8880"
|
||||
restart: always
|
||||
vmselect-2:
|
||||
image: victoriametrics/vmselect:v1.137.0-cluster
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
depends_on:
|
||||
- "vmstorage-1"
|
||||
- "vmstorage-2"
|
||||
@@ -101,7 +105,7 @@ services:
|
||||
# read requests from Grafana, vmui, vmalert among vmselects.
|
||||
# It can be used as an authentication proxy.
|
||||
vmauth:
|
||||
image: victoriametrics/vmauth:v1.137.0
|
||||
image: victoriametrics/vmauth:v1.138.0
|
||||
depends_on:
|
||||
- "vmselect-1"
|
||||
- "vmselect-2"
|
||||
@@ -115,7 +119,7 @@ services:
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.138.0
|
||||
depends_on:
|
||||
- "vmauth"
|
||||
ports:
|
||||
@@ -127,8 +131,17 @@ services:
|
||||
- ./rules/alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
|
||||
command:
|
||||
- "--datasource.url=http://vmauth:8427/select/0/prometheus"
|
||||
- "--datasource.basicAuth.username=foo"
|
||||
- "--datasource.basicAuth.password=bar"
|
||||
|
||||
- "--remoteRead.url=http://vmauth:8427/select/0/prometheus"
|
||||
- "--remoteRead.basicAuth.username=foo"
|
||||
- "--remoteRead.basicAuth.password=bar"
|
||||
|
||||
- "--remoteWrite.url=http://vmauth:8427/insert/0/prometheus"
|
||||
- "--remoteWrite.basicAuth.username=foo"
|
||||
- "--remoteWrite.basicAuth.password=bar"
|
||||
|
||||
- "--notifier.url=http://alertmanager:9093/"
|
||||
- "--rule=/etc/alerts/*.yml"
|
||||
# display source of alerts in grafana
|
||||
|
||||
@@ -3,7 +3,7 @@ services:
|
||||
# It scrapes targets defined in --promscrape.config
|
||||
# And forward them to --remoteWrite.url
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -18,7 +18,7 @@ services:
|
||||
# VictoriaMetrics instance, a single process responsible for
|
||||
# storing metrics and serve read requests.
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.137.0
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
- 8089:8089
|
||||
@@ -43,19 +43,23 @@ services:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
- 3000:3000
|
||||
restart: always
|
||||
environment:
|
||||
- GF_PLUGINS_PREINSTALL=yesoreyeram-infinity-datasource
|
||||
volumes:
|
||||
- grafanadata:/var/lib/grafana
|
||||
- ./provisioning/datasources/prometheus-datasource/single.yml:/etc/grafana/provisioning/datasources/single.yml
|
||||
- ./provisioning/datasources/prometheus/single.yml:/etc/grafana/provisioning/datasources/single.yml
|
||||
- ./provisioning/datasources/infinity/single.yml:/etc/grafana/provisioning/datasources/infinity-single.yml
|
||||
- ./provisioning/dashboards:/etc/grafana/provisioning/dashboards
|
||||
- ./../../dashboards/victoriametrics.json:/var/lib/grafana/dashboards/vm.json
|
||||
- ./../../dashboards/vmagent.json:/var/lib/grafana/dashboards/vmagent.json
|
||||
- ./../../dashboards/vmalert.json:/var/lib/grafana/dashboards/vmalert.json
|
||||
- ./../../dashboards/alert-statistics.json:/var/lib/grafana/dashboards/alert-statistics.json
|
||||
restart: always
|
||||
- ./../../dashboards/metrics-explorer.json:/var/lib/grafana/dashboards/metrics-explorer.json
|
||||
|
||||
# vmalert executes alerting and recording rules
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.138.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
- "alertmanager"
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics-Infinity
|
||||
type: yesoreyeram-infinity-datasource
|
||||
url: "http://vmauth:8427/select/0/prometheus"
|
||||
basicAuth: true
|
||||
basicAuthUser: foo
|
||||
secureJsonData:
|
||||
basicAuthPassword: bar
|
||||
@@ -0,0 +1,6 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: VictoriaMetrics-Infinity
|
||||
type: yesoreyeram-infinity-datasource
|
||||
url: "http://victoriametrics:8428"
|
||||
@@ -1,6 +1,6 @@
|
||||
services:
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.137.0
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -14,7 +14,7 @@ services:
|
||||
restart: always
|
||||
|
||||
victoriametrics:
|
||||
image: victoriametrics/victoria-metrics:v1.137.0
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
ports:
|
||||
- 8428:8428
|
||||
volumes:
|
||||
@@ -40,7 +40,7 @@ services:
|
||||
restart: always
|
||||
|
||||
vmalert:
|
||||
image: victoriametrics/vmalert:v1.137.0
|
||||
image: victoriametrics/vmalert:v1.138.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
93
docs/ai-tools/README.md
Normal file
@@ -0,0 +1,93 @@
|
||||
VictoriaMetrics Observability Stack integrates with AI assistants through MCP servers and agent skills.
|
||||
These integrations allow AI agents and automation tools to query metrics, logs, and traces, analyze telemetry data,
|
||||
and assist engineers with debugging and observability tasks.
|
||||
|
||||
# MCP Servers
|
||||
|
||||
MCP (Model Context Protocol) servers expose observability data and operational capabilities to AI assistants in a structured way.
|
||||
This allows AI agents to query telemetry data, analyze system behavior, and assist engineers in troubleshooting and investigation workflows.
|
||||
|
||||
## VictoriaMetrics MCP Server
|
||||
|
||||
[VictoriaMetrics MCP Server](https://github.com/VictoriaMetrics/mcp-victoriametrics) provides access to VictoriaMetrics
|
||||
instances, seamless integration with [VictoriaMetrics APIs](https://docs.victoriametrics.com/victoriametrics/url-examples/)
|
||||
and [documentation](https://docs.victoriametrics.com/).
|
||||
|
||||
It offers a comprehensive interface for monitoring, observability, and debugging tasks related to VictoriaMetrics,
|
||||
enabling advanced automation and interaction capabilities for engineers and tools.
|
||||
|
||||
Capabilities include:
|
||||
- Query metrics and exploring data (even drawing graphs if your client supports it)
|
||||
- List and exporting available metrics, labels, labels values and entire time series
|
||||
- Analyze and testing your alerting and recording rules and alerts
|
||||
- Show parameters of your VictoriaMetrics instances
|
||||
- Explore cardinality of your data and metrics usage statistics
|
||||
- Analyze, trace, prettify and explain your queries
|
||||
- Debug your relabeling rules, downsampling and retention policy configurations
|
||||
- Integrate with [VictoriaMetrics Cloud](https://docs.victoriametrics.com/victoriametrics-cloud/)
|
||||
|
||||
> On YouTube: [How to Use an AI Assistant with Your Monitoring System – VictoriaMetrics MCP Server](https://www.youtube.com/watch?v=1k7xgbRi1k0).
|
||||
|
||||
See more details at [VictoriaMetrics/mcp-victoriametrics](https://github.com/VictoriaMetrics/mcp-victoriametrics).
|
||||
|
||||
## VictoriaLogs MCP Server
|
||||
|
||||
[VictoriaLogs MCP Server](https://github.com/VictoriaMetrics/mcp-victorialogs) provides access to VictoriaLogs instances,
|
||||
integration with [VictoriaLogs APIs](https://docs.victoriametrics.com/victorialogs/querying/#http-api) and [documentation](https://docs.victoriametrics.com/victorialogs/).
|
||||
|
||||
It provides a comprehensive interface for working with logs and performing observability and debugging tasks related to VictoriaLogs.
|
||||
|
||||
Capabilities include:
|
||||
- Querying logs and exploring logs data
|
||||
- Showing parameters of your VictoriaLogs instances
|
||||
- Listing available streams, fields, field values
|
||||
- Query statistics for the logs as metrics
|
||||
|
||||
See more details at [VictoriaMetrics/mcp-victorialogs](https://github.com/VictoriaMetrics/mcp-victorialogs).
|
||||
|
||||
## VictoriaTraces MCP Server
|
||||
|
||||
[VictoriaTraces MCP Server](https://github.com/VictoriaMetrics/mcp-victoriatraces) provides access to VictoriaTraces instances,
|
||||
integration with [VictoriaTraces APIs](https://docs.victoriametrics.com/victoriatraces/querying/#http-api) and [documentation](https://docs.victoriametrics.com/victoriatraces/).
|
||||
|
||||
It enables AI assistants and tools to interact with distributed tracing data for observability and debugging tasks.
|
||||
|
||||
Capabilities include:
|
||||
- Get services and operations (span names)
|
||||
- Query traces, explore and analyze traces data
|
||||
|
||||
See more details at [VictoriaMetrics/mcp-victoriatraces](https://github.com/VictoriaMetrics/mcp-victoriatraces).
|
||||
|
||||
## vmanomaly MCP Server
|
||||
|
||||
[vmanomaly MCP Server](https://github.com/VictoriaMetrics/mcp-vmanomaly) provides seamless integration with vmanomaly
|
||||
REST API and documentation for AI-assisted anomaly detection, model management, and observability insights.
|
||||
|
||||
Capabilities include:
|
||||
- Health Monitoring: Check `vmanomaly` server health and build information
|
||||
- Model Management: List, validate, and configure anomaly detection models (like `zscore_online`, `prophet`, and more)
|
||||
- Configuration Generation: Generate complete `vmanomaly` YAML configurations
|
||||
- Alert Rule Generation: Generate [`vmalert`](https://docs.victoriametrics.com/victoriametrics/vmalert/) [alerting rules](https://docs.victoriametrics.com/victoriametrics/vmalert/#alerting-rules) based on [anomaly score metrics](https://docs.victoriametrics.com/anomaly-detection/faq/#what-is-anomaly-score) to simplify alerting setup
|
||||
- Documentation Search: Full-text search across embedded `vmanomaly` documentation with fuzzy matching
|
||||
|
||||
See more details at [VictoriaMetrics/mcp-vmanomaly](https://github.com/VictoriaMetrics/mcp-vmanomaly).
|
||||
|
||||
|
||||
# Agent Skills
|
||||
|
||||
[Agent skills](https://github.com/VictoriaMetrics/skills) help AI agents and automation tools understand, operate,
|
||||
and troubleshoot VictoriaMetrics observability components, including metrics, logs, and traces.
|
||||
|
||||
These skills provide predefined workflows and capabilities such as:
|
||||
* Query metrics, logs, traces and alerts
|
||||
* Query trace analysis
|
||||
* Multi-signal investigations
|
||||
* Cardinality optimization
|
||||
* Unused metric detection
|
||||
|
||||
To install the available skills for AI agents, run:
|
||||
```sh
|
||||
npx skills add VictoriaMetrics/skills
|
||||
```
|
||||
|
||||
See more details at [VictoriaMetrics/skills](https://github.com/VictoriaMetrics/skills).
|
||||
19
docs/ai-tools/_index.md
Normal file
@@ -0,0 +1,19 @@
|
||||
---
|
||||
title: AI tools
|
||||
weight: 61
|
||||
menu:
|
||||
docs:
|
||||
weight: 61
|
||||
identifier: ai-tools
|
||||
tags:
|
||||
- metrics
|
||||
- logs
|
||||
- traces
|
||||
- AI
|
||||
- AI integration
|
||||
- agent
|
||||
- assistant
|
||||
- MCP server
|
||||
|
||||
---
|
||||
{{% content "README.md" %}}
|
||||
@@ -14,6 +14,19 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.29.1
|
||||
Released: 2026-03-25
|
||||
|
||||
- FEATURE: Added `min_rel_dev_from_expected` [common model argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-relative-deviation-from-expected) to support relative business gating based on percentage deviation from expected values. This allows users to specify a relative threshold for ignoring small deviations that are not significant in the context of the expected value, particularly useful for heterogeneous series with varying unknown magnitudes, returned from the same query.
|
||||
|
||||
- UI: Updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.5.0](https://docs.victoriametrics.com/anomaly-detection/ui/#v150) to [v1.5.1](https://docs.victoriametrics.com/anomaly-detection/ui/#v151), see respective [release notes](https://docs.victoriametrics.com/anomaly-detection/ui/#v151) for details.
|
||||
|
||||
- IMPROVEMENT: Optimized [`VmWriter`](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer) hot path by 2-3x in terms of infer-write latency
|
||||
|
||||
- IMPROVEMENT: Optimized backbone of [t-digest](https://www.sciencedirect.com/science/article/pii/S2665963820300403) data structures to reduce the memory usage and speed up the fit/infer calls for underlying models that use it (e.g. [OnlineQuantileModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) or [MAD](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-mad)).
|
||||
|
||||
- BUGFIX: Fixed forward compatibility issues with the persisted state from [v1.28.5](#v1285) for [ProphetModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [QuantileModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) models, which could lead to "util.files - ERROR - Unexpected error while loading model from ..." in [stateful mode](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) after the upgrade to [v1.29.0](#v1290). Now the service can properly load the persisted state from [v1.28.5](#v1285) and continue functioning without requiring retraining of the affected models.
|
||||
|
||||
## v1.29.0
|
||||
Released: 2026-03-05
|
||||
|
||||
@@ -118,7 +131,7 @@ Released: 2025-10-09
|
||||
```
|
||||
This happened in scenarios with a large number of queries (e.g., in non-sharded deployments). Now the pool size is set dynamically to prevent such warnings and retain efficient connection reuse.
|
||||
|
||||
## v1.26.1
|
||||
## v1.26.1
|
||||
Released: 2025-10-08
|
||||
|
||||
- IMPROVEMENT: Enriched lifecycle logs with the deterministic labelset hash for each query result (metric). This allows correlating model training, inference runs/skips, and on-disk artifacts presence or cleanup during incident triage.
|
||||
@@ -128,7 +141,7 @@ Released: 2025-10-02
|
||||
|
||||
- FEATURE: Introduced vmui-like [UI](https://docs.victoriametrics.com/anomaly-detection/ui/) for `vmanomaly` service to simplify the configuration and backtesting of anomaly detection models before it goes to production. It provides an intuitive interface to finetune model configurations, visualize its predictions and anomaly scores, and perform backtesting on historical data. The UI is accessible via a web browser and can be run as a [standalone service](https://docs.victoriametrics.com/anomaly-detection/ui/#preset-usage) or [integrated with productionalized deployments](https://docs.victoriametrics.com/anomaly-detection/ui/#mixed-usage). For more details, refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/ui/).
|
||||
|
||||
- FEATURE: Added support for reading data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) with `VLogsReader`. This reader allows quering and analyzing log data stored in VictoriaLogs, enabling anomaly detection on metrics generated from logs. It supports similar configuration options as `VmReader`, including `datasource_url`, `tenant_id`, `queries`, etc. For more details, refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader). It can be also used in [UI mode](https://docs.victoriametrics.com/anomaly-detection/ui/) for backtesting log-based anomaly detection configurations.
|
||||
- FEATURE: Added support for reading data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) with `VLogsReader`. This reader allows querying and analyzing log data stored in VictoriaLogs, enabling anomaly detection on metrics generated from logs. It supports similar configuration options as `VmReader`, including `datasource_url`, `tenant_id`, `queries`, etc. For more details, refer to the [documentation](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vlogs-reader). It can be also used in [UI mode](https://docs.victoriametrics.com/anomaly-detection/ui/) for backtesting log-based anomaly detection configurations.
|
||||
|
||||
- IMPROVEMENT: Resolved the case in the [`IsolationForestModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#isolation-forest-multivariate) with `provide_series` common model [argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#provide-series) including `yhat.*` series (prediction and confidence boundaries), which are not produced by this model. Now config validation will fail with a clear error message if such series names are requested.
|
||||
|
||||
@@ -183,7 +196,7 @@ Released: 2025-07-17
|
||||
|
||||
- FEATURE: Added an option to reference environment variables in [configuration files](https://docs.victoriametrics.com/anomaly-detection/components/) using scalar string placeholders `%{ENV_NAME}`. See the [environment variables](https://docs.victoriametrics.com/anomaly-detection/components/#environment-variables) section for more details and examples. This feature is particularly useful for managing sensitive information like API keys or database credentials while still making it accessible to the service.
|
||||
|
||||
- IMPROVEMENT: Added `iqr_threshold` to [OnlineQuantileModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) to refine the prediction boundaries without the need to manually adjusting `scale` [argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#scale). Best set as >= 2 and used with smaller, robust quantiles (e.g. `(0.25, 0.5, 0.75)`) to both reduce the impact of outliers on the prediction boundaries and increase the likelyhood of having "non-anomalous" data within updated boundaries.
|
||||
- IMPROVEMENT: Added `iqr_threshold` to [OnlineQuantileModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) to refine the prediction boundaries without the need to manually adjusting `scale` [argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#scale). Best set as >= 2 and used with smaller, robust quantiles (e.g. `(0.25, 0.5, 0.75)`) to both reduce the impact of outliers on the prediction boundaries and increase the likelihood of having "non-anomalous" data within updated boundaries.
|
||||
|
||||
- IMPROVEMENT: Fixed duplicated calls to VictoriaMetrics' in [reader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) for queries in `reader.queries` that are attached to multiple models in `models` [section](https://docs.victoriametrics.com/anomaly-detection/components/models/#queries) where previously, each model would independently fetch for the same query, leading to unnecessary load on the reader and VictoriaMetrics TSDB. Now, the reader will only be called once per unique (scheduler_alias, query_key) pair, and the results will be shared across all models that use the same query in the same scheduler.
|
||||
|
||||
@@ -285,7 +298,7 @@ Released: 2025-03-03
|
||||
> This release contains a bug introduced in [v1.18.7](#v1187) - [`PeriodicScheduler`](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler) where configurations with `fit_every` > `fit_window` could cause inference to be skipped for |fit_every - fit_window| time, until the next `fit_every` call happens. For `fit_every` > `fit_window` configurations we recommend upgrading to [v1.20.1](#v1201), which resolves this issue.
|
||||
|
||||
- FEATURE: The `scale` argument is now a [common argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#scale), previously supported only by [`ProphetModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [`OnlineQuantileModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile). Additionally, `scale` is now **two-sided**, represented as `[scale_lb, scale_ub]`. The previous format (`scale: x`) remains supported and will be automatically converted to `scale: [x, x]`.
|
||||
|
||||
|
||||
- FEATURE: Introduced a post-processing step to clip `yhat`, `yhat_lower`, and `yhat_upper` to the configured `data_range` [values](https://docs.victoriametrics.com/anomaly-detection/components/reader/) in `VmReader`, if defined. This feature is disabled by default for backward compatibility. It can be enabled for models that generate predictions and estimates, such as [`ProphetModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), by setting the [common argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#clip-predictions) `clip_predictions` to `True`.
|
||||
|
||||
- IMPROVEMENT: Introduced the `anomaly_score_outside_data_range` [parameter](https://docs.victoriametrics.com/anomaly-detection/components/models/#score-outside-data-range) to allow overriding the default anomaly score (`1.01`) assigned when input values (`y`) fall outside the defined `data_range` (data domain violation). It improves flexibility for alerting rules and enables clearer visual distinction between different anomaly scenarios. Override can be configured at the **service level** (`settings`) or per **model instance** (`models.model_xxx`), with model-level values taking priority. If not explicitly set, the default anomaly score remains `1.01` for backward compatibility.
|
||||
@@ -320,8 +333,8 @@ Released: 2025-01-20
|
||||
> This release contains a bug introduced in [v1.18.7](#v1187) - [`PeriodicScheduler`](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler) where configurations with `fit_every` > `fit_window` could cause inference to be skipped for |fit_every - fit_window| time, until the next `fit_every` call happens. For `fit_every` > `fit_window` configurations we recommend upgrading to [v1.20.1](#v1201), which resolves this issue.
|
||||
|
||||
- FEATURE: Added support for per-query `tenant_id` in the [`VmReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader). This allows overriding the reader-level `tenant_id` within a single global `vmanomaly` configuration on a *per-query* basis, enabling isolation of data for different tenants in separate queries when querying the [VictoriaMetrics cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/). For details, see the [documentation](https://docs.victoriametrics.com/anomaly-detection/components/reader/#per-query-parameters).
|
||||
- IMPROVEMEMT: Speedup the model infer stage on multicore systems.
|
||||
- IMPROVEMEMT: Speedup the model fitting stage by 1.25-3x, depending on configuration complexity.
|
||||
- IMPROVEMENT: Speedup the model infer stage on multicore systems.
|
||||
- IMPROVEMENT: Speedup the model fitting stage by 1.25-3x, depending on configuration complexity.
|
||||
- IMPROVEMENT: Reduced service RAM usage by 5-10%, depending on configuration complexity.
|
||||
- BUGFIX: Now [`VmReader`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader) properly handles the cases where the number of queries processed in parallel (up to `reader.queries` cardinality) exceeds the default limit of 10 HTTP(S) connections, preventing potential data loss from discarded queries. The pool limit will automatically adjust to match `reader.queries` cardinality.
|
||||
- BUGFIX: Corrected the construction of write endpoints for cluster VictoriaMetrics `url`s (`tenant_id` arg is set) in `monitoring.push` [section configurations](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#push-config-parameters).
|
||||
@@ -499,7 +512,7 @@ Released: 2024-08-26
|
||||
|
||||
## v1.15.5
|
||||
Released: 2024-08-19
|
||||
- BUGFIX: following [v1.15.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1152) online model enhancement, now `data_range` parameter is correctly initialized for online models, created (for new time series returned by particular query) during `infer` calls.
|
||||
- BUGFIX: following [v1.15.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1152) online model enhancement, now `data_range` parameter is correctly initialized for online models, created (for new time series returned by particular query) during `infer` calls.
|
||||
|
||||
## v1.15.4
|
||||
Released: 2024-08-15
|
||||
@@ -539,7 +552,7 @@ Released: 2024-08-06
|
||||
|
||||
- FEATURE: Introduced the `optimized_business_params` key (list of strings) to the [`AutoTuned`](https://docs.victoriametrics.com/anomaly-detection/components/models/#autotuned) `optimization_params`. This allows particular business-specific parameters such as [`detection_direction`](https://docs.victoriametrics.com/anomaly-detection/components/models/#detection-direction) and [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) to remain **unchanged during optimizations, retaining their default values**.
|
||||
- IMPROVEMENT: Optimized the [`AutoTuned`](https://docs.victoriametrics.com/anomaly-detection/components/models/#autotuned) model logic to minimize deviations from the expected `anomaly_percentage` specified in the configuration and the detected percentage in the data, while also reducing discrepancies between the actual values (`y`) and the predictions (`yhat`).
|
||||
- IMPROVEMENT: Allow [`ProphetModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) to fit with multiple seasonalities when used in [`AutoTuned`](https://docs.victoriametrics.com/anomaly-detection/components/models/#autotuned) mode.
|
||||
- IMPROVEMENT: Allow [`ProphetModel`](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) to fit with multiple seasonalities when used in [`AutoTuned`](https://docs.victoriametrics.com/anomaly-detection/components/models/#autotuned) mode.
|
||||
|
||||
## v1.14.2
|
||||
Released: 2024-07-26
|
||||
@@ -590,10 +603,10 @@ Released: 2024-03-31
|
||||
Released: 2024-02-22
|
||||
- FEATURE: Multi-scheduler support. Now users can use multiple [model specs](https://docs.victoriametrics.com/anomaly-detection/components/models/) in a single config (via aliasing), each spec can be run with its own (even multiple) [schedulers](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/).
|
||||
- Introduction of `schedulers` arg in model spec:
|
||||
- It allows each model to be managed by 1 (or more) schedulers, so overall resource usage is optimized and flexibility is preserved.
|
||||
- It allows each model to be managed by 1 (or more) schedulers, so overall resource usage is optimized and flexibility is preserved.
|
||||
- Passing an empty list or not specifying this param implies that each model is run in **all** the schedulers, which is a backward-compatible behavior.
|
||||
- Please find more details in docs on [Model section](https://docs.victoriametrics.com/anomaly-detection/components/models/#schedulers)
|
||||
- DEPRECATION: slight refactor of a scheduler config section
|
||||
- DEPRECATION: slight refactor of a scheduler config section
|
||||
- Now schedulers are passed as a mapping of `scheduler_alias: scheduler_spec` under [scheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/) sections. Using old format (< [1.11.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1110)) will produce warnings for now and will be removed in future versions.
|
||||
- DEPRECATION: The `--watch` CLI option for config file reloads is deprecated and will be ignored in the future.
|
||||
|
||||
@@ -601,11 +614,11 @@ Released: 2024-02-22
|
||||
Released: 2024-02-15
|
||||
- FEATURE: Multi-model support. Now users can specify multiple [model specs](https://docs.victoriametrics.com/anomaly-detection/components/models/) in a single config (via aliasing), as well as to reference what [queries from VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) it should be run on.
|
||||
- Introduction of `queries` arg in model spec:
|
||||
- It allows the model to be executed only on a particular query subset from `reader` section.
|
||||
- It allows the model to be executed only on a particular query subset from `reader` section.
|
||||
- Passing an empty list or not specifying this param implies that each model is run on results from **all** queries, which is a backward-compatible behavior.
|
||||
- Please find more details in docs on [Model section](https://docs.victoriametrics.com/anomaly-detection/components/models/#queries)
|
||||
|
||||
- DEPRECATION: slight refactor of a model config section
|
||||
- DEPRECATION: slight refactor of a model config section
|
||||
- Now models are passed as a mapping of `model_alias: model_spec` under [model](https://docs.victoriametrics.com/anomaly-detection/components/models/) sections. Using old format (<= [1.9.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v192)) will produce warnings for now and will be removed in future versions.
|
||||
- Please find more details in docs on [Model section](https://docs.victoriametrics.com/anomaly-detection/components/models/)
|
||||
- IMPROVEMENT: now logs from [`monitoring.pull`](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#monitoring-section-config-example) GET requests to `/metrics` endpoint are shown only in DEBUG mode
|
||||
@@ -656,7 +669,7 @@ Released: 2023-12-21
|
||||
|
||||
## v1.6.0
|
||||
Released: 2023-10-30
|
||||
- IMPROVEMENT:
|
||||
- IMPROVEMENT:
|
||||
- now all the produced healthcheck metrics have `vmanomaly_` prefix for easier accessing.
|
||||
- updated docs for monitoring.
|
||||
> This is an backward-incompatible change, as metric names will be changed, resulting in new metrics creation, i.e. `model_datapoints_produced` will become `vmanomaly_model_datapoints_produced`
|
||||
@@ -669,19 +682,19 @@ Released: 2023-10-30
|
||||
|
||||
## v1.5.1
|
||||
Released: 2023-09-18
|
||||
- IMPROVEMENT: Infer from the latest seen datapoint for each query. Handles the case datapoints arrive late.
|
||||
- IMPROVEMENT: Infer from the latest seen datapoint for each query. Handles the case datapoints arrive late.
|
||||
|
||||
|
||||
## v1.5.0
|
||||
Released: 2023-08-11
|
||||
- FEATURE: add `--license` and `--license-file` command-line flags for license code verification.
|
||||
- FEATURE: add `--license` and `--license-file` command-line flags for license code verification.
|
||||
- IMPROVEMENT: Updated Python to 3.11.4 and updated dependencies.
|
||||
- IMPROVEMENT: Guide documentation for Custom Model usage.
|
||||
|
||||
|
||||
## v1.4.2
|
||||
Released: 2023-06-09
|
||||
- BUGFIX: Fix case with received metric labels overriding generated.
|
||||
- BUGFIX: Fix case with received metric labels overriding generated.
|
||||
|
||||
|
||||
## v1.4.1
|
||||
|
||||
@@ -162,7 +162,7 @@ Anomaly detection models can significantly improve when incorporating business-s
|
||||
|
||||
- **Defining a `data_range`** - configure [`data_range`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) for the model’s input query to **automatically assign anomaly scores > 1** for values (`y`) that fall outside the defined range.
|
||||
|
||||
- **Filtering minor fluctuations with `min_dev_from_expected`** – use [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) to **ignore insignificant deviations** and prevent small fluctuations from triggering [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive).
|
||||
- **Filtering minor fluctuations with absolute (`min_dev_from_expected`)** or **relative (`min_rel_dev_from_expected`)** thresholding – use [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) and [`min_rel_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-relative-deviation-from-expected) to **ignore insignificant deviations** and prevent alerting rules from triggering [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive).
|
||||
|
||||
- **Applying `scale` for asymmetric confidence adjustments** - use [`scale`](https://docs.victoriametrics.com/anomaly-detection/components/models/#scale) to adjust confidence intervals **differently for spikes and drops**, ensuring more appropriate anomaly detection.
|
||||
|
||||
@@ -173,7 +173,7 @@ Consider a metric tracking the percentage of HTTP 4xx status codes for a specifi
|
||||
- **Expected data range**: The percentage naturally falls between `0%` and `100%` (`[0, 1]`).
|
||||
- **Threshold-based anomaly detection**: If the error rate exceeds `5%`, it should be **automatically flagged as an anomaly** ([anomaly score](#what-is-anomaly-score) > 1), encouraging an incident investigation.
|
||||
- **Regime shift detection**: A **continuous increase** in error rates (e.g., from `1.5%` to `3%`) should also be considered **anomalous**, as regime change may indicate underlying system problem, e.g. with a new release.
|
||||
- **Avoiding false positives**: **Small, infrequent deviations** (e.g., from `1%` to `1.3%`) should **not** trigger alerts to **prevent unnecessary SRE escalations**. Let it be on the level of 0.5%.
|
||||
- **Avoiding false positives**: **Small, infrequent deviations** (e.g., from `1%` to `1.3%` on a scale of 0-100%) should **not** trigger alerts to **prevent unnecessary SRE escalations**. Let it be on the level of 0.5%. Also, relative deviations of less than 10% (e.g., from `1%` to `1.1%`) should be ignored, as they may not represent significant changes in the context of the metric vs its average fluctuation.
|
||||
|
||||
Then, the following config may be used to benefit from incorporating domain knowledge into model behavior:
|
||||
|
||||
@@ -201,7 +201,8 @@ models:
|
||||
schedulers: ['periodic_http']
|
||||
queries: ['percentage_4xx']
|
||||
detection_direction: 'above_expected' # as interested only in spikes, drops are OK
|
||||
min_dev_from_expected: 0.005 # <0.5% deviations vs expected values should be neglected, generating anomaly score == 0
|
||||
min_dev_from_expected: [0, 0.005] # <0.5% deviations vs expected values should be neglected, generating anomaly score == 0
|
||||
min_rel_dev_from_expected: [0, 0.1] # <10% relative deviations vs expected values should be neglected, generating anomaly score == 0
|
||||
# to align predictions to be within [0, 5%] interval, defined in reader.queries.percentage_4xx.data_range
|
||||
clip_predictions: True
|
||||
# specify output series produced by vmanomaly to be written to VictoriaMetrics in `writer`
|
||||
@@ -420,7 +421,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -638,7 +639,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0 && docker image tag victoriametrics/vmanomaly:v1.29.0 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1 && docker image tag victoriametrics/vmanomaly:v1.29.1 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -39,14 +39,15 @@ This section outlines the compatibility of different `vmanomaly` versions with v
|
||||
|
||||
> Used if `settings.restore_state` is set to `true`. See argument details in the [configuration documentation](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration).
|
||||
|
||||
There are 2 types of compatibilitity to consider when migrating in stateful mode:
|
||||
There are 2 types of compatibility to consider when migrating in stateful mode:
|
||||
- **Global (in)compatibility**: The new version can seamlessly read and utilize the existing state without any modifications or data loss. Or, in case of incompatibility, the existing state must be dropped completely to proceed with the migration.
|
||||
- **Component (in)compatibility**: The new version may introduce changes that affect specific components (e.g., specific models, data formats) but can still operate with the existing state with some adjustments or drop of incompatible on disk artifacts.
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Partially compatible* | Dumped models of class [prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [seasonal quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) have problems with loading to [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) due to dropped `pytz` library. **Upgrading directly from v1.28.7 to [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) with a fix is suggested** |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
| [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) | [v1.25.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1252) | Fully Compatible | In [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) there was a change to `vmanomaly.db` metadata database format, so migrating from v1.24.0-v1.25.0 requires deletion of a state, see note above the table |
|
||||
| [v1.24.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1241) | [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) | Partially Compatible* | In [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) there were changes to **data dump layout** and to `online_quantile` and `isolation_forest_multivariate` [model](https://docs.victoriametrics.com/anomaly-detection/components/models/) states, so to migrate from v1.24.0-v1.24.1 it is recommended to drop the state |
|
||||
|
||||
@@ -30,7 +30,7 @@ The following options are available:
|
||||
|
||||
### Command-line arguments
|
||||
|
||||
The `vmanomaly` service supports a set of command-line arguments to configure its behavior, including options for licensing, logging levels, and more.
|
||||
The `vmanomaly` service supports a set of command-line arguments to configure its behavior, including options for licensing, logging levels, and more.
|
||||
|
||||
> `vmanomaly` supports {{% available_from "v1.18.5" anomaly %}} running on config **directories**, see the `config` positional arg description in help message below.
|
||||
|
||||
@@ -49,7 +49,7 @@ options:
|
||||
-h Show this help message and exit
|
||||
--license STRING License key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/trial/ to obtain a trial license.
|
||||
--licenseFile PATH Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/trial/ to obtain a trial license.
|
||||
--license.forceOffline
|
||||
--license.forceOffline
|
||||
Whether to force offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued
|
||||
license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification.
|
||||
--loggerLevel {DEBUG,WARNING,FATAL,ERROR,INFO}
|
||||
@@ -91,7 +91,7 @@ groups:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} instance {{ $labels.instance }} license expires in less than 30 days"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} license expires in {{ $value | humanizeDuration }}.
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} license expires in {{ $value | humanizeDuration }}.
|
||||
Please make sure to update the license before it expires."
|
||||
|
||||
- alert: LicenseExpiresInLessThan7Days
|
||||
@@ -100,18 +100,18 @@ groups:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "{{ $labels.job }} instance {{ $labels.instance }} license expires in less than 7 days"
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} license expires in {{ $value | humanizeDuration }}.
|
||||
description: "{{ $labels.instance }} of job {{ $labels.job }} license expires in {{ $value | humanizeDuration }}.
|
||||
Please make sure to update the license before it expires."
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
> To run `vmanomaly`, you need to have VictoriaMetrics Enterprise license. You can get a trial license key [**here**](https://victoriametrics.com/products/enterprise/trial/). <br><br>
|
||||
> Due to the upcoming [DockerHub pull limits](https://docs.docker.com/docker-hub/usage/pulls), an additional image registry, **Quay.io**, has been introduced for VictoriaMetrics images, including [`vmanomaly`](https://quay.io/repository/victoriametrics/vmanomaly). If you encounter pull rate limits, switch from:
|
||||
> Due to the upcoming [DockerHub pull limits](https://docs.docker.com/docker-hub/usage/pulls), an additional image registry, **Quay.io**, has been introduced for VictoriaMetrics images, including [`vmanomaly`](https://quay.io/repository/victoriametrics/vmanomaly). If you encounter pull rate limits, switch from:
|
||||
> ```
|
||||
> docker pull victoriametrics/vmanomaly:vX.Y.Z
|
||||
> ```
|
||||
> to:
|
||||
> to:
|
||||
> ```
|
||||
> docker pull quay.io/victoriametrics/vmanomaly:vX.Y.Z
|
||||
> ```
|
||||
@@ -122,7 +122,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -142,7 +142,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
victoriametrics/vmanomaly:v1.29.1 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -159,7 +159,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
victoriametrics/vmanomaly:v1.29.1 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -172,7 +172,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -197,7 +197,7 @@ volumes:
|
||||
# ...
|
||||
# Enable if on-disk mode over in-memory is preferred
|
||||
# Required, if settings.restore_state is True
|
||||
vmanomaly_data: {}
|
||||
vmanomaly_data: {}
|
||||
```
|
||||
|
||||
For a complete docker-compose example please refer to [our alerting guide](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/), chapter [docker-compose](https://docs.victoriametrics.com/anomaly-detection/guides/guide-vmanomaly-vmalert/#docker-compose)
|
||||
@@ -226,8 +226,8 @@ If you are using [VM Operator](https://docs.victoriametrics.com/operator/) to ma
|
||||
|
||||
To run `vmanomaly`, use YAML files or directories containing YAML files. The configuration files support shallow merge, allowing splitting the configuration into multiple files for better organization.
|
||||
|
||||
> If you are using directories, all `.yaml` files inside will be shallow merged, without deeper recursion. If you want to merge multiple YAML files, you can specify them as separate arguments, e.g.
|
||||
> ```shellhelp
|
||||
> If you are using directories, all `.yaml` files inside will be shallow merged, without deeper recursion. If you want to merge multiple YAML files, you can specify them as separate arguments, e.g.
|
||||
> ```shellhelp
|
||||
> vmanomaly config1.yaml config2.yaml ./config_dir/
|
||||
> ```
|
||||
|
||||
@@ -245,7 +245,7 @@ settings:
|
||||
restore_state: true # restore state from previous run, available since v1.24.0
|
||||
# https://docs.victoriametrics.com/anomaly-detection/components/settings/#logger-levels
|
||||
# to override service-global logger levels, use the `logger_levels` section
|
||||
logger_levels:
|
||||
logger_levels:
|
||||
# vmanomaly: INFO
|
||||
# scheduler: INFO
|
||||
# reader: INFO
|
||||
@@ -288,11 +288,11 @@ reader:
|
||||
datasource_url: "https://play.victoriametrics.com/" # [YOUR_DATASOURCE_URL]
|
||||
tenant_id: '0:0'
|
||||
sampling_period: "5m"
|
||||
queries:
|
||||
queries:
|
||||
# define your queries with MetricsQL - https://docs.victoriametrics.com/victoriametrics/metricsql/
|
||||
cpu_user:
|
||||
expr: 'sum(rate(node_cpu_seconds_total{mode=~"user"}[10m])) by (container)'
|
||||
max_datapoints_per_query: 15000 # to deal with longer queries hitting seach.MaxPointsPerTimeseries
|
||||
max_datapoints_per_query: 15000 # to deal with longer queries hitting search.MaxPointsPerTimeseries
|
||||
# other queries ...
|
||||
|
||||
writer:
|
||||
@@ -361,7 +361,7 @@ writer:
|
||||
|
||||
{{% /collapse %}}
|
||||
|
||||
{{% collapse name="Playground on VictoriaTraces Datasource" %}}
|
||||
{{% collapse name="Playground on VictoriaTraces Datasource" %}}
|
||||
|
||||
<div class="position-relative mb-3">
|
||||
<button
|
||||
@@ -400,9 +400,9 @@ For optimal service behavior, consider the following tweaks when configuring `vm
|
||||
- Set up **config hot-reloading** {{% available_from "v1.25.0" anomaly %}} to automatically reload configurations on config files changes. This can be enabled via the `--watch` [CLI argument](https://docs.victoriametrics.com/anomaly-detection/quickstart/#command-line-arguments) and allows for configuration updates without explicit service restarts.
|
||||
|
||||
**Schedulers**:
|
||||
- Configure the **inference frequency** in the [scheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/) section of the configuration file.
|
||||
- Ensure that `infer_every` aligns with your **minimum required alerting frequency**.
|
||||
- For example, if receiving **alerts every 15 minutes** is sufficient (when `anomaly_score > 1`), set `infer_every` to match `reader.sampling_period` or override it per query via `reader.queries.query_xxx.step` for an optimal setup.
|
||||
- Configure the **inference frequency** in the [scheduler](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/) section of the configuration file.
|
||||
- Ensure that `infer_every` aligns with your **minimum required alerting frequency**.
|
||||
- For example, if receiving **alerts every 15 minutes** is sufficient (when `anomaly_score > 1`), set `infer_every` to match `reader.sampling_period` or override it per query via `reader.queries.query_xxx.step` for an optimal setup.
|
||||
|
||||
**Reader**:
|
||||
- Setup the datasource to read data from in the [reader](https://docs.victoriametrics.com/anomaly-detection/components/reader/) section. Include tenant ID if using a [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) (`multitenant` value {{% available_from "v1.16.2" anomaly %}} can be also used here).
|
||||
|
||||
@@ -189,9 +189,9 @@ The best applications of this mode are:
|
||||
|
||||
### What you can do with Copilot
|
||||
|
||||
- **Ask questions** about any model (e.g. [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) or [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) — parameters, trade-offs, when to use each)
|
||||
- **Improve detection quality** — describe what's wrong ("too many false positives", "missing spikes") and Copilot reads the config, searches the docs, and proposes a validated configuration change to fix the issue.
|
||||
- **Get config suggestions inline** — suggestions appear as interactive cards with an explanation and a YAML diff; click **Apply** to write the change directly to your current settings, or **Decline** to keep the conversation going.
|
||||
- **Ask questions** about any model (e.g. [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) or [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) - parameters, trade-offs, when to use each)
|
||||
- **Improve detection quality** - describe what's wrong ("too many false positives", "missing spikes") and Copilot reads the config, searches the docs, and proposes a validated configuration change to fix the issue.
|
||||
- **Get config suggestions inline** - suggestions appear as interactive cards with an explanation and a YAML diff; click **Apply** to write the change directly to your current settings, or **Decline** to keep the conversation going.
|
||||
|
||||
### How it works
|
||||
|
||||
@@ -205,22 +205,61 @@ Copilot appears as a **chat popup** anchored to the bottom-right corner of the p
|
||||
AI Assistant is disabled by default; enable it with `VMANOMALY_COPILOT_ENABLED=true`, then configure an LLM provider API key and, optionally, a model. Once enabled and configured, Copilot will appear as a chat popup in the bottom-right corner of the UI.
|
||||
|
||||
|
||||
|
||||
Supported providers and model formats:
|
||||
|
||||
- **Anthropic** — set `ANTHROPIC_API_KEY`; model format: `anthropic:<model>`
|
||||
- **Anthropic** - set `ANTHROPIC_API_KEY`; model format: `anthropic:<model>`
|
||||
- Examples: `claude-haiku-4-5`, `claude-sonnet-4-6`; see [full list](https://platform.claude.com/docs/en/about-claude/models/overview#latest-models-comparison)
|
||||
- **OpenAI** — set `OPENAI_API_KEY`; model format: `openai:<model>`
|
||||
- **OpenAI** - set `OPENAI_API_KEY`; model format: `openai:<model>` or `openai-responses:<model>`
|
||||
- Examples: `gpt-5-mini`, `gpt-5.2`; see [full list](https://platform.openai.com/docs/models)
|
||||
- {{% available_from "v1.29.1" anomaly %}} OpenAI-compatible non-OpenAI providers are supported through `OPENAI_BASE_URL` + `OPENAI_API_KEY`
|
||||
- {{% available_from "v1.29.1" anomaly %}} Azure OpenAI is supported through `AZURE_OPENAI_ENDPOINT` + `OPENAI_API_VERSION` + `AZURE_OPENAI_API_KEY` (or `AZURE_OPENAI_AD_TOKEN`); do not set both `OPENAI_BASE_URL` and `AZURE_OPENAI_ENDPOINT`
|
||||
- {{% available_from "v1.29.1" anomaly %}} **Google** - model format: `google-gla:<model>` or `google-vertex:<model>`
|
||||
- Use `GOOGLE_API_KEY` for `google-gla`; for `google-vertex`, use Application Default Credentials, a service account (`GOOGLE_APPLICATION_CREDENTIALS`), or `GOOGLE_API_KEY`
|
||||
- Example: `google-gla:gemini-2.5-pro-preview`
|
||||
- {{% available_from "v1.29.1" anomaly %}} **AWS Bedrock** - use AWS credentials or an IAM role; model format: `bedrock:<model>`
|
||||
- Preferred: set `AWS_BEARER_TOKEN_BEDROCK` and `AWS_DEFAULT_REGION`
|
||||
- Alternative: set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_DEFAULT_REGION` (add `AWS_SESSION_TOKEN` if using a session token)
|
||||
- Example: `bedrock:anthropic.claude-sonnet-4-5-20250929-v1:0`
|
||||
- {{% available_from "v1.29.1" anomaly %}} **OpenRouter** - set `OPENROUTER_API_KEY`; model format: `openrouter:<model>`
|
||||
- Example: `openrouter:anthropic/claude-sonnet-4-5`
|
||||
|
||||
Set exactly one provider key matching your selected model provider:
|
||||
Set the credentials matching your selected provider:
|
||||
|
||||
```bash
|
||||
# Anthropic
|
||||
export ANTHROPIC_API_KEY=your_key_here
|
||||
|
||||
# or OpenAI
|
||||
# OpenAI
|
||||
export OPENAI_API_KEY=your_key_here
|
||||
|
||||
# OpenAI-compatible non-OpenAI providers
|
||||
export OPENAI_BASE_URL=https://api.example.com/v1
|
||||
export OPENAI_API_KEY=your_key_here
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_ENDPOINT=https://example.openai.azure.com
|
||||
export OPENAI_API_VERSION=2024-10-21
|
||||
export AZURE_OPENAI_API_KEY=your_key_here
|
||||
# or: export AZURE_OPENAI_AD_TOKEN=your_entra_token
|
||||
|
||||
# Google Generative Language API
|
||||
export GOOGLE_API_KEY=your_key_here
|
||||
|
||||
# Google Vertex AI service account
|
||||
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
||||
# or use Application Default Credentials: gcloud auth application-default login
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY=your_key_here
|
||||
|
||||
# AWS Bedrock (preferred: bearer token)
|
||||
export AWS_BEARER_TOKEN_BEDROCK=your_bearer_token
|
||||
export AWS_DEFAULT_REGION=us-east-1
|
||||
# AWS Bedrock (alternative: access key pair or IAM role)
|
||||
# export AWS_ACCESS_KEY_ID=your_access_key
|
||||
# export AWS_SECRET_ACCESS_KEY=your_secret_key
|
||||
# export AWS_DEFAULT_REGION=us-east-1
|
||||
# export AWS_SESSION_TOKEN=your_session_token # if using a session token
|
||||
```
|
||||
|
||||
Optionally override the default model:
|
||||
@@ -589,6 +628,17 @@ If the **results** look good and the **model configuration should be deployed in
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.5.1
|
||||
Released: 2026-03-25
|
||||
|
||||
vmanomaly version: [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291)
|
||||
|
||||
- FEATURE: GCP/AWS/OpenRouter Copilot LLM providers are now supported in addition to OpenAI and Anthropic, for more choice and flexibility in AI assistance. See [AI Assistance](#ai-assistance) section for details on supported providers and configuration.
|
||||
|
||||
- BUGFIX: Now Visualization Panel correctly switches in between "query" and "detect" modes when respective buttons are hit in the [Visualization Panel](#visualization-panel), without showing stale results from the previous mode, once running anomaly detection task is explicitly cancelled (regression introduced in [v1.5.0](#v150)).
|
||||
|
||||
- BUGFIX: Fixed an issue with [crypto.randomUUID](https://developer.mozilla.org/en-US/docs/Web/API/Crypto/randomUUID) introduced in [v1.29.0](#v1290) in [UI copilot](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) that led to the front app showing a blank page.
|
||||
|
||||
### v1.5.0
|
||||
Released: 2026-03-05
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ schedulers:
|
||||
periodic:
|
||||
class: 'periodic'
|
||||
infer_every: "30s"
|
||||
fit_every: "24h"
|
||||
fit_every: "365d"
|
||||
fit_window: "24h"
|
||||
|
||||
reader:
|
||||
@@ -189,8 +189,9 @@ reader:
|
||||
|
||||
models:
|
||||
zscore:
|
||||
class: 'zscore'
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3.5
|
||||
decay: 0.99 # gives more weight to recent data points, value should be in (0, 1], 1 means to give equal weight to all data
|
||||
provide_series: ['anomaly_score']
|
||||
# if queries are not specified, all queries from reader will be used
|
||||
# if schedulers are not specified, all schedulers will be used
|
||||
@@ -220,8 +221,8 @@ reader:
|
||||
|
||||
After saving the changes, hot reload will automatically detect the changes in `config.yaml` and attempt to reload the configuration. As the changes are valid, the service will log a success message and increment the `vmanomaly_hot_reload_events_total` metric with `status="success"` label:
|
||||
|
||||
- All the model instances of class `zscore`, that were trained on `host_network_receive_errors` can be reused as they are still valid and "fresh" for making inference on new datapoints until the next `fit_every` happens (10m - 5m).
|
||||
- All the model instances of class `zscore`, that were trained on `cpu_seconds_total` will be re-trained with the new query expression and frequency, as old model instances are not valid anymore.
|
||||
- All the model instances of class `zscore_online`, that were trained on `host_network_receive_errors` can be reused as they are still valid and "fresh" for making inference on new datapoints until the next `fit_every` happens.
|
||||
- All the model instances of class `zscore_online`, that were trained on `cpu_seconds_total` will be re-trained with the new query expression and frequency, as old model instances are not valid anymore.
|
||||
|
||||
|
||||
## Environment variables
|
||||
|
||||
@@ -158,21 +158,21 @@ Config with a split example:
|
||||
```yaml
|
||||
models:
|
||||
model_above_expected:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3.0
|
||||
# track only cases when y > yhat, otherwise anomaly_score would be explicitly set to 0
|
||||
detection_direction: 'above_expected'
|
||||
# for this query we do not need to track lower values, thus, set anomaly detection tracking for y > yhat (above_expected)
|
||||
queries: ['query_values_the_lower_the_better']
|
||||
model_below_expected:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3.0
|
||||
# track only cases when y < yhat, otherwise anomaly_score would be explicitly set to 0
|
||||
detection_direction: 'below_expected'
|
||||
# for this query we do not need to track higher values, thus, set anomaly detection tracking for y < yhat (above_expected)
|
||||
queries: ['query_values_the_higher_the_better']
|
||||
model_bidirectional_default:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3.0
|
||||
# track in both direction, same backward-compatible behavior in case this arg is missing
|
||||
detection_direction: 'both'
|
||||
@@ -181,9 +181,12 @@ models:
|
||||
reader:
|
||||
# ...
|
||||
queries:
|
||||
query_values_the_lower_the_better: metricsql_expression1
|
||||
query_values_the_higher_the_better: metricsql_expression2
|
||||
query_values_both_direction_matters: metricsql_expression3
|
||||
query_values_the_lower_the_better:
|
||||
expr: metricsql_expression1
|
||||
query_values_the_higher_the_better:
|
||||
expr: metricsql_expression2
|
||||
query_values_both_direction_matters:
|
||||
expr: metricsql_expression3
|
||||
# other components like writer, schedule, monitoring
|
||||
```
|
||||
|
||||
@@ -191,11 +194,12 @@ reader:
|
||||
|
||||
`min_dev_from_expected`{{% available_from "v1.13.0" anomaly %}} argument is designed to **reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive)** in scenarios where deviations between the actual value (`y`) and the expected value (`yhat`) are **relatively** high. Such deviations can cause models to generate high [anomaly scores](https://docs.victoriametrics.com/anomaly-detection/faq/#what-is-anomaly-score). However, these deviations may not be significant enough in **absolute values** from a business perspective to be considered anomalies. This parameter ensures that anomaly scores for data points where `|y - yhat| < min_dev_from_expected` are explicitly set to 0. By default, if this parameter is not set, it is set to `0` to maintain backward compatibility.
|
||||
|
||||
> {{% available_from "v1.23.0" anomaly %}} The `min_dev_from_expected` argument can be a list of two float values, allowing separate thresholds for upper and lower deviations. This is useful when the acceptable deviation varies in different directions (e.g., `min_dev_from_expected: [0.01, 0.02]` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is `0.02` when `y` is greater than `yhat`). If only one value is provided, it is broadcasted to both directions, meaning that the same threshold is applied for both upper and lower deviations (e.g., `min_dev_from_expected: 0.01` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is also `0.01` when `y` is greater than `yhat`).
|
||||
> [!NOTE]
|
||||
{{% available_from "v1.23.0" anomaly %}} The `min_dev_from_expected` argument can be a list of two float values, allowing separate thresholds for upper and lower deviations. This is useful when the acceptable deviation varies in different directions (e.g., `min_dev_from_expected: [0.01, 0.02]` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is `0.02` when `y` is greater than `yhat`). If only one value is provided, it is broadcasted to both directions, meaning that the same threshold is applied for both upper and lower deviations (e.g., `min_dev_from_expected: 0.01` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is also `0.01` when `y` is greater than `yhat`).
|
||||
|
||||
> `min_dev_from_expected` must be >= 0. The higher the value of `min_dev_from_expected`, the more significant the deviation must be to generate an anomaly score > 1. This helps in filtering out small deviations that may not be meaningful in the context of the monitored metric.
|
||||
`min_dev_from_expected` must be >= 0. The higher the value of `min_dev_from_expected` is, the more significant the deviation must be to generate an anomaly score != 0. This helps in filtering out small *absolute* deviations that may not be meaningful in the context of the monitored metric.
|
||||
|
||||
*Example*: Consider a scenario where CPU utilization is low and oscillates around 0.3% (0.003). A sudden spike to 1.3% (0.013) represents a +333% increase in **relative** terms, but only a +1 percentage point (0.01) increase in **absolute** terms, which may be negligible and not warrant an alert. Setting the `min_dev_from_expected` argument to `0.01` (1%) will ensure that all anomaly scores for deviations <= `0.01` are set to 0.
|
||||
*Example*: Consider a scenario where CPU utilization in specific mode is low and oscillates around 0.3% (0.003). A sudden spike to 1.3% (0.013) represents a +333% increase in **relative** terms, but only a +1 percentage point (0.01) increase in **absolute** terms, which may be negligible and not warrant an alert. Setting the `min_dev_from_expected` argument to `0.01` (1%) will ensure that all anomaly scores for deviations <= `0.01` are set to 0.
|
||||
|
||||
Visualizations below demonstrate this concept; the green zone defined as the `[yhat - min_dev_from_expected, yhat + min_dev_from_expected]` range excludes actual data points (`y`) from generating anomaly scores if they fall within that range.
|
||||
|
||||
@@ -215,23 +219,65 @@ reader:
|
||||
# ...
|
||||
queries:
|
||||
# the usage of min_dev should reduce false positives here
|
||||
need_to_include_min_dev: small_abs_values_metricsql_expression
|
||||
need_to_include_min_dev:
|
||||
expr: small_abs_values_metricsql_expression
|
||||
# min_dev is not really needed here
|
||||
normal_behavior: no_need_to_exclude_small_deviations_metricsql_expression
|
||||
normal_behavior:
|
||||
expr: no_need_to_exclude_small_deviations_metricsql_expression
|
||||
models:
|
||||
zscore_with_min_dev:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
min_dev_from_expected: [5.0, 5.0]
|
||||
min_dev_from_expected: [5.0, 5.0] # set the same threshold for both directions, meaning that deviations less than 5.0 in absolute values won't be considered anomalous, even if they are relatively significant
|
||||
queries: ['need_to_include_min_dev'] # use such models on queries where domain experience confirm usefulness
|
||||
zscore_wo_min_dev:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
# if not set, equals to setting min_dev_from_expected == 0 (meaning no filtering is applied)
|
||||
# min_dev_from_expected: [0.0, 0.0]
|
||||
queries: ['normal_behavior'] # use the default where it's not needed
|
||||
```
|
||||
|
||||
### Minimal relative deviation from expected
|
||||
|
||||
{{% available_from "v1.29.1" anomaly %}} `min_rel_dev_from_expected` argument serves a similar purpose to `min_dev_from_expected` (see [section above](#minimal-deviation-from-expected)), but focuses on **relative deviations** rather than absolute ones. It is designed to reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive) in scenarios where the relative deviation between the actual value (`y`) and the expected value (`yhat`) is high, but the absolute deviation is not significant enough to be considered an anomaly from a business perspective. This parameter ensures that anomaly scores for data points where `|y - yhat| / |yhat| < min_rel_dev_from_expected` are explicitly set to 0. By default, if this parameter is not set, it is set to `0` to maintain backward compatibility.
|
||||
|
||||
Parameter can be a list of two float values, *allowing separate thresholds for upper and lower relative deviations*. If only one value is provided, it is broadcasted to both directions.
|
||||
|
||||
> [!NOTE]
|
||||
If both `min_dev_from_expected` [arg](#minimal-deviation-from-expected) and `min_rel_dev_from_expected` are set, the model will combine both filters. A data point will be considered anomalous (i.e., have an anomaly score != 0) only if it exceeds **both** the *absolute* deviation threshold defined by `min_dev_from_expected` and the *relative* deviation threshold defined by `min_rel_dev_from_expected`. This allows for more granular control over anomaly detection, ensuring that only significant deviations in both absolute and relative terms are flagged as anomalies.
|
||||
|
||||
|
||||
*Example*: Consider a scenario of monitoring incoming traffic to websites that typically receives *unknown in advance* requests per second (from tens to thousands). Setting absolute deviation threshold with `min_dev_from_expected` *may not be effective in reducing false positives*, as even a small increase in traffic (e.g., from 10 to 20 requests per second) can represent a 100% relative increase, which may be significant for that website. Instead, setting `min_rel_dev_from_expected` to smaller relative value - `[20, 40]` (20/40%) - will ensure that traffic drop from 10 to 8 requests per second (20% decrease) and traffic spike from 10 to 14 requests per second (40% increase) won't be considered anomalous, even if they exceed confidence intervals, thus, reducing false positives for small absolute deviations that are relatively significant.
|
||||
|
||||
Example of how to use this parameter in config:
|
||||
|
||||
```yaml
|
||||
# other components like writer, schedulers, monitoring ...
|
||||
reader:
|
||||
# ...
|
||||
queries:
|
||||
# the usage of min_rel_dev should reduce false positives here
|
||||
need_to_include_min_rel_dev:
|
||||
expr: small_abs_values_metricsql_expression
|
||||
# min_rel_dev is not really needed here
|
||||
normal_behavior:
|
||||
expr: no_need_to_exclude_small_deviations_metricsql_expression
|
||||
models:
|
||||
zscore_with_min_rel_dev:
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
min_rel_dev_from_expected: [10, 20] # set different thresholds for both directions, meaning that relative deviations less than 10% when y < yhat and less than 20% when y > yhat won't be considered anomalous, even if they exceed confidence intervals, thus, reducing false positives for small absolute deviations that are relatively significant
|
||||
queries: ['need_to_include_min_rel_dev'] # use such models on queries where domain experience confirm usefulness
|
||||
zscore_wo_min_rel_dev:
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
# if not set, equals to setting min_rel_dev_from_expected == 0 (meaning no filtering is applied)
|
||||
# min_rel_dev_from_expected: [0, 0]
|
||||
queries: ['normal_behavior'] # use the default where it's not needed
|
||||
```
|
||||
|
||||
|
||||
### Group by
|
||||
|
||||
> The `groupby` argument works only in combination with [multivariate models](#multivariate-models).
|
||||
@@ -289,14 +335,14 @@ The most common **use case** is when there is a preference to **widen one side**
|
||||
# other components like reader, writer, schedulers, monitoring ...
|
||||
models:
|
||||
zscore_no_scale:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
# if not set, equals to [1.0, 1.0], meaning no scaling is applied
|
||||
# scale: [1.0, 1.0]
|
||||
zscore_scaled:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
# vs `zscore_no_scale`, increase lower confidence interval width by 1.2x, decrease upper confidence width by 25%
|
||||
# vs `zscore_no_scale`, increase lower confidence interval width by 1.2x, decrease upper confidence width by 25% (1.0 - 0.25 = 0.75), thus, making the model more conservative in flagging anomalies when y < yhat and more aggressive when y > yhat
|
||||
scale: [1.2, 0.75]
|
||||
```
|
||||
|
||||
@@ -325,7 +371,7 @@ reader:
|
||||
# if no data range defined, it will be implicitly converted to ["-inf", "inf"]
|
||||
models:
|
||||
zscore_mixed:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
clip_predictions: True
|
||||
queries: [
|
||||
@@ -339,7 +385,7 @@ models:
|
||||
'q2_no_clip',
|
||||
]
|
||||
zscore_no_clip:
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
# if not set, by default resolved to `clip_predictions: False`
|
||||
queries: [
|
||||
@@ -550,27 +596,27 @@ Tuning [hyperparameters](https://en.wikipedia.org/wiki/Hyperparameter_(machine_l
|
||||
> # this may result in 1 model per each unique labelset with different hyperparameters, such as z_threshold
|
||||
> autotuned_model:
|
||||
> class: 'auto'
|
||||
> tuned_class_name: 'zscore'
|
||||
> tuned_class_name: 'zscore_online'
|
||||
> optimization_params:
|
||||
> anomaly_percentage: 0.01
|
||||
> queries: ['your_query']
|
||||
> ```
|
||||
> will produce **one model per each unique labelset** found in `your_query` results, with different hyperparameters, such as `z_threshold`, while
|
||||
> will produce **one model per each unique labelset** found in `your_query` results, with **different hyperparameters**, such as `z_threshold`, while
|
||||
> ```yaml
|
||||
> models:
|
||||
> # this will result in 1 model per each timeseries returned by the query,
|
||||
> # with the same hyperparameters, such as z_threshold
|
||||
> zscore_model:
|
||||
> class: 'zscore'
|
||||
> class: 'zscore_online'
|
||||
> z_threshold: 3 # all models will have the same z_threshold, but different parameters, such as mean, std, etc.
|
||||
> queries: ['your_query']
|
||||
> ```
|
||||
> will produce **one model per each timeseries** returned by `your_query`, with the same hyperparameters, such as `z_threshold`, but different parameters, such as mean, std, etc.
|
||||
> will produce **one model per each timeseries** returned by `your_query`, with **the same** hyperparameters, such as `z_threshold`, but different parameters, such as mean, std, etc.
|
||||
|
||||
*Parameters specific for vmanomaly*:
|
||||
|
||||
* `class` (string) - model class name `"model.auto.AutoTunedModel"` (or `auto` with class alias support{{% available_from "v1.13.0" anomaly %}})
|
||||
* `tuned_class_name` (string) - [Built-in model class](#built-in-models) to wrap, i.e. `model.zscore.ZscoreModel` (or `zscore` with class alias support{{% available_from "v1.13.0" anomaly %}}).
|
||||
* `tuned_class_name` (string) - [Built-in model class](#built-in-models) to wrap, i.e. `zscore_online`
|
||||
* `optimization_params` (dict) - Optimization parameters for *unsupervised* model tuning. Control percentage of found anomalies, as well as a tradeoff between time spent and the accuracy. The higher `timeout` and `n_trials` are, the better model configuration can be found for `tuned_class_name`, but the longer it takes and vice versa. Set `n_jobs` to `-1` to use all the CPUs available, it makes sense if only you have a big dataset to train on during `fit` calls, otherwise overhead isn't worth it.
|
||||
- `anomaly_percentage` (float) - Expected percentage of anomalies that can be seen in training data, from `[0, 0.5)` interval (i.e. 0.01 means it's expected ~ 1% of anomalies to be present in training data). This is a *required* parameter.
|
||||
- `optimized_business_params` (list[string]) - {{% available_from "v1.15.0" anomaly %}} this argument allows particular [business-specific parameters](#common-args) such as [`detection_direction`](https://docs.victoriametrics.com/anomaly-detection/components/models/#detection-direction) or [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) to remain **unchanged during optimizations, retaining their initial values**. I.e. setting `optimized_business_params` to `['detection_direction']` will allow to optimize only `detection_direction` business-specific arg, while `min_dev_from_expected` will retain its default value of (e.g. [1, 2] if set to that value in model config). By default and if not set, will be equal to `[]` (empty list), meaning no business params will be optimized. **A recommended option is to leave it empty** as this feature is still experimental and may lead to unexpected results.
|
||||
@@ -589,7 +635,7 @@ Tuning [hyperparameters](https://en.wikipedia.org/wiki/Hyperparameter_(machine_l
|
||||
models:
|
||||
your_desired_alias_for_a_model:
|
||||
class: 'auto' # or 'model.auto.AutoTunedModel' until v1.13.0
|
||||
tuned_class_name: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
tuned_class_name: 'zscore_online'
|
||||
optimization_params:
|
||||
anomaly_percentage: 0.004 # required. i.e. we expect <= 0.4% of anomalies to be present in training data
|
||||
seed: 42 # fix reproducibility & determinism
|
||||
@@ -600,7 +646,7 @@ models:
|
||||
n_trials: 128 # how many configurations to sample from search space during optimization
|
||||
timeout: 10 # how many seconds to spend on optimization for each trained model during `fit` phase call
|
||||
n_jobs: 1 # how many jobs in parallel to launch. Consider making it > 1 only if you have fit window containing > 10000 datapoints for each series
|
||||
optimized_business_params: [] # business-specific params to include in optimization, if not set is empty list
|
||||
optimized_business_params: [] # business-specific params to include in optimization, if not set - defaults to empty list, meaning no business params will be optimized, which is a recommended option as business arguments are better set by stakeholders rather than algorithms
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -1219,7 +1265,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1233,7 +1279,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.0 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.1 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
@@ -1362,4 +1408,4 @@ models:
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
```
|
||||
|
||||
Resulting metrics of the model are described [here](#vmanomaly-output).
|
||||
Resulting metrics of the model are described [here](#vmanomaly-output).
|
||||
|
||||
@@ -76,7 +76,7 @@ There is change {{% available_from "v1.13.0" anomaly %}} of [`queries`](https://
|
||||
|
||||
- `data_range`{{% available_from "v1.15.1" anomaly %}} (list[float | string]): It allows defining **valid** data ranges for input per individual query in `queries`, resulting in:
|
||||
- **High anomaly scores** (>1) when the *data falls outside the expected range*, indicating a data range constraint violation (e.g. improperly configured metricsQL query, sensor malfunction, overflows in underlying metrics, etc.). Anomaly scores can be set to a specific value, like `5`, to indicate a strong violation, using the `anomaly_score_outside_data_range` [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#score-outside-data-range) of a respective model this query is used in.
|
||||
- **Lowest anomaly scores** (=0) when the *model's predictions (`yhat`) fall outside the expected range*, meaning uncertain predictions that does not really aligh with the data.
|
||||
- **Lowest anomaly scores** (=0) when the *model's predictions (`yhat`) fall outside the expected range*, meaning uncertain predictions that does not really align with the data.
|
||||
|
||||
Works together with `anomaly_score_outside_data_range` [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#score-outside-data-range) of a model to determine the anomaly score for such cases as well as with `clip_predictions` [arg](https://docs.victoriametrics.com/anomaly-detection/components/models/#clip-predictions) of a model to clip the predictions to the expected range.
|
||||
|
||||
@@ -95,7 +95,7 @@ There is change {{% available_from "v1.13.0" anomaly %}} of [`queries`](https://
|
||||
|
||||
> The recommended approach for using per-query `tenant_id`s is to set both `reader.tenant_id` and `writer.tenant_id` to `multitenant`. See [this section](https://docs.victoriametrics.com/anomaly-detection/components/writer/#multitenancy-support) for more details. Configurations where `reader.tenant_id` equals `writer.tenant_id` and is not `multitenant` are also considered safe, provided there is a single, DISTINCT `tenant_id` defined in the reader (either at the reader level or the query level, if set).
|
||||
|
||||
- `offset` {{% available_from "v1.25.3" anomaly %}} (string): this optional argument allows specifying a time offset for the query, which can be useful for adjusting the query time range to account for data collection delays or other timing issues. The offset is specified as a string (e.g., "15s", "-20s") and will be applied to the query time range. Valid resolutions are `ms`, `s`, `m`, `h`, `d` (miliseconds, seconds, minutes, hours, days). If not set, defaults to `0s` (0). See [FAQ](https://docs.victoriametrics.com/anomaly-detection/faq/#using-offsets) for more details.
|
||||
- `offset` {{% available_from "v1.25.3" anomaly %}} (string): this optional argument allows specifying a time offset for the query, which can be useful for adjusting the query time range to account for data collection delays or other timing issues. The offset is specified as a string (e.g., "15s", "-20s") and will be applied to the query time range. Valid resolutions are `ms`, `s`, `m`, `h`, `d` (milliseconds, seconds, minutes, hours, days). If not set, defaults to `0s` (0). See [FAQ](https://docs.victoriametrics.com/anomaly-detection/faq/#using-offsets) for more details.
|
||||
|
||||
### Per-query config example
|
||||
```yaml
|
||||
@@ -133,7 +133,7 @@ reader:
|
||||
<tr>
|
||||
<th>Parameter</th>
|
||||
<th>Example</th>
|
||||
<th><span style="white-space: nowrap;">Description</span></th>
|
||||
<th><span style="white-space: nowrap;">Description</span></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -276,8 +276,8 @@ Timeout for the requests, passed as a string
|
||||
`false`
|
||||
</td>
|
||||
<td>
|
||||
Verify TLS certificate. If `False`, it will not verify the TLS certificate.
|
||||
If `True`, it will verify the certificate using the system's CA store.
|
||||
Verify TLS certificate. If `False`, it will not verify the TLS certificate.
|
||||
If `True`, it will verify the certificate using the system's CA store.
|
||||
If a path to a CA bundle file (like `ca.crt`), it will verify the certificate using the provided CA bundle.
|
||||
</td>
|
||||
</tr>
|
||||
@@ -485,7 +485,7 @@ To experiment with MetricsQL queries for `VmReader`, you can use the [VictoriaMe
|
||||
|
||||
`vmanomaly` supports [mutual TLS (mTLS)](https://en.wikipedia.org/wiki/Mutual_authentication){{% available_from "v1.16.3" anomaly %}} for secure communication across its components, including [VmReader](https://docs.victoriametrics.com/anomaly-detection/components/reader/#vm-reader), [VmWriter](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer), and [Monitoring/Push](https://docs.victoriametrics.com/anomaly-detection/components/monitoring/#push-config-parameters). This allows for mutual authentication between the client and server when querying or writing data to [VictoriaMetrics Enterprise, configured for mTLS](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#mtls-protection).
|
||||
|
||||
mTLS ensures that both the client and server verify each other's identity using certificates, which enhances security by preventing unauthorized access.
|
||||
mTLS ensures that both the client and server verify each other's identity using certificates, which enhances security by preventing unauthorized access.
|
||||
|
||||
To configure mTLS, the following parameters can be set in the [config](#config-parameters):
|
||||
- `verify_tls`: If set to a string, it functions like the `-mtlsCAFile` command-line argument of VictoriaMetrics, specifying the CA bundle to use. Set to `True` to use the system's default certificate store.
|
||||
@@ -521,7 +521,7 @@ reader:
|
||||
|
||||
## VictoriaLogs reader
|
||||
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly` can read data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) endpoint with `VLogsReader`. This reader allows quering and analyzing log data stored in [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/), enabling anomaly detection on metrics generated from logs. **Querying [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) is supported with the same reader, as the endpoints for both are equivalent.**
|
||||
{{% available_from "v1.26.0" anomaly %}} `vmanomaly` can read data from [VictoriaLogs stats queries](https://docs.victoriametrics.com/victorialogs/querying/#querying-log-range-stats) endpoint with `VLogsReader`. This reader allows querying and analyzing log data stored in [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/), enabling anomaly detection on metrics generated from logs. **Querying [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) is supported with the same reader, as the endpoints for both are equivalent.**
|
||||
|
||||
Its queries should be expressed in [LogsQL*](https://docs.victoriametrics.com/victorialogs/logsql/) language that both VictoriaLogs and VictoriaTraces support, with the focus on using [stats pipe](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe) functions to calculate metrics from logs.
|
||||
|
||||
@@ -658,7 +658,7 @@ You can also access **embedded version of the playground below** (VictoriaLogs d
|
||||
<tr>
|
||||
<th>Parameter</th>
|
||||
<th>Example</th>
|
||||
<th><span style="white-space: nowrap;">Description</span></th>
|
||||
<th><span style="white-space: nowrap;">Description</span></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
@@ -799,8 +799,8 @@ Frequency of the points returned. Will be converted to `/select/stats_query_rang
|
||||
`false`
|
||||
</td>
|
||||
<td>
|
||||
Verify TLS certificate. If `False`, it will not verify the TLS certificate.
|
||||
If `True`, it will verify the certificate using the system's CA store.
|
||||
Verify TLS certificate. If `False`, it will not verify the TLS certificate.
|
||||
If `True`, it will verify the certificate using the system's CA store.
|
||||
If a path to a CA bundle file (like `ca.crt`), it will verify the certificate using the provided CA bundle.
|
||||
</td>
|
||||
</tr>
|
||||
@@ -808,7 +808,7 @@ If a path to a CA bundle file (like `ca.crt`), it will verify the certificate us
|
||||
<td>
|
||||
<span style="white-space: nowrap;">`tls_cert_file`</span>
|
||||
</td>
|
||||
<td>
|
||||
<td>
|
||||
|
||||
`path/to/cert.crt`
|
||||
</td>
|
||||
@@ -922,7 +922,7 @@ reader:
|
||||
step: '2m' # overrides global `sampling_period` of 1m
|
||||
# other per-query parameters as needed
|
||||
# other reader-level parameters as needed
|
||||
|
||||
|
||||
# other config sections, like models, schedulers, writer, ...
|
||||
```
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ reader:
|
||||
query_from_last_seen_timestamp: False
|
||||
verify_tls: False
|
||||
# other reader settings
|
||||
|
||||
|
||||
writer:
|
||||
class: "vm"
|
||||
datasource_url: http://localhost:8428
|
||||
@@ -128,7 +128,7 @@ reader:
|
||||
query_from_last_seen_timestamp: False
|
||||
verify_tls: False
|
||||
# other reader settings
|
||||
|
||||
|
||||
writer:
|
||||
class: "vm"
|
||||
datasource_url: http://localhost:8428
|
||||
@@ -157,7 +157,7 @@ By default, `restore_state` is set to `false`, meaning the service will start fr
|
||||
### Benefits
|
||||
|
||||
This feature improves the experience of using the anomaly detection service in several ways:
|
||||
- **Operational continuity**: Production of anomaly scores is resumed from the last known state, minimizing downtime, especially useful in conbination with [periodic schedulers](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler) with `start_from` argument explicitly defined.
|
||||
- **Operational continuity**: Production of anomaly scores is resumed from the last known state, minimizing downtime, especially useful in combination with [periodic schedulers](https://docs.victoriametrics.com/anomaly-detection/components/scheduler/#periodic-scheduler) with `start_from` argument explicitly defined.
|
||||
- **Resource efficiency**: Avoids unnecessary resource and time consumption by not retraining models that have already been trained and remain actual, or querying redundant data from VictoriaMetrics TSDB.
|
||||
- **Config hot-reloading**: Allows for on-the-fly configuration changes with the reuse of unchanged models/data/scheduler combinations, avoiding unnecessary retraining, additional resource utilization and manual service restarts. Please refer to the [hot-reload](https://docs.victoriametrics.com/anomaly-detection/components/#hot-reload) section for more details on how to use this feature.
|
||||
|
||||
@@ -210,7 +210,7 @@ reader:
|
||||
query_from_last_seen_timestamp: False
|
||||
verify_tls: False
|
||||
# other reader settings
|
||||
|
||||
|
||||
writer:
|
||||
class: "vm"
|
||||
datasource_url: http://localhost:8428
|
||||
@@ -320,7 +320,7 @@ The section is **backward-compatible and disabled by default**, meaning that all
|
||||
- The service is restarted with `restore_state` set to `false`, which triggers a cleanup of all stored artifacts.
|
||||
- The models are marked as outdated once scheduled re-fitting is due, leading to retraining and replacement of previous artifacts.
|
||||
|
||||
`ttl` argument defines the time-to-live period for model instances and their training data. It should be a valid period string (e.g., `7d` for 7 days, `30d` for 30 days, etc.). If a model instance or its training data has not been used for inference or refitting within this period, it will be considered stale and eligible for cleanup.
|
||||
`ttl` argument defines the time-to-live period for model instances and their training data. It should be a valid period string (e.g., `7d` for 7 days, `30d` for 30 days, etc.). If a model instance or its training data has not been used for inference or refitting within this period, it will be considered stale and eligible for cleanup.
|
||||
|
||||
> If set higher than respective scheduler's `fit_every` period, the ttl will have no effect, as models will always be refitted before they become stale.
|
||||
|
||||
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
463
docs/guides/grafana-vmauth-openid-configuration/README.md
Normal file
@@ -0,0 +1,463 @@
|
||||
Using [Grafana](https://grafana.com/) with [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/) is an effective way to provide [multi-tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) access to your metrics, logs, and traces.
|
||||
vmauth provides a way to authenticate users using [JWT tokens](https://en.wikipedia.org/wiki/JSON_Web_Token) {{% available_from "v1.138.0" %}} issued by an external identity provider.
|
||||
Those tokens can include information about the user and their tenant, which vmauth can use to restrict access so users only see metrics in their own tenant.
|
||||
|
||||
This guide walks through configuring Grafana with OIDC to query metrics from both single-node and cluster deployments of VictoriaMetrics.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
* [Docker](https://docs.docker.com/engine/install/) and [docker compose](https://docs.docker.com/compose/) must be installed.
|
||||
* [jq tool](https://jqlang.org/)
|
||||
* Add `grafana` and `keycloak` hosts to the `/etc/hosts` file, pointing to `127.0.0.1`.
|
||||
|
||||
```
|
||||
# /etc/hosts
|
||||
|
||||
# Setup vmauth - Multi-Tenant Access with Grafana & OIDC
|
||||
# https://docs.victoriametrics.com/guides/grafana-vmauth-openid-configuration/#prerequisites
|
||||
127.0.0.1 keycloak grafana
|
||||
```
|
||||
|
||||
## Identity provider
|
||||
|
||||
The identity provider must be able to issue JWT tokens with the following `vm_access` claim:
|
||||
|
||||
```json
|
||||
{
|
||||
"exp": 1772019469,
|
||||
"vm_access": {
|
||||
"metrics_account_id": 0,
|
||||
"metrics_project_id": 0,
|
||||
"metrics_extra_labels": [
|
||||
"team=dev"
|
||||
],
|
||||
"metrics_extra_filters": [
|
||||
"{env=~\"aws|gcp\",cluster!=\"production\"}"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
> Note: all properties inside `vm_access` are optional and could be omitted. `vm_access: {}` is a valid claim value.
|
||||
|
||||
Some identity providers support only string-based claim values, and vmauth supports these as well:
|
||||
```json
|
||||
{
|
||||
"exp": 1772019469,
|
||||
"vm_access": "{\"metrics_account_id\": 0, \"metrics_project_id\": 0}"
|
||||
}
|
||||
```
|
||||
|
||||
See details about all supported options in the [vmauth - JWT token auth proxy](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-token-auth-proxy).
|
||||
|
||||
### Setup Keycloak
|
||||
|
||||
[Keycloak](https://www.keycloak.org/) is an open-source identity provider that can issue JWT tokens.
|
||||
|
||||
Add the following section to your `compose.yaml` file to configure Keycloak:
|
||||
|
||||
```yaml
|
||||
# compose.yaml
|
||||
services:
|
||||
keycloak:
|
||||
image: quay.io/keycloak/keycloak:26.3
|
||||
command:
|
||||
- start-dev
|
||||
- --http-port=3001
|
||||
ports:
|
||||
- 127.0.0.1:3001:3001
|
||||
environment:
|
||||
KC_HOSTNAME_BACKCHANNEL_DYNAMIC: "true"
|
||||
KC_HOSTNAME: http://keycloak:3001/
|
||||
KC_BOOTSTRAP_ADMIN_USERNAME: admin
|
||||
KC_BOOTSTRAP_ADMIN_PASSWORD: change_me
|
||||
volumes:
|
||||
- keycloakdata:/opt/keycloak/data
|
||||
|
||||
volumes:
|
||||
keycloakdata: {}
|
||||
```
|
||||
|
||||
Start the services:
|
||||
```sh
|
||||
docker compose up
|
||||
```
|
||||
|
||||
Once Keycloak is available, follow the steps below to configure the OIDC client and users for Grafana:
|
||||
|
||||
### Create client
|
||||
|
||||
1. Open [http://keycloak:3001](http://keycloak:3001).
|
||||
1. Log in with credentials.
|
||||
- Username: `admin`
|
||||
- Password: `change_me`
|
||||
1. Go to `Clients` -> `Create client`.
|
||||
- Use `OpenID Connect` as `Client Type`.
|
||||
- Specify `grafana` as `Client ID`.
|
||||
- Click `Next`.
|
||||

|
||||
1. Enable `Client authentication`
|
||||
- Enable `Authorization`.
|
||||
- Enable `Direct access grants` (this is only required for testing the token but it can be disabled in production)
|
||||

|
||||
- Click `Next`.
|
||||
1. Add the Grafana URL as `Root URL`. For example, `http://grafana:3000`.
|
||||

|
||||
- Click `Save`.
|
||||
1. Go to `Clients` -> `grafana` -> `Client scopes`.
|
||||

|
||||
- Click on `grafana-dedicated` -> `Configure a new mapper` -> `User attribute`.
|
||||

|
||||
1. Configure the mapper as follows:
|
||||
- Set `Name` to `vm_access`.
|
||||
- Set `User Attribute` to `vm_access`.
|
||||
- Set `Token Claim Name` to `vm_access`.
|
||||
- Set `Claim JSON Type` to `JSON`.
|
||||
- Enable `Add to ID token` and `Add to access token`.
|
||||
|
||||

|
||||
- Click `Save`.
|
||||
|
||||
### Create users
|
||||
|
||||
1. Go to `Realm settings` -> `User profile`.
|
||||
- Click `Create attribute`.
|
||||
- Specify `vm_access` as `Attribute [Name]`.
|
||||

|
||||
- Click `Create`.
|
||||
1. Go to `Users` -> `Add user`.
|
||||
- Mark email as verified.
|
||||
- Specify `test-dev` as `Username`.
|
||||
- Specify `test-dev@example.com` as `Email`.
|
||||
- Specify `vm_access` as `{"metrics_account_id": 1, "metrics_project_id": 2, "metrics_extra_labels": ["team=dev"]}`.
|
||||
- Press `Create`
|
||||

|
||||
- Go to `Users` -> `test-dev` user -> `Credentials` tab.
|
||||
- Press `Set Password`.
|
||||
- Type the password `testpass`.
|
||||
- Disable `Temporary` option
|
||||
- Press `Save` and confirm.
|
||||
|
||||
1. Go to `Users` -> `admin` user.
|
||||
- Mark email as verified.
|
||||
- Specify `admin@example.com` as `Email`.
|
||||
- Specify `vm_access` as `{"metrics_account_id": 1, "metrics_project_id": 2, "metrics_extra_labels": ["team=admin"]}`.
|
||||
- Click `Save`.
|
||||
|
||||
### Test identity provider
|
||||
|
||||
Gather the following information needed to configure Grafana:
|
||||
|
||||
1. The Realm name must be `master`. To get the name, go to `Realm settings` -> `General` and copy the `Name`.
|
||||
1. The Client ID must be `grafana`. To get the ID, go to `Clients` -> `grafana` -> `Settings` and copy the `Client ID`.
|
||||
1. The Client Secret is dynamically generated. To get the secret, go to `Clients` -> `grafana` -> `Credentials` and copy the `Client Secret`.<br>
|
||||

|
||||
<br>
|
||||
|
||||
Test that everything is working by requesting a token using `curl`:
|
||||
|
||||
```sh
|
||||
TOKEN=$(curl --fail -s -X POST "http://keycloak:3001/realms/master/protocol/openid-connect/token" \
|
||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||
-d "client_id=grafana" \
|
||||
-d "client_secret={CLIENT_SECRET}" \
|
||||
-d "grant_type=password" \
|
||||
-d "username=test-dev" \
|
||||
-d "password=testpass" | jq -r '.access_token') && echo $TOKEN
|
||||
```
|
||||
|
||||
<!--
|
||||
fish example:
|
||||
set TOKEN (curl --fail -s -X POST "http://keycloak:3001/realms/master/protocol/openid-connect/token" \
|
||||
-H "Content-Type: application/x-www-form-urlencoded" \
|
||||
-d "client_id=grafana" \
|
||||
-d "client_secret={CLIENT_SECRET}" \
|
||||
-d "grant_type=password" \
|
||||
-d "username=test-dev" \
|
||||
-d "password=testpass" | jq -r '.access_token'); and echo $TOKEN
|
||||
-->
|
||||
|
||||
The response should contain a valid JWT token with the `vm_access` claim.
|
||||
Use [jwt.io](https://jwt.io/) to decode and verify that the vm_access claim is present with the expected values.
|
||||
|
||||
> Please note that the issued token is short-lived, so you might need to refresh it before use in later chapters.
|
||||
|
||||
## VictoriaMetrics
|
||||
|
||||
### Storage and scraping
|
||||
|
||||
First, create a `scrape.yaml` file with vmagent scrape configuration to ingest data into vmsingle and vmstorage for testing purposes:
|
||||
|
||||
```yaml
|
||||
# scrape.yaml
|
||||
scrape_configs:
|
||||
- job_name: stat
|
||||
metric_relabel_configs:
|
||||
# The team label showcases extra_filter functionality used with vmsingle.
|
||||
- if: "{instance =~ 'vmauth.*'}"
|
||||
action: replace
|
||||
target_label: team
|
||||
replacement: admin
|
||||
- if: "{instance =~ 'vmagent.*'}"
|
||||
action: replace
|
||||
target_label: team
|
||||
replacement: dev
|
||||
|
||||
# The vm_account_id and vm_project_id labels showcase tenant functionality used with vmcluster
|
||||
- if: "{instance =~ 'vmauth.*'}"
|
||||
action: replace
|
||||
target_label: vm_account_id
|
||||
replacement: '1'
|
||||
- if: "{instance =~ 'vmauth.*'}"
|
||||
action: replace
|
||||
target_label: vm_project_id
|
||||
replacement: '2'
|
||||
- if: "{instance =~ 'vmagent.*'}"
|
||||
action: replace
|
||||
target_label: vm_account_id
|
||||
replacement: '1'
|
||||
- if: "{instance =~ 'vmagent.*'}"
|
||||
action: replace
|
||||
target_label: vm_project_id
|
||||
replacement: '2'
|
||||
static_configs:
|
||||
- targets:
|
||||
- vmagent:8429
|
||||
- vmauth:8427
|
||||
|
||||
```
|
||||
|
||||
Add VictoriaMetrics single-node and cluster to the `compose.yaml` file.
|
||||
These services will be used to store metrics scraped by vmagent and to query them via Grafana using vmauth.
|
||||
|
||||
Relabeling rules will add the `team` label to the scraped metrics in order to test multi-tenant access.
|
||||
Metrics from `vmagent` will be labeled with `team=dev` and metrics from `vmauth` will be labeled with `team=admin`.
|
||||
|
||||
vmagent will write data into VictoriaMetrics single-node and cluster (with tenant `1:2`).
|
||||
|
||||
```yaml
|
||||
# compose.yaml
|
||||
services:
|
||||
vmsingle:
|
||||
image: victoriametrics/victoria-metrics:v1.138.0
|
||||
|
||||
vmstorage:
|
||||
image: victoriametrics/vmstorage:v1.138.0-cluster
|
||||
|
||||
vminsert:
|
||||
image: victoriametrics/vminsert:v1.138.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8400
|
||||
|
||||
vmselect:
|
||||
image: victoriametrics/vmselect:v1.138.0-cluster
|
||||
command:
|
||||
- -storageNode=vmstorage:8401
|
||||
|
||||
vmagent:
|
||||
image: victoriametrics/vmagent:v1.138.0
|
||||
volumes:
|
||||
- ./scrape.yaml:/etc/vmagent/config.yaml
|
||||
command:
|
||||
- -promscrape.config=/etc/vmagent/config.yaml
|
||||
- -remoteWrite.url=http://vminsert:8480/insert/multitenant/prometheus/api/v1/write
|
||||
- -remoteWrite.url=http://vmsingle:8428/api/v1/write
|
||||
```
|
||||
|
||||
### Vmauth
|
||||
|
||||
Before we start, let's explore the concept of placeholders supported in the vmauth configuration.
|
||||
Placeholders can be used inside the `url_prefix` property to restrict access by setting the [tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) or [extra filters](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-enhancements).
|
||||
|
||||
A placeholder value is taken from the authenticated JWT token.
|
||||
The following placeholders are supported:
|
||||
- `{{.MetricsTenant}}` placeholder is a combination of `vm_access.metrics_account_id` and `vm_access.metrics_project_id` delimited by `:`.
|
||||
- `{{.MetricsExtraLabels}}` placeholder is substituted from `vm_access.metrics_extra_labels` claim property.
|
||||
- `{{.MetricsExtraFilters}}` placeholder is substituted from `vm_access.metrics_extra_filters` claim property.
|
||||
|
||||
Now, let's create a vmauth configuration file `auth.yaml` that enables OIDC authorization using the [identity provider](https://docs.victoriametrics.com/guides/grafana-vmauth-openid-configuration/#identity-provider).
|
||||
For cluster access, we use the `{{.MetricsTenant}}` placeholder to route requests to a specific tenant.
|
||||
For single-node access, we use `{{.MetricsExtraLabels}}`.
|
||||
Read more about templating in vmauth [docs](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-based-request-templating).
|
||||
|
||||
```yaml
|
||||
# auth.yaml
|
||||
users:
|
||||
- jwt:
|
||||
oidc:
|
||||
issuer: 'http://keycloak:3001/realms/master'
|
||||
url_map:
|
||||
- src_paths:
|
||||
- "/insert/.*"
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vminsert:8480/insert/{{.MetricsTenant}}/prometheus/"
|
||||
- src_paths:
|
||||
- "/select/.*"
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmselect:8481/select/{{.MetricsTenant}}/prometheus/"
|
||||
- src_paths:
|
||||
- "/single/.*"
|
||||
drop_src_path_prefix_parts: 1
|
||||
url_prefix: "http://vmsingle:8428?extra_label={{.MetricsExtraLabels}}"
|
||||
```
|
||||
|
||||
Now add the vmauth service to `compose.yaml`:
|
||||
|
||||
```yaml
|
||||
# compose.yaml
|
||||
services:
|
||||
vmauth:
|
||||
image: docker.io/victoriametrics/vmauth:v1.138.0
|
||||
ports:
|
||||
- 8427:8427
|
||||
volumes:
|
||||
- ./auth.yaml:/auth.yaml
|
||||
command:
|
||||
- -auth.config=/auth.yaml
|
||||
```
|
||||
|
||||
### Test vmauth
|
||||
|
||||
Start the services:
|
||||
|
||||
```sh
|
||||
docker compose up
|
||||
```
|
||||
|
||||
Use the token obtained in the [Test identity provider](https://docs.victoriametrics.com/guides/grafana-vmauth-openid-configuration/#test-identity-provider) section to test vmauth configuration.
|
||||
|
||||
Cluster select:
|
||||
```sh
|
||||
curl --fail http://localhost:8427/select/api/v1/status/buildinfo -H "Authorization: Bearer $TOKEN"
|
||||
|
||||
# Output:
|
||||
# {"status":"success","data":{"version":"2.24.0"}}
|
||||
```
|
||||
|
||||
Cluster insert:
|
||||
```sh
|
||||
curl --fail http://localhost:8427/insert/api/v1/write -H "Authorization: Bearer $TOKEN" -i
|
||||
# Output
|
||||
# HTTP/1.1 204 No Content
|
||||
# ...
|
||||
```
|
||||
|
||||
Single select:
|
||||
```sh
|
||||
curl --fail http://localhost:8427/single/api/v1/status/buildinfo -H "Authorization: Bearer $TOKEN"
|
||||
|
||||
# Output:
|
||||
# {"status":"success","data":{"version":"2.24.0"}}
|
||||
```
|
||||
|
||||
## Grafana
|
||||
|
||||
### Setup
|
||||
|
||||
Add the Grafana service to the `compose.yaml` file.
|
||||
This configuration enables OAuth authentication using the previously configured Keycloak service as the identity provider.
|
||||
Don't forget to replace the `{CLIENT_SECRET}` placeholder with the actual client secret gathered earlier.
|
||||
|
||||
```yaml
|
||||
# compose.yaml
|
||||
services:
|
||||
grafana:
|
||||
image: grafana/grafana:12.1.0
|
||||
ports:
|
||||
- 3000:3000
|
||||
environment:
|
||||
GF_SERVER_ROOT_URL: http://grafana:3000
|
||||
GF_AUTH_GENERIC_OAUTH_ENABLED: true
|
||||
GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP: true
|
||||
GF_AUTH_GENERIC_OAUTH_NAME: keycloak
|
||||
GF_AUTH_GENERIC_OAUTH_CLIENT_ID: grafana
|
||||
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: '{CLIENT_SECRET}'
|
||||
GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH: email
|
||||
GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH: username
|
||||
GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH: full_name
|
||||
GF_AUTH_GENERIC_OAUTH_SCOPES: openid profile email
|
||||
GF_AUTH_GENERIC_OAUTH_USE_REFRESH_TOKEN: true
|
||||
GF_AUTH_GENERIC_OAUTH_AUTH_URL: http://keycloak:3001/realms/master/protocol/openid-connect/auth
|
||||
GF_AUTH_GENERIC_OAUTH_TOKEN_URL: http://keycloak:3001/realms/master/protocol/openid-connect/token
|
||||
GF_AUTH_GENERIC_OAUTH_API_URL: http://keycloak:3001/realms/master/protocol/openid-connect/userinfo
|
||||
GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: contains(groups[*], 'grafana-editor') && 'Editor' || 'GrafanaAdmin'
|
||||
volumes:
|
||||
- grafanadata:/var/lib/grafana/
|
||||
|
||||
volumes:
|
||||
grafanadata: {}
|
||||
```
|
||||
|
||||
Alternatively, OAuth authentication can be enabled via the `grafana.ini` configuration file.
|
||||
Don't forget to mount it to the Grafana service at `/etc/grafana/grafana.ini`.
|
||||
|
||||
```ini
|
||||
# grafana.ini
|
||||
|
||||
[server]
|
||||
root_url = http://grafana:3000
|
||||
|
||||
[auth.generic_oauth]
|
||||
enabled = true
|
||||
allow_sign_up = true
|
||||
name = keycloak
|
||||
client_id = grafana
|
||||
client_secret = {CLIENT_SECRET}
|
||||
scopes = openid profile email
|
||||
auth_url = http://keycloak:3001/realms/master/protocol/openid-connect/auth
|
||||
token_url = http://keycloak:3001/realms/master/protocol/openid-connect/token
|
||||
api_url = http://keycloak:3001/realms/master/protocol/openid-connect/userinfo
|
||||
use_refresh_token = true
|
||||
```
|
||||
|
||||
After starting Grafana with the new config, you should be able to log in [http://grafana:3000](http://grafana:3000) using your [identity provider](https://docs.victoriametrics.com/guides/grafana-vmauth-openid-configuration/#identity-provider).
|
||||
|
||||

|
||||
|
||||
### Datasource
|
||||
|
||||
Create two Prometheus datasources in Grafana with the following URLs: `http://vmauth:8427/select` and `http://vmauth:8427/single`, pointing to the `vmselect` and `vmsingle` services, respectively. Make sure the authentication method is set to `Forward OAuth identity`.
|
||||
|
||||

|
||||
|
||||
You can also use the VictoriaMetrics [Grafana datasource](https://github.com/VictoriaMetrics/victoriametrics-datasource) plugin.
|
||||
See installation instructions in [Grafana datasource - Installation](https://docs.victoriametrics.com/victoriametrics/victoriametrics-datasource/#installation).
|
||||
|
||||
Users with the `vm_access` claim will be able to query metrics from the specified tenant with extra filters applied.
|
||||
|
||||
### Test access
|
||||
|
||||
The Grafana datasources configuration should be as follows:
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana vmauth datasources</figcaption>
|
||||
|
||||
Let's log in as a dev user in the VictoriaMetrics cluster and single versions.
|
||||
Both data sources should return the same metrics.
|
||||
|
||||
The only difference is the filter: for the VictoriaMetrics cluster, the `vmauth-cluster` data source must restrict results by `tenant=1:2`.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Logged in as dev user to Grafana dashboard on VictoriaMetrics Cluster</figcaption>
|
||||
|
||||
While on VictoriaMetrics single `vmauth-single` must apply the `team=dev` label filter instead.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Logged in as dev user to Grafana dashboard on VictoriaMetrics Single</figcaption>
|
||||
|
||||
Let's log in as an admin user. The `vmauth-single` data source should differ from the previous user, while `vmauth-cluster` should remain the same because both users use tenant `1:2`.
|
||||
|
||||
The only difference is the filter: in the VictoriaMetrics cluster `vmauth-cluster`, the data source must restrict results by `tenant=1:2`.
|
||||
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Logged in as admin user to Grafana dashboard on VictoriaMetrics Cluster</figcaption>
|
||||
|
||||
While in VictoriaMetrics single `vmauth-single` must apply the `team=admin` label filter instead.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Logged in as admin user to Grafana dashboard on VictoriaMetrics Single</figcaption>
|
||||
|
||||
## Summary
|
||||
|
||||
In this guide, we demonstrated how to set up vmauth with OIDC authorization using Keycloak as the identity provider. We also showed how to provide multi-tenant access to your metrics stored in VictoriaMetrics, single-node or cluster, using Grafana and vmauth with OIDC authorization enabled.
|
||||
|
||||
14
docs/guides/grafana-vmauth-openid-configuration/_index.md
Normal file
@@ -0,0 +1,14 @@
|
||||
---
|
||||
weight: 5
|
||||
title: Setup vmauth - Multi-Tenant Access with Grafana & OIDC
|
||||
menu:
|
||||
docs:
|
||||
parent: guides
|
||||
weight: 5
|
||||
tags:
|
||||
- metrics
|
||||
- guide
|
||||
aliases:
|
||||
- /guides/grafana-vmauth-openid-configuration.html
|
||||
---
|
||||
{{% content "README.md" %}}
|
||||
|
After Width: | Height: | Size: 18 KiB |
|
After Width: | Height: | Size: 28 KiB |
|
After Width: | Height: | Size: 14 KiB |
|
After Width: | Height: | Size: 18 KiB |
|
After Width: | Height: | Size: 31 KiB |
|
After Width: | Height: | Size: 44 KiB |
|
After Width: | Height: | Size: 16 KiB |
|
After Width: | Height: | Size: 24 KiB |
|
After Width: | Height: | Size: 62 KiB |
|
After Width: | Height: | Size: 60 KiB |
|
After Width: | Height: | Size: 40 KiB |
BIN
docs/guides/grafana-vmauth-openid-configuration/grafana-ds.webp
Normal file
|
After Width: | Height: | Size: 16 KiB |
|
After Width: | Height: | Size: 33 KiB |
|
After Width: | Height: | Size: 60 KiB |
|
After Width: | Height: | Size: 73 KiB |
|
After Width: | Height: | Size: 22 KiB |
|
After Width: | Height: | Size: 34 KiB |
@@ -6,6 +6,8 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
> vmgateway access control feature has been deprecated. Consider following the vmauth guide [Setup vmauth - Multi-Tenant Access with Grafana & OIDC](https://docs.victoriametrics.com/guides/grafana-vmauth-openid-configuration/) instead. See [migration](https://docs.victoriametrics.com/victoriametrics/vmgateway/#access-control-migration-to-vmauth) docs.
|
||||
|
||||
Using [Grafana](https://grafana.com/) with [vmgateway](https://docs.victoriametrics.com/victoriametrics/vmgateway/) is a great way to provide [multi-tenant](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy) access to your metrics.
|
||||
vmgateway provides a way to authenticate users using [JWT tokens](https://en.wikipedia.org/wiki/JSON_Web_Token) issued by an external identity provider.
|
||||
Those tokens can include information about the user and the tenant they belong to, which can be used
|
||||
@@ -38,7 +40,7 @@ See details about all supported options in the [vmgateway documentation](https:/
|
||||
|
||||
### Configuration example for Keycloak
|
||||
|
||||
[Keycloak](https://www.keycloak.org/) is an open source identity service that can be used to issue JWT tokens.
|
||||
[Keycloak](https://www.keycloak.org/) is an open-source identity service that can issue JWT tokens.
|
||||
|
||||
1. Log in with admin credentials to your Keycloak instance
|
||||
1. Go to `Clients` -> `Create`.<br>
|
||||
@@ -83,9 +85,9 @@ See details about all supported options in the [vmgateway documentation](https:/
|
||||

|
||||
Click `Save`.
|
||||
|
||||
## Configure grafana
|
||||
## Configure Grafana
|
||||
|
||||
To forward JWT tokens Grafana must be configured to use OpenID Connect authentication as follows:
|
||||
To forward JWT tokens, Grafana must be configured to use OpenID Connect authentication as follows:
|
||||
|
||||
```ini
|
||||
[auth.generic_oauth]
|
||||
@@ -100,7 +102,7 @@ token_url = http://localhost:3001/realms/{KEYCLOAK_REALM}/protocol/openid-connec
|
||||
api_url = http://localhost:3001/realms/{KEYCLOAK_REALM}/protocol/openid-connect/userinfo
|
||||
```
|
||||
|
||||
After restarting Grafana with the new config you should be able to log in using your identity provider.
|
||||
After restarting Grafana with the new config, you should be able to log in using your identity provider.
|
||||
|
||||
## Start vmgateway
|
||||
|
||||
@@ -118,7 +120,7 @@ In order to enable multi-tenant access, you must also specify the `-clusterMode=
|
||||
-read.url=http://localhost:8481
|
||||
```
|
||||
|
||||
With this configuration vmgateway will use the `vm_access` claim from the JWT token to restrict access to metrics.
|
||||
With this configuration, vmgateway will use the `vm_access` claim from the JWT token to restrict access to metrics.
|
||||
For example, if the JWT token contains the following `vm_access` claim:
|
||||
|
||||
```json
|
||||
@@ -131,21 +133,21 @@ For example, if the JWT token contains the following `vm_access` claim:
|
||||
}
|
||||
}
|
||||
```
|
||||
> Note: in case `project_id` is not specified, default value `0` is used.
|
||||
> Note: in case `project_id` is not specified, the default value `0` is used.
|
||||
|
||||
Then vmgateway will proxy request to an endpoint with the following path:
|
||||
Then vmgateway will proxy the request to an endpoint with the following path:
|
||||
|
||||
```sh
|
||||
http://localhost:8480/select/0:0/
|
||||
```
|
||||
|
||||
This allows to restrict access to specific tenants without having to create separate datasources in Grafana,
|
||||
This allows us to restrict access to specific tenants without having to create separate datasources in Grafana,
|
||||
or manually managing access at another proxy level.
|
||||
|
||||
### Multi-tenant access for single-node VictoriaMetrics
|
||||
|
||||
In order to use multi-tenant access with single-node VictoriaMetrics, you can use token claims such as `extra_labels`
|
||||
or `extra_filters` filled dynamically by using Identity Provider's user information.
|
||||
To use multi-tenant access with single-node VictoriaMetrics, you can use token claims such as `extra_labels`
|
||||
or `extra_filters` filled dynamically by using the Identity Provider's user information.
|
||||
vmgateway uses those claims and [enhanced Prometheus querying API](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#prometheus-querying-api-enhancements)
|
||||
to provide additional filtering capabilities.
|
||||
|
||||
@@ -167,14 +169,14 @@ This will add the following query args to the proxied request:
|
||||
- `extra_labels=team=dev`
|
||||
- `extra_filters={env=~"aws|gcp",cluster!="production"}`
|
||||
|
||||
With this configuration VictoriaMetrics will add the following filters to every query: `{team="dev", env=~"aws|gcp", cluster!="production"}`.
|
||||
With this configuration, VictoriaMetrics will add the following filters to every query: `{team="dev", env=~"aws|gcp", cluster!="production"}`.
|
||||
So when user will try to query `vm_http_requests_total` query will be transformed to `vm_http_requests_total{team="dev", env=~"aws|gcp", cluster!="production"}`.
|
||||
|
||||
### Token signature verification
|
||||
|
||||
It is also possible to enable [JWT token signature verification](https://docs.victoriametrics.com/victoriametrics/vmgateway/#jwt-signature-verification) at
|
||||
vmgateway.
|
||||
To do this by using OpenID Connect discovery endpoint you need to specify the `-auth.oidcDiscoveryEndpoints` flag. For example:
|
||||
To do this by using the OpenID Connect discovery endpoint, you need to specify the `-auth.oidcDiscoveryEndpoints` flag. For example:
|
||||
|
||||
```sh
|
||||
./bin/vmgateway \
|
||||
@@ -201,7 +203,7 @@ It is also possible to provide the public keys directly via the `-auth.publicKey
|
||||
Create a new Prometheus datasource in Grafana with the following URL `http://<vmgateway>:8431`.
|
||||
URL should point to the vmgateway instance.
|
||||
|
||||
In the "Type and version" section it is recommended to set the type to "Prometheus" and the version to at least "2.24.x":
|
||||
In the "Type and version" section, it is recommended to set the type to "Prometheus" and the version to at least "2.24.x":
|
||||
|
||||

|
||||
|
||||
@@ -214,11 +216,11 @@ Enable `Forward OAuth identity` flag.<br>
|
||||

|
||||
|
||||
Now you can use Grafana to query metrics from the specified tenant.
|
||||
Users with `vm_access` claim will be able to query metrics from the specified tenant.
|
||||
Users with a `vm_access` claim will be able to query metrics from the specified tenant.
|
||||
|
||||
## Test multi-tenant access
|
||||
|
||||
For the test purpose we will setup the following services as [docker-compose](https://docs.docker.com/compose/) manifest:
|
||||
For the test purpose, we will set up the following services as [docker-compose](https://docs.docker.com/compose/) manifest:
|
||||
- Grafana
|
||||
- Keycloak
|
||||
- vmagent to generate test metrics
|
||||
@@ -311,7 +313,7 @@ volumes:
|
||||
grafana_data:
|
||||
```
|
||||
|
||||
For the test purpose vmagent will be configured to scrape metrics from the following targets(`scrape.yaml` contents):
|
||||
For the test purpose, vmagent will be configured to scrape metrics from the following targets(`scrape.yaml` contents):
|
||||
|
||||
```yaml
|
||||
scrape_configs:
|
||||
@@ -341,27 +343,27 @@ Grafana datasources configuration will be the following:
|
||||
|
||||

|
||||
|
||||
Let's login as user with `team=dev` labels limitation set via claims.
|
||||
Let's log in as a user with `team=dev` labels limitation set via claims.
|
||||
|
||||
Using `vmgateway-cluster` results into `No data` response as proxied request will go to tenant `0:1`.
|
||||
Since vmagent is only configured to write to `0:0` `No data` is an expected response.
|
||||
Using `vmgateway-cluster` results in `No data` response as the proxied request will go to tenant `0:1`.
|
||||
Since vmagent is configured to write only to `0:0`, the `No data` response is expected.
|
||||
|
||||

|
||||
|
||||
Switching to `vmgateway-single` does have data. Note that it is limited to metrics with `team=dev` label.
|
||||
Switching to `vmgateway-single` does have data. Note that it is limited to metrics with the `team=dev` label.
|
||||
|
||||

|
||||
|
||||
Now lets login as user with `team=admin`.
|
||||
Now let's log in as a user with `team=admin`.
|
||||
|
||||
Both cluster and single node datasources now return metrics for `team=admin`.
|
||||
Both cluster and single-node datasources now return metrics for `team=admin`.
|
||||
|
||||

|
||||

|
||||
|
||||
## Using oAuth for remote write with vmagent
|
||||
## Using OAuth for remote write with vmagent
|
||||
|
||||
vmagent can be configured to use oAuth for remote write. This is in order to add authentication to the write requests.
|
||||
vmagent can be configured to use OAuth for remote write. This adds authentication to write requests.
|
||||
|
||||
In order to create a client for vmagent to use, follow the steps below:
|
||||
|
||||
@@ -375,7 +377,7 @@ In order to create a client for vmagent to use, follow the steps below:
|
||||
Enable `Authorization`.<br>
|
||||

|
||||
Click `Next`.<br>
|
||||
1. Leave URLs section empty as vmagent will not use any.
|
||||
1. Leave the URLs section empty, as vmagent will not use any.
|
||||

|
||||
Click `Save`.<br>
|
||||
1. Go to `Clients` -> `vmagent` -> `Credentials`.<br>
|
||||
@@ -396,12 +398,12 @@ In order to create a client for vmagent to use, follow the steps below:
|
||||
Click `Save`.<br>
|
||||
1. Go to `Service account roles` -> click on `service-account-vmagent`.<br>
|
||||

|
||||
1. Go to `Attributes` tab and add an attribute.
|
||||
1. Go to the `Attributes` tab and add an attribute.
|
||||
Change `vm_access` attribute value to `{"tenant_id" : {"account_id": 0, "project_id": 0 }}`. <br>
|
||||

|
||||
Click `Save`.
|
||||
|
||||
Once iDP configuration is done, vmagent configuration needs to be updated to use oAuth for remote write:
|
||||
Once the iDP configuration is done, the vmagent configuration needs to be updated to use OAuth for remote write:
|
||||
|
||||
```yaml
|
||||
vmagent:
|
||||
@@ -419,7 +421,8 @@ Once iDP configuration is done, vmagent configuration needs to be updated to use
|
||||
- -remoteWrite.oauth2.scopes=openid
|
||||
```
|
||||
|
||||
It is required to replace `{CLIENT_ID}` with the client ID and provide the client secret in `vmagent-client-secret` file.
|
||||
It is required to replace `{CLIENT_ID}` with the client ID and provide the client secret in the `vmagent-client-secret` file.
|
||||
Note that vmagent will use the same token for both single-node and cluster vmgateway. vmgateway running in cluster mode
|
||||
will use tenant information from the token to route the request to the correct tenant. vmgateway running in single-node mode
|
||||
will use the tenant information from the token to route the request to the correct tenant. vmgateway running in single-node mode
|
||||
will just verify token validity.
|
||||
|
||||
|
||||
@@ -1,10 +1,7 @@
|
||||
---
|
||||
weight: 5
|
||||
weight: 16
|
||||
title: Setup vmgateway - Multi-Tenant Access with Grafana & OIDC
|
||||
menu:
|
||||
docs:
|
||||
parent: guides
|
||||
weight: 5
|
||||
menu: false
|
||||
tags:
|
||||
- metrics
|
||||
- guide
|
||||
|
||||
@@ -6,33 +6,70 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
**The guide covers:**
|
||||
|
||||
* High availability monitoring via [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) in [Kubernetes](https://kubernetes.io/) with Helm charts
|
||||
* How to store metrics
|
||||
* How to scrape metrics from k8s components using a service discovery
|
||||
* How to visualize stored data
|
||||
* How to store metrics in [VictoriaMetrics](https://victoriametrics.com)
|
||||
This guide walks you through deploying a [VictoriaMetrics cluster](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/) version on Kubernetes in high-availability mode.
|
||||
|
||||
**Preconditions**
|
||||
By the end of this guide, you will know:
|
||||
|
||||
* [Kubernetes cluster 1.19.12-gke.2100](https://cloud.google.com/kubernetes-engine). We use GKE cluster from [GCP](https://cloud.google.com/) but this guide also applies to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/).
|
||||
* [Helm 3 ](https://helm.sh/docs/intro/install)
|
||||
* [kubectl 1.21](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
* [jq](https://stedolan.github.io/jq/download/) tool
|
||||
- How to install and configure [VictoriaMetrics cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) using Helm.
|
||||
- How high-availability mode works in VictoriaMetrics.
|
||||
- How to scrape metrics from Kubernetes components using service discovery.
|
||||
|
||||
## Overview
|
||||
|
||||
In this guide, high availability is achieved by configuring [replication](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) on `vminsert` to a value of 2. This means every incoming data point is written twice to separate `vmstorage` pods, so data remains available as long as at least one replica of a given time series is reachable.
|
||||
|
||||
This setup requires **twice as much storage** as a normal, non-replicating cluster because `vminsert` fans out each write into two `vmstorage` pods.
|
||||
|
||||
Duplication causes `vmselect` to read back two copies of each sample, potentially skewing results. For example, in aggregations such as `sum` or `count`, this would double the result. To handle this, we must enable [de-duplication](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#deduplication) in the `vmselect` pods to collapse the replicas into a single sample per scrape interval.
|
||||
|
||||
## Preconditions
|
||||
|
||||
> [!NOTE] Note
|
||||
> We used a GKE cluster (v1.35) from [GCP](https://cloud.google.com/) in this guide, but it can also be applied to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/) or an on-premises cluster.
|
||||
|
||||
- [Kubernetes cluster](https://cloud.google.com/kubernetes-engine).
|
||||
- [Helm](https://helm.sh/docs/intro/install)
|
||||
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
- [jq](https://stedolan.github.io/jq/download/) tool
|
||||
|
||||
## 1. VictoriaMetrics Helm repository
|
||||
|
||||
Please see the relevant [VictoriaMetrics Helm repository](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#1-victoriametrics-helm-repository) section in previous guides.
|
||||
Run the following command to add the VictoriaMetrics Helm repository:
|
||||
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Then, verify that VictoriaMetrics charts are available with:
|
||||
|
||||
```sh
|
||||
helm search repo vm/
|
||||
```
|
||||
|
||||
You should get a list of charts similar to this:
|
||||
|
||||
```text
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-metrics-cluster 0.35.0 v1.136.0 VictoriaMetrics Cluster version - high-performa...
|
||||
vm/victoria-metrics-agent 0.32.0 v1.136.0 VictoriaMetrics Agent - collects metrics from v...
|
||||
vm/victoria-metrics-common 0.0.46 VictoriaMetrics Common - contains shared templa...
|
||||
...(list continues)...
|
||||
```
|
||||
|
||||
## 2. Install VictoriaMetrics Cluster from the Helm chart
|
||||
|
||||
Execute the following command in your terminal:
|
||||
A [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) consists of three services:
|
||||
|
||||
- `vminsert`: receives incoming metrics and distributes them across vmstorage nodes via consistent hashing on metric names and labels.
|
||||
- `vmstorage`: stores raw data and serves queries filtered by time range and labels.
|
||||
- `vmselect`: executes queries by fetching data across all configured vmstorage nodes.
|
||||
|
||||
Create a high-availability configuration file for the VictoriaMetrics services:
|
||||
|
||||
```sh
|
||||
cat <<EOF | helm install vmcluster vm/victoria-metrics-cluster -f -
|
||||
cat <<EOF > victoria-metrics-cluster-values.yml
|
||||
vmselect:
|
||||
extraArgs:
|
||||
dedup.minScrapeInterval: 1ms
|
||||
@@ -58,30 +95,40 @@ vmstorage:
|
||||
EOF
|
||||
```
|
||||
|
||||
* The `Helm install vmcluster vm/victoria-metrics-cluster` command installs [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) to the default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
|
||||
* `dedup.minScrapeInterval: 1ms` configures [de-duplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) for the cluster that de-duplicates data points in the same time series if they fall within the same discrete 1ms bucket. The earliest data point will be kept. In the case of equal timestamps, an arbitrary data point will be kept.
|
||||
* `replicationFactor: 2` Replication factor for the ingested data, i.e. how many copies should be made among distinct `-storageNode` instances. If the replication factor is greater than one, the deduplication must be enabled on the remote storage side.
|
||||
* `podAnnotations: prometheus.io/scrape: "true"` enables the scraping of metrics from the vmselect, vminsert and vmstorage pods.
|
||||
* `podAnnotations:prometheus.io/port: "some_port" ` enables the scraping of metrics from the vmselect, vminsert and vmstorage pods from corresponding ports.
|
||||
* `replicaCount: 3` creates three replicas of vmselect, vminsert and vmstorage.
|
||||
|
||||
Let's break down how high availability is achieved:
|
||||
|
||||
The expected result of the command execution is the following:
|
||||
- `replicaCount: 3` creates three replicas of vmselect, vminsert, and vmstorage each.
|
||||
- `replicationFactor: 2` enables [replication](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) for `vminsert` and `vmselect`.
|
||||
- `vminsert` uses `replicationFactor` to fan out writes. In this case, it creates two copies of the sample and distributes them among distinct `vmstorage` pods.
|
||||
- `vmselect` also gets a `replicationFactor` so it knows how many replicas to expect and when to treat a response as partial (more on this later).
|
||||
- `dedup.minScrapeInterval`: 1ms configures [de-duplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) for `vmselect`, so it does not double-count samples when retrieving data from `vmstorage` pods.
|
||||
- `podAnnotations: prometheus.io/scrape: "true"` enables metric scraping so you can monitor your VictoriaMetrics cluster.
|
||||
- `podAnnotations: prometheus.io/port: "some_port" ` defines the scraping port.
|
||||
|
||||
Install the VictoriaMetrics cluster in high-availability mode. The following command deploys a VictoriaMetrics cluster in the default namespace:
|
||||
|
||||
```sh
|
||||
helm install vmcluster vm/victoria-metrics-cluster -f victoria-metrics-cluster-values.yml
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME: vmcluster
|
||||
LAST DEPLOYED: Thu Jul 29 13:33:51 2021
|
||||
LAST DEPLOYED: Mon Mar 2 12:50:25 2026
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
TEST SUITE: None
|
||||
NOTES:
|
||||
Write API:
|
||||
|
||||
The VictoriaMetrics write api can be accessed via port 8480 via the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local
|
||||
The VictoriaMetrics write api can be accessed via port 8480 with the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local.
|
||||
|
||||
Get the VictoriaMetrics insert service URL by running these commands in the same shell:
|
||||
Get the Victoria Metrics insert service URL by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app=vminsert" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 8480
|
||||
|
||||
@@ -92,21 +139,20 @@ prometheus.yml
|
||||
remote_write:
|
||||
- url: "http://<insert-service>/insert/0/prometheus/"
|
||||
|
||||
|
||||
for example - inside the Kubernetes cluster:
|
||||
|
||||
remote_write:
|
||||
- url: "http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/"
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
Read API:
|
||||
|
||||
The VictoriaMetrics read api can be accessed via port 8481 with the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local
|
||||
vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.
|
||||
|
||||
Get the VictoriaMetrics select service URL by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app=vmselect" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 8481
|
||||
|
||||
You need to specify select service URL into your Grafana:
|
||||
You need to specify the service URL in your Grafana:
|
||||
NOTE: you need to use the Prometheus Data Source
|
||||
|
||||
Input this URL field into Grafana
|
||||
@@ -116,127 +162,93 @@ Input this URL field into Grafana
|
||||
|
||||
for example - inside the Kubernetes cluster:
|
||||
|
||||
http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local:8481/select/0/prometheus/"
|
||||
|
||||
http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.:8481/select/0/prometheus/
|
||||
```
|
||||
|
||||
Verify that the VictoriaMetrics cluster pods are up and running by executing the following command:
|
||||
|
||||
|
||||
```sh
|
||||
kubectl get pods | grep vmcluster
|
||||
kubectl get pods -l app.kubernetes.io/instance=vmcluster
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
You should see:
|
||||
|
||||
```text
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-4mh9d 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-4ppl7 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-782qk 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-4v4ws 1/1 Running 0 2m27s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-kwc7q 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-v7pmk 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 2m27s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 2m3s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-2 1/1 Running 0 99s
|
||||
```
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmcluster-victoria-metrics-cluster-vminsert-788c76b69b-lphnn 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-788c76b69b-lxg2w 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-788c76b69b-qmtkp 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-65796bc88d-29cwm 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-65796bc88d-lz58p 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-65796bc88d-t42pr 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 91s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-2 1/1 Running 0 76s
|
||||
|
||||
```
|
||||
## 3. Install vmagent from the Helm chart
|
||||
|
||||
To scrape metrics from Kubernetes with a VictoriaMetrics Cluster we will need to install [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) with some additional configurations. To do so, please run the following command:
|
||||
To scrape metrics from Kubernetes with a VictoriaMetrics Cluster, we need to install [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and configure it with additional settings.
|
||||
|
||||
Install `vmagent` with the following command:
|
||||
|
||||
```yaml
|
||||
helm install vmagent vm/victoria-metrics-agent -f https://docs.victoriametrics.com/guides/examples/guide-vmcluster-vmagent-values.yaml
|
||||
```
|
||||
|
||||
Here is full file content `guide-vmcluster-vmagent-values.yaml`
|
||||
You can obtain a copy of `guide-vmcluster-vmagent-values.yaml` to review with:
|
||||
|
||||
```yaml
|
||||
remoteWrite:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
|
||||
scrape_configs:
|
||||
- job_name: vmagent
|
||||
static_configs:
|
||||
- targets: ["localhost:8429"]
|
||||
- job_name: "kubernetes-apiservers"
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
[
|
||||
__meta_kubernetes_namespace,
|
||||
__meta_kubernetes_service_name,
|
||||
__meta_kubernetes_endpoint_port_name,
|
||||
]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
- job_name: "kubernetes-nodes"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
```sh
|
||||
wget https://docs.victoriametrics.com/guides/examples/guide-vmcluster-vmagent-values.yaml
|
||||
```
|
||||
* By updating `remoteWrite` we configuring [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) to write scraped metrics into the `vminsert` service.
|
||||
* The `metric_relabel_configs` section allows you to process Kubernetes metrics for the Grafana dashboard.
|
||||
|
||||
Here are the key settings in the chart file that we used to install `vmagent` with Helm earlier:
|
||||
|
||||
- `remoteWrite` defines the vminsert endpoint that receives telemetry from vmagent. This value should match exactly the URL for the `remote_write` in the output of the VictoriaMetrics cluster installation in [Step 2](https://docs.victoriametrics.com/guides/k8s-ha-monitoring-via-vm-cluster/#id-2-install-victoriametrics-cluster-from-the-helm-chart).
|
||||
|
||||
```yaml
|
||||
remoteWrite:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
```
|
||||
|
||||
- `metric_relabel_configs` defines label-rewriting rules for the scraped metrics.
|
||||
|
||||
```yaml
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
```
|
||||
```yaml
|
||||
```
|
||||
|
||||
Verify that `vmagent`'s pod is up and running by executing the following command:
|
||||
|
||||
|
||||
```shell
|
||||
kubectl get pods | grep vmagent
|
||||
kubectl get pod -l app.kubernetes.io/instance=vmagent
|
||||
```
|
||||
|
||||
|
||||
The expected output is:
|
||||
Expected output:
|
||||
|
||||
```text
|
||||
vmagent-victoria-metrics-agent-57ddbdc55d-h4ljb 1/1 Running 0 13s
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmagent-victoria-metrics-agent-6848c6b58d-87rf6 1/1 Running 0 32s
|
||||
```
|
||||
|
||||
## 4. Verifying HA of VictoriaMetrics Cluster
|
||||
@@ -244,157 +256,182 @@ vmagent-victoria-metrics-agent-57ddbdc55d-h4ljb 1/1 Running
|
||||
Run the following command to check that VictoriaMetrics services are up and running:
|
||||
|
||||
```shell
|
||||
kubectl get pods | grep victoria-metrics
|
||||
kubectl get svc -l app.kubernetes.io/instance=vmcluster
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
vmagent-victoria-metrics-agent-57ddbdc55d-h4ljb 1/1 Running 0 75s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-s8v7x 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-xlm9d 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-xqxrh 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-7dg95 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-ck7qb 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-jjqsl 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 63s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-2 1/1 Running 0 34s
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vmcluster-victoria-metrics-cluster-vminsert ClusterIP 10.43.157.170 <none> 8480/TCP 4m41s
|
||||
vmcluster-victoria-metrics-cluster-vmselect ClusterIP 10.43.222.181 <none> 8481/TCP 4m41s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage ClusterIP None <none> 8482/TCP,8401/TCP,8400/TCP 4m41s
|
||||
```
|
||||
|
||||
To verify that metrics are present in the VictoriaMetrics send a curl request to the `vmselect` service from kubernetes or setup Grafana and check it via the web interface.
|
||||
To verify that metrics are present in VictoriaMetrics, you can send a curl request to the `vmselect` service. Run the following command to make `vmselect`'s port accessible from the local machine:
|
||||
|
||||
Run the following command to see the list of services:
|
||||
|
||||
```shell
|
||||
kubectl get svc | grep vmselect
|
||||
```
|
||||
|
||||
The expected output:
|
||||
|
||||
```text
|
||||
vmcluster-victoria-metrics-cluster-vmselect ClusterIP 10.88.2.69 <none> 8481/TCP 1m
|
||||
```
|
||||
|
||||
Run the following command to make `vmselect`'s port accessible from the local machine:
|
||||
|
||||
|
||||
```shell
|
||||
```sh
|
||||
kubectl port-forward svc/vmcluster-victoria-metrics-cluster-vmselect 8481:8481
|
||||
```
|
||||
|
||||
Execute the following command to get metrics via `curl`:
|
||||
|
||||
```sh
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query_range?query=count(up{kubernetes_pod_name=~".*vmselect.*"})&start=-10m&step=1m' | jq
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query=count(up{kubernetes_pod_name=~".*vmselect.*"})' | jq
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
Let's break down the command:
|
||||
|
||||
* The request to `http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query` uses the [VictoriaMetrics querying API](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) to fetch metric data
|
||||
* The argument `query=count(up{kubernetes_pod_name=~".*vmselect.*"})` specifies the query. Specifically, we want to count the number of `vmselect` pods.
|
||||
* We pipe the output to `jq` to format the output in a more readable way.
|
||||
|
||||
You should see:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"isPartial": false,
|
||||
"data": {
|
||||
"resultType": "matrix",
|
||||
"resultType": "vector",
|
||||
"result": [
|
||||
{
|
||||
"metric": {},
|
||||
"values": [
|
||||
[
|
||||
1628065480.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065540.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065600.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065660.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065720.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065780.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065840.657,
|
||||
"3"
|
||||
]
|
||||
"value": [
|
||||
1773419630,
|
||||
"3"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stats": {
|
||||
"seriesFetched": "3",
|
||||
"executionTimeMsec": 3
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
* Query `http://127.0.0.1:8481/select/0/prometheus/api/v1/query_range` uses [VictoriaMetrics querying API](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) to fetch previously stored data points;
|
||||
* Argument `query=count(up{kubernetes_pod_name=~".*vmselect.*"})` specifies the query we want to execute. Specifically, we calculate the number of `vmselect` pods.
|
||||
* Additional arguments `start=-10m&step=1m'` set requested time range from -10 minutes (10 minutes ago) to now (default value if `end` argument is omitted) and step (the distance between returned data points) of 1 minute;
|
||||
* By adding `| jq` we pass the output to the jq utility which outputs information in json format
|
||||
The value should be 3, which is the number of replicas we configured earlier.
|
||||
|
||||
The expected result of the query `count(up{kubernetes_pod_name=~".*vmselect.*"})` should be equal to `3` - the number of replicas we set via `replicaCount` parameter.
|
||||
You can also execute the query in VMUI by opening your browser in `http://localhost:8481/select/0/vmui/` (where 0 is the [default tenant ID](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy)).
|
||||
|
||||
Type `count(up{kubernetes_pod_name=~".*vmselect.*"})` and press **Execute query**
|
||||
|
||||
To test via Grafana, we need to install it first. [Install and connect Grafana to VictoriaMetrics](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#4-install-and-connect-grafana-to-victoriametrics-with-helm), login into Grafana and open the metrics explore page at `http://127.0.0.1:3000/explore`.
|
||||

|
||||
|
||||
You can also try **Explore** > **Prometheus metrics** to discover metrics collected from the Kubernetes cluster.
|
||||
|
||||

|
||||
|
||||
Choose `victoriametrics` from the list of datasources and enter `count(up{kubernetes_pod_name=~".*vmselect.*"})` to the **Metric browser** field as shown on the screenshot, then press **Run query** button:
|
||||
|
||||

|
||||
|
||||
The expected output is:
|
||||
|
||||

|
||||

|
||||
|
||||
## 5. High Availability
|
||||
|
||||
To test if High Availability works, we need to shutdown one of the `vmstorages`. To do this, run the following command:
|
||||
We can test that High Availability is working by simulating a failure. We can do this by shutting down one of the `vmstorage` pods.
|
||||
|
||||
Reduce the number of `vmstorage` pods from 3 to 2 with the following command:
|
||||
|
||||
```shell
|
||||
kubectl scale sts vmcluster-victoria-metrics-cluster-vmstorage --replicas=2
|
||||
```
|
||||
|
||||
Verify that now we have two running `vmstorages` in the cluster by executing the following command:
|
||||
|
||||
Verify that now we have two running `vmstorage` pods in the cluster by executing the following command:
|
||||
|
||||
```shell
|
||||
kubectl get pods | grep vmstorage
|
||||
kubectl get pods -l app=vmstorage
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 44m
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 43m
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 3h20m
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 3h20m
|
||||
```
|
||||
|
||||
Return to Grafana Explore and press the **Run query** button again.
|
||||
You can confirm that there are two `vmstorage` pods with this query:
|
||||
|
||||
The expected output is:
|
||||
```sh
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query=count(up{kubernetes_pod_name=~".*vmstorage.*"})' | jq
|
||||
```
|
||||
|
||||

|
||||
This should output 2 nodes:
|
||||
|
||||
As you can see, after we scaled down the `vmstorage` replicas number from three to two pods, metrics are still available and correct. The response is not partial as it was before scaling. Also we see that query `count(up{kubernetes_pod_name=~".*vmselect.*"})` returns the same value as before.
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"isPartial": false,
|
||||
"data": {
|
||||
"resultType": "vector",
|
||||
"result": [
|
||||
{
|
||||
"metric": {},
|
||||
"value": [
|
||||
1773437033,
|
||||
"2"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stats": {
|
||||
"seriesFetched": "2",
|
||||
"executionTimeMsec": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
To confirm that the number of `vmstorage` pods is equivalent to two, execute the following request in Grafana Explore:
|
||||
Since each data point is stored across two storage pods, losing a single pod does not affect query results, and data remains available as long as at least one replica per time series remains reachable.
|
||||
|
||||

|
||||
You can also check if the query result is complete by examining the `isPartial` value in the response:
|
||||
- When `isPartial: false`, the response is complete for the requested time range and series. This means that enough storage replicas have responded (according to the configured `replicationFactor`).
|
||||
- When `isPartial: true`, it means `vmselect` could not fetch all the data it expected from `vmstorage`, so the returned series and values may be incomplete or incorrect.
|
||||
|
||||
Running other queries such as `count(up{kubernetes_pod_name=~".*vmselect.*"})` should still return 3.
|
||||
|
||||
```sh
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query=count(up{kubernetes_pod_name=~".*vmselect.*"})' | jq
|
||||
```
|
||||
|
||||
This should print:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"isPartial": false,
|
||||
"data": {
|
||||
"resultType": "vector",
|
||||
"result": [
|
||||
{
|
||||
"metric": {},
|
||||
"value": [
|
||||
1773437137,
|
||||
"3"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stats": {
|
||||
"seriesFetched": "3",
|
||||
"executionTimeMsec": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This means that queries and metric ingestion are not affected by the "failure" of a single storage pod.
|
||||
|
||||
Finally, you can scale the `vmstorage` pods back to 3 to resume normal operation:
|
||||
|
||||
```sh
|
||||
kubectl scale sts vmcluster-victoria-metrics-cluster-vmstorage --replicas=3
|
||||
```
|
||||
|
||||
## 6. Final thoughts
|
||||
|
||||
* We set up VictoriaMetrics for Kubernetes cluster with HA.
|
||||
* We collected metrics from running services and stored them in the VictoriaMetrics database.
|
||||
* We configured `dedup.minScrapeInterval` and `replicationFactor: 2` for VictoriaMetrics cluster for high availability purposes.
|
||||
* We tested and made sure that metrics are available even if one of `vmstorages` nodes was turned off.
|
||||
- We set up a highly available VictoriaMetrics cluster on Kubernetes
|
||||
- We collected metrics from running services and stored them in the VictoriaMetrics database.
|
||||
- We configured `dedup.minScrapeInterval` and `replicationFactor: 2` for the VictoriaMetrics cluster for high availability purposes.
|
||||
- We tested and made sure that metrics are available even if one of the `vmstorage` nodes is turned off.
|
||||
|
||||
Next steps:
|
||||
- [Learn more about the cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
- [Migrate existing metric data into VictoriaMetrics with vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/)
|
||||
- [Install Grafana](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#id-4-install-and-connect-grafana-to-victoriametrics-with-helm)
|
||||
|
||||
|
||||