Compare commits
1 Commits
debug-grou
...
issue-1065
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
534c57b79a |
16
.github/workflows/check-commit-signed.yml
vendored
@@ -27,21 +27,11 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check raw commit objects for a "gpgsig" header as a fast early signal for
|
||||
# contributors. Both GPG and SSH signatures use this header.
|
||||
# This avoids relying on %G? which returns N for SSH commits.
|
||||
# This check is not a security enforcement — unsigned commits cannot be merged
|
||||
# anyway due to the GitHub repository merge policy.
|
||||
unsigned=""
|
||||
for sha in $(git rev-list $RANGE); do
|
||||
if ! git cat-file commit "$sha" | grep -q "^gpgsig"; then
|
||||
unsigned="$unsigned $sha"
|
||||
fi
|
||||
done
|
||||
unsigned=$(git log --pretty="%H %G?" $RANGE | grep -vE " (G|E)$" || true)
|
||||
if [ -n "$unsigned" ]; then
|
||||
echo "Found unsigned commits:"
|
||||
echo "$unsigned"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "All commits in PR are signed (GPG or SSH)"
|
||||
|
||||
echo "All commits in PR are signed (G or E)"
|
||||
2
.github/workflows/test.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
run: make ${{ matrix.scenario}}
|
||||
|
||||
- name: Publish coverage
|
||||
uses: codecov/codecov-action@v6
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
files: ./coverage.txt
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
# VictoriaMetrics
|
||||
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/releases)
|
||||
[](https://hub.docker.com/u/victoriametrics)
|
||||

|
||||
[](https://goreportcard.com/report/github.com/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/actions/workflows/build.yml)
|
||||
[](https://app.codecov.io/gh/VictoriaMetrics/VictoriaMetrics)
|
||||
[](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/LICENSE)
|
||||
[](https://slack.victoriametrics.com)
|
||||

|
||||
[](https://x.com/VictoriaMetrics/)
|
||||
[](https://www.reddit.com/r/VictoriaMetrics/)
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ func (m *manager) close() {
|
||||
m.wg.Wait()
|
||||
}
|
||||
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
|
||||
func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) error {
|
||||
id := g.GetID()
|
||||
g.Init()
|
||||
m.wg.Go(func() {
|
||||
@@ -110,6 +110,7 @@ func (m *manager) startGroup(ctx context.Context, g *rule.Group, restore bool) {
|
||||
})
|
||||
|
||||
m.groups[id] = g
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error {
|
||||
@@ -118,7 +119,7 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
for _, cfg := range groupsCfg {
|
||||
for _, r := range cfg.Rules {
|
||||
if rrPresent && arPresent {
|
||||
break
|
||||
continue
|
||||
}
|
||||
if r.Record != "" {
|
||||
rrPresent = true
|
||||
@@ -161,7 +162,10 @@ func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore
|
||||
}
|
||||
}
|
||||
for _, ng := range groupsRegistry {
|
||||
m.startGroup(ctx, ng, restore)
|
||||
if err := m.startGroup(ctx, ng, restore); err != nil {
|
||||
m.groupsMu.Unlock()
|
||||
return err
|
||||
}
|
||||
}
|
||||
m.groupsMu.Unlock()
|
||||
|
||||
|
||||
@@ -789,7 +789,16 @@ func firingAlertStaleTimeSeries(ls map[string]string, timestamp int64) []prompb.
|
||||
|
||||
// restore restores the value of ActiveAt field for active alerts,
|
||||
// based on previously written time series `alertForStateMetricName`.
|
||||
// Only rules with For > 0 can be restored.
|
||||
func (ar *AlertingRule) restore(ctx context.Context, q datasource.Querier, ts time.Time, lookback time.Duration) error {
|
||||
if ar.For < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
nameStr := fmt.Sprintf("%s=%q", alertNameLabel, ar.Name)
|
||||
if !*disableAlertGroupLabel {
|
||||
nameStr = fmt.Sprintf("%s=%q,%s=%q", alertGroupNameLabel, ar.GroupName, alertNameLabel, ar.Name)
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"hash/fnv"
|
||||
"maps"
|
||||
"net/url"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -214,7 +213,6 @@ func (g *Group) CreateID() uint64 {
|
||||
// restore restores alerts state for group rules
|
||||
func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts time.Time, lookback time.Duration) error {
|
||||
for _, rule := range g.Rules {
|
||||
// Only alerting rule with for > 0 and has active alerts from the first evaluation can be restored
|
||||
ar, ok := rule.(*AlertingRule)
|
||||
if !ok {
|
||||
continue
|
||||
@@ -222,9 +220,6 @@ func (g *Group) restore(ctx context.Context, qb datasource.QuerierBuilder, ts ti
|
||||
if ar.For < 1 {
|
||||
continue
|
||||
}
|
||||
if len(ar.alerts) < 1 {
|
||||
return nil
|
||||
}
|
||||
q := qb.BuildWithParams(datasource.QuerierParams{
|
||||
EvaluationInterval: g.Interval,
|
||||
QueryParams: g.Params,
|
||||
@@ -338,11 +333,6 @@ func (g *Group) Init() {
|
||||
// Start starts group's evaluation
|
||||
func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasource.QuerierBuilder) {
|
||||
defer func() { close(g.finishedCh) }()
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
|
||||
evalTS := time.Now()
|
||||
// sleep random duration to spread group rules evaluation
|
||||
// over maxStartDelay to reduce the load on datasource.
|
||||
@@ -377,6 +367,11 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
evalTS = evalTS.Add(sleepBeforeStart)
|
||||
}
|
||||
|
||||
e := &executor{
|
||||
Rw: rw,
|
||||
notifierHeaders: g.NotifierHeaders,
|
||||
}
|
||||
|
||||
g.infof("started")
|
||||
|
||||
eval := func(ctx context.Context, ts time.Time) time.Time {
|
||||
@@ -386,9 +381,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
|
||||
if len(g.Rules) < 1 {
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.mu.Lock()
|
||||
g.LastEvaluation = start
|
||||
g.mu.Unlock()
|
||||
return ts
|
||||
}
|
||||
|
||||
@@ -402,32 +395,7 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
}
|
||||
}
|
||||
g.metrics.iterationDuration.UpdateDuration(start)
|
||||
g.mu.Lock()
|
||||
g.LastEvaluation = start
|
||||
g.mu.Unlock()
|
||||
if g.EvalOffset != nil && e.Rw != nil {
|
||||
hostname, err := os.Hostname()
|
||||
if err != nil {
|
||||
hostname = "unknown"
|
||||
}
|
||||
labels := map[string]string{
|
||||
"__name__": "vmalert_eval_timestamp",
|
||||
"host": hostname,
|
||||
"group": g.Name,
|
||||
"file": g.File,
|
||||
}
|
||||
var ls []prompb.Label
|
||||
for k, v := range labels {
|
||||
ls = append(ls, prompb.Label{
|
||||
Name: k,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
ts := newTimeSeries([]float64{float64(ts.Unix())}, []int64{start.Unix()}, ls)
|
||||
if err := e.Rw.Push(ts); err != nil {
|
||||
logger.Errorf("group %q: failed to push evaluation timestamp: %s", g.Name, err)
|
||||
}
|
||||
}
|
||||
return ts
|
||||
}
|
||||
|
||||
@@ -437,11 +405,11 @@ func (g *Group) Start(ctx context.Context, rw remotewrite.RWClient, rr datasourc
|
||||
g.mu.Unlock()
|
||||
defer g.evalCancel()
|
||||
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
|
||||
t := time.NewTicker(g.Interval)
|
||||
defer t.Stop()
|
||||
|
||||
realEvalTS := eval(evalCtx, evalTS)
|
||||
|
||||
// restore the rules state after the first evaluation
|
||||
// so only active alerts can be restored.
|
||||
if rr != nil {
|
||||
|
||||
@@ -57,8 +57,12 @@ type ApiGroup struct {
|
||||
EvalOffset float64 `json:"eval_offset,omitempty"`
|
||||
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
|
||||
EvalDelay float64 `json:"eval_delay,omitempty"`
|
||||
// States represents counts per each rule state
|
||||
States map[string]int `json:"states"`
|
||||
// Unhealthy unhealthy rules count
|
||||
Unhealthy int
|
||||
// Healthy passing rules count
|
||||
Healthy int
|
||||
// NoMatch not matching rules count
|
||||
NoMatch int
|
||||
}
|
||||
|
||||
// APILink returns a link to the group's JSON representation.
|
||||
@@ -130,11 +134,6 @@ type ApiRule struct {
|
||||
Updates []StateEntry `json:"-"`
|
||||
}
|
||||
|
||||
// IsNoMatch returns true if rule is in nomatch state
|
||||
func (r *ApiRule) IsNoMatch() bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
|
||||
// ApiAlert represents a notifier.AlertingRule state
|
||||
// for WEB view
|
||||
// https://github.com/prometheus/compliance/blob/main/alert_generator/specification.md#get-apiv1rules
|
||||
@@ -236,20 +235,6 @@ func NewAlertAPI(ar *AlertingRule, a *notifier.Alert) *ApiAlert {
|
||||
return aa
|
||||
}
|
||||
|
||||
func (r *ApiRule) ExtendState() {
|
||||
if len(r.Alerts) > 0 {
|
||||
return
|
||||
}
|
||||
if r.State == "" {
|
||||
r.State = "ok"
|
||||
}
|
||||
if r.Health != "ok" {
|
||||
r.State = "unhealthy"
|
||||
} else if r.IsNoMatch() {
|
||||
r.State = "nomatch"
|
||||
}
|
||||
}
|
||||
|
||||
// ToAPI returns ApiGroup representation of g
|
||||
func (g *Group) ToAPI() *ApiGroup {
|
||||
g.mu.RLock()
|
||||
@@ -267,7 +252,6 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
Headers: headersToStrings(g.Headers),
|
||||
NotifierHeaders: headersToStrings(g.NotifierHeaders),
|
||||
Labels: g.Labels,
|
||||
States: make(map[string]int),
|
||||
}
|
||||
if g.EvalOffset != nil {
|
||||
ag.EvalOffset = g.EvalOffset.Seconds()
|
||||
@@ -275,10 +259,9 @@ func (g *Group) ToAPI() *ApiGroup {
|
||||
if g.EvalDelay != nil {
|
||||
ag.EvalDelay = g.EvalDelay.Seconds()
|
||||
}
|
||||
ag.Rules = make([]ApiRule, 0, len(g.Rules))
|
||||
ag.Rules = make([]ApiRule, 0)
|
||||
for _, r := range g.Rules {
|
||||
ar := r.ToAPI()
|
||||
ag.Rules = append(ag.Rules, ar)
|
||||
ag.Rules = append(ag.Rules, r.ToAPI())
|
||||
}
|
||||
return &ag
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
<path d="M224.163 175.27a1.9 1.9 0 0 0 2.8 0l6-5.9a2.1 2.1 0 0 0 .2-2.7 1.9 1.9 0 0 0-3-.2l-2.6 2.6v-5.2c0-1.54-1.667-2.502-3-1.732-.619.357-1 1.017-1 1.732v5.2l-2.6-2.6a1.9 1.9 0 0 0-3 .2 2.1 2.1 0 0 0 .2 2.7zm-16.459-23.297h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1m36 4h-36c-1.54 0-2.502 1.667-1.732 3 .357.619 1.017 1 1.732 1h36c1.54 0 2.502-1.667 1.732-3a2 2 0 0 0-1.732-1m-16.59-23.517a1.9 1.9 0 0 0-2.8 0l-6 5.9a2.1 2.1 0 0 0-.2 2.7 1.9 1.9 0 0 0 3 .2l2.6-2.6v5.2c0 1.54 1.667 2.502 3 1.732.619-.357 1-1.017 1-1.732v-5.2l2.6 2.6a1.9 1.9 0 0 0 3-.2 2.1 2.1 0 0 0-.2-2.7z"/>
|
||||
</symbol>
|
||||
|
||||
<symbol id="state" viewBox="-10 -10 320 310">
|
||||
<symbol id="filter" viewBox="-10 -10 320 310">
|
||||
<path d="M288.953 0h-277c-5.522 0-10 4.478-10 10v49.531c0 5.522 4.478 10 10 10h12.372l91.378 107.397v113.978a10 10 0 0 0 15.547 8.32l49.5-33a10 10 0 0 0 4.453-8.32v-80.978l91.378-107.397h12.372c5.522 0 10-4.478 10-10V10c0-5.522-4.477-10-10-10M167.587 166.77a10 10 0 0 0-2.384 6.48v79.305l-29.5 19.666V173.25a10 10 0 0 0-2.384-6.48L50.585 69.531h199.736zM278.953 49.531h-257V20h257z"/>
|
||||
</symbol>
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 4.7 KiB After Width: | Height: | Size: 4.7 KiB |
@@ -8,9 +8,9 @@ function actionAll(isCollapse) {
|
||||
});
|
||||
}
|
||||
|
||||
function groupForState(key) {
|
||||
function groupFilter(key) {
|
||||
if (key) {
|
||||
location.href = `?state=${key}`;
|
||||
location.href = `?filter=${key}`;
|
||||
} else {
|
||||
window.location = window.location.pathname;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"cmp"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
@@ -52,7 +50,6 @@ var (
|
||||
"alert": rule.TypeAlerting,
|
||||
"record": rule.TypeRecording,
|
||||
}
|
||||
ruleStates = []string{"ok", "nomatch", "inactive", "firing", "pending", "unhealthy"}
|
||||
)
|
||||
|
||||
type requestHandler struct {
|
||||
@@ -66,14 +63,6 @@ var (
|
||||
staticServer = http.StripPrefix("/vmalert", staticHandler)
|
||||
)
|
||||
|
||||
func marshalJson(v any, kind string) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
data, err := json.Marshal(v)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to marshal %s: %s", kind, err), http.StatusInternalServerError)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
if strings.HasPrefix(r.URL.Path, "/vmalert/static") {
|
||||
staticServer.ServeHTTP(w, r)
|
||||
@@ -105,32 +94,40 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
WriteRule(w, r, rule)
|
||||
WriteRuleDetails(w, r, rule)
|
||||
return true
|
||||
// current used by old vmalert UI and Grafana Alerts
|
||||
case "/vmalert/groups", "/rules":
|
||||
case "/vmalert/groups":
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
// only support filtering by a single state
|
||||
state := ""
|
||||
if len(rf.states) > 0 {
|
||||
state = rf.states[0]
|
||||
rf.states = rf.states[:1]
|
||||
}
|
||||
lr := rh.groups(rf)
|
||||
WriteListGroups(w, r, lr.Data.Groups, state)
|
||||
data := rh.groups(rf)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
case "/vmalert/notifiers":
|
||||
WriteListTargets(w, r, notifier.GetTargets())
|
||||
return true
|
||||
|
||||
// special cases for Grafana requests,
|
||||
// served without `vmalert` prefix:
|
||||
case "/rules":
|
||||
// Grafana makes an extra request to `/rules`
|
||||
// handler in addition to `/api/v1/rules` calls in alerts UI
|
||||
var data []*rule.ApiGroup
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data = rh.groups(rf)
|
||||
WriteListGroups(w, r, data, rf.filter)
|
||||
return true
|
||||
|
||||
case "/vmalert/api/v1/notifiers", "/api/v1/notifiers":
|
||||
data, err := rh.listNotifiers()
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -138,14 +135,15 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
case "/vmalert/api/v1/rules", "/api/v1/rules":
|
||||
// path used by Grafana for ng alerting
|
||||
var data []byte
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := rh.listGroups(rf)
|
||||
data, err = rh.listGroups(rf)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -154,14 +152,14 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
|
||||
case "/vmalert/api/v1/alerts", "/api/v1/alerts":
|
||||
// path used by Grafana for ng alerting
|
||||
gf, err := newGroupsFilter(r)
|
||||
rf, err := newRulesFilter(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := rh.listAlerts(gf)
|
||||
data, err := rh.listAlerts(rf)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -170,12 +168,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/alert", "/api/v1/alert":
|
||||
alert, err := rh.getAlert(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := marshalJson(alert, "alert")
|
||||
data, err := json.Marshal(alert)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "failed to marshal alert: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -184,16 +182,16 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/rule", "/api/v1/rule":
|
||||
apiRule, err := rh.getRule(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
rwu := rule.ApiRuleWithUpdates{
|
||||
ApiRule: apiRule,
|
||||
StateUpdates: apiRule.Updates,
|
||||
}
|
||||
data, err := marshalJson(rwu, "rule")
|
||||
data, err := json.Marshal(rwu)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "failed to marshal rule: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -202,12 +200,12 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
case "/vmalert/api/v1/group", "/api/v1/group":
|
||||
group, err := rh.getGroup(r)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return true
|
||||
}
|
||||
data, err := marshalJson(group, "group")
|
||||
data, err := json.Marshal(group)
|
||||
if err != nil {
|
||||
errJson(w, r, err)
|
||||
httpserver.Errorf(w, r, "failed to marshal group: %s", err)
|
||||
return true
|
||||
}
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
@@ -227,10 +225,10 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
obj, err := rh.m.groupAPI(groupID)
|
||||
if err != nil {
|
||||
@@ -239,14 +237,14 @@ func (rh *requestHandler) getGroup(r *http.Request) (*rule.ApiGroup, *httpserver
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
ruleID, err := strconv.ParseUint(r.FormValue(rule.ParamRuleID), 10, 64)
|
||||
if err != nil {
|
||||
return rule.ApiRule{}, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err), http.StatusBadRequest)
|
||||
return rule.ApiRule{}, fmt.Errorf("failed to read %q param: %w", rule.ParamRuleID, err)
|
||||
}
|
||||
obj, err := rh.m.ruleAPI(groupID, ruleID)
|
||||
if err != nil {
|
||||
@@ -255,14 +253,14 @@ func (rh *requestHandler) getRule(r *http.Request) (rule.ApiRule, *httpserver.Er
|
||||
return obj, nil
|
||||
}
|
||||
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, error) {
|
||||
groupID, err := strconv.ParseUint(r.FormValue(rule.ParamGroupID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err), http.StatusBadRequest)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamGroupID, err)
|
||||
}
|
||||
alertID, err := strconv.ParseUint(r.FormValue(rule.ParamAlertID), 10, 64)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err), http.StatusBadRequest)
|
||||
return nil, fmt.Errorf("failed to read %q param: %w", rule.ParamAlertID, err)
|
||||
}
|
||||
a, err := rh.m.alertAPI(groupID, alertID)
|
||||
if err != nil {
|
||||
@@ -272,76 +270,28 @@ func (rh *requestHandler) getAlert(r *http.Request) (*rule.ApiAlert, *httpserver
|
||||
}
|
||||
|
||||
type listGroupsResponse struct {
|
||||
Status string `json:"status"`
|
||||
Page int `json:"page,omitempty"`
|
||||
TotalPages int `json:"total_pages,omitempty"`
|
||||
TotalGroups int `json:"total_groups,omitempty"`
|
||||
TotalRules int `json:"total_rules,omitempty"`
|
||||
Data struct {
|
||||
Status string `json:"status"`
|
||||
Data struct {
|
||||
Groups []*rule.ApiGroup `json:"groups"`
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
type groupsFilter struct {
|
||||
groupNames []string
|
||||
files []string
|
||||
dsType config.Type
|
||||
}
|
||||
|
||||
func newGroupsFilter(r *http.Request) (*groupsFilter, *httpserver.ErrorWithStatusCode) {
|
||||
_ = r.ParseForm()
|
||||
vs := r.Form
|
||||
gf := &groupsFilter{
|
||||
groupNames: vs["rule_group[]"],
|
||||
files: vs["file[]"],
|
||||
}
|
||||
dsType := vs.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
gf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
return gf, nil
|
||||
}
|
||||
|
||||
func (gf *groupsFilter) matches(group *rule.Group) bool {
|
||||
if len(gf.groupNames) > 0 && !slices.Contains(gf.groupNames, group.Name) {
|
||||
return false
|
||||
}
|
||||
if len(gf.files) > 0 && !slices.Contains(gf.files, group.File) {
|
||||
return false
|
||||
}
|
||||
if len(gf.dsType.Name) > 0 && gf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// see https://prometheus.io/docs/prometheus/latest/querying/api/#rules
|
||||
type rulesFilter struct {
|
||||
gf *groupsFilter
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
states []string
|
||||
maxGroups int
|
||||
pageNum int
|
||||
search string
|
||||
extendedStates bool
|
||||
files []string
|
||||
groupNames []string
|
||||
ruleNames []string
|
||||
ruleType string
|
||||
excludeAlerts bool
|
||||
filter string
|
||||
dsType config.Type
|
||||
}
|
||||
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusCode) {
|
||||
gf, err := newGroupsFilter(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
func newRulesFilter(r *http.Request) (*rulesFilter, error) {
|
||||
rf := &rulesFilter{}
|
||||
query := r.URL.Query()
|
||||
|
||||
var rf rulesFilter
|
||||
rf.gf = gf
|
||||
vs := r.Form
|
||||
ruleTypeParam := vs.Get("type")
|
||||
ruleTypeParam := query.Get("type")
|
||||
if len(ruleTypeParam) > 0 {
|
||||
if ruleType, ok := ruleTypeMap[ruleTypeParam]; ok {
|
||||
rf.ruleType = ruleType
|
||||
@@ -350,146 +300,102 @@ func newRulesFilter(r *http.Request) (*rulesFilter, *httpserver.ErrorWithStatusC
|
||||
}
|
||||
}
|
||||
|
||||
states := vs["state"]
|
||||
if len(states) == 0 {
|
||||
states = vs["filter"]
|
||||
dsType := query.Get("datasource_type")
|
||||
if len(dsType) > 0 {
|
||||
if config.SupportedType(dsType) {
|
||||
rf.dsType = config.NewRawType(dsType)
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "datasource_type": not supported value %q`, dsType), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
for _, s := range states {
|
||||
values := strings.Split(s, ",")
|
||||
for _, v := range values {
|
||||
if len(v) == 0 {
|
||||
continue
|
||||
}
|
||||
if !slices.Contains(ruleStates, v) {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "state": contains not supported value %q`, v), http.StatusBadRequest)
|
||||
}
|
||||
rf.states = append(rf.states, v)
|
||||
|
||||
filter := strings.ToLower(query.Get("filter"))
|
||||
if len(filter) > 0 {
|
||||
if filter == "nomatch" || filter == "unhealthy" {
|
||||
rf.filter = filter
|
||||
} else {
|
||||
return nil, errResponse(fmt.Errorf(`invalid parameter "filter": not supported value %q`, filter), http.StatusBadRequest)
|
||||
}
|
||||
}
|
||||
|
||||
rf.excludeAlerts = httputil.GetBool(r, "exclude_alerts")
|
||||
rf.extendedStates = httputil.GetBool(r, "extended_states")
|
||||
rf.ruleNames = append([]string{}, vs["rule_name[]"]...)
|
||||
rf.search = strings.ToLower(vs.Get("search"))
|
||||
|
||||
pageNum := vs.Get("page_num")
|
||||
maxGroups := vs.Get("group_limit")
|
||||
if pageNum != "" {
|
||||
if maxGroups == "" {
|
||||
return nil, errResponse(fmt.Errorf(`"group_limit" needs to be present in order to paginate over the groups`), http.StatusBadRequest)
|
||||
}
|
||||
v, err := strconv.Atoi(pageNum)
|
||||
if err != nil || v <= 0 {
|
||||
return nil, errResponse(fmt.Errorf(`"page_num" is expected to be a positive number, found %q`, pageNum), http.StatusBadRequest)
|
||||
}
|
||||
rf.pageNum = v
|
||||
}
|
||||
if maxGroups != "" {
|
||||
v, err := strconv.Atoi(maxGroups)
|
||||
if err != nil || v <= 0 {
|
||||
return nil, errResponse(fmt.Errorf(`"group_limit" is expected to be a positive number, found %q`, maxGroups), http.StatusBadRequest)
|
||||
}
|
||||
rf.maxGroups = v
|
||||
}
|
||||
return &rf, nil
|
||||
rf.ruleNames = append([]string{}, r.Form["rule_name[]"]...)
|
||||
rf.groupNames = append([]string{}, r.Form["rule_group[]"]...)
|
||||
rf.files = append([]string{}, r.Form["file[]"]...)
|
||||
return rf, nil
|
||||
}
|
||||
|
||||
func (rf *rulesFilter) matchesRule(r *rule.ApiRule) bool {
|
||||
if rf.ruleType != "" && rf.ruleType != r.Type {
|
||||
func (rf *rulesFilter) matchesGroup(group *rule.Group) bool {
|
||||
if len(rf.groupNames) > 0 && !slices.Contains(rf.groupNames, group.Name) {
|
||||
return false
|
||||
}
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, r.Name) {
|
||||
if len(rf.files) > 0 && !slices.Contains(rf.files, group.File) {
|
||||
return false
|
||||
}
|
||||
if len(rf.states) == 0 {
|
||||
return true
|
||||
if len(rf.dsType.Name) > 0 && rf.dsType.String() != group.Type.String() {
|
||||
return false
|
||||
}
|
||||
return slices.Contains(rf.states, r.State)
|
||||
return true
|
||||
}
|
||||
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) *listGroupsResponse {
|
||||
func (rh *requestHandler) groups(rf *rulesFilter) []*rule.ApiGroup {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
skipGroups := (rf.pageNum - 1) * rf.maxGroups
|
||||
lr := &listGroupsResponse{
|
||||
Status: "success",
|
||||
}
|
||||
lr.Data.Groups = make([]*rule.ApiGroup, 0)
|
||||
if skipGroups >= len(rh.m.groups) {
|
||||
return lr
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
groups := make([]*rule.Group, 0, len(rh.m.groups))
|
||||
groups := make([]*rule.ApiGroup, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
groups = append(groups, group)
|
||||
}
|
||||
|
||||
slices.SortFunc(groups, func(a, b *rule.Group) int {
|
||||
nameCmp := cmp.Compare(a.Name, b.Name)
|
||||
if nameCmp != 0 {
|
||||
return nameCmp
|
||||
}
|
||||
return cmp.Compare(a.File, b.File)
|
||||
})
|
||||
for _, group := range groups {
|
||||
if !rf.gf.matches(group) {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
groupFound := len(rf.search) == 0 || strings.Contains(strings.ToLower(group.Name), rf.search) || strings.Contains(strings.ToLower(group.File), rf.search)
|
||||
g := group.ToAPI()
|
||||
// the returned list should always be non-nil
|
||||
// https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4221
|
||||
filteredRules := make([]rule.ApiRule, 0)
|
||||
for _, rule := range g.Rules {
|
||||
if !groupFound && !strings.Contains(strings.ToLower(rule.Name), rf.search) {
|
||||
if rf.ruleType != "" && rf.ruleType != rule.Type {
|
||||
continue
|
||||
}
|
||||
if rf.extendedStates {
|
||||
rule.ExtendState()
|
||||
if len(rf.ruleNames) > 0 && !slices.Contains(rf.ruleNames, rule.Name) {
|
||||
continue
|
||||
}
|
||||
if !rf.matchesRule(&rule) {
|
||||
if (rule.LastError == "" && rf.filter == "unhealthy") || (!isNoMatch(rule) && rf.filter == "nomatch") {
|
||||
continue
|
||||
}
|
||||
if rf.excludeAlerts {
|
||||
rule.Alerts = nil
|
||||
}
|
||||
g.States[rule.State]++
|
||||
if rule.LastError != "" {
|
||||
g.Unhealthy++
|
||||
} else {
|
||||
g.Healthy++
|
||||
}
|
||||
if isNoMatch(rule) {
|
||||
g.NoMatch++
|
||||
}
|
||||
filteredRules = append(filteredRules, rule)
|
||||
}
|
||||
if len(g.Rules) == 0 || len(filteredRules) > 0 {
|
||||
if rf.maxGroups > 0 {
|
||||
lr.TotalGroups++
|
||||
lr.TotalRules += len(filteredRules)
|
||||
}
|
||||
if skipGroups > 0 {
|
||||
skipGroups--
|
||||
continue
|
||||
}
|
||||
if rf.maxGroups == 0 || len(lr.Data.Groups) < rf.maxGroups {
|
||||
g.Rules = filteredRules
|
||||
lr.Data.Groups = append(lr.Data.Groups, g)
|
||||
}
|
||||
g.Rules = filteredRules
|
||||
groups = append(groups, g)
|
||||
}
|
||||
// sort list of groups for deterministic output
|
||||
slices.SortFunc(groups, func(a, b *rule.ApiGroup) int {
|
||||
if a.Name != b.Name {
|
||||
return strings.Compare(a.Name, b.Name)
|
||||
}
|
||||
}
|
||||
if rf.maxGroups > 0 {
|
||||
lr.Page = rf.pageNum
|
||||
lr.TotalPages = max(int(math.Ceil(float64(lr.TotalGroups)/float64(rf.maxGroups))), 1)
|
||||
}
|
||||
return lr
|
||||
return strings.Compare(a.File, b.File)
|
||||
})
|
||||
return groups
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
lr := rh.groups(rf)
|
||||
if rf.pageNum > 1 && len(lr.Data.Groups) == 0 {
|
||||
return nil, errResponse(fmt.Errorf(`page_num exceeds total amount of pages`), http.StatusBadRequest)
|
||||
}
|
||||
if lr.Page > lr.TotalPages {
|
||||
return nil, errResponse(fmt.Errorf(`page_num=%d exceeds total amount of pages in result=%d`, lr.Page, lr.TotalPages), http.StatusBadRequest)
|
||||
}
|
||||
func (rh *requestHandler) listGroups(rf *rulesFilter) ([]byte, error) {
|
||||
lr := listGroupsResponse{Status: "success"}
|
||||
lr.Data.Groups = rh.groups(rf)
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of groups: %w`, err), http.StatusInternalServerError)
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -528,14 +434,14 @@ func (rh *requestHandler) groupAlerts() []rule.GroupAlerts {
|
||||
return gAlerts
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) listAlerts(rf *rulesFilter) ([]byte, error) {
|
||||
rh.m.groupsMu.RLock()
|
||||
defer rh.m.groupsMu.RUnlock()
|
||||
|
||||
lr := listAlertsResponse{Status: "success"}
|
||||
lr.Data.Alerts = make([]*rule.ApiAlert, 0)
|
||||
for _, group := range rh.m.groups {
|
||||
if !gf.matches(group) {
|
||||
if !rf.matchesGroup(group) {
|
||||
continue
|
||||
}
|
||||
g := group.ToAPI()
|
||||
@@ -554,7 +460,10 @@ func (rh *requestHandler) listAlerts(gf *groupsFilter) ([]byte, *httpserver.Erro
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of active alerts: %w`, err), http.StatusInternalServerError)
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of active alerts: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -566,7 +475,7 @@ type listNotifiersResponse struct {
|
||||
} `json:"data"`
|
||||
}
|
||||
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCode) {
|
||||
func (rh *requestHandler) listNotifiers() ([]byte, error) {
|
||||
targets := notifier.GetTargets()
|
||||
|
||||
lr := listNotifiersResponse{Status: "success"}
|
||||
@@ -588,7 +497,10 @@ func (rh *requestHandler) listNotifiers() ([]byte, *httpserver.ErrorWithStatusCo
|
||||
|
||||
b, err := json.Marshal(lr)
|
||||
if err != nil {
|
||||
return nil, errResponse(fmt.Errorf(`error encoding list of notifiers: %w`, err), http.StatusInternalServerError)
|
||||
return nil, &httpserver.ErrorWithStatusCode{
|
||||
Err: fmt.Errorf(`error encoding list of notifiers: %w`, err),
|
||||
StatusCode: http.StatusInternalServerError,
|
||||
}
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
@@ -599,8 +511,3 @@ func errResponse(err error, sc int) *httpserver.ErrorWithStatusCode {
|
||||
StatusCode: sc,
|
||||
}
|
||||
}
|
||||
|
||||
func errJson(w http.ResponseWriter, r *http.Request, err *httpserver.ErrorWithStatusCode) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
httpserver.Errorf(w, r, `{"error":%q,"errorType":%d}`, err, err.StatusCode)
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
|
||||
) %}
|
||||
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) %}
|
||||
{% func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) %}
|
||||
<div class="btn-toolbar mb-3" role="toolbar">
|
||||
<div class="d-flex gap-2 justify-content-between w-100">
|
||||
<div class="d-flex gap-2 align-items-center">
|
||||
@@ -28,10 +28,10 @@
|
||||
<use href="{%s prefix %}static/icons/icons.svg#expand"/>
|
||||
</svg>
|
||||
</a>
|
||||
{% if len(states) > 0 %}
|
||||
{% if len(filters) > 0 %}
|
||||
<span class="d-none d-md-inline-block">Filter by status:</span>
|
||||
<svg class="d-md-none" width="20" height="20">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#state">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#filter">
|
||||
</svg>
|
||||
<div class="dropdown">
|
||||
<button
|
||||
@@ -46,10 +46,10 @@
|
||||
</svg>
|
||||
</button>
|
||||
<ul class="dropdown-menu">
|
||||
{% for key, title := range states %}
|
||||
{% for key, title := range filters %}
|
||||
{% if title != currentText %}
|
||||
<li>
|
||||
<a class="dropdown-item" onclick="groupForState('{%s key %}')">
|
||||
<a class="dropdown-item" onclick="groupFilter('{%s key %}')">
|
||||
<span class="d-none d-md-inline-block">{%s title %}</span>
|
||||
<svg class="d-md-none" width="22" height="22">
|
||||
<use href="{%s prefix %}static/icons/icons.svg#{%s icons[key] %}"/>
|
||||
@@ -97,10 +97,10 @@
|
||||
{%= tpl.Footer(r) %}
|
||||
{% endfunc %}
|
||||
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) %}
|
||||
{% func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) %}
|
||||
{%code
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
states := map[string]string{
|
||||
filters := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"nomatch": "No Match",
|
||||
@@ -110,14 +110,14 @@
|
||||
"unhealthy": "unhealthy",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := states[state]
|
||||
currentIcon := icons[state]
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
%}
|
||||
{%= tpl.Header(r, navItems, "Groups", getLastConfigError()) %}
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, states, true) %}
|
||||
{%= Controls(prefix, currentIcon, currentText, icons, filters, true) %}
|
||||
{% if len(groups) > 0 %}
|
||||
{% for _, g := range groups %}
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.States["unhealthy"] > 0 %} alert-danger{% endif %}">
|
||||
<div id="group-{%s g.ID %}" class="w-100 border-0 flex-column vm-group{% if g.Unhealthy > 0 %} alert-danger{% endif %}">
|
||||
<span class="d-flex justify-content-between">
|
||||
<a
|
||||
class="vm-group-search"
|
||||
@@ -130,9 +130,9 @@
|
||||
data-bs-target="#item-{%s g.ID %}"
|
||||
>
|
||||
<span class="d-flex gap-2">
|
||||
{% if g.States["unhealthy"] > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.States["unhealthy"] %}</span> {% endif %}
|
||||
{% if g.States["nomatch"] > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.States["nomatch"] %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.States["ok"] %}</span>
|
||||
{% if g.Unhealthy > 0 %}<span class="badge bg-danger" title="Number of rules with status Error">{%d g.Unhealthy %}</span> {% endif %}
|
||||
{% if g.NoMatch > 0 %}<span class="badge bg-warning" title="Number of rules with status NoMatch">{%d g.NoMatch %}</span> {% endif %}
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">{%d g.Healthy %}</span>
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
@@ -189,7 +189,7 @@
|
||||
<b>record:</b> {%s r.Name %}
|
||||
{% endif %}
|
||||
|
|
||||
{%= seriesFetchedWarn(prefix, &r) %}
|
||||
{%= seriesFetchedWarn(prefix, r) %}
|
||||
<span><a target="_blank" href="{%s prefix+r.WebLink() %}">Details</a></span>
|
||||
</div>
|
||||
<div class="col-12">
|
||||
@@ -476,7 +476,7 @@
|
||||
{% endfunc %}
|
||||
|
||||
|
||||
{% func Rule(r *http.Request, rule rule.ApiRule) %}
|
||||
{% func RuleDetails(r *http.Request, rule rule.ApiRule) %}
|
||||
{%code prefix := vmalertutil.Prefix(r.URL.Path) %}
|
||||
{%= tpl.Header(r, navItems, "", getLastConfigError()) %}
|
||||
{%code
|
||||
@@ -661,8 +661,8 @@
|
||||
<span class="badge bg-warning text-dark" title="This firing state is kept because of `keep_firing_for`">stabilizing</span>
|
||||
{% endfunc %}
|
||||
|
||||
{% func seriesFetchedWarn(prefix string, r *rule.ApiRule) %}
|
||||
{% if r.IsNoMatch() %}
|
||||
{% func seriesFetchedWarn(prefix string, r rule.ApiRule) %}
|
||||
{% if isNoMatch(r) %}
|
||||
<svg
|
||||
data-bs-toggle="tooltip"
|
||||
title="No match! This rule's last evaluation hasn't selected any time series from the datasource.
|
||||
@@ -673,3 +673,9 @@
|
||||
</svg>
|
||||
{% endif %}
|
||||
{% endfunc %}
|
||||
|
||||
{%code
|
||||
func isNoMatch (r rule.ApiRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
%}
|
||||
|
||||
@@ -31,7 +31,7 @@ var (
|
||||
)
|
||||
|
||||
//line app/vmalert/web.qtpl:15
|
||||
func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText string, icons, states map[string]string, search bool) {
|
||||
func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) {
|
||||
//line app/vmalert/web.qtpl:15
|
||||
qw422016.N().S(`
|
||||
<div class="btn-toolbar mb-3" role="toolbar">
|
||||
@@ -59,7 +59,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
</a>
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:31
|
||||
if len(states) > 0 {
|
||||
if len(filters) > 0 {
|
||||
//line app/vmalert/web.qtpl:31
|
||||
qw422016.N().S(`
|
||||
<span class="d-none d-md-inline-block">Filter by status:</span>
|
||||
@@ -68,7 +68,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
//line app/vmalert/web.qtpl:34
|
||||
qw422016.E().S(prefix)
|
||||
//line app/vmalert/web.qtpl:34
|
||||
qw422016.N().S(`static/icons/icons.svg#state">
|
||||
qw422016.N().S(`static/icons/icons.svg#filter">
|
||||
</svg>
|
||||
<div class="dropdown">
|
||||
<button
|
||||
@@ -97,7 +97,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
<ul class="dropdown-menu">
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:49
|
||||
for key, title := range states {
|
||||
for key, title := range filters {
|
||||
//line app/vmalert/web.qtpl:49
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -106,7 +106,7 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
//line app/vmalert/web.qtpl:50
|
||||
qw422016.N().S(`
|
||||
<li>
|
||||
<a class="dropdown-item" onclick="groupForState('`)
|
||||
<a class="dropdown-item" onclick="groupFilter('`)
|
||||
//line app/vmalert/web.qtpl:52
|
||||
qw422016.E().S(key)
|
||||
//line app/vmalert/web.qtpl:52
|
||||
@@ -176,22 +176,22 @@ func StreamControls(qw422016 *qt422016.Writer, prefix, currentIcon, currentText
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:77
|
||||
func WriteControls(qq422016 qtio422016.Writer, prefix, currentIcon, currentText string, icons, states map[string]string, search bool) {
|
||||
func WriteControls(qq422016 qtio422016.Writer, prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) {
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, states, search)
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, filters, search)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:77
|
||||
func Controls(prefix, currentIcon, currentText string, icons, states map[string]string, search bool) string {
|
||||
func Controls(prefix, currentIcon, currentText string, icons, filters map[string]string, search bool) string {
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:77
|
||||
WriteControls(qb422016, prefix, currentIcon, currentText, icons, states, search)
|
||||
WriteControls(qb422016, prefix, currentIcon, currentText, icons, filters, search)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:77
|
||||
@@ -324,13 +324,13 @@ func Welcome(r *http.Request) string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:100
|
||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule.ApiGroup, state string) {
|
||||
func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule.ApiGroup, filter string) {
|
||||
//line app/vmalert/web.qtpl:100
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:102
|
||||
prefix := vmalertutil.Prefix(r.URL.Path)
|
||||
states := map[string]string{
|
||||
filters := map[string]string{
|
||||
"": "All",
|
||||
"unhealthy": "Unhealthy",
|
||||
"nomatch": "No Match",
|
||||
@@ -340,8 +340,8 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
"unhealthy": "unhealthy",
|
||||
"nomatch": "nomatch",
|
||||
}
|
||||
currentText := states[state]
|
||||
currentIcon := icons[state]
|
||||
currentText := filters[filter]
|
||||
currentIcon := icons[filter]
|
||||
|
||||
//line app/vmalert/web.qtpl:115
|
||||
qw422016.N().S(`
|
||||
@@ -352,7 +352,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:117
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, states, true)
|
||||
StreamControls(qw422016, prefix, currentIcon, currentText, icons, filters, true)
|
||||
//line app/vmalert/web.qtpl:117
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -371,7 +371,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
//line app/vmalert/web.qtpl:120
|
||||
qw422016.N().S(`" class="w-100 border-0 flex-column vm-group`)
|
||||
//line app/vmalert/web.qtpl:120
|
||||
if g.States["unhealthy"] > 0 {
|
||||
if g.Unhealthy > 0 {
|
||||
//line app/vmalert/web.qtpl:120
|
||||
qw422016.N().S(` alert-danger`)
|
||||
//line app/vmalert/web.qtpl:120
|
||||
@@ -418,11 +418,11 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
<span class="d-flex gap-2">
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
if g.States["unhealthy"] > 0 {
|
||||
if g.Unhealthy > 0 {
|
||||
//line app/vmalert/web.qtpl:133
|
||||
qw422016.N().S(`<span class="badge bg-danger" title="Number of rules with status Error">`)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
qw422016.N().D(g.States["unhealthy"])
|
||||
qw422016.N().D(g.Unhealthy)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
qw422016.N().S(`</span> `)
|
||||
//line app/vmalert/web.qtpl:133
|
||||
@@ -431,11 +431,11 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
if g.States["nomatch"] > 0 {
|
||||
if g.NoMatch > 0 {
|
||||
//line app/vmalert/web.qtpl:134
|
||||
qw422016.N().S(`<span class="badge bg-warning" title="Number of rules with status NoMatch">`)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
qw422016.N().D(g.States["nomatch"])
|
||||
qw422016.N().D(g.NoMatch)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
qw422016.N().S(`</span> `)
|
||||
//line app/vmalert/web.qtpl:134
|
||||
@@ -444,7 +444,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
qw422016.N().S(`
|
||||
<span class="badge bg-success" title="Number of rules with status Ok">`)
|
||||
//line app/vmalert/web.qtpl:135
|
||||
qw422016.N().D(g.States["ok"])
|
||||
qw422016.N().D(g.Healthy)
|
||||
//line app/vmalert/web.qtpl:135
|
||||
qw422016.N().S(`</span>
|
||||
</span>
|
||||
@@ -617,7 +617,7 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
|
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:192
|
||||
streamseriesFetchedWarn(qw422016, prefix, &r)
|
||||
streamseriesFetchedWarn(qw422016, prefix, r)
|
||||
//line app/vmalert/web.qtpl:192
|
||||
qw422016.N().S(`
|
||||
<span><a target="_blank" href="`)
|
||||
@@ -750,22 +750,22 @@ func StreamListGroups(qw422016 *qt422016.Writer, r *http.Request, groups []*rule
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:234
|
||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []*rule.ApiGroup, state string) {
|
||||
func WriteListGroups(qq422016 qtio422016.Writer, r *http.Request, groups []*rule.ApiGroup, filter string) {
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
StreamListGroups(qw422016, r, groups, state)
|
||||
StreamListGroups(qw422016, r, groups, filter)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:234
|
||||
func ListGroups(r *http.Request, groups []*rule.ApiGroup, state string) string {
|
||||
func ListGroups(r *http.Request, groups []*rule.ApiGroup, filter string) string {
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:234
|
||||
WriteListGroups(qb422016, r, groups, state)
|
||||
WriteListGroups(qb422016, r, groups, filter)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:234
|
||||
@@ -1462,7 +1462,7 @@ func Alert(r *http.Request, alert *rule.ApiAlert) string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:479
|
||||
func StreamRule(qw422016 *qt422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
func StreamRuleDetails(qw422016 *qt422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:479
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
@@ -1859,22 +1859,22 @@ func StreamRule(qw422016 *qt422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:642
|
||||
func WriteRule(qq422016 qtio422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
func WriteRuleDetails(qq422016 qtio422016.Writer, r *http.Request, rule rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
StreamRule(qw422016, r, rule)
|
||||
StreamRuleDetails(qw422016, r, rule)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:642
|
||||
func Rule(r *http.Request, rule rule.ApiRule) string {
|
||||
func RuleDetails(r *http.Request, rule rule.ApiRule) string {
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:642
|
||||
WriteRule(qb422016, r, rule)
|
||||
WriteRuleDetails(qb422016, r, rule)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vmalert/web.qtpl:642
|
||||
@@ -2015,12 +2015,12 @@ func badgeStabilizing() string {
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:664
|
||||
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, prefix string, r *rule.ApiRule) {
|
||||
func streamseriesFetchedWarn(qw422016 *qt422016.Writer, prefix string, r rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:664
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line app/vmalert/web.qtpl:665
|
||||
if r.IsNoMatch() {
|
||||
if isNoMatch(r) {
|
||||
//line app/vmalert/web.qtpl:665
|
||||
qw422016.N().S(`
|
||||
<svg
|
||||
@@ -2045,7 +2045,7 @@ func streamseriesFetchedWarn(qw422016 *qt422016.Writer, prefix string, r *rule.A
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:675
|
||||
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, prefix string, r *rule.ApiRule) {
|
||||
func writeseriesFetchedWarn(qq422016 qtio422016.Writer, prefix string, r rule.ApiRule) {
|
||||
//line app/vmalert/web.qtpl:675
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vmalert/web.qtpl:675
|
||||
@@ -2056,7 +2056,7 @@ func writeseriesFetchedWarn(qq422016 qtio422016.Writer, prefix string, r *rule.A
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:675
|
||||
func seriesFetchedWarn(prefix string, r *rule.ApiRule) string {
|
||||
func seriesFetchedWarn(prefix string, r rule.ApiRule) string {
|
||||
//line app/vmalert/web.qtpl:675
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vmalert/web.qtpl:675
|
||||
@@ -2069,3 +2069,8 @@ func seriesFetchedWarn(prefix string, r *rule.ApiRule) string {
|
||||
return qs422016
|
||||
//line app/vmalert/web.qtpl:675
|
||||
}
|
||||
|
||||
//line app/vmalert/web.qtpl:678
|
||||
func isNoMatch(r rule.ApiRule) bool {
|
||||
return r.LastSamples == 0 && r.LastSeriesFetched != nil && *r.LastSeriesFetched == 0
|
||||
}
|
||||
|
||||
@@ -210,7 +210,7 @@ func TestHandler(t *testing.T) {
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("/api/v1/rules&states", func(t *testing.T) {
|
||||
t.Run("/api/v1/rules&filters", func(t *testing.T) {
|
||||
check := func(url string, statusCode, expGroups, expRules int) {
|
||||
t.Helper()
|
||||
lr := listGroupsResponse{}
|
||||
@@ -252,15 +252,9 @@ func TestHandler(t *testing.T) {
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 0, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=foo", 200, 3, 0)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert", 200, 3, 3)
|
||||
check("/api/v1/rules?rule_group[]=group&file[]=rules.yaml&rule_name[]=alert&rule_name[]=record", 200, 3, 6)
|
||||
|
||||
check("/api/v1/rules?group_limit=1", 200, 1, 2)
|
||||
check("/api/v1/rules?group_limit=1&type=alert", 200, 1, 1)
|
||||
check("/api/v1/rules?group_limit=1&type=record", 200, 1, 1)
|
||||
check("/api/v1/rules?group_limit=2", 200, 2, 4)
|
||||
check(fmt.Sprintf("/api/v1/rules?group_limit=1&page_num=%d", 1), 200, 1, 2)
|
||||
})
|
||||
t.Run("/api/v1/rules&exclude_alerts=true", func(t *testing.T) {
|
||||
// check if response returns active alerts by default
|
||||
|
||||
@@ -30,7 +30,6 @@ var (
|
||||
concurrency = flag.Int("concurrency", 10, "The number of concurrent workers. Higher concurrency may reduce restore duration")
|
||||
maxBytesPerSecond = flagutil.NewBytes("maxBytesPerSecond", 0, "The maximum download speed. There is no limit if it is set to 0")
|
||||
skipBackupCompleteCheck = flag.Bool("skipBackupCompleteCheck", false, "Whether to skip checking for 'backup complete' file in -src. This may be useful for restoring from old backups, which were created without 'backup complete' file")
|
||||
SkipPreallocation = flag.Bool("skipFilePreallocation", false, "Whether to skip pre-allocated files. This will likely be slower in most cases, but allows restores to resume mid file on failure")
|
||||
)
|
||||
|
||||
func main() {
|
||||
@@ -64,7 +63,6 @@ func main() {
|
||||
Src: srcFS,
|
||||
Dst: dstFS,
|
||||
SkipBackupCompleteCheck: *skipBackupCompleteCheck,
|
||||
SkipPreallocation: *SkipPreallocation,
|
||||
}
|
||||
pushmetrics.Init()
|
||||
if err := a.Run(ctx); err != nil {
|
||||
|
||||
@@ -743,26 +743,6 @@ func proxyVMAlertRequests(w http.ResponseWriter, r *http.Request, path string) {
|
||||
req := r.Clone(r.Context())
|
||||
req.URL.Path = strings.TrimPrefix(path, "prometheus")
|
||||
req.Host = vmalertProxyHost
|
||||
|
||||
if strings.HasPrefix(r.Header.Get(`User-Agent`), `Grafana`) {
|
||||
// Grafana currently supports only Prometheus-style alerts. If other alert types
|
||||
// (e.g. logs or traces) are returned, it may fail with "Error loading alerts".
|
||||
//
|
||||
// Grafana queries the vmalert API directly, bypassing the VictoriaMetrics datasource,
|
||||
// so query params (such as datasource_type) cannot be enforced on the Grafana side.
|
||||
//
|
||||
// To ensure compatibility, we detect Grafana requests via the User-Agent and enforce
|
||||
// `datasource_type=prometheus`.
|
||||
//
|
||||
// See:
|
||||
// - https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329#issuecomment-3847585443
|
||||
// - https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/59
|
||||
q := req.URL.Query()
|
||||
q.Set("datasource_type", "prometheus")
|
||||
req.URL.RawQuery = q.Encode()
|
||||
req.RequestURI = ""
|
||||
}
|
||||
|
||||
vmalertProxy.ServeHTTP(w, req)
|
||||
}
|
||||
|
||||
|
||||
209
app/vmselect/vmui/assets/index-DeVEZ1fy.js
Normal file
1
app/vmselect/vmui/assets/index-DffVfcrT.css
Normal file
@@ -1 +0,0 @@
|
||||
var e=Object.create,t=Object.defineProperty,n=Object.getOwnPropertyDescriptor,r=Object.getOwnPropertyNames,i=Object.getPrototypeOf,a=Object.prototype.hasOwnProperty,o=(e,t)=>()=>(e&&(t=e(e=0)),t),s=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports),c=(e,n)=>{let r={};for(var i in e)t(r,i,{get:e[i],enumerable:!0});return n||t(r,Symbol.toStringTag,{value:`Module`}),r},l=(e,i,o,s)=>{if(i&&typeof i==`object`||typeof i==`function`)for(var c=r(i),l=0,u=c.length,d;l<u;l++)d=c[l],!a.call(e,d)&&d!==o&&t(e,d,{get:(e=>i[e]).bind(null,d),enumerable:!(s=n(i,d))||s.enumerable});return e},u=(n,r,a)=>(a=n==null?{}:e(i(n)),l(r||!n||!n.__esModule?t(a,`default`,{value:n,enumerable:!0}):a,n)),d=e=>a.call(e,`module.exports`)?e[`module.exports`]:l(t({},`__esModule`,{value:!0}),e);export{u as a,d as i,o as n,c as r,s as t};
|
||||
66
app/vmselect/vmui/assets/vendor-BR6Q0Fin.js
Normal file
@@ -1 +0,0 @@
|
||||
.uplot,.uplot *,.uplot :before,.uplot :after{box-sizing:border-box}.uplot{width:min-content;font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji;line-height:1.5}.u-title{text-align:center;font-size:18px;font-weight:700}.u-wrap{-webkit-user-select:none;user-select:none;position:relative}.u-over,.u-under{position:absolute}.u-under{overflow:hidden}.uplot canvas{width:100%;height:100%;display:block;position:relative}.u-axis{position:absolute}.u-legend{text-align:center;margin:auto;font-size:14px}.u-inline{display:block}.u-inline *{display:inline-block}.u-inline tr{margin-right:16px}.u-legend th{font-weight:600}.u-legend th>*{vertical-align:middle;display:inline-block}.u-legend .u-marker{width:1em;height:1em;margin-right:4px;background-clip:padding-box!important}.u-inline.u-live th:after{content:":";vertical-align:middle}.u-inline:not(.u-live) .u-value{display:none}.u-series>*{padding:4px}.u-series th{cursor:pointer}.u-legend .u-off>*{opacity:.3}.u-select{pointer-events:none;background:#00000012;position:absolute}.u-cursor-x,.u-cursor-y{pointer-events:none;will-change:transform;position:absolute;top:0;left:0}.u-hz .u-cursor-x,.u-vt .u-cursor-y{border-right:1px dashed #607d8b;height:100%}.u-hz .u-cursor-y,.u-vt .u-cursor-x{border-bottom:1px dashed #607d8b;width:100%}.u-cursor-pt{pointer-events:none;will-change:transform;border:0 solid;border-radius:50%;position:absolute;top:0;left:0;background-clip:padding-box!important}.u-axis.u-off,.u-select.u-off,.u-cursor-x.u-off,.u-cursor-y.u-off,.u-cursor-pt.u-off{display:none}
|
||||
1
app/vmselect/vmui/assets/vendor-D1GxaB_c.css
Normal file
@@ -0,0 +1 @@
|
||||
.uplot,.uplot *,.uplot *:before,.uplot *:after{box-sizing:border-box}.uplot{font-family:system-ui,-apple-system,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";line-height:1.5;width:min-content}.u-title{text-align:center;font-size:18px;font-weight:700}.u-wrap{position:relative;-webkit-user-select:none;user-select:none}.u-over,.u-under{position:absolute}.u-under{overflow:hidden}.uplot canvas{display:block;position:relative;width:100%;height:100%}.u-axis{position:absolute}.u-legend{font-size:14px;margin:auto;text-align:center}.u-inline{display:block}.u-inline *{display:inline-block}.u-inline tr{margin-right:16px}.u-legend th{font-weight:600}.u-legend th>*{vertical-align:middle;display:inline-block}.u-legend .u-marker{width:1em;height:1em;margin-right:4px;background-clip:padding-box!important}.u-inline.u-live th:after{content:":";vertical-align:middle}.u-inline:not(.u-live) .u-value{display:none}.u-series>*{padding:4px}.u-series th{cursor:pointer}.u-legend .u-off>*{opacity:.3}.u-select{background:#00000012;position:absolute;pointer-events:none}.u-cursor-x,.u-cursor-y{position:absolute;left:0;top:0;pointer-events:none;will-change:transform}.u-hz .u-cursor-x,.u-vt .u-cursor-y{height:100%;border-right:1px dashed #607D8B}.u-hz .u-cursor-y,.u-vt .u-cursor-x{width:100%;border-bottom:1px dashed #607D8B}.u-cursor-pt{position:absolute;top:0;left:0;border-radius:50%;border:0 solid;pointer-events:none;will-change:transform;background-clip:padding-box!important}.u-axis.u-off,.u-select.u-off,.u-cursor-x.u-off,.u-cursor-y.u-off,.u-cursor-pt.u-off{display:none}
|
||||
@@ -37,11 +37,10 @@
|
||||
<meta property="og:title" content="UI for VictoriaMetrics">
|
||||
<meta property="og:url" content="https://victoriametrics.com/">
|
||||
<meta property="og:description" content="Explore and troubleshoot your VictoriaMetrics data">
|
||||
<script type="module" crossorigin src="./assets/index-KEOgEEMl.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/rolldown-runtime-COnpUsM8.js">
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-Mr0bmX1E.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-CnsZ1jie.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-D2OEy8Ra.css">
|
||||
<script type="module" crossorigin src="./assets/index-DeVEZ1fy.js"></script>
|
||||
<link rel="modulepreload" crossorigin href="./assets/vendor-BR6Q0Fin.js">
|
||||
<link rel="stylesheet" crossorigin href="./assets/vendor-D1GxaB_c.css">
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-DffVfcrT.css">
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
|
||||
@@ -319,7 +319,6 @@ func Stop() {
|
||||
Storage.MustClose()
|
||||
logger.Infof("successfully closed the storage in %.3f seconds", time.Since(startTime).Seconds())
|
||||
|
||||
fs.MustStopDirRemover()
|
||||
logger.Infof("the storage has been stopped")
|
||||
}
|
||||
|
||||
|
||||
3740
app/vmui/packages/vmui/package-lock.json
generated
@@ -21,42 +21,43 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"classnames": "^2.5.1",
|
||||
"dayjs": "^1.11.20",
|
||||
"dayjs": "^1.11.19",
|
||||
"lodash.debounce": "^4.0.8",
|
||||
"marked": "^17.0.5",
|
||||
"preact": "^10.29.0",
|
||||
"qs": "^6.15.0",
|
||||
"marked": "^17.0.1",
|
||||
"preact": "^10.28.3",
|
||||
"qs": "^6.14.1",
|
||||
"react-input-mask": "^2.0.4",
|
||||
"react-router-dom": "^7.13.2",
|
||||
"react-router-dom": "^7.13.0",
|
||||
"uplot": "^1.6.32",
|
||||
"vite": "^8.0.2",
|
||||
"vite": "^7.3.1",
|
||||
"web-vitals": "^5.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/eslintrc": "^3.3.5",
|
||||
"@eslint/eslintrc": "^3.3.3",
|
||||
"@eslint/js": "^9.39.2",
|
||||
"@preact/preset-vite": "^2.10.5",
|
||||
"@preact/preset-vite": "^2.10.3",
|
||||
"@testing-library/jest-dom": "^6.9.1",
|
||||
"@testing-library/preact": "^3.2.4",
|
||||
"@types/lodash.debounce": "^4.0.9",
|
||||
"@types/node": "^25.5.0",
|
||||
"@types/qs": "^6.15.0",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/node": "^25.2.0",
|
||||
"@types/qs": "^6.14.0",
|
||||
"@types/react": "^19.2.10",
|
||||
"@types/react-input-mask": "^3.0.6",
|
||||
"@types/react-router-dom": "^5.3.3",
|
||||
"@typescript-eslint/eslint-plugin": "^8.57.2",
|
||||
"@typescript-eslint/parser": "^8.57.2",
|
||||
"@typescript-eslint/eslint-plugin": "^8.54.0",
|
||||
"@typescript-eslint/parser": "^8.54.0",
|
||||
"cross-env": "^10.1.0",
|
||||
"eslint": "^9.39.2",
|
||||
"eslint-plugin-react": "^7.37.5",
|
||||
"eslint-plugin-unused-imports": "^4.4.1",
|
||||
"globals": "^17.4.0",
|
||||
"eslint-plugin-unused-imports": "^4.3.0",
|
||||
"globals": "^17.3.0",
|
||||
"http-proxy-middleware": "^3.0.5",
|
||||
"jsdom": "^29.0.1",
|
||||
"postcss": "^8.5.8",
|
||||
"sass-embedded": "^1.98.0",
|
||||
"jsdom": "^28.0.0",
|
||||
"postcss": "^8.5.6",
|
||||
"rollup-plugin-visualizer": "^6.0.5",
|
||||
"sass-embedded": "^1.97.3",
|
||||
"typescript": "^5.9.3",
|
||||
"vitest": "^4.1.1"
|
||||
"vitest": "^4.0.18"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
export const getGroupsUrl = (server: string, search: string, type: string, states: string[], maxGroups: number): string => {
|
||||
return `${server}/vmalert/api/v1/rules?datasource_type=prometheus&search=${encodeURIComponent(search)}&type=${encodeURIComponent(type)}&state=${states.map(encodeURIComponent).join(",")}&group_limit=${maxGroups}&extended_states=true`;
|
||||
export const getGroupsUrl = (server: string): string => {
|
||||
return `${server}/vmalert/api/v1/rules?datasource_type=prometheus`;
|
||||
};
|
||||
|
||||
export const getItemUrl = (
|
||||
|
||||
@@ -60,7 +60,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
const options = useMemo(() => {
|
||||
switch (context) {
|
||||
case QueryContextType.metricsql:
|
||||
return includeFunctions ? [...metrics, ...metricsqlFunctions] : metrics;
|
||||
return [...metrics, ...metricsqlFunctions];
|
||||
case QueryContextType.label:
|
||||
return labels;
|
||||
case QueryContextType.labelValue:
|
||||
@@ -68,7 +68,7 @@ const QueryEditorAutocomplete: FC<QueryEditorAutocompleteProps> = ({
|
||||
default:
|
||||
return [];
|
||||
}
|
||||
}, [context, metrics, labels, labelValues, metricsqlFunctions, includeFunctions]);
|
||||
}, [context, metrics, labels, labelValues, metricsqlFunctions]);
|
||||
|
||||
const handleSelect = useCallback((insert: string) => {
|
||||
// Find the start and end of valueByContext in the query string
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import "./style.scss";
|
||||
import { ReactNode } from "react";
|
||||
|
||||
export type BadgeColor = "firing" | "inactive" | "pending" | "nomatch" | "unhealthy" | "ok" | "passive";
|
||||
export type BadgeColor = "firing" | "inactive" | "pending" | "no-match" | "unhealthy" | "ok" | "passive";
|
||||
|
||||
interface BadgeItem {
|
||||
value?: number | string;
|
||||
|
||||
@@ -4,7 +4,7 @@ $badge-colors: (
|
||||
"firing": $color-error,
|
||||
"inactive": $color-success,
|
||||
"pending": $color-warning,
|
||||
"nomatch": $color-notice,
|
||||
"no-match": $color-notice,
|
||||
"unhealthy": $color-broken,
|
||||
"ok": $color-info,
|
||||
"passive": $color-passive,
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
import { useMemo } from "preact/compat";
|
||||
import "./style.scss";
|
||||
import { Group as APIGroup } from "../../../types";
|
||||
import ItemHeader from "../ItemHeader";
|
||||
import { getStates, formatDuration, formatEventTime } from "../helpers";
|
||||
import { formatDuration, formatEventTime } from "../helpers";
|
||||
import Badges, { BadgeColor } from "../Badges";
|
||||
|
||||
interface BaseGroupProps {
|
||||
@@ -118,21 +117,6 @@ const BaseGroup = ({ group }: BaseGroupProps) => {
|
||||
)}
|
||||
</tbody>
|
||||
</table>
|
||||
<div className="vm-explore-alerts-rule-item">
|
||||
<span className="vm-alerts-title">Rules</span>
|
||||
{group.rules.map((rule) => (
|
||||
<ItemHeader
|
||||
classes={["vm-badge-item", rule.state]}
|
||||
key={rule.id}
|
||||
entity="rule"
|
||||
type={rule.type}
|
||||
groupId={rule.group_id}
|
||||
states={getStates(rule)}
|
||||
id={rule.id}
|
||||
name={rule.name}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -18,7 +18,6 @@ import {
|
||||
import Button from "../../Main/Button/Button";
|
||||
|
||||
interface ItemHeaderControlsProps {
|
||||
classes?: string[];
|
||||
entity: string;
|
||||
type?: string;
|
||||
groupId: string;
|
||||
@@ -28,19 +27,12 @@ interface ItemHeaderControlsProps {
|
||||
onClose?: () => void;
|
||||
}
|
||||
|
||||
const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, type, states, onClose, classes }) => {
|
||||
const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, type, states, onClose }) => {
|
||||
const { isMobile } = useDeviceDetect();
|
||||
const { serverUrl } = useAppState();
|
||||
const navigate = useNavigate();
|
||||
const copyToClipboard = useCopyToClipboard();
|
||||
|
||||
const openGroupLink = () => {
|
||||
navigate({
|
||||
pathname: "/rules",
|
||||
search: `group_id=${groupId}`,
|
||||
});
|
||||
};
|
||||
|
||||
const openItemLink = () => {
|
||||
navigate({
|
||||
pathname: "/rules",
|
||||
@@ -57,7 +49,7 @@ const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, ty
|
||||
const headerClasses = classNames({
|
||||
"vm-explore-alerts-item-header": true,
|
||||
"vm-explore-alerts-item-header_mobile": isMobile,
|
||||
}, classes);
|
||||
});
|
||||
|
||||
const renderIcon = () => {
|
||||
switch(entity) {
|
||||
@@ -113,30 +105,16 @@ const ItemHeader: FC<ItemHeaderControlsProps> = ({ name, id, groupId, entity, ty
|
||||
items={badgesItems}
|
||||
/>
|
||||
{onClose ? (
|
||||
<>
|
||||
{id && (
|
||||
<Button
|
||||
className="vm-back-button"
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="gray"
|
||||
startIcon={<GroupIcon />}
|
||||
onClick={openGroupLink}
|
||||
>
|
||||
<span className="vm-button-text">Open Group</span>
|
||||
</Button>
|
||||
)}
|
||||
<Button
|
||||
className="vm-back-button"
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="gray"
|
||||
startIcon={<LinkIcon />}
|
||||
onClick={copyLink}
|
||||
>
|
||||
<span className="vm-button-text">Copy Link</span>
|
||||
</Button>
|
||||
</>
|
||||
<Button
|
||||
className="vm-back-button"
|
||||
size="small"
|
||||
variant="outlined"
|
||||
color="gray"
|
||||
startIcon={<LinkIcon />}
|
||||
onClick={copyLink}
|
||||
>
|
||||
<span className="vm-button-text">Copy Link</span>
|
||||
</Button>
|
||||
) : (
|
||||
<Button
|
||||
className="vm-button-borderless"
|
||||
|
||||
@@ -6,10 +6,6 @@
|
||||
justify-content: space-between;
|
||||
gap: $padding-global;
|
||||
|
||||
&:is(.vm-badge-item) {
|
||||
padding: 6px 0 6px 6px;
|
||||
}
|
||||
|
||||
.vm-button_small {
|
||||
padding: 4px;
|
||||
}
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
import Button from "../../Main/Button/Button";
|
||||
import { ArrowDownIcon } from "../../Main/Icons";
|
||||
import "./style.scss";
|
||||
import classNames from "classnames";
|
||||
|
||||
interface PaginationProps {
|
||||
page: number;
|
||||
totalPages: number;
|
||||
totalRules: number;
|
||||
totalGroups: number;
|
||||
pageRules: number;
|
||||
pageGroups: number;
|
||||
onPageChange: (num: number) => () => void;
|
||||
}
|
||||
|
||||
const getButtons = (page: number, totalPages: number) => {
|
||||
const result: number[] = [];
|
||||
if (totalPages < 2) return result;
|
||||
result.push(1);
|
||||
if (page > 3) result.push(0);
|
||||
if (page > 2) result.push(page - 1);
|
||||
if (page > 1 && page < totalPages) result.push(page);
|
||||
if (page > 0 && page < totalPages - 1) result.push(page + 1);
|
||||
if (totalPages - page > 2) result.push(0);
|
||||
result.push(totalPages);
|
||||
return result;
|
||||
};
|
||||
|
||||
const Pagination = ({
|
||||
page,
|
||||
totalPages,
|
||||
onPageChange,
|
||||
totalGroups,
|
||||
totalRules,
|
||||
pageGroups,
|
||||
pageRules,
|
||||
}: PaginationProps) => {
|
||||
|
||||
const buttons = getButtons(page, totalPages);
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
className="vm-pagination"
|
||||
>
|
||||
<span className="vm-pagination-stats">
|
||||
<span>Page rules/groups:</span> <b>{pageRules}</b> / <b>{pageGroups}</b>
|
||||
</span>
|
||||
{!!buttons.length && (
|
||||
<div className="vm-pagination-buttons">
|
||||
<Button
|
||||
className="vm-button-borderless vm-pagination-prev"
|
||||
size="small"
|
||||
color="gray"
|
||||
disabled={page == 1}
|
||||
variant="outlined"
|
||||
startIcon={<ArrowDownIcon />}
|
||||
onClick={onPageChange(page-1)}
|
||||
/>
|
||||
{buttons.map((button, index) => {
|
||||
return button ? (
|
||||
<Button
|
||||
className={classNames({
|
||||
"vm-button-borderless": page !== button,
|
||||
})}
|
||||
key={index}
|
||||
size="small"
|
||||
color="gray"
|
||||
variant="outlined"
|
||||
onClick={onPageChange(button)}
|
||||
>{button}</Button>
|
||||
) : (
|
||||
<span className="vm-pagination-more">...</span>
|
||||
);
|
||||
})}
|
||||
<Button
|
||||
className="vm-button-borderless vm-pagination-next"
|
||||
size="small"
|
||||
color="gray"
|
||||
disabled={page==totalPages}
|
||||
variant="outlined"
|
||||
startIcon={<ArrowDownIcon />}
|
||||
onClick={onPageChange(page+1)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<span className="vm-pagination-stats">
|
||||
<span>Total rules/groups:</span> <b>{totalRules}</b> / <b>{totalGroups}</b>
|
||||
</span>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
export default Pagination;
|
||||
@@ -1,33 +0,0 @@
|
||||
@use "src/styles/variables" as *;
|
||||
|
||||
.vm-pagination {
|
||||
display: flex;
|
||||
min-height: 24px;
|
||||
justify-content: space-between;
|
||||
&-stats {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
color: var(--color-text-secondary);
|
||||
column-gap: $padding-tiny;
|
||||
}
|
||||
&-buttons {
|
||||
display: flex;
|
||||
column-gap: $padding-small;
|
||||
}
|
||||
.vm-button-borderless {
|
||||
border: 0;
|
||||
}
|
||||
&-more {
|
||||
align-self: center;
|
||||
}
|
||||
&-prev {
|
||||
svg {
|
||||
transform: rotate(90deg);
|
||||
}
|
||||
}
|
||||
&-next {
|
||||
svg {
|
||||
transform: rotate(-90deg);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { useMemo } from "preact/compat";
|
||||
import { FC, useMemo } from "preact/compat";
|
||||
import Select from "../../Main/Select/Select";
|
||||
import { SearchIcon } from "../../Main/Icons";
|
||||
import TextField from "../../Main/TextField/TextField";
|
||||
@@ -8,25 +8,25 @@ import useDeviceDetect from "../../../hooks/useDeviceDetect";
|
||||
|
||||
interface RulesHeaderProps {
|
||||
types: string[];
|
||||
allRuleTypes: string[];
|
||||
allTypes: string[];
|
||||
allStates: string[];
|
||||
states: string[];
|
||||
search: string;
|
||||
onChangeRuleType: (input: string) => void;
|
||||
onChangeTypes: (input: string) => void;
|
||||
onChangeStates: (input: string) => void;
|
||||
onChangeSearch: (input: string) => void;
|
||||
}
|
||||
|
||||
const RulesHeader = ({
|
||||
const RulesHeader: FC<RulesHeaderProps> = ({
|
||||
types,
|
||||
allRuleTypes,
|
||||
allTypes,
|
||||
allStates,
|
||||
states,
|
||||
search,
|
||||
onChangeRuleType,
|
||||
onChangeTypes,
|
||||
onChangeStates,
|
||||
onChangeSearch,
|
||||
}: RulesHeaderProps) => {
|
||||
}) => {
|
||||
const noStateText = useMemo(
|
||||
() => (types.length ? "" : "No states. Please select rule states"),
|
||||
[types],
|
||||
@@ -46,10 +46,10 @@ const RulesHeader = ({
|
||||
<div className="vm-explore-alerts-header__rule_type">
|
||||
<Select
|
||||
value={types}
|
||||
list={allRuleTypes}
|
||||
label="Rule type"
|
||||
list={allTypes}
|
||||
label="Rules type"
|
||||
placeholder="Please select rule type"
|
||||
onChange={onChangeRuleType}
|
||||
onChange={onChangeTypes}
|
||||
autofocus={!!types.length && !isMobile}
|
||||
includeAll
|
||||
searchable
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import dayjs from "dayjs";
|
||||
import { Rule } from "../../types";
|
||||
|
||||
export const formatDuration = (raw: number) => {
|
||||
const duration = dayjs.duration(Math.round(raw * 1000));
|
||||
@@ -19,13 +18,3 @@ export const formatEventTime = (raw: string) => {
|
||||
const t = dayjs(raw);
|
||||
return t.year() <= 1 ? "Never" : t.format("DD MMM YYYY HH:mm:ss");
|
||||
};
|
||||
|
||||
export const getStates = (rule: Rule) => {
|
||||
if (!rule.alerts?.length) {
|
||||
return { [rule.state]: 1 };
|
||||
}
|
||||
return rule.alerts.reduce((acc, alert) => {
|
||||
acc[alert.state] = (acc[alert.state] ?? 0) + 1;
|
||||
return acc;
|
||||
}, {} as Record<string, number>);
|
||||
};
|
||||
|
||||
@@ -72,9 +72,9 @@ const useGetMetricsQL = (includeFunctions: boolean) => {
|
||||
}
|
||||
};
|
||||
fetchMarkdown();
|
||||
}, [includeFunctions, metricsQLFunctions.length, queryDispatch]);
|
||||
}, []);
|
||||
|
||||
return metricsQLFunctions;
|
||||
return includeFunctions ? metricsQLFunctions : [];
|
||||
};
|
||||
|
||||
export default useGetMetricsQL;
|
||||
|
||||
@@ -6,7 +6,7 @@ import { Rule as APIRule } from "../../types";
|
||||
import ItemHeader from "../../components/ExploreAlerts/ItemHeader";
|
||||
import BaseRule from "../../components/ExploreAlerts/BaseRule";
|
||||
import Modal from "../../components/Main/Modal/Modal";
|
||||
import { getStates } from "../../components/ExploreAlerts/helpers";
|
||||
import { getStates } from "./helpers";
|
||||
|
||||
interface ExploreRuleProps {
|
||||
groupId: string;
|
||||
|
||||
@@ -7,36 +7,30 @@ import Accordion from "../../components/Main/Accordion/Accordion";
|
||||
import { useFetchGroups } from "./hooks/useFetchGroups";
|
||||
import "./style.scss";
|
||||
import RulesHeader from "../../components/ExploreAlerts/RulesHeader";
|
||||
import Pagination from "../../components/ExploreAlerts/Pagination";
|
||||
import GroupHeader from "../../components/ExploreAlerts/GroupHeader";
|
||||
import Rule from "../../components/ExploreAlerts/Rule";
|
||||
import ExploreRule from "../../pages/ExploreAlerts/ExploreRule";
|
||||
import ExploreAlert from "../../pages/ExploreAlerts/ExploreAlert";
|
||||
import ExploreGroup from "../../pages/ExploreAlerts/ExploreGroup";
|
||||
import { getQueryStringValue } from "../../utils/query-string";
|
||||
import { getChanges } from "./helpers";
|
||||
import { getStates, getChanges, filterGroups } from "./helpers";
|
||||
import debounce from "lodash.debounce";
|
||||
import { getStates } from "../../components/ExploreAlerts/helpers";
|
||||
|
||||
const defaultRuleType = getQueryStringValue("type", "") as string;
|
||||
const defaultTypesStr = getQueryStringValue("types", "") as string;
|
||||
const defaultTypes = defaultTypesStr.split("&").filter((rt) => rt) as string[];
|
||||
const defaultStatesStr = getQueryStringValue("states", "") as string;
|
||||
const defaultStates = defaultStatesStr.split("&").filter((s) => s) as string[];
|
||||
const defaultSearchInput = getQueryStringValue("search", "") as string;
|
||||
const TYPE_STATES: Record<string, string[]> = {
|
||||
alert: ["inactive", "firing", "nomatch", "pending", "unhealthy"],
|
||||
record: ["unhealthy", "nomatch", "ok"],
|
||||
};
|
||||
|
||||
const ExploreRules: FC = () => {
|
||||
const pageNum = getQueryStringValue("page_num", "1") as string;
|
||||
const groupId = getQueryStringValue("group_id", "") as string;
|
||||
const ruleId = getQueryStringValue("rule_id", "") as string;
|
||||
const alertId = getQueryStringValue("alert_id", "") as string;
|
||||
|
||||
const [searchInput, setSearchInput] = useState(defaultSearchInput);
|
||||
const [ruleType, setRuleType] = useState(defaultRuleType);
|
||||
const [types, setTypes] = useState(defaultTypes);
|
||||
const [states, setStates] = useState(defaultStates);
|
||||
const [modalOpen, setModalOpen] = useState(false);
|
||||
const [modalOpen, setModalOpen] = useState(true);
|
||||
const [searchParams, setSearchParams] = useSearchParams();
|
||||
|
||||
useEffect(() => {
|
||||
@@ -44,7 +38,7 @@ const ExploreRules: FC = () => {
|
||||
}, [groupId]);
|
||||
|
||||
useSetQueryParams({
|
||||
type: ruleType,
|
||||
types: types.join("&"),
|
||||
states: states.join("&"),
|
||||
search: searchInput,
|
||||
group_id: groupId,
|
||||
@@ -53,11 +47,12 @@ const ExploreRules: FC = () => {
|
||||
});
|
||||
|
||||
const handleChangeSearch = useCallback((input: string) => {
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", "1");
|
||||
setSearchParams(newParams);
|
||||
setSearchInput(input || "");
|
||||
}, [searchInput, searchParams]);
|
||||
if (!input) {
|
||||
setSearchInput("");
|
||||
} else {
|
||||
setSearchInput(input);
|
||||
}
|
||||
}, [searchInput]);
|
||||
|
||||
const getModal = () => {
|
||||
if (ruleId) {
|
||||
@@ -99,79 +94,55 @@ const ExploreRules: FC = () => {
|
||||
setModalOpen(false);
|
||||
};
|
||||
|
||||
const onPageChange = (num: number) => {
|
||||
return () => {
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", num.toString());
|
||||
setSearchParams(newParams);
|
||||
};
|
||||
};
|
||||
|
||||
const allRuleTypes = Object.keys(TYPE_STATES);
|
||||
const allStates = useMemo(
|
||||
() => Array.from(ruleType === "" ? new Set(Object.values(TYPE_STATES).flat()) : TYPE_STATES[ruleType] || []),
|
||||
[ruleType]
|
||||
);
|
||||
const selectedRuleTypes = [ruleType].filter(Boolean);
|
||||
useEffect(() => {
|
||||
if (!states.every(v => allStates.includes(v))) {
|
||||
setStates([]);
|
||||
}
|
||||
}, [states, allStates]);
|
||||
|
||||
const pageNumInt: number = Math.max(1, parseInt(pageNum, 10) || 1);
|
||||
const {
|
||||
groups,
|
||||
isLoading,
|
||||
error,
|
||||
pageInfo,
|
||||
} = useFetchGroups({ blockFetch: modalOpen, search: searchInput, ruleType, states, pageNum: pageNumInt, onPageChange });
|
||||
} = useFetchGroups({ blockFetch: modalOpen });
|
||||
|
||||
const { filteredGroups, allTypes, allStates } = useMemo(
|
||||
() => filterGroups(groups || [], types, states, searchInput),
|
||||
[groups, types, states, searchInput]
|
||||
);
|
||||
|
||||
if (!types.every(v => allTypes.has(v))) {
|
||||
setTypes([]);
|
||||
}
|
||||
const selectedTypes = allTypes.size === types.length ? [] : types;
|
||||
|
||||
if (!states.every(v => allStates.has(v))) {
|
||||
setStates([]);
|
||||
}
|
||||
const selectedStates = allStates.size === states.length ? [] : states;
|
||||
|
||||
const handleChangeStates = useCallback((title: string) => {
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", "1");
|
||||
setSearchParams(newParams);
|
||||
const changes = getChanges(title, states);
|
||||
setStates(changes.length == allStates.length ? [] : changes);
|
||||
}, [states, searchParams]);
|
||||
setStates(getChanges(title, selectedStates));
|
||||
}, [states]);
|
||||
|
||||
const handleChangeRuleType = useCallback((title: string) => {
|
||||
const newParams = new URLSearchParams(searchParams);
|
||||
newParams.set("page_num", "1");
|
||||
setSearchParams(newParams);
|
||||
const changes = getChanges(title, selectedRuleTypes);
|
||||
setRuleType(changes.length && changes.length !== allRuleTypes.length ? changes[0] : "");
|
||||
}, [ruleType, searchParams]);
|
||||
const handleChangeTypes = useCallback((title: string) => {
|
||||
setTypes(getChanges(title, selectedTypes));
|
||||
}, [types]);
|
||||
|
||||
return (
|
||||
<>
|
||||
{modalOpen && getModal()}
|
||||
{(!modalOpen || !!allStates?.length) && (
|
||||
{(!modalOpen || !!allStates?.size) && (
|
||||
<div className="vm-explore-alerts">
|
||||
<RulesHeader
|
||||
types={selectedRuleTypes}
|
||||
allRuleTypes={allRuleTypes}
|
||||
states={states}
|
||||
allStates={allStates}
|
||||
types={selectedTypes}
|
||||
allTypes={Array.from(allTypes)}
|
||||
states={selectedStates}
|
||||
allStates={Array.from(allStates)}
|
||||
search={searchInput}
|
||||
onChangeRuleType={handleChangeRuleType}
|
||||
onChangeTypes={handleChangeTypes}
|
||||
onChangeStates={handleChangeStates}
|
||||
onChangeSearch={debounce(handleChangeSearch, 500)}
|
||||
/>
|
||||
<Pagination
|
||||
page={pageInfo.page}
|
||||
totalPages={pageInfo.total_pages}
|
||||
pageRules={groups.reduce((total, g) => total + g?.rules.length, 0)}
|
||||
pageGroups={groups.length}
|
||||
totalRules={pageInfo.total_rules}
|
||||
totalGroups={pageInfo.total_groups}
|
||||
onPageChange={onPageChange}
|
||||
/>
|
||||
{(isLoading && <Spinner />) || (error && <Alert variant="error">{error}</Alert>) || (
|
||||
!groups.length && <Alert variant="info">{noRuleFound}</Alert>
|
||||
!filteredGroups.length && <Alert variant="info">{noRuleFound}</Alert>
|
||||
) || (
|
||||
<div className="vm-explore-alerts-body">
|
||||
{groups.map((group) => (
|
||||
{filteredGroups.map((group) => (
|
||||
<div
|
||||
key={group.id}
|
||||
className="vm-explore-alert-group vm-block vm-block_empty-padding"
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { Rule, Group } from "../../types";
|
||||
|
||||
export const getChanges = (title: string, prevValues: string[]): string[] => {
|
||||
if (title === "All") return [];
|
||||
|
||||
@@ -10,3 +12,77 @@ export const getChanges = (title: string, prevValues: string[]): string[] => {
|
||||
|
||||
return Array.from(newValues);
|
||||
};
|
||||
|
||||
export const getState = (rule: Rule) => {
|
||||
let state = rule?.state || "ok";
|
||||
if (rule?.health !== "ok") {
|
||||
state = "unhealthy";
|
||||
} else if (!rule?.lastSamples && !rule?.lastSeriesFetched) {
|
||||
state = "no match";
|
||||
}
|
||||
return state;
|
||||
};
|
||||
|
||||
export const getStates = (rule: Rule) => {
|
||||
const output: Record<string, number> = {};
|
||||
const alertsCount = rule?.alerts?.length || 0;
|
||||
if (alertsCount > 0) {
|
||||
rule.alerts.forEach((alert) => {
|
||||
if (alert.state in output) {
|
||||
output[alert.state] += 1;
|
||||
} else {
|
||||
output[alert.state] = 1;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
output[getState(rule)] = 1;
|
||||
}
|
||||
return output;
|
||||
};
|
||||
|
||||
export const filterGroups = (groups: Group[], types: string[], states: string[], searchInput: string) => {
|
||||
const allTypes: Set<string> = new Set();
|
||||
const allStates: Set<string> = new Set();
|
||||
const filteredGroups: Group[] = [];
|
||||
|
||||
groups.forEach((group) => {
|
||||
const filteredRules: Rule[] = [];
|
||||
const statesPerGroup: Record<string, number> = {};
|
||||
group.rules.forEach((rule) => {
|
||||
const ruleType = rule.type.charAt(0).toUpperCase() + rule.type.slice(1);
|
||||
allTypes.add(ruleType);
|
||||
if (types?.length && !types.includes(ruleType)) return;
|
||||
|
||||
const state = getState(rule);
|
||||
const stateName = state.charAt(0).toUpperCase() + state.slice(1);
|
||||
allStates.add(stateName);
|
||||
if (states?.length && !states.includes(stateName)) return;
|
||||
|
||||
if (
|
||||
searchInput &&
|
||||
!rule.name.toLowerCase().includes(searchInput.toLowerCase()) &&
|
||||
!group.name.toLowerCase().includes(searchInput.toLowerCase()) &&
|
||||
!group.file.toLowerCase().includes(searchInput.toLowerCase())
|
||||
)
|
||||
return;
|
||||
|
||||
filteredRules.push(rule);
|
||||
if (state !== "no match" && state !== "unhealthy" && state !== "firing" && state !== "pending")
|
||||
return;
|
||||
|
||||
const count = state === "firing" || state === "pending" ? rule?.alerts?.length : 1;
|
||||
if (stateName in statesPerGroup) {
|
||||
statesPerGroup[stateName] += count;
|
||||
} else {
|
||||
statesPerGroup[stateName] = count;
|
||||
}
|
||||
});
|
||||
if (filteredRules.length) {
|
||||
const g = Object.assign({}, group);
|
||||
g.rules = filteredRules;
|
||||
g.states = statesPerGroup;
|
||||
filteredGroups.push(g);
|
||||
}
|
||||
});
|
||||
return { filteredGroups, allTypes, allStates };
|
||||
};
|
||||
|
||||
@@ -1,75 +1,46 @@
|
||||
import { useMemo, useEffect, useState } from "preact/compat";
|
||||
import { useTimeState } from "../../../state/time/TimeStateContext";
|
||||
import { useEffect, useMemo, useState } from "preact/compat";
|
||||
import { getGroupsUrl } from "../../../api/explore-alerts";
|
||||
import { useAppState } from "../../../state/common/StateContext";
|
||||
import { ErrorTypes, Group } from "../../../types";
|
||||
import { useTimeState } from "../../../state/time/TimeStateContext";
|
||||
|
||||
interface FetchGroupsReturn {
|
||||
groups: Group[];
|
||||
isLoading: boolean;
|
||||
error?: ErrorTypes | string;
|
||||
pageInfo: PageInfo;
|
||||
}
|
||||
|
||||
interface FetchGroupsProps {
|
||||
blockFetch: boolean;
|
||||
search: string;
|
||||
ruleType: string;
|
||||
states: string[];
|
||||
pageNum: number;
|
||||
onPageChange: (num: number) => () => void;
|
||||
blockFetch: boolean
|
||||
}
|
||||
|
||||
interface PageInfo {
|
||||
page: number;
|
||||
total_pages: number;
|
||||
total_groups: number;
|
||||
total_rules: number;
|
||||
}
|
||||
|
||||
const MAX_GROUPS = 100;
|
||||
|
||||
export const useFetchGroups = ({ blockFetch, pageNum, search, ruleType, states, onPageChange }: FetchGroupsProps): FetchGroupsReturn => {
|
||||
export const useFetchGroups = ({ blockFetch }: FetchGroupsProps): FetchGroupsReturn => {
|
||||
const { serverUrl } = useAppState();
|
||||
const { period } = useTimeState();
|
||||
|
||||
const [groups, setGroups] = useState<Group[]>([]);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [pageInfo, setPageInfo] = useState<PageInfo>({
|
||||
page: pageNum,
|
||||
total_pages: 1,
|
||||
total_groups: 0,
|
||||
total_rules: 0,
|
||||
});
|
||||
const [error, setError] = useState<ErrorTypes | string>();
|
||||
|
||||
const fetchUrl = useMemo(
|
||||
() => getGroupsUrl(serverUrl, search, ruleType, states, MAX_GROUPS),
|
||||
[serverUrl, search, ruleType, states],
|
||||
() => getGroupsUrl(serverUrl),
|
||||
[serverUrl],
|
||||
);
|
||||
|
||||
const loaded = !!groups.length || !blockFetch;
|
||||
|
||||
useEffect(() => {
|
||||
if (blockFetch) return;
|
||||
const fetchData = async () => {
|
||||
setIsLoading(true);
|
||||
try {
|
||||
const url = `${fetchUrl}&page_num=${pageNum}`;
|
||||
const response = await fetch(url);
|
||||
const response = await fetch(fetchUrl);
|
||||
const resp = await response.json();
|
||||
|
||||
if (response.ok) {
|
||||
const loadedGroups = (resp.data.groups || []) as Group[];
|
||||
setGroups(loadedGroups);
|
||||
setPageInfo({
|
||||
page: resp.page || 1,
|
||||
total_pages: resp.total_pages || 1,
|
||||
total_groups: resp.total_groups || 0,
|
||||
total_rules: resp.total_rules || 0,
|
||||
});
|
||||
const data = (resp.data.groups || []) as Group[];
|
||||
setGroups(data.sort((a, b) => a.name.localeCompare(b.name)));
|
||||
setError(undefined);
|
||||
} else if (response.status === 400 && resp?.error?.includes("exceeds total amount of pages")) {
|
||||
onPageChange(1)();
|
||||
setError(`${resp.errorType}\r\n${resp?.error}`);
|
||||
} else {
|
||||
setError(`${resp.errorType}\r\n${resp?.error}`);
|
||||
}
|
||||
@@ -80,8 +51,9 @@ export const useFetchGroups = ({ blockFetch, pageNum, search, ruleType, states,
|
||||
}
|
||||
setIsLoading(false);
|
||||
};
|
||||
fetchData().catch(console.error);
|
||||
}, [fetchUrl, period, loaded, pageNum]);
|
||||
|
||||
return { groups, isLoading, error, pageInfo };
|
||||
fetchData().catch(console.error);
|
||||
}, [fetchUrl, period, loaded]);
|
||||
|
||||
return { groups, isLoading, error };
|
||||
};
|
||||
|
||||
@@ -3,7 +3,7 @@ import { compactObject } from "../../../utils/object";
|
||||
import useSearchParamsFromObject from "../../../hooks/useSearchParamsFromObject";
|
||||
|
||||
interface rulesQueryProps {
|
||||
type?: string;
|
||||
types?: string;
|
||||
states?: string;
|
||||
search?: string;
|
||||
rule_id: string;
|
||||
@@ -12,7 +12,7 @@ interface rulesQueryProps {
|
||||
}
|
||||
|
||||
export const useRulesSetQueryParams = ({
|
||||
type,
|
||||
types,
|
||||
states,
|
||||
search,
|
||||
rule_id,
|
||||
@@ -23,7 +23,7 @@ export const useRulesSetQueryParams = ({
|
||||
|
||||
const setSearchParamsFromState = () => {
|
||||
const params = compactObject({
|
||||
type,
|
||||
types,
|
||||
states,
|
||||
search,
|
||||
alert_id,
|
||||
@@ -35,7 +35,7 @@ export const useRulesSetQueryParams = ({
|
||||
};
|
||||
|
||||
useEffect(setSearchParamsFromState, [
|
||||
type,
|
||||
types,
|
||||
states,
|
||||
search,
|
||||
rule_id,
|
||||
|
||||
@@ -17,19 +17,6 @@
|
||||
}
|
||||
}
|
||||
|
||||
.vm-explore-alerts-load {
|
||||
text-align: center;
|
||||
color: var(--color-text-disabled);
|
||||
button {
|
||||
border: none;
|
||||
}
|
||||
&-before {
|
||||
svg {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.vm-list-item-inner {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
@@ -230,7 +230,6 @@ export interface Rule {
|
||||
debug: boolean;
|
||||
updates: RuleUpdate[];
|
||||
max_updates_entries: number;
|
||||
states: Record<string, number>;
|
||||
}
|
||||
|
||||
interface RuleUpdate {
|
||||
|
||||
@@ -194,7 +194,7 @@
|
||||
"columns": [],
|
||||
"datasource": {
|
||||
"type": "yesoreyeram-infinity-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "${DS_VICTORIAMETRICS-INFINITY}"
|
||||
},
|
||||
"filters": [],
|
||||
"format": "table",
|
||||
@@ -580,7 +580,7 @@
|
||||
"columns": [],
|
||||
"datasource": {
|
||||
"type": "yesoreyeram-infinity-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "${DS_VICTORIAMETRICS-INFINITY}"
|
||||
},
|
||||
"filters": [],
|
||||
"format": "table",
|
||||
@@ -762,7 +762,7 @@
|
||||
"computed_columns": [],
|
||||
"datasource": {
|
||||
"type": "yesoreyeram-infinity-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "${DS_VICTORIAMETRICS-INFINITY}"
|
||||
},
|
||||
"filters": [],
|
||||
"format": "table",
|
||||
@@ -870,7 +870,7 @@
|
||||
"computed_columns": [],
|
||||
"datasource": {
|
||||
"type": "yesoreyeram-infinity-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "${DS_VICTORIAMETRICS-INFINITY}"
|
||||
},
|
||||
"filters": [],
|
||||
"format": "table",
|
||||
@@ -994,7 +994,7 @@
|
||||
"computed_columns": [],
|
||||
"datasource": {
|
||||
"type": "yesoreyeram-infinity-datasource",
|
||||
"uid": "$ds"
|
||||
"uid": "${DS_VICTORIAMETRICS-INFINITY}"
|
||||
},
|
||||
"filters": [],
|
||||
"format": "table",
|
||||
|
||||
@@ -59,7 +59,7 @@ services:
|
||||
- '--external.alert.source=explore?orgId=1&left=["now-1h","now","VictoriaMetrics",{"expr": },{"mode":"Metrics"},{"ui":[true,true,true,"none"]}]'
|
||||
restart: always
|
||||
vmanomaly:
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -14,19 +14,6 @@ aliases:
|
||||
---
|
||||
Please find the changelog for VictoriaMetrics Anomaly Detection below.
|
||||
|
||||
## v1.29.1
|
||||
Released: 2026-03-25
|
||||
|
||||
- FEATURE: Added `min_rel_dev_from_expected` [common model argument](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-relative-deviation-from-expected) to support relative business gating based on percentage deviation from expected values. This allows users to specify a relative threshold for ignoring small deviations that are not significant in the context of the expected value, particularly useful for heterogeneous series with varying unknown magnitudes, returned from the same query.
|
||||
|
||||
- UI: Updated [vmanomaly UI](https://docs.victoriametrics.com/anomaly-detection/ui/) from [v1.5.0](https://docs.victoriametrics.com/anomaly-detection/ui/#v150) to [v1.5.1](https://docs.victoriametrics.com/anomaly-detection/ui/#v151), see respective [release notes](https://docs.victoriametrics.com/anomaly-detection/ui/#v151) for details.
|
||||
|
||||
- IMPROVEMENT: Optimized [`VmWriter`](https://docs.victoriametrics.com/anomaly-detection/components/writer/#vm-writer) hot path by 2-3x in terms of infer-write latency
|
||||
|
||||
- IMPROVEMENT: Optimized backbone of [t-digest](https://www.sciencedirect.com/science/article/pii/S2665963820300403) data structures to reduce the memory usage and speed up the fit/infer calls for underlying models that use it (e.g. [OnlineQuantileModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) or [MAD](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-mad)).
|
||||
|
||||
- BUGFIX: Fixed forward compatibility issues with the persisted state from [v1.28.5](#v1285) for [ProphetModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [QuantileModel](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) models, which could lead to "util.files - ERROR - Unexpected error while loading model from ..." in [stateful mode](https://docs.victoriametrics.com/anomaly-detection/components/settings/#state-restoration) after the upgrade to [v1.29.0](#v1290). Now the service can properly load the persisted state from [v1.28.5](#v1285) and continue functioning without requiring retraining of the affected models.
|
||||
|
||||
## v1.29.0
|
||||
Released: 2026-03-05
|
||||
|
||||
|
||||
@@ -162,7 +162,7 @@ Anomaly detection models can significantly improve when incorporating business-s
|
||||
|
||||
- **Defining a `data_range`** - configure [`data_range`](https://docs.victoriametrics.com/anomaly-detection/components/reader/#config-parameters) for the model’s input query to **automatically assign anomaly scores > 1** for values (`y`) that fall outside the defined range.
|
||||
|
||||
- **Filtering minor fluctuations with absolute (`min_dev_from_expected`)** or **relative (`min_rel_dev_from_expected`)** thresholding – use [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) and [`min_rel_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-relative-deviation-from-expected) to **ignore insignificant deviations** and prevent alerting rules from triggering [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive).
|
||||
- **Filtering minor fluctuations with `min_dev_from_expected`** – use [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) to **ignore insignificant deviations** and prevent small fluctuations from triggering [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive).
|
||||
|
||||
- **Applying `scale` for asymmetric confidence adjustments** - use [`scale`](https://docs.victoriametrics.com/anomaly-detection/components/models/#scale) to adjust confidence intervals **differently for spikes and drops**, ensuring more appropriate anomaly detection.
|
||||
|
||||
@@ -173,7 +173,7 @@ Consider a metric tracking the percentage of HTTP 4xx status codes for a specifi
|
||||
- **Expected data range**: The percentage naturally falls between `0%` and `100%` (`[0, 1]`).
|
||||
- **Threshold-based anomaly detection**: If the error rate exceeds `5%`, it should be **automatically flagged as an anomaly** ([anomaly score](#what-is-anomaly-score) > 1), encouraging an incident investigation.
|
||||
- **Regime shift detection**: A **continuous increase** in error rates (e.g., from `1.5%` to `3%`) should also be considered **anomalous**, as regime change may indicate underlying system problem, e.g. with a new release.
|
||||
- **Avoiding false positives**: **Small, infrequent deviations** (e.g., from `1%` to `1.3%` on a scale of 0-100%) should **not** trigger alerts to **prevent unnecessary SRE escalations**. Let it be on the level of 0.5%. Also, relative deviations of less than 10% (e.g., from `1%` to `1.1%`) should be ignored, as they may not represent significant changes in the context of the metric vs its average fluctuation.
|
||||
- **Avoiding false positives**: **Small, infrequent deviations** (e.g., from `1%` to `1.3%`) should **not** trigger alerts to **prevent unnecessary SRE escalations**. Let it be on the level of 0.5%.
|
||||
|
||||
Then, the following config may be used to benefit from incorporating domain knowledge into model behavior:
|
||||
|
||||
@@ -201,8 +201,7 @@ models:
|
||||
schedulers: ['periodic_http']
|
||||
queries: ['percentage_4xx']
|
||||
detection_direction: 'above_expected' # as interested only in spikes, drops are OK
|
||||
min_dev_from_expected: [0, 0.005] # <0.5% deviations vs expected values should be neglected, generating anomaly score == 0
|
||||
min_rel_dev_from_expected: [0, 0.1] # <10% relative deviations vs expected values should be neglected, generating anomaly score == 0
|
||||
min_dev_from_expected: 0.005 # <0.5% deviations vs expected values should be neglected, generating anomaly score == 0
|
||||
# to align predictions to be within [0, 5%] interval, defined in reader.queries.percentage_4xx.data_range
|
||||
clip_predictions: True
|
||||
# specify output series produced by vmanomaly to be written to VictoriaMetrics in `writer`
|
||||
@@ -421,7 +420,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
@@ -639,7 +638,7 @@ options:
|
||||
Here’s an example of using the config splitter to divide configurations based on the `extra_filters` argument from the reader section:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1 && docker image tag victoriametrics/vmanomaly:v1.29.1 vmanomaly
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0 && docker image tag victoriametrics/vmanomaly:v1.29.0 vmanomaly
|
||||
```
|
||||
|
||||
```sh
|
||||
|
||||
@@ -45,9 +45,8 @@ There are 2 types of compatibility to consider when migrating in stateful mode:
|
||||
|
||||
| Group start | Group end | Compatibility | Notes |
|
||||
|---------|--------- |------------|-------|
|
||||
| [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Partially compatible* | Dumped models of class [prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) and [seasonal quantile](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-seasonal-quantile) have problems with loading to [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) due to dropped `pytz` library. **Upgrading directly from v1.28.7 to [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291) with a fix is suggested** |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.28.7](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1287) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Latest* | Fully Compatible | Just a placeholder for new releases |
|
||||
| [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262) | [v1.29.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1290) | Fully Compatible | [v1.28.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1280) introduced [rolling](https://docs.victoriametrics.com/anomaly-detection/components/models/#rolling-models) model class drop in favor of [online](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-models) models (`rolling_quantile` and `std` models), however, it does not impact compatibility, as artifacts were not produced by default for rolling models. Also, offline `mad` and `zscore` models are redirecting to their respective online counterparts since [v1.28.4](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1284). |
|
||||
| [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) | [v1.26.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270) | Partially Compatible* | [v1.25.3](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1253) introduced `forecast_at` argument for base [univariate](https://docs.victoriametrics.com/anomaly-detection/components/models/#univariate-models) and `Prophet` [models](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet), however, itself remains backward-reversible from newer states like [v1.26.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1262), [v1.27.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1270). (All models except `isolation_forest_multivariate` class will be dropped) |
|
||||
| [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) | [v1.25.2](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1252) | Fully Compatible | In [v1.25.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1251) there was a change to `vmanomaly.db` metadata database format, so migrating from v1.24.0-v1.25.0 requires deletion of a state, see note above the table |
|
||||
| [v1.24.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1241) | [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) | Partially Compatible* | In [v1.25.0](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1250) there were changes to **data dump layout** and to `online_quantile` and `isolation_forest_multivariate` [model](https://docs.victoriametrics.com/anomaly-detection/components/models/) states, so to migrate from v1.24.0-v1.24.1 it is recommended to drop the state |
|
||||
|
||||
@@ -122,7 +122,7 @@ Below are the steps to get `vmanomaly` up and running inside a Docker container:
|
||||
1. Pull Docker image:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0
|
||||
```
|
||||
|
||||
2. Create the license file with your license key.
|
||||
@@ -142,7 +142,7 @@ docker run -it \
|
||||
-v ./license:/license \
|
||||
-v ./config.yaml:/config.yaml \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.1 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -159,7 +159,7 @@ docker run -it \
|
||||
-e VMANOMALY_DATA_DUMPS_DIR=/tmp/vmanomaly/data \
|
||||
-e VMANOMALY_MODEL_DUMPS_DIR=/tmp/vmanomaly/models \
|
||||
-p 8490:8490 \
|
||||
victoriametrics/vmanomaly:v1.29.1 \
|
||||
victoriametrics/vmanomaly:v1.29.0 \
|
||||
/config.yaml \
|
||||
--licenseFile=/license \
|
||||
--loggerLevel=INFO \
|
||||
@@ -172,7 +172,7 @@ services:
|
||||
# ...
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
# ...
|
||||
restart: always
|
||||
volumes:
|
||||
|
||||
@@ -189,9 +189,9 @@ The best applications of this mode are:
|
||||
|
||||
### What you can do with Copilot
|
||||
|
||||
- **Ask questions** about any model (e.g. [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) or [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) - parameters, trade-offs, when to use each)
|
||||
- **Improve detection quality** - describe what's wrong ("too many false positives", "missing spikes") and Copilot reads the config, searches the docs, and proposes a validated configuration change to fix the issue.
|
||||
- **Get config suggestions inline** - suggestions appear as interactive cards with an explanation and a YAML diff; click **Apply** to write the change directly to your current settings, or **Decline** to keep the conversation going.
|
||||
- **Ask questions** about any model (e.g. [Prophet](https://docs.victoriametrics.com/anomaly-detection/components/models/#prophet) or [Z-score](https://docs.victoriametrics.com/anomaly-detection/components/models/#online-z-score) — parameters, trade-offs, when to use each)
|
||||
- **Improve detection quality** — describe what's wrong ("too many false positives", "missing spikes") and Copilot reads the config, searches the docs, and proposes a validated configuration change to fix the issue.
|
||||
- **Get config suggestions inline** — suggestions appear as interactive cards with an explanation and a YAML diff; click **Apply** to write the change directly to your current settings, or **Decline** to keep the conversation going.
|
||||
|
||||
### How it works
|
||||
|
||||
@@ -205,61 +205,22 @@ Copilot appears as a **chat popup** anchored to the bottom-right corner of the p
|
||||
AI Assistant is disabled by default; enable it with `VMANOMALY_COPILOT_ENABLED=true`, then configure an LLM provider API key and, optionally, a model. Once enabled and configured, Copilot will appear as a chat popup in the bottom-right corner of the UI.
|
||||
|
||||
|
||||
|
||||
Supported providers and model formats:
|
||||
|
||||
- **Anthropic** - set `ANTHROPIC_API_KEY`; model format: `anthropic:<model>`
|
||||
- **Anthropic** — set `ANTHROPIC_API_KEY`; model format: `anthropic:<model>`
|
||||
- Examples: `claude-haiku-4-5`, `claude-sonnet-4-6`; see [full list](https://platform.claude.com/docs/en/about-claude/models/overview#latest-models-comparison)
|
||||
- **OpenAI** - set `OPENAI_API_KEY`; model format: `openai:<model>` or `openai-responses:<model>`
|
||||
- **OpenAI** — set `OPENAI_API_KEY`; model format: `openai:<model>`
|
||||
- Examples: `gpt-5-mini`, `gpt-5.2`; see [full list](https://platform.openai.com/docs/models)
|
||||
- {{% available_from "v1.29.1" anomaly %}} OpenAI-compatible non-OpenAI providers are supported through `OPENAI_BASE_URL` + `OPENAI_API_KEY`
|
||||
- {{% available_from "v1.29.1" anomaly %}} Azure OpenAI is supported through `AZURE_OPENAI_ENDPOINT` + `OPENAI_API_VERSION` + `AZURE_OPENAI_API_KEY` (or `AZURE_OPENAI_AD_TOKEN`); do not set both `OPENAI_BASE_URL` and `AZURE_OPENAI_ENDPOINT`
|
||||
- {{% available_from "v1.29.1" anomaly %}} **Google** - model format: `google-gla:<model>` or `google-vertex:<model>`
|
||||
- Use `GOOGLE_API_KEY` for `google-gla`; for `google-vertex`, use Application Default Credentials, a service account (`GOOGLE_APPLICATION_CREDENTIALS`), or `GOOGLE_API_KEY`
|
||||
- Example: `google-gla:gemini-2.5-pro-preview`
|
||||
- {{% available_from "v1.29.1" anomaly %}} **AWS Bedrock** - use AWS credentials or an IAM role; model format: `bedrock:<model>`
|
||||
- Preferred: set `AWS_BEARER_TOKEN_BEDROCK` and `AWS_DEFAULT_REGION`
|
||||
- Alternative: set `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `AWS_DEFAULT_REGION` (add `AWS_SESSION_TOKEN` if using a session token)
|
||||
- Example: `bedrock:anthropic.claude-sonnet-4-5-20250929-v1:0`
|
||||
- {{% available_from "v1.29.1" anomaly %}} **OpenRouter** - set `OPENROUTER_API_KEY`; model format: `openrouter:<model>`
|
||||
- Example: `openrouter:anthropic/claude-sonnet-4-5`
|
||||
|
||||
Set the credentials matching your selected provider:
|
||||
Set exactly one provider key matching your selected model provider:
|
||||
|
||||
```bash
|
||||
# Anthropic
|
||||
export ANTHROPIC_API_KEY=your_key_here
|
||||
|
||||
# OpenAI
|
||||
# or OpenAI
|
||||
export OPENAI_API_KEY=your_key_here
|
||||
|
||||
# OpenAI-compatible non-OpenAI providers
|
||||
export OPENAI_BASE_URL=https://api.example.com/v1
|
||||
export OPENAI_API_KEY=your_key_here
|
||||
|
||||
# Azure OpenAI
|
||||
export AZURE_OPENAI_ENDPOINT=https://example.openai.azure.com
|
||||
export OPENAI_API_VERSION=2024-10-21
|
||||
export AZURE_OPENAI_API_KEY=your_key_here
|
||||
# or: export AZURE_OPENAI_AD_TOKEN=your_entra_token
|
||||
|
||||
# Google Generative Language API
|
||||
export GOOGLE_API_KEY=your_key_here
|
||||
|
||||
# Google Vertex AI service account
|
||||
export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
|
||||
# or use Application Default Credentials: gcloud auth application-default login
|
||||
|
||||
# OpenRouter
|
||||
export OPENROUTER_API_KEY=your_key_here
|
||||
|
||||
# AWS Bedrock (preferred: bearer token)
|
||||
export AWS_BEARER_TOKEN_BEDROCK=your_bearer_token
|
||||
export AWS_DEFAULT_REGION=us-east-1
|
||||
# AWS Bedrock (alternative: access key pair or IAM role)
|
||||
# export AWS_ACCESS_KEY_ID=your_access_key
|
||||
# export AWS_SECRET_ACCESS_KEY=your_secret_key
|
||||
# export AWS_DEFAULT_REGION=us-east-1
|
||||
# export AWS_SESSION_TOKEN=your_session_token # if using a session token
|
||||
```
|
||||
|
||||
Optionally override the default model:
|
||||
@@ -628,17 +589,6 @@ If the **results** look good and the **model configuration should be deployed in
|
||||
|
||||
## Changelog
|
||||
|
||||
### v1.5.1
|
||||
Released: 2026-03-25
|
||||
|
||||
vmanomaly version: [v1.29.1](https://docs.victoriametrics.com/anomaly-detection/changelog/#v1291)
|
||||
|
||||
- FEATURE: GCP/AWS/OpenRouter Copilot LLM providers are now supported in addition to OpenAI and Anthropic, for more choice and flexibility in AI assistance. See [AI Assistance](#ai-assistance) section for details on supported providers and configuration.
|
||||
|
||||
- BUGFIX: Now Visualization Panel correctly switches in between "query" and "detect" modes when respective buttons are hit in the [Visualization Panel](#visualization-panel), without showing stale results from the previous mode, once running anomaly detection task is explicitly cancelled (regression introduced in [v1.5.0](#v150)).
|
||||
|
||||
- BUGFIX: Fixed an issue with [crypto.randomUUID](https://developer.mozilla.org/en-US/docs/Web/API/Crypto/randomUUID) introduced in [v1.29.0](#v1290) in [UI copilot](https://docs.victoriametrics.com/anomaly-detection/ui/#ai-assistance) that led to the front app showing a blank page.
|
||||
|
||||
### v1.5.0
|
||||
Released: 2026-03-05
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ schedulers:
|
||||
periodic:
|
||||
class: 'periodic'
|
||||
infer_every: "30s"
|
||||
fit_every: "365d"
|
||||
fit_every: "24h"
|
||||
fit_window: "24h"
|
||||
|
||||
reader:
|
||||
@@ -189,9 +189,8 @@ reader:
|
||||
|
||||
models:
|
||||
zscore:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore'
|
||||
z_threshold: 3.5
|
||||
decay: 0.99 # gives more weight to recent data points, value should be in (0, 1], 1 means to give equal weight to all data
|
||||
provide_series: ['anomaly_score']
|
||||
# if queries are not specified, all queries from reader will be used
|
||||
# if schedulers are not specified, all schedulers will be used
|
||||
@@ -221,8 +220,8 @@ reader:
|
||||
|
||||
After saving the changes, hot reload will automatically detect the changes in `config.yaml` and attempt to reload the configuration. As the changes are valid, the service will log a success message and increment the `vmanomaly_hot_reload_events_total` metric with `status="success"` label:
|
||||
|
||||
- All the model instances of class `zscore_online`, that were trained on `host_network_receive_errors` can be reused as they are still valid and "fresh" for making inference on new datapoints until the next `fit_every` happens.
|
||||
- All the model instances of class `zscore_online`, that were trained on `cpu_seconds_total` will be re-trained with the new query expression and frequency, as old model instances are not valid anymore.
|
||||
- All the model instances of class `zscore`, that were trained on `host_network_receive_errors` can be reused as they are still valid and "fresh" for making inference on new datapoints until the next `fit_every` happens (10m - 5m).
|
||||
- All the model instances of class `zscore`, that were trained on `cpu_seconds_total` will be re-trained with the new query expression and frequency, as old model instances are not valid anymore.
|
||||
|
||||
|
||||
## Environment variables
|
||||
|
||||
@@ -158,21 +158,21 @@ Config with a split example:
|
||||
```yaml
|
||||
models:
|
||||
model_above_expected:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3.0
|
||||
# track only cases when y > yhat, otherwise anomaly_score would be explicitly set to 0
|
||||
detection_direction: 'above_expected'
|
||||
# for this query we do not need to track lower values, thus, set anomaly detection tracking for y > yhat (above_expected)
|
||||
queries: ['query_values_the_lower_the_better']
|
||||
model_below_expected:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3.0
|
||||
# track only cases when y < yhat, otherwise anomaly_score would be explicitly set to 0
|
||||
detection_direction: 'below_expected'
|
||||
# for this query we do not need to track higher values, thus, set anomaly detection tracking for y < yhat (above_expected)
|
||||
queries: ['query_values_the_higher_the_better']
|
||||
model_bidirectional_default:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3.0
|
||||
# track in both direction, same backward-compatible behavior in case this arg is missing
|
||||
detection_direction: 'both'
|
||||
@@ -181,12 +181,9 @@ models:
|
||||
reader:
|
||||
# ...
|
||||
queries:
|
||||
query_values_the_lower_the_better:
|
||||
expr: metricsql_expression1
|
||||
query_values_the_higher_the_better:
|
||||
expr: metricsql_expression2
|
||||
query_values_both_direction_matters:
|
||||
expr: metricsql_expression3
|
||||
query_values_the_lower_the_better: metricsql_expression1
|
||||
query_values_the_higher_the_better: metricsql_expression2
|
||||
query_values_both_direction_matters: metricsql_expression3
|
||||
# other components like writer, schedule, monitoring
|
||||
```
|
||||
|
||||
@@ -194,12 +191,11 @@ reader:
|
||||
|
||||
`min_dev_from_expected`{{% available_from "v1.13.0" anomaly %}} argument is designed to **reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive)** in scenarios where deviations between the actual value (`y`) and the expected value (`yhat`) are **relatively** high. Such deviations can cause models to generate high [anomaly scores](https://docs.victoriametrics.com/anomaly-detection/faq/#what-is-anomaly-score). However, these deviations may not be significant enough in **absolute values** from a business perspective to be considered anomalies. This parameter ensures that anomaly scores for data points where `|y - yhat| < min_dev_from_expected` are explicitly set to 0. By default, if this parameter is not set, it is set to `0` to maintain backward compatibility.
|
||||
|
||||
> [!NOTE]
|
||||
{{% available_from "v1.23.0" anomaly %}} The `min_dev_from_expected` argument can be a list of two float values, allowing separate thresholds for upper and lower deviations. This is useful when the acceptable deviation varies in different directions (e.g., `min_dev_from_expected: [0.01, 0.02]` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is `0.02` when `y` is greater than `yhat`). If only one value is provided, it is broadcasted to both directions, meaning that the same threshold is applied for both upper and lower deviations (e.g., `min_dev_from_expected: 0.01` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is also `0.01` when `y` is greater than `yhat`).
|
||||
> {{% available_from "v1.23.0" anomaly %}} The `min_dev_from_expected` argument can be a list of two float values, allowing separate thresholds for upper and lower deviations. This is useful when the acceptable deviation varies in different directions (e.g., `min_dev_from_expected: [0.01, 0.02]` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is `0.02` when `y` is greater than `yhat`). If only one value is provided, it is broadcasted to both directions, meaning that the same threshold is applied for both upper and lower deviations (e.g., `min_dev_from_expected: 0.01` means that the lower bound is `0.01` when `y` is less than `yhat` and the upper bound is also `0.01` when `y` is greater than `yhat`).
|
||||
|
||||
`min_dev_from_expected` must be >= 0. The higher the value of `min_dev_from_expected` is, the more significant the deviation must be to generate an anomaly score != 0. This helps in filtering out small *absolute* deviations that may not be meaningful in the context of the monitored metric.
|
||||
> `min_dev_from_expected` must be >= 0. The higher the value of `min_dev_from_expected`, the more significant the deviation must be to generate an anomaly score > 1. This helps in filtering out small deviations that may not be meaningful in the context of the monitored metric.
|
||||
|
||||
*Example*: Consider a scenario where CPU utilization in specific mode is low and oscillates around 0.3% (0.003). A sudden spike to 1.3% (0.013) represents a +333% increase in **relative** terms, but only a +1 percentage point (0.01) increase in **absolute** terms, which may be negligible and not warrant an alert. Setting the `min_dev_from_expected` argument to `0.01` (1%) will ensure that all anomaly scores for deviations <= `0.01` are set to 0.
|
||||
*Example*: Consider a scenario where CPU utilization is low and oscillates around 0.3% (0.003). A sudden spike to 1.3% (0.013) represents a +333% increase in **relative** terms, but only a +1 percentage point (0.01) increase in **absolute** terms, which may be negligible and not warrant an alert. Setting the `min_dev_from_expected` argument to `0.01` (1%) will ensure that all anomaly scores for deviations <= `0.01` are set to 0.
|
||||
|
||||
Visualizations below demonstrate this concept; the green zone defined as the `[yhat - min_dev_from_expected, yhat + min_dev_from_expected]` range excludes actual data points (`y`) from generating anomaly scores if they fall within that range.
|
||||
|
||||
@@ -219,65 +215,23 @@ reader:
|
||||
# ...
|
||||
queries:
|
||||
# the usage of min_dev should reduce false positives here
|
||||
need_to_include_min_dev:
|
||||
expr: small_abs_values_metricsql_expression
|
||||
need_to_include_min_dev: small_abs_values_metricsql_expression
|
||||
# min_dev is not really needed here
|
||||
normal_behavior:
|
||||
expr: no_need_to_exclude_small_deviations_metricsql_expression
|
||||
normal_behavior: no_need_to_exclude_small_deviations_metricsql_expression
|
||||
models:
|
||||
zscore_with_min_dev:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
min_dev_from_expected: [5.0, 5.0] # set the same threshold for both directions, meaning that deviations less than 5.0 in absolute values won't be considered anomalous, even if they are relatively significant
|
||||
min_dev_from_expected: [5.0, 5.0]
|
||||
queries: ['need_to_include_min_dev'] # use such models on queries where domain experience confirm usefulness
|
||||
zscore_wo_min_dev:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
# if not set, equals to setting min_dev_from_expected == 0 (meaning no filtering is applied)
|
||||
# min_dev_from_expected: [0.0, 0.0]
|
||||
queries: ['normal_behavior'] # use the default where it's not needed
|
||||
```
|
||||
|
||||
### Minimal relative deviation from expected
|
||||
|
||||
{{% available_from "v1.29.1" anomaly %}} `min_rel_dev_from_expected` argument serves a similar purpose to `min_dev_from_expected` (see [section above](#minimal-deviation-from-expected)), but focuses on **relative deviations** rather than absolute ones. It is designed to reduce [false positives](https://victoriametrics.com/blog/victoriametrics-anomaly-detection-handbook-chapter-1/#false-positive) in scenarios where the relative deviation between the actual value (`y`) and the expected value (`yhat`) is high, but the absolute deviation is not significant enough to be considered an anomaly from a business perspective. This parameter ensures that anomaly scores for data points where `|y - yhat| / |yhat| < min_rel_dev_from_expected` are explicitly set to 0. By default, if this parameter is not set, it is set to `0` to maintain backward compatibility.
|
||||
|
||||
Parameter can be a list of two float values, *allowing separate thresholds for upper and lower relative deviations*. If only one value is provided, it is broadcasted to both directions.
|
||||
|
||||
> [!NOTE]
|
||||
If both `min_dev_from_expected` [arg](#minimal-deviation-from-expected) and `min_rel_dev_from_expected` are set, the model will combine both filters. A data point will be considered anomalous (i.e., have an anomaly score != 0) only if it exceeds **both** the *absolute* deviation threshold defined by `min_dev_from_expected` and the *relative* deviation threshold defined by `min_rel_dev_from_expected`. This allows for more granular control over anomaly detection, ensuring that only significant deviations in both absolute and relative terms are flagged as anomalies.
|
||||
|
||||
|
||||
*Example*: Consider a scenario of monitoring incoming traffic to websites that typically receives *unknown in advance* requests per second (from tens to thousands). Setting absolute deviation threshold with `min_dev_from_expected` *may not be effective in reducing false positives*, as even a small increase in traffic (e.g., from 10 to 20 requests per second) can represent a 100% relative increase, which may be significant for that website. Instead, setting `min_rel_dev_from_expected` to smaller relative value - `[20, 40]` (20/40%) - will ensure that traffic drop from 10 to 8 requests per second (20% decrease) and traffic spike from 10 to 14 requests per second (40% increase) won't be considered anomalous, even if they exceed confidence intervals, thus, reducing false positives for small absolute deviations that are relatively significant.
|
||||
|
||||
Example of how to use this parameter in config:
|
||||
|
||||
```yaml
|
||||
# other components like writer, schedulers, monitoring ...
|
||||
reader:
|
||||
# ...
|
||||
queries:
|
||||
# the usage of min_rel_dev should reduce false positives here
|
||||
need_to_include_min_rel_dev:
|
||||
expr: small_abs_values_metricsql_expression
|
||||
# min_rel_dev is not really needed here
|
||||
normal_behavior:
|
||||
expr: no_need_to_exclude_small_deviations_metricsql_expression
|
||||
models:
|
||||
zscore_with_min_rel_dev:
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
min_rel_dev_from_expected: [10, 20] # set different thresholds for both directions, meaning that relative deviations less than 10% when y < yhat and less than 20% when y > yhat won't be considered anomalous, even if they exceed confidence intervals, thus, reducing false positives for small absolute deviations that are relatively significant
|
||||
queries: ['need_to_include_min_rel_dev'] # use such models on queries where domain experience confirm usefulness
|
||||
zscore_wo_min_rel_dev:
|
||||
class: 'zscore_online'
|
||||
z_threshold: 3
|
||||
# if not set, equals to setting min_rel_dev_from_expected == 0 (meaning no filtering is applied)
|
||||
# min_rel_dev_from_expected: [0, 0]
|
||||
queries: ['normal_behavior'] # use the default where it's not needed
|
||||
```
|
||||
|
||||
|
||||
### Group by
|
||||
|
||||
> The `groupby` argument works only in combination with [multivariate models](#multivariate-models).
|
||||
@@ -335,14 +289,14 @@ The most common **use case** is when there is a preference to **widen one side**
|
||||
# other components like reader, writer, schedulers, monitoring ...
|
||||
models:
|
||||
zscore_no_scale:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
# if not set, equals to [1.0, 1.0], meaning no scaling is applied
|
||||
# scale: [1.0, 1.0]
|
||||
zscore_scaled:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
# vs `zscore_no_scale`, increase lower confidence interval width by 1.2x, decrease upper confidence width by 25% (1.0 - 0.25 = 0.75), thus, making the model more conservative in flagging anomalies when y < yhat and more aggressive when y > yhat
|
||||
# vs `zscore_no_scale`, increase lower confidence interval width by 1.2x, decrease upper confidence width by 25%
|
||||
scale: [1.2, 0.75]
|
||||
```
|
||||
|
||||
@@ -371,7 +325,7 @@ reader:
|
||||
# if no data range defined, it will be implicitly converted to ["-inf", "inf"]
|
||||
models:
|
||||
zscore_mixed:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
clip_predictions: True
|
||||
queries: [
|
||||
@@ -385,7 +339,7 @@ models:
|
||||
'q2_no_clip',
|
||||
]
|
||||
zscore_no_clip:
|
||||
class: 'zscore_online'
|
||||
class: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
z_threshold: 3
|
||||
# if not set, by default resolved to `clip_predictions: False`
|
||||
queries: [
|
||||
@@ -596,27 +550,27 @@ Tuning [hyperparameters](https://en.wikipedia.org/wiki/Hyperparameter_(machine_l
|
||||
> # this may result in 1 model per each unique labelset with different hyperparameters, such as z_threshold
|
||||
> autotuned_model:
|
||||
> class: 'auto'
|
||||
> tuned_class_name: 'zscore_online'
|
||||
> tuned_class_name: 'zscore'
|
||||
> optimization_params:
|
||||
> anomaly_percentage: 0.01
|
||||
> queries: ['your_query']
|
||||
> ```
|
||||
> will produce **one model per each unique labelset** found in `your_query` results, with **different hyperparameters**, such as `z_threshold`, while
|
||||
> will produce **one model per each unique labelset** found in `your_query` results, with different hyperparameters, such as `z_threshold`, while
|
||||
> ```yaml
|
||||
> models:
|
||||
> # this will result in 1 model per each timeseries returned by the query,
|
||||
> # with the same hyperparameters, such as z_threshold
|
||||
> zscore_model:
|
||||
> class: 'zscore_online'
|
||||
> class: 'zscore'
|
||||
> z_threshold: 3 # all models will have the same z_threshold, but different parameters, such as mean, std, etc.
|
||||
> queries: ['your_query']
|
||||
> ```
|
||||
> will produce **one model per each timeseries** returned by `your_query`, with **the same** hyperparameters, such as `z_threshold`, but different parameters, such as mean, std, etc.
|
||||
> will produce **one model per each timeseries** returned by `your_query`, with the same hyperparameters, such as `z_threshold`, but different parameters, such as mean, std, etc.
|
||||
|
||||
*Parameters specific for vmanomaly*:
|
||||
|
||||
* `class` (string) - model class name `"model.auto.AutoTunedModel"` (or `auto` with class alias support{{% available_from "v1.13.0" anomaly %}})
|
||||
* `tuned_class_name` (string) - [Built-in model class](#built-in-models) to wrap, i.e. `zscore_online`
|
||||
* `tuned_class_name` (string) - [Built-in model class](#built-in-models) to wrap, i.e. `model.zscore.ZscoreModel` (or `zscore` with class alias support{{% available_from "v1.13.0" anomaly %}}).
|
||||
* `optimization_params` (dict) - Optimization parameters for *unsupervised* model tuning. Control percentage of found anomalies, as well as a tradeoff between time spent and the accuracy. The higher `timeout` and `n_trials` are, the better model configuration can be found for `tuned_class_name`, but the longer it takes and vice versa. Set `n_jobs` to `-1` to use all the CPUs available, it makes sense if only you have a big dataset to train on during `fit` calls, otherwise overhead isn't worth it.
|
||||
- `anomaly_percentage` (float) - Expected percentage of anomalies that can be seen in training data, from `[0, 0.5)` interval (i.e. 0.01 means it's expected ~ 1% of anomalies to be present in training data). This is a *required* parameter.
|
||||
- `optimized_business_params` (list[string]) - {{% available_from "v1.15.0" anomaly %}} this argument allows particular [business-specific parameters](#common-args) such as [`detection_direction`](https://docs.victoriametrics.com/anomaly-detection/components/models/#detection-direction) or [`min_dev_from_expected`](https://docs.victoriametrics.com/anomaly-detection/components/models/#minimal-deviation-from-expected) to remain **unchanged during optimizations, retaining their initial values**. I.e. setting `optimized_business_params` to `['detection_direction']` will allow to optimize only `detection_direction` business-specific arg, while `min_dev_from_expected` will retain its default value of (e.g. [1, 2] if set to that value in model config). By default and if not set, will be equal to `[]` (empty list), meaning no business params will be optimized. **A recommended option is to leave it empty** as this feature is still experimental and may lead to unexpected results.
|
||||
@@ -635,7 +589,7 @@ Tuning [hyperparameters](https://en.wikipedia.org/wiki/Hyperparameter_(machine_l
|
||||
models:
|
||||
your_desired_alias_for_a_model:
|
||||
class: 'auto' # or 'model.auto.AutoTunedModel' until v1.13.0
|
||||
tuned_class_name: 'zscore_online'
|
||||
tuned_class_name: 'zscore' # or 'model.zscore.ZscoreModel' until v1.13.0
|
||||
optimization_params:
|
||||
anomaly_percentage: 0.004 # required. i.e. we expect <= 0.4% of anomalies to be present in training data
|
||||
seed: 42 # fix reproducibility & determinism
|
||||
@@ -646,7 +600,7 @@ models:
|
||||
n_trials: 128 # how many configurations to sample from search space during optimization
|
||||
timeout: 10 # how many seconds to spend on optimization for each trained model during `fit` phase call
|
||||
n_jobs: 1 # how many jobs in parallel to launch. Consider making it > 1 only if you have fit window containing > 10000 datapoints for each series
|
||||
optimized_business_params: [] # business-specific params to include in optimization, if not set - defaults to empty list, meaning no business params will be optimized, which is a recommended option as business arguments are better set by stakeholders rather than algorithms
|
||||
optimized_business_params: [] # business-specific params to include in optimization, if not set is empty list
|
||||
# ...
|
||||
```
|
||||
|
||||
@@ -1265,7 +1219,7 @@ monitoring:
|
||||
Let's pull the docker image for `vmanomaly`:
|
||||
|
||||
```sh
|
||||
docker pull victoriametrics/vmanomaly:v1.29.1
|
||||
docker pull victoriametrics/vmanomaly:v1.29.0
|
||||
```
|
||||
|
||||
Now we can run the docker container putting as volumes both config and model file:
|
||||
@@ -1279,7 +1233,7 @@ docker run -it \
|
||||
-v $(PWD)/license:/license \
|
||||
-v $(PWD)/custom_model.py:/vmanomaly/model/custom.py \
|
||||
-v $(PWD)/custom.yaml:/config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.1 /config.yaml \
|
||||
victoriametrics/vmanomaly:v1.29.0 /config.yaml \
|
||||
--licenseFile=/license
|
||||
--watch
|
||||
```
|
||||
@@ -1408,4 +1362,4 @@ models:
|
||||
# anomaly_score_outside_data_range: 1.01 # auto anomaly score (1.01) if `y` (real value) is outside of data_range, if set
|
||||
```
|
||||
|
||||
Resulting metrics of the model are described [here](#vmanomaly-output).
|
||||
Resulting metrics of the model are described [here](#vmanomaly-output).
|
||||
@@ -395,7 +395,7 @@ services:
|
||||
restart: always
|
||||
vmanomaly:
|
||||
container_name: vmanomaly
|
||||
image: victoriametrics/vmanomaly:v1.29.1
|
||||
image: victoriametrics/vmanomaly:v1.29.0
|
||||
depends_on:
|
||||
- "victoriametrics"
|
||||
ports:
|
||||
|
||||
@@ -6,70 +6,33 @@ build:
|
||||
sitemap:
|
||||
disable: true
|
||||
---
|
||||
**The guide covers:**
|
||||
|
||||
This guide walks you through deploying a [VictoriaMetrics cluster](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/) version on Kubernetes in high-availability mode.
|
||||
* High availability monitoring via [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) in [Kubernetes](https://kubernetes.io/) with Helm charts
|
||||
* How to store metrics
|
||||
* How to scrape metrics from k8s components using a service discovery
|
||||
* How to visualize stored data
|
||||
* How to store metrics in [VictoriaMetrics](https://victoriametrics.com)
|
||||
|
||||
By the end of this guide, you will know:
|
||||
**Preconditions**
|
||||
|
||||
- How to install and configure [VictoriaMetrics cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) using Helm.
|
||||
- How high-availability mode works in VictoriaMetrics.
|
||||
- How to scrape metrics from Kubernetes components using service discovery.
|
||||
|
||||
## Overview
|
||||
|
||||
In this guide, high availability is achieved by configuring [replication](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) on `vminsert` to a value of 2. This means every incoming data point is written twice to separate `vmstorage` pods, so data remains available as long as at least one replica of a given time series is reachable.
|
||||
|
||||
This setup requires **twice as much storage** as a normal, non-replicating cluster because `vminsert` fans out each write into two `vmstorage` pods.
|
||||
|
||||
Duplication causes `vmselect` to read back two copies of each sample, potentially skewing results. For example, in aggregations such as `sum` or `count`, this would double the result. To handle this, we must enable [de-duplication](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#deduplication) in the `vmselect` pods to collapse the replicas into a single sample per scrape interval.
|
||||
|
||||
## Preconditions
|
||||
|
||||
> [!NOTE] Note
|
||||
> We used a GKE cluster (v1.35) from [GCP](https://cloud.google.com/) in this guide, but it can also be applied to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/) or an on-premises cluster.
|
||||
|
||||
- [Kubernetes cluster](https://cloud.google.com/kubernetes-engine).
|
||||
- [Helm](https://helm.sh/docs/intro/install)
|
||||
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
- [jq](https://stedolan.github.io/jq/download/) tool
|
||||
* [Kubernetes cluster 1.19.12-gke.2100](https://cloud.google.com/kubernetes-engine). We use GKE cluster from [GCP](https://cloud.google.com/) but this guide also applies to any Kubernetes cluster. For example, [Amazon EKS](https://aws.amazon.com/ru/eks/).
|
||||
* [Helm 3 ](https://helm.sh/docs/intro/install)
|
||||
* [kubectl 1.21](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
* [jq](https://stedolan.github.io/jq/download/) tool
|
||||
|
||||
## 1. VictoriaMetrics Helm repository
|
||||
|
||||
Run the following command to add the VictoriaMetrics Helm repository:
|
||||
Please see the relevant [VictoriaMetrics Helm repository](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#1-victoriametrics-helm-repository) section in previous guides.
|
||||
|
||||
```sh
|
||||
helm repo add vm https://victoriametrics.github.io/helm-charts/
|
||||
helm repo update
|
||||
```
|
||||
|
||||
Then, verify that VictoriaMetrics charts are available with:
|
||||
|
||||
```sh
|
||||
helm search repo vm/
|
||||
```
|
||||
|
||||
You should get a list of charts similar to this:
|
||||
|
||||
```text
|
||||
NAME CHART VERSION APP VERSION DESCRIPTION
|
||||
vm/victoria-metrics-cluster 0.35.0 v1.136.0 VictoriaMetrics Cluster version - high-performa...
|
||||
vm/victoria-metrics-agent 0.32.0 v1.136.0 VictoriaMetrics Agent - collects metrics from v...
|
||||
vm/victoria-metrics-common 0.0.46 VictoriaMetrics Common - contains shared templa...
|
||||
...(list continues)...
|
||||
```
|
||||
|
||||
## 2. Install VictoriaMetrics Cluster from the Helm chart
|
||||
|
||||
A [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) consists of three services:
|
||||
Execute the following command in your terminal:
|
||||
|
||||
- `vminsert`: receives incoming metrics and distributes them across vmstorage nodes via consistent hashing on metric names and labels.
|
||||
- `vmstorage`: stores raw data and serves queries filtered by time range and labels.
|
||||
- `vmselect`: executes queries by fetching data across all configured vmstorage nodes.
|
||||
|
||||
Create a high-availability configuration file for the VictoriaMetrics services:
|
||||
|
||||
```sh
|
||||
cat <<EOF > victoria-metrics-cluster-values.yml
|
||||
cat <<EOF | helm install vmcluster vm/victoria-metrics-cluster -f -
|
||||
vmselect:
|
||||
extraArgs:
|
||||
dedup.minScrapeInterval: 1ms
|
||||
@@ -95,40 +58,30 @@ vmstorage:
|
||||
EOF
|
||||
```
|
||||
|
||||
* The `Helm install vmcluster vm/victoria-metrics-cluster` command installs [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/) to the default [namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/).
|
||||
* `dedup.minScrapeInterval: 1ms` configures [de-duplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) for the cluster that de-duplicates data points in the same time series if they fall within the same discrete 1ms bucket. The earliest data point will be kept. In the case of equal timestamps, an arbitrary data point will be kept.
|
||||
* `replicationFactor: 2` Replication factor for the ingested data, i.e. how many copies should be made among distinct `-storageNode` instances. If the replication factor is greater than one, the deduplication must be enabled on the remote storage side.
|
||||
* `podAnnotations: prometheus.io/scrape: "true"` enables the scraping of metrics from the vmselect, vminsert and vmstorage pods.
|
||||
* `podAnnotations:prometheus.io/port: "some_port" ` enables the scraping of metrics from the vmselect, vminsert and vmstorage pods from corresponding ports.
|
||||
* `replicaCount: 3` creates three replicas of vmselect, vminsert and vmstorage.
|
||||
|
||||
Let's break down how high availability is achieved:
|
||||
|
||||
- `replicaCount: 3` creates three replicas of vmselect, vminsert, and vmstorage each.
|
||||
- `replicationFactor: 2` enables [replication](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#replication-and-data-safety) for `vminsert` and `vmselect`.
|
||||
- `vminsert` uses `replicationFactor` to fan out writes. In this case, it creates two copies of the sample and distributes them among distinct `vmstorage` pods.
|
||||
- `vmselect` also gets a `replicationFactor` so it knows how many replicas to expect and when to treat a response as partial (more on this later).
|
||||
- `dedup.minScrapeInterval`: 1ms configures [de-duplication](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#deduplication) for `vmselect`, so it does not double-count samples when retrieving data from `vmstorage` pods.
|
||||
- `podAnnotations: prometheus.io/scrape: "true"` enables metric scraping so you can monitor your VictoriaMetrics cluster.
|
||||
- `podAnnotations: prometheus.io/port: "some_port" ` defines the scraping port.
|
||||
|
||||
Install the VictoriaMetrics cluster in high-availability mode. The following command deploys a VictoriaMetrics cluster in the default namespace:
|
||||
|
||||
```sh
|
||||
helm install vmcluster vm/victoria-metrics-cluster -f victoria-metrics-cluster-values.yml
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
The expected result of the command execution is the following:
|
||||
|
||||
```text
|
||||
NAME: vmcluster
|
||||
LAST DEPLOYED: Mon Mar 2 12:50:25 2026
|
||||
LAST DEPLOYED: Thu Jul 29 13:33:51 2021
|
||||
NAMESPACE: default
|
||||
STATUS: deployed
|
||||
REVISION: 1
|
||||
DESCRIPTION: Install complete
|
||||
TEST SUITE: None
|
||||
NOTES:
|
||||
Write API:
|
||||
|
||||
The VictoriaMetrics write api can be accessed via port 8480 with the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local.
|
||||
The VictoriaMetrics write api can be accessed via port 8480 via the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local
|
||||
|
||||
Get the Victoria Metrics insert service URL by running these commands in the same shell:
|
||||
Get the VictoriaMetrics insert service URL by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app=vminsert" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 8480
|
||||
|
||||
@@ -139,20 +92,21 @@ prometheus.yml
|
||||
remote_write:
|
||||
- url: "http://<insert-service>/insert/0/prometheus/"
|
||||
|
||||
|
||||
for example - inside the Kubernetes cluster:
|
||||
|
||||
remote_write:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
- url: "http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/"
|
||||
Read API:
|
||||
|
||||
The VictoriaMetrics read api can be accessed via port 8481 with the following DNS name from within your cluster:
|
||||
vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.
|
||||
vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local
|
||||
|
||||
Get the VictoriaMetrics select service URL by running these commands in the same shell:
|
||||
export POD_NAME=$(kubectl get pods --namespace default -l "app=vmselect" -o jsonpath="{.items[0].metadata.name}")
|
||||
kubectl --namespace default port-forward $POD_NAME 8481
|
||||
|
||||
You need to specify the service URL in your Grafana:
|
||||
You need to specify select service URL into your Grafana:
|
||||
NOTE: you need to use the Prometheus Data Source
|
||||
|
||||
Input this URL field into Grafana
|
||||
@@ -162,93 +116,127 @@ Input this URL field into Grafana
|
||||
|
||||
for example - inside the Kubernetes cluster:
|
||||
|
||||
http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local.:8481/select/0/prometheus/
|
||||
http://vmcluster-victoria-metrics-cluster-vmselect.default.svc.cluster.local:8481/select/0/prometheus/"
|
||||
|
||||
```
|
||||
|
||||
Verify that the VictoriaMetrics cluster pods are up and running by executing the following command:
|
||||
|
||||
|
||||
```sh
|
||||
kubectl get pods -l app.kubernetes.io/instance=vmcluster
|
||||
kubectl get pods | grep vmcluster
|
||||
```
|
||||
|
||||
You should see:
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmcluster-victoria-metrics-cluster-vminsert-788c76b69b-lphnn 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-788c76b69b-lxg2w 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-788c76b69b-qmtkp 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-65796bc88d-29cwm 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-65796bc88d-lz58p 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-65796bc88d-t42pr 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 106s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 91s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-2 1/1 Running 0 76s
|
||||
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-4mh9d 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-4ppl7 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-782qk 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-4v4ws 1/1 Running 0 2m27s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-kwc7q 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-v7pmk 1/1 Running 0 2m28s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 2m27s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 2m3s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-2 1/1 Running 0 99s
|
||||
```
|
||||
|
||||
## 3. Install vmagent from the Helm chart
|
||||
|
||||
To scrape metrics from Kubernetes with a VictoriaMetrics Cluster, we need to install [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and configure it with additional settings.
|
||||
|
||||
Install `vmagent` with the following command:
|
||||
To scrape metrics from Kubernetes with a VictoriaMetrics Cluster we will need to install [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) with some additional configurations. To do so, please run the following command:
|
||||
|
||||
```yaml
|
||||
helm install vmagent vm/victoria-metrics-agent -f https://docs.victoriametrics.com/guides/examples/guide-vmcluster-vmagent-values.yaml
|
||||
```
|
||||
|
||||
You can obtain a copy of `guide-vmcluster-vmagent-values.yaml` to review with:
|
||||
Here is full file content `guide-vmcluster-vmagent-values.yaml`
|
||||
|
||||
```sh
|
||||
wget https://docs.victoriametrics.com/guides/examples/guide-vmcluster-vmagent-values.yaml
|
||||
```
|
||||
|
||||
Here are the key settings in the chart file that we used to install `vmagent` with Helm earlier:
|
||||
|
||||
- `remoteWrite` defines the vminsert endpoint that receives telemetry from vmagent. This value should match exactly the URL for the `remote_write` in the output of the VictoriaMetrics cluster installation in [Step 2](https://docs.victoriametrics.com/guides/k8s-ha-monitoring-via-vm-cluster/#id-2-install-victoriametrics-cluster-from-the-helm-chart).
|
||||
|
||||
```yaml
|
||||
remoteWrite:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
```
|
||||
|
||||
- `metric_relabel_configs` defines label-rewriting rules for the scraped metrics.
|
||||
|
||||
```yaml
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
```
|
||||
```yaml
|
||||
remoteWrite:
|
||||
- url: http://vmcluster-victoria-metrics-cluster-vminsert.default.svc.cluster.local:8480/insert/0/prometheus/
|
||||
|
||||
scrape_configs:
|
||||
- job_name: vmagent
|
||||
static_configs:
|
||||
- targets: ["localhost:8429"]
|
||||
- job_name: "kubernetes-apiservers"
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
relabel_configs:
|
||||
- source_labels:
|
||||
[
|
||||
__meta_kubernetes_namespace,
|
||||
__meta_kubernetes_service_name,
|
||||
__meta_kubernetes_endpoint_port_name,
|
||||
]
|
||||
action: keep
|
||||
regex: default;kubernetes;https
|
||||
- job_name: "kubernetes-nodes"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- job_name: "kubernetes-nodes-cadvisor"
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
insecure_skip_verify: true
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
metrics_path: /metrics/cadvisor
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- source_labels: [__metrics_path__]
|
||||
target_label: metrics_path
|
||||
metric_relabel_configs:
|
||||
- action: replace
|
||||
source_labels: [pod]
|
||||
regex: '(.+)'
|
||||
target_label: pod_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
source_labels: [container]
|
||||
regex: '(.+)'
|
||||
target_label: container_name
|
||||
replacement: '${1}'
|
||||
- action: replace
|
||||
target_label: name
|
||||
replacement: k8s_stub
|
||||
- action: replace
|
||||
source_labels: [id]
|
||||
regex: '^/system\.slice/(.+)\.service$'
|
||||
target_label: systemd_service_name
|
||||
replacement: '${1}'
|
||||
```
|
||||
* By updating `remoteWrite` we configuring [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) to write scraped metrics into the `vminsert` service.
|
||||
* The `metric_relabel_configs` section allows you to process Kubernetes metrics for the Grafana dashboard.
|
||||
|
||||
|
||||
Verify that `vmagent`'s pod is up and running by executing the following command:
|
||||
|
||||
|
||||
```shell
|
||||
kubectl get pod -l app.kubernetes.io/instance=vmagent
|
||||
kubectl get pods | grep vmagent
|
||||
```
|
||||
|
||||
Expected output:
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmagent-victoria-metrics-agent-6848c6b58d-87rf6 1/1 Running 0 32s
|
||||
vmagent-victoria-metrics-agent-57ddbdc55d-h4ljb 1/1 Running 0 13s
|
||||
```
|
||||
|
||||
## 4. Verifying HA of VictoriaMetrics Cluster
|
||||
@@ -256,182 +244,157 @@ vmagent-victoria-metrics-agent-6848c6b58d-87rf6 1/1 Running 0 3
|
||||
Run the following command to check that VictoriaMetrics services are up and running:
|
||||
|
||||
```shell
|
||||
kubectl get svc -l app.kubernetes.io/instance=vmcluster
|
||||
kubectl get pods | grep victoria-metrics
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||
vmcluster-victoria-metrics-cluster-vminsert ClusterIP 10.43.157.170 <none> 8480/TCP 4m41s
|
||||
vmcluster-victoria-metrics-cluster-vmselect ClusterIP 10.43.222.181 <none> 8481/TCP 4m41s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage ClusterIP None <none> 8482/TCP,8401/TCP,8400/TCP 4m41s
|
||||
vmagent-victoria-metrics-agent-57ddbdc55d-h4ljb 1/1 Running 0 75s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-s8v7x 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-xlm9d 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vminsert-78b84d8cd9-xqxrh 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-7dg95 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-ck7qb 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmselect-69c5f48bc6-jjqsl 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 89s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 63s
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-2 1/1 Running 0 34s
|
||||
```
|
||||
|
||||
To verify that metrics are present in VictoriaMetrics, you can send a curl request to the `vmselect` service. Run the following command to make `vmselect`'s port accessible from the local machine:
|
||||
To verify that metrics are present in the VictoriaMetrics send a curl request to the `vmselect` service from kubernetes or setup Grafana and check it via the web interface.
|
||||
|
||||
```sh
|
||||
Run the following command to see the list of services:
|
||||
|
||||
```shell
|
||||
kubectl get svc | grep vmselect
|
||||
```
|
||||
|
||||
The expected output:
|
||||
|
||||
```text
|
||||
vmcluster-victoria-metrics-cluster-vmselect ClusterIP 10.88.2.69 <none> 8481/TCP 1m
|
||||
```
|
||||
|
||||
Run the following command to make `vmselect`'s port accessible from the local machine:
|
||||
|
||||
|
||||
```shell
|
||||
kubectl port-forward svc/vmcluster-victoria-metrics-cluster-vmselect 8481:8481
|
||||
```
|
||||
|
||||
Execute the following command to get metrics via `curl`:
|
||||
|
||||
```sh
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query=count(up{kubernetes_pod_name=~".*vmselect.*"})' | jq
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query_range?query=count(up{kubernetes_pod_name=~".*vmselect.*"})&start=-10m&step=1m' | jq
|
||||
```
|
||||
|
||||
Let's break down the command:
|
||||
|
||||
* The request to `http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query` uses the [VictoriaMetrics querying API](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) to fetch metric data
|
||||
* The argument `query=count(up{kubernetes_pod_name=~".*vmselect.*"})` specifies the query. Specifically, we want to count the number of `vmselect` pods.
|
||||
* We pipe the output to `jq` to format the output in a more readable way.
|
||||
|
||||
You should see:
|
||||
The expected output is:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"isPartial": false,
|
||||
"data": {
|
||||
"resultType": "vector",
|
||||
"resultType": "matrix",
|
||||
"result": [
|
||||
{
|
||||
"metric": {},
|
||||
"value": [
|
||||
1773419630,
|
||||
"3"
|
||||
"values": [
|
||||
[
|
||||
1628065480.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065540.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065600.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065660.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065720.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065780.657,
|
||||
"3"
|
||||
],
|
||||
[
|
||||
1628065840.657,
|
||||
"3"
|
||||
]
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stats": {
|
||||
"seriesFetched": "3",
|
||||
"executionTimeMsec": 3
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The value should be 3, which is the number of replicas we configured earlier.
|
||||
* Query `http://127.0.0.1:8481/select/0/prometheus/api/v1/query_range` uses [VictoriaMetrics querying API](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) to fetch previously stored data points;
|
||||
* Argument `query=count(up{kubernetes_pod_name=~".*vmselect.*"})` specifies the query we want to execute. Specifically, we calculate the number of `vmselect` pods.
|
||||
* Additional arguments `start=-10m&step=1m'` set requested time range from -10 minutes (10 minutes ago) to now (default value if `end` argument is omitted) and step (the distance between returned data points) of 1 minute;
|
||||
* By adding `| jq` we pass the output to the jq utility which outputs information in json format
|
||||
|
||||
You can also execute the query in VMUI by opening your browser in `http://localhost:8481/select/0/vmui/` (where 0 is the [default tenant ID](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#multitenancy)).
|
||||
The expected result of the query `count(up{kubernetes_pod_name=~".*vmselect.*"})` should be equal to `3` - the number of replicas we set via `replicaCount` parameter.
|
||||
|
||||
Type `count(up{kubernetes_pod_name=~".*vmselect.*"})` and press **Execute query**
|
||||
|
||||

|
||||
To test via Grafana, we need to install it first. [Install and connect Grafana to VictoriaMetrics](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#4-install-and-connect-grafana-to-victoriametrics-with-helm), login into Grafana and open the metrics explore page at `http://127.0.0.1:3000/explore`.
|
||||
|
||||
You can also try **Explore** > **Prometheus metrics** to discover metrics collected from the Kubernetes cluster.
|
||||
|
||||

|
||||

|
||||
|
||||
Choose `victoriametrics` from the list of datasources and enter `count(up{kubernetes_pod_name=~".*vmselect.*"})` to the **Metric browser** field as shown on the screenshot, then press **Run query** button:
|
||||
|
||||

|
||||
|
||||
The expected output is:
|
||||
|
||||

|
||||
|
||||
## 5. High Availability
|
||||
|
||||
We can test that High Availability is working by simulating a failure. We can do this by shutting down one of the `vmstorage` pods.
|
||||
To test if High Availability works, we need to shutdown one of the `vmstorages`. To do this, run the following command:
|
||||
|
||||
Reduce the number of `vmstorage` pods from 3 to 2 with the following command:
|
||||
|
||||
```shell
|
||||
kubectl scale sts vmcluster-victoria-metrics-cluster-vmstorage --replicas=2
|
||||
```
|
||||
|
||||
Verify that now we have two running `vmstorage` pods in the cluster by executing the following command:
|
||||
Verify that now we have two running `vmstorages` in the cluster by executing the following command:
|
||||
|
||||
|
||||
```shell
|
||||
kubectl get pods -l app=vmstorage
|
||||
kubectl get pods | grep vmstorage
|
||||
```
|
||||
|
||||
The expected output is:
|
||||
|
||||
```text
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 3h20m
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 3h20m
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-0 1/1 Running 0 44m
|
||||
vmcluster-victoria-metrics-cluster-vmstorage-1 1/1 Running 0 43m
|
||||
```
|
||||
|
||||
You can confirm that there are two `vmstorage` pods with this query:
|
||||
Return to Grafana Explore and press the **Run query** button again.
|
||||
|
||||
```sh
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query=count(up{kubernetes_pod_name=~".*vmstorage.*"})' | jq
|
||||
```
|
||||
The expected output is:
|
||||
|
||||
This should output 2 nodes:
|
||||

|
||||
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"isPartial": false,
|
||||
"data": {
|
||||
"resultType": "vector",
|
||||
"result": [
|
||||
{
|
||||
"metric": {},
|
||||
"value": [
|
||||
1773437033,
|
||||
"2"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stats": {
|
||||
"seriesFetched": "2",
|
||||
"executionTimeMsec": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
As you can see, after we scaled down the `vmstorage` replicas number from three to two pods, metrics are still available and correct. The response is not partial as it was before scaling. Also we see that query `count(up{kubernetes_pod_name=~".*vmselect.*"})` returns the same value as before.
|
||||
|
||||
Since each data point is stored across two storage pods, losing a single pod does not affect query results, and data remains available as long as at least one replica per time series remains reachable.
|
||||
To confirm that the number of `vmstorage` pods is equivalent to two, execute the following request in Grafana Explore:
|
||||
|
||||
You can also check if the query result is complete by examining the `isPartial` value in the response:
|
||||
- When `isPartial: false`, the response is complete for the requested time range and series. This means that enough storage replicas have responded (according to the configured `replicationFactor`).
|
||||
- When `isPartial: true`, it means `vmselect` could not fetch all the data it expected from `vmstorage`, so the returned series and values may be incomplete or incorrect.
|
||||

|
||||
|
||||
Running other queries such as `count(up{kubernetes_pod_name=~".*vmselect.*"})` should still return 3.
|
||||
|
||||
```sh
|
||||
curl -sg 'http://127.0.0.1:8481/select/0/prometheus/api/v1/query?query=count(up{kubernetes_pod_name=~".*vmselect.*"})' | jq
|
||||
```
|
||||
|
||||
This should print:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "success",
|
||||
"isPartial": false,
|
||||
"data": {
|
||||
"resultType": "vector",
|
||||
"result": [
|
||||
{
|
||||
"metric": {},
|
||||
"value": [
|
||||
1773437137,
|
||||
"3"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"stats": {
|
||||
"seriesFetched": "3",
|
||||
"executionTimeMsec": 5
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This means that queries and metric ingestion are not affected by the "failure" of a single storage pod.
|
||||
|
||||
Finally, you can scale the `vmstorage` pods back to 3 to resume normal operation:
|
||||
|
||||
```sh
|
||||
kubectl scale sts vmcluster-victoria-metrics-cluster-vmstorage --replicas=3
|
||||
```
|
||||
|
||||
## 6. Final thoughts
|
||||
|
||||
- We set up a highly available VictoriaMetrics cluster on Kubernetes
|
||||
- We collected metrics from running services and stored them in the VictoriaMetrics database.
|
||||
- We configured `dedup.minScrapeInterval` and `replicationFactor: 2` for the VictoriaMetrics cluster for high availability purposes.
|
||||
- We tested and made sure that metrics are available even if one of the `vmstorage` nodes is turned off.
|
||||
|
||||
Next steps:
|
||||
- [Learn more about the cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/)
|
||||
- [Migrate existing metric data into VictoriaMetrics with vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/)
|
||||
- [Install Grafana](https://docs.victoriametrics.com/guides/k8s-monitoring-via-vm-cluster/#id-4-install-and-connect-grafana-to-victoriametrics-with-helm)
|
||||
|
||||
* We set up VictoriaMetrics for Kubernetes cluster with HA.
|
||||
* We collected metrics from running services and stored them in the VictoriaMetrics database.
|
||||
* We configured `dedup.minScrapeInterval` and `replicationFactor: 2` for VictoriaMetrics cluster for high availability purposes.
|
||||
* We tested and made sure that metrics are available even if one of `vmstorages` nodes was turned off.
|
||||
|
||||
|
Before Width: | Height: | Size: 597 KiB |
|
After Width: | Height: | Size: 20 KiB |
|
After Width: | Height: | Size: 21 KiB |
|
After Width: | Height: | Size: 15 KiB |
BIN
docs/guides/k8s-ha-monitoring-via-vm-cluster/explore.webp
Normal file
|
After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 110 KiB |
@@ -1,19 +0,0 @@
|
||||
VictoriaMetrics provides a rich set of public playgrounds that let you explore the full observability stack — metrics, logs, and traces — and even use migration tools without installing or configuring anything locally.
|
||||
|
||||
These playgrounds are backed by real VictoriaMetrics components and data, making them ideal for:
|
||||
|
||||
- Learning VictoriaMetrics query languages
|
||||
- Trying dashboards and queries interactively
|
||||
- Validating migration paths
|
||||
- Demonstrating features in talks or workshops
|
||||
|
||||
In this section, we'll walk through each available playground, explain what it does, and link to the relevant GitHub repositories.
|
||||
|
||||
## Docker Compose Demo
|
||||
|
||||
We provide Docker Compose files for:
|
||||
- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/README.md)
|
||||
- [VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/blob/master/deployment/docker/README.md)
|
||||
- [VictoriaTraces](https://github.com/VictoriaMetrics/VictoriaTraces/blob/master/deployment/docker/README.md).
|
||||
|
||||
The compose files are already configured, provisioned, and interconnected. They can be used to quickly set up a demo environment, suitable for a [quick start](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
||||
@@ -1,14 +0,0 @@
|
||||
---
|
||||
title: Playgrounds
|
||||
weight: 63
|
||||
menu:
|
||||
docs:
|
||||
weight: 63
|
||||
identifier: playgrounds
|
||||
tags:
|
||||
- metrics
|
||||
- logs
|
||||
- traces
|
||||
- playground
|
||||
---
|
||||
{{% content "README.md" %}}
|
||||
@@ -1,21 +0,0 @@
|
||||
---
|
||||
weight: 6
|
||||
title: Cloud Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 6
|
||||
tags:
|
||||
- victoriametrics
|
||||
- cloud
|
||||
- playground
|
||||
- monitoring
|
||||
---
|
||||
|
||||
- Try it: <https://console.victoriametrics.cloud/explore>
|
||||
|
||||
VictoriaMetrics UIs are included in the explore section of VictoriaMetrics and VictoriaLogs deployments embedded in VictoriaMetrics Cloud.
|
||||
|
||||
You can experiment with your own data without the need to deploy the VictoriaMetrics Stack in your local environment for free for a month by following this guide: [VictoriaMetrics Cloud Quickstart](https://docs.victoriametrics.com/victoriametrics-cloud/get-started/quickstart/).
|
||||
|
||||
Once set up, follow this guide to explore your data: <https://docs.victoriametrics.com/victoriametrics-cloud/exploring-data/>
|
||||
@@ -1,45 +0,0 @@
|
||||
---
|
||||
weight: 4
|
||||
title: Grafana Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 4
|
||||
tags:
|
||||
- grafana
|
||||
- playground
|
||||
- metrics
|
||||
- logs
|
||||
- traces
|
||||
---
|
||||
|
||||
- Try it: <https://play-grafana.victoriametrics.com/>
|
||||
|
||||
This playground is particularly useful if you already use Grafana and want to see how VictoriaMetrics integrates into existing workflows. It provides a hosted Grafana instance preconfigured with:
|
||||
|
||||
- [VictoriaMetrics](https://docs.victoriametrics.com/victoriametrics/) as a metrics data source
|
||||
- [VictoriaLogs](https://docs.victoriametrics.com/victorialogs/) as a logs data source
|
||||
- [VictoriaTraces](https://docs.victoriametrics.com/victoriatraces/) as a Jaeger data source for traces
|
||||
|
||||
## What can you do here?
|
||||
|
||||
- Explore [real dashboards](https://play-grafana.victoriametrics.com/dashboards) built on top of VictoriaMetrics
|
||||
- See how [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) and [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) are used in Grafana panels
|
||||
- Explore correlation with the help of the [OpenTelemetry Collector dashboard](https://play-grafana.victoriametrics.com/d/BKf2sowmj/opentelemetry-collector)
|
||||
- Learn dashboard design and visualization best practices
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana dashboard in the playground</figcaption>
|
||||
|
||||
The OpenTelemetry Collector dashboard is built on the official [OpenTelemetry Astronomy Shop demo](https://github.com/VictoriaMetrics-Community/opentelemetry-demo). It lets you visualize and understand telemetry data alongside VictoriaMetrics Stack observability signals, using VictoriaMetrics for metrics, VictoriaLogs for logs, and VictoriaTraces for traces.
|
||||
|
||||
For an always-updated list of dashboards, bookmark this playground.
|
||||
|
||||
## Distribution
|
||||
|
||||
Relevant GitHub:
|
||||
- VictoriaMetrics Grafana datasource: <https://github.com/VictoriaMetrics/victoriametrics-datasource>
|
||||
- VictoriaLogs Grafana datasource: <https://github.com/VictoriaMetrics/victorialogs-datasource>
|
||||
|
||||
|
||||
|
||||
|
Before Width: | Height: | Size: 94 KiB |
@@ -1,42 +0,0 @@
|
||||
---
|
||||
weight: 8
|
||||
title: LogQL to LogsQL Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 8
|
||||
tags:
|
||||
- logsql
|
||||
- loki
|
||||
- victorialogs
|
||||
- playground
|
||||
- monitoring
|
||||
---
|
||||
|
||||
- Try it: <https://play-logql.victoriametrics.com/>
|
||||
|
||||
For teams migrating from Grafana Loki, this simple UI provides a useful [translator from LogQL to LogsQL](https://docs.victoriametrics.com/victorialogs/logql-to-logsql/).
|
||||
|
||||

|
||||
|
||||
## What can you do here?
|
||||
|
||||
The query-language translation tool automatically converts Loki queries into VictoriaLogs queries, reducing friction when adopting VictoriaLogs in environments already using Loki.
|
||||
|
||||
Type your LogQL query and press **Execute**.
|
||||
|
||||
For example, this LogQL query:
|
||||
|
||||
```text
|
||||
{collector="otel-collector"} |= "POST"
|
||||
```
|
||||
|
||||
Translates into the equivalent [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/)
|
||||
|
||||
```text
|
||||
{collector="otel-collector"} "POST"
|
||||
```
|
||||
|
||||
## Distribution
|
||||
|
||||
- GitHub: <https://github.com/VictoriaMetrics-Community/logql-to-logsql>
|
||||
|
Before Width: | Height: | Size: 140 KiB |
@@ -1,46 +0,0 @@
|
||||
---
|
||||
weight: 7
|
||||
title: SQL to LogsQL Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 7
|
||||
tags:
|
||||
- logsql
|
||||
- SQL
|
||||
- victorialogs
|
||||
- playground
|
||||
- monitoring
|
||||
---
|
||||
|
||||
- Try it: <https://play-sql.victoriametrics.com/>
|
||||
|
||||
This playground enables you to query data from a VictoriaLogs instance or translate SQL to [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) without querying.
|
||||
|
||||

|
||||
|
||||
## What can you do here?
|
||||
|
||||
First, run `SHOW TABLES;` to view all the existing tables in the SQL database and their equivalent query in [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/).
|
||||
|
||||
Then, type your SQL query and press **Execute**.
|
||||
|
||||
For example, this query:
|
||||
|
||||
```sql
|
||||
SELECT _time, _msg
|
||||
FROM logs
|
||||
WHERE _msg LIKE 'error'
|
||||
ORDER BY _time DESC
|
||||
LIMIT 100
|
||||
```
|
||||
|
||||
Translates into:
|
||||
|
||||
```text
|
||||
_msg:error | fields _time, _msg | sort by (_time desc) | limit 100
|
||||
```
|
||||
|
||||
## Distribution
|
||||
|
||||
- GitHub: <https://github.com/VictoriaMetrics/sql-to-logsql>
|
||||
|
Before Width: | Height: | Size: 88 KiB |
@@ -1,44 +0,0 @@
|
||||
---
|
||||
weight: 2
|
||||
title: VictoriaLogs Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 2
|
||||
tags:
|
||||
- victorialogs
|
||||
- playground
|
||||
- logs
|
||||
---
|
||||
|
||||
- Try it: <https://play-vmlogs.victoriametrics.com/>
|
||||
- Query language reference: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/)
|
||||
|
||||
This playground focuses on VictoriaLogs and lets you test the query engine on a demo log set. The playground demonstrates how VictoriaLogs handles high-volume log data with predictable performance and low operational overhead.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VictoriaLogs playground</figcaption>
|
||||
|
||||
## What can you do here?
|
||||
|
||||
The WebUI provides the following modes for displaying query results:
|
||||
- Group: results are displayed as a table with rows grouped by stream fields.
|
||||
- Table: displays query results as a table.
|
||||
- JSON: displays raw JSON response from `/select/logsql/query` HTTP API.
|
||||
- Live: displays live tailing results for the given query.
|
||||
|
||||
As a starting point, you can type `collector: "otel-collector"` in the query field to search for entries collected by OpenTelemetry.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Log entries collected with the OpenTelemetry collector</figcaption>
|
||||
|
||||
Typing `error AND _time:24h` shows you the entries containing the text "error" during the last 24 hours.
|
||||

|
||||
|
||||
The **Overview** provides a quick, high-level look at the logs stored in VictoriaLogs. It helps you understand the volume and structure of your log data before diving into detailed queries. You can see log ingestion trends, identify the most common fields and values, and quickly spot noisy or unusual streams. From here, you can click on fields or values to automatically apply filters and start exploring your data with LogsQL.
|
||||
|
||||

|
||||
|
||||
## Distribution
|
||||
|
||||
- GitHub: <https://github.com/VictoriaMetrics/VictoriaLogs>
|
||||
@@ -1,63 +0,0 @@
|
||||
---
|
||||
weight: 1
|
||||
title: VictoriaMetrics Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 1
|
||||
tags:
|
||||
- victoriametrics
|
||||
---
|
||||
|
||||
- Try it: <https://play.victoriametrics.com/>
|
||||
- Query language reference: [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/)
|
||||
|
||||
This is the primary playground for VictoriaMetrics, powered by VMUI and backed by a VictoriaMetrics cluster installation. Use it to experiment with the query engine, see available pages, or try tools such as the relabeling debugger.
|
||||
|
||||
This playground is the best starting point for understanding how VictoriaMetrics stores and queries metrics at scale.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">VictoriaMetrics playground</figcaption>
|
||||
|
||||
## What can you do here?
|
||||
|
||||
The query tab provides a sandbox to experiment with [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/). Turn on Autocomplete and start typing to discover time series. You can add multiple queries and compare them.
|
||||
|
||||
You can try these to get started:
|
||||
|
||||
- Average CPU usage per job: `sum(rate(process_cpu_seconds_total[5m])) by (job)`
|
||||
- HTTP requests per-second rate: `sum(rate(vm_http_requests_total[5m]))`
|
||||
- Top 5 CPU intensive jobs `topk(5, sum(rate(process_cpu_seconds_total[5m])) by (job))`
|
||||
|
||||
Below is an example of average CPU usage per job:
|
||||
|
||||
```text
|
||||
sum(rate(process_cpu_seconds_total[5m])) by (job)
|
||||
```
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Average CPU usage per job</figcaption>
|
||||
|
||||
Here, we are requesting the per-second rate of HTTP requests:
|
||||
|
||||
```text
|
||||
sum(rate(vm_http_requests_total[5m]))
|
||||
```
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">HTTP requests per second</figcaption>
|
||||
|
||||
And here is an example for obtaining the top 5 high CPU jobs:
|
||||
|
||||
```text
|
||||
topk(5, sum(rate(process_cpu_seconds_total[5m])) by (job))
|
||||
```
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Top 5 CPU intensive jobs</figcaption>
|
||||
|
||||
For a deep dive into all the features of this playground, please visit the [VMUI](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui) page.
|
||||
|
||||
## Distribution
|
||||
|
||||
- GitHub: <https://github.com/VictoriaMetrics/VictoriaMetrics>
|
||||
@@ -1,36 +0,0 @@
|
||||
---
|
||||
weight: 3
|
||||
title: VictoriaTraces Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 3
|
||||
tags:
|
||||
- victoriatraces
|
||||
- playground
|
||||
- monitoring
|
||||
---
|
||||
|
||||
- Try it: <https://play-vtraces.victoriametrics.com/>
|
||||
- Query language reference: [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/)
|
||||
|
||||
> [!NOTE] Note
|
||||
> This playground is currently under development, as the main project it is correlated with, VictoriaTraces, is also under development.
|
||||
|
||||
VictoriaTraces provides a UI for browsing raw data and Jaeger APIs/Grafana data source for trace visualization. This playground showcases VictoriaTraces, the VictoriaMetrics backend for distributed tracing, and enables trace searching, visualization, and service graph/dependency analysis.
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Grafana playground showing VictoriaTraces/Jaeger datasource</figcaption>
|
||||
|
||||
## What can you do here?
|
||||
|
||||
The WebUI provides the following modes for displaying query results:
|
||||
- Group: results are displayed as a table with rows grouped by stream fields.
|
||||
- Table: displays query results as a table.
|
||||
- JSON: displays raw JSON response from the HTTP API.
|
||||
- Live: displays live tailing results for the given query.
|
||||
|
||||
## Distribution
|
||||
|
||||
- GitHub: <https://github.com/VictoriaMetrics/VictoriaTraces>
|
||||
|
||||
|
Before Width: | Height: | Size: 71 KiB |
|
Before Width: | Height: | Size: 86 KiB |
|
Before Width: | Height: | Size: 98 KiB |
|
Before Width: | Height: | Size: 101 KiB |
|
Before Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 68 KiB |
|
Before Width: | Height: | Size: 75 KiB |
|
Before Width: | Height: | Size: 110 KiB |
@@ -1,44 +0,0 @@
|
||||
---
|
||||
weight: 5
|
||||
title: Anomaly Detection Playground
|
||||
menu:
|
||||
docs:
|
||||
parent: "playgrounds"
|
||||
weight: 5
|
||||
tags:
|
||||
- vmanomaly
|
||||
- playground
|
||||
- monitoring
|
||||
---
|
||||
|
||||
- Try it: <https://play-vmanomaly.victoriametrics.com/metrics/vmui/>
|
||||
- UI Guide: <https://docs.victoriametrics.com/anomaly-detection/ui/#example-usage>
|
||||
|
||||
The playground demonstrates automatic [anomaly detection](https://docs.victoriametrics.com/anomaly-detection/).
|
||||
|
||||

|
||||
<figcaption style="text-align: center; font-style: italic;">Exploring model fit for CPU usage series</figcaption>
|
||||
|
||||
The playground showcases anomaly detection data (native timeseries or converted to timeseries) using VictoriaMetrics, VictoriaLogs, or VictoriaTraces datasources, respectively:
|
||||
|
||||
- <https://play-vmanomaly.victoriametrics.com/metrics/>
|
||||
- <https://play-vmanomaly.victoriametrics.com/logs/>
|
||||
- <https://play-vmanomaly.victoriametrics.com/traces/>
|
||||
|
||||
## What can you do here?
|
||||
|
||||
The Anomaly Detection playground lets you:
|
||||
- Understand how [MetricsQL](https://docs.victoriametrics.com/victoriametrics/metricsql/) and [LogsQL](https://docs.victoriametrics.com/victorialogs/logsql/) are used to generate input data for anomaly detection.
|
||||
- Explore metrics data enriched with anomaly scores, predictions, and confidence intervals.
|
||||
- Visualize anomalies directly in VMUI, including consecutive anomalies that last over time rather than being a single point, to imitate how alerting rules trigger on such data.
|
||||
- Learn how anomaly scores can be used for alerting purposes by exploring generated alerting rules.
|
||||
|
||||
## Distribution & setup
|
||||
|
||||
VMAnomaly is distributed through various channels:
|
||||
|
||||
- [Installation guide](https://docs.victoriametrics.com/anomaly-detection/quickstart/)
|
||||
- Docker containers available in [Docker Hub](https://hub.docker.com/r/victoriametrics/vmanomaly) and [Quay.io](https://quay.io/repository/victoriametrics/vmanomaly)
|
||||
- [Helm charts](https://github.com/VictoriaMetrics/helm-charts) (including anomaly setups)
|
||||
- [VM Operator](https://docs.victoriametrics.com/operator/resources/vmanomaly/)
|
||||
|
||||
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 102 KiB |
@@ -109,8 +109,6 @@ See also [case studies](https://docs.victoriametrics.com/victoriametrics/casestu
|
||||
* [How to Master Kubernetes Observability: Multi-Cluster Monitoring with VictoriaMetrics, Loki, and Grafana](https://www.keyvalue.systems/blog/kubernetes-observability-with-victoriametrics-loki-grafana/)
|
||||
* [A Complete Guide to VictoriaMetrics, a Prometheus Comparison, and Kubernetes Monitoring Implementation](https://apprecode.com/blog/a-complete-guide-to-victoriametrics-a-prometheus-comparison-and-kubernetes-monitoring-implementation)
|
||||
* [Monitoring Pipeline with Prometheus and VictoriaMetrics](https://clovisc.medium.com/monitoring-pipeline-with-blackbox-exporter-prometheus-victoriametrics-and-vmalert-0ab020c7202a)
|
||||
* [FreeBSD: monitoring with VictoriaMetrics and Grafana](https://setevoy.medium.com/freebsd-monitoring-with-victoriametrics-and-grafana-f789904f2628)
|
||||
* [QCon London 2026: Wrangling Telemetry at Scale, a Guide to Self-Hosted Observability](https://www.infoq.com/news/2026/03/self-hosted-observability/)
|
||||
|
||||
## Third-party articles and slides about VictoriaLogs
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ menu:
|
||||
docs:
|
||||
parent: 'victoriametrics'
|
||||
weight: 24
|
||||
identifier: vm-faq
|
||||
tags:
|
||||
- metrics
|
||||
aliases:
|
||||
|
||||
@@ -27,5 +27,5 @@ to [the latest available releases](https://docs.victoriametrics.com/victoriametr
|
||||
|
||||
## Currently supported LTS release lines
|
||||
|
||||
- v1.136.x - the latest one is [v1.136.2 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.2)
|
||||
- v1.122.x - the latest one is [v1.122.17 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.17)
|
||||
- v1.136.x - the latest one is [v1.136.0 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.136.0)
|
||||
- v1.122.x - the latest one is [v1.122.15 LTS release](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.122.15)
|
||||
@@ -229,6 +229,23 @@ See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3781)
|
||||
[Docker-compose](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#readme)
|
||||
helps to spin up VictoriaMetrics, [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and Grafana with one command.
|
||||
|
||||
## Playgrounds
|
||||
|
||||
VictoriaMetrics has the following publicly available demo resources:
|
||||
|
||||
1. [https://play.victoriametrics.com/](https://play.victoriametrics.com/) - [VMUI](#vmui) of VictoriaMetrics cluster installation.
|
||||
It is available for testing the query engine, relabeling debugger, other tools and pages provided by VMUI.
|
||||
1. [https://play-grafana.victoriametrics.com/](https://play-grafana.victoriametrics.com/) - Grafana configured with many
|
||||
typical dashboards using VictoriaMetrics and VictoriaLogs as datasource. It contains VictoriaMetrics cluster dashboard with
|
||||
3 cluster installations for the recent OS and LTS versions running under the constant benchmark.
|
||||
1. [https://play-vmlogs.victoriametrics.com/](https://play-vmlogs.victoriametrics.com/) - [VMUI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui) of VictoriaLogs installation.
|
||||
It is available for testing the query engine on demo logs set.
|
||||
1. [https://play-vtraces.victoriametrics.com/](https://play-vtraces.victoriametrics.com/) - [VMUI](https://docs.victoriametrics.com/victoriatraces/querying/#web-ui) of VictoriaTraces installation.
|
||||
It is available for testing the query engine on demo traces set.
|
||||
|
||||
Additionally, we provide a docker-compose environment for [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/README.md), [VictoriaLogs](https://github.com/VictoriaMetrics/VictoriaLogs/blob/master/deployment/docker/README.md) and [VictoriaTraces](https://github.com/VictoriaMetrics/VictoriaTraces/blob/master/deployment/docker/README.md).
|
||||
They are already configured, provisioned and interconnected. It can be used as an example for a [quick start](https://docs.victoriametrics.com/victoriametrics/quick-start/).
|
||||
|
||||
## How to upgrade VictoriaMetrics
|
||||
|
||||
VictoriaMetrics is developed at a fast pace, so it is recommended periodically checking [the CHANGELOG page](https://docs.victoriametrics.com/victoriametrics/changelog/) and performing regular upgrades.
|
||||
@@ -2471,7 +2488,3 @@ Moved to [integrations/graphite/#tags-api](https://docs.victoriametrics.com/vict
|
||||
###### Integrations
|
||||
|
||||
Moved to [integrations](https://docs.victoriametrics.com/victoriametrics/integrations/).
|
||||
|
||||
###### Playgrounds
|
||||
|
||||
The VictoriaMetrics playgrounds have been moved to [Playgrounds](https://docs.victoriametrics.com/playgrounds/).
|
||||
|
||||
@@ -26,27 +26,11 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
|
||||
|
||||
## tip
|
||||
|
||||
## [v1.139.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.139.0)
|
||||
|
||||
Released at 2026-03-27
|
||||
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): show `seriesCountByMetricName` table when a label is in focus in the [Cardinality Explorer](https://docs.victoriametrics.com/victoriametrics/#cardinality-explorer). See [#10630](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10630). Thanks to @Roshan1299 for the contribution.
|
||||
* FEATURE: [dashboards/unused-metrics](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/unused-metrics.json): add a new dashboard for exploring stored metrics based on [Caridnality Explorer](https://docs.victoriametrics.com/victoriametrics/#cardinality-explorer) and [ingested metrics usage API](https://docs.victoriametrics.com/victoriametrics/#track-ingested-metrics-usage). The dashboard requires [Infinity Grafana plugin](https://grafana.com/grafana/plugins/yesoreyeram-infinity-datasource/) to be installed. See [#10617](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10617) for details.
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): add `search` parameter and pagination support in `/api/v1/rules` API. See [#10046](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10046).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): add default pagination to improve the Alerting Rules page experience when vmalert loads thousands of rules. See [#10046](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10046).
|
||||
* FEATURE: all VictoriaMetrics components: log a warning when an IPv6 listen address (e.g. `[::]:6969`) is specified but `-enableTCP6` is not set. Previously, the server silently listened on IPv4 only. See [#6858](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6858). Thanks to @andriibeee for the contribution.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): extend JWT [claim matching](https://docs.victoriametrics.com/victoriametrics/vmauth/#jwt-claim-matching) with array claim values support. See [#10647](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10647). Thanks to @andriibeee for the contribution.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): allow specifying `basic_auth` in scrape configs without `username`. Previously this resulted in a config error. Now a warning is logged instead. See [#6956](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6956). Thanks to @andriibeee for the contribution.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/) and [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/): add support for negative buckets in [OpenTelemetry](https://docs.victoriametrics.com/victoriametrics/integrations/opentelemetry/) `ExponentialHistogram` during ingestion. See [#9896-comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9896#issuecomment-4037522985).
|
||||
* FEATURE: [vmrestore](https://docs.victoriametrics.com/victoriametrics/vmrestore/): Improve restore speed on linux systems by pre-allocating files and optimizing write path. This behavior can be disabled with `-skipFilePreallocation`. See [#10661](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10661/). Thanks to @BenNF for the contribution.
|
||||
|
||||
|
||||
* BUGFIX: `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): retry RPC by dialing a new connection instead of reusing a pooled one when the previous attempt fails with `io.EOF`, `broken pipe` or `reset by peer`. This reduces query failures caused by stale connections to restarted vmstorage nodes. See [#10314](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10314)
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): fix autocomplete dropdown not closing on the Raw Query page. See [#10665](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10665)
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly handle JWKS keys per [RFC 7517](https://datatracker.ietf.org/doc/html/rfc7517#section-4.2) during [OIDC discovery](https://docs.victoriametrics.com/victoriametrics/vmauth/#oidc-discovery): skip keys with `use=enc`, reject `use=sig` keys with unsupported `alg`, and warn-skip keys with empty `use` that have unsupported `alg`. See [#10663](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10663). Thanks to @andriibeee for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): enforce `datasource_type=prometheus` when [proxying](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#vmalert) Grafana requests to [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/). Grafana supports only `prometheus` and `loki` alerts. Without this fix, Grafana shows `Error loading alerts` when non-Prometheus alert types are returned. See [victoriametrics-datasource#329](https://github.com/VictoriaMetrics/victoriametrics-datasource/issues/329).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): stop logging `error`-level messages when scraping targets that expose OpenMetrics `info`, `gaugehistogram`, `stateset`, or `unknown` metric types. These are valid [OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md) types and should be parsed without error. See [#10685](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10685). Thanks to @tsarna for the contribution.
|
||||
* BUGFIX: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): prevent panic during directory deletion on `NFS`-based mounts. The bug was introduced in [83da33d8](https://github.com/VictoriaMetrics/VictoriaMetrics/commit/83da33d8cfe8352fd0022d05a8b6346ebb48420d) and included in [v1.123.0](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/victoriametrics/changelog/CHANGELOG_2025.md#v11230). See [#9842](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9842).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): make `scrape_series_added` negative when the number of series exposed by target decrease or fail to scrape the target. See [#10653](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10653).
|
||||
|
||||
## [v1.138.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.138.0)
|
||||
|
||||
@@ -62,7 +46,7 @@ Released at 2026-03-13
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/victoriametrics/vmalert/): support negative values for the group `eval_offset` option, which allows starting group evaluation at `groupInterval-abs(eval_offset)` within `[0...groupInterval]`. See [#10424](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10424).
|
||||
* FEATURE: [vmsingle](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/) and `vmselect` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/): Disable `/graphite/tags/tagSeries` and `/graphite/tags/tagMultiSeries` for Graphite tag registration since it is unlikely it is used in context of VictoriaMetrics. See [10544](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10544).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/victoriametrics/single-server-victoriametrics/#vmui): rename debug tools buttons for clarity. See [#10453](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10453).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): [yandexcloud_sd_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#yandexcloud_sd_configs) now supports `folder_ids` for limiting discovery to specific folders. See [#10623](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10623). Thanks to @Br1an67 for the contribution.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): [`yandexcloud_sd_configs`](https://docs.victoriametrics.com/victoriametrics/sd_configs/#yandexcloud_sd_configs) now supports `folder_ids` for limiting discovery to specific folders. See [#10623](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10623).
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: replace `histogram` with `untyped` metric metadata type for [VictoriaMetrics histograms](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#histogram) when `-metrics.exposeMetadata` is set. See [#82](https://github.com/VictoriaMetrics/metrics/issues/82).
|
||||
* BUGFIX: [vmauth](https://docs.victoriametrics.com/victoriametrics/vmauth/): properly route requests to `default_url`. Previously, `request_path` query arg could be set incorrectly during concurrent requests. See [#10626](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10626).
|
||||
|
||||
@@ -21,7 +21,7 @@ exporters:
|
||||
otlphttp/victoriametrics:
|
||||
compression: gzip
|
||||
encoding: proto
|
||||
metrics_endpoint: http://<vmsingle>:8428/opentelemetry/v1/metrics
|
||||
metrics_endpoint: http://<vmsinle>:8428/opentelemetry/v1/metrics
|
||||
```
|
||||
|
||||
> For the [cluster version](https://docs.victoriametrics.com/victoriametrics/cluster-victoriametrics/#url-format) specify the tenant ID:
|
||||
|
||||
@@ -33,7 +33,7 @@ By default, VictoriaMetrics promotes all [OpenTelemetry resource](https://opente
|
||||
## Exponential histograms
|
||||
|
||||
OpenTelemetry [exponential histogram](https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram) is automatically converted
|
||||
to [VictoriaMetrics histogram format](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) with `vmrange` labels during ingestion.
|
||||
to [VictoriaMetrics histogram format](https://valyala.medium.com/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350) during ingestion. Since VictoriaMetrics histogram doesn't support negative observations, all buckets in the negative range are dropped.
|
||||
|
||||
## Delta Temporality
|
||||
|
||||
|
||||
@@ -2080,8 +2080,8 @@ and in the majority of [supported service discovery configs](#supported-service-
|
||||
# basic_auth is an optional HTTP basic authentication configuration.
|
||||
#
|
||||
# basic_auth:
|
||||
# username: "..." # optional
|
||||
# username_file: "..." # optional, is mutually-exclusive with username
|
||||
# username: "..."
|
||||
# username_file: "..." # is mutually-exclusive with username
|
||||
# password: "..."
|
||||
# password_file: "..." # is mutually-exclusive with password
|
||||
|
||||
|
||||
@@ -598,7 +598,7 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
|
||||
The limit can be set via `label_limit` option at [scrape_configs](https://docs.victoriametrics.com/victoriametrics/sd_configs/#scrape_configs).
|
||||
This metric is exposed only if the `label_limit` is set.
|
||||
|
||||
* `scrape_series_added` - **an approximate** number of new [series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) the given target generates during the current scrape.
|
||||
* `scrape_series_added` - **an approximate** number of exposed [series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) the given target changes during the current scrape.
|
||||
This metric allows detecting targets (identified by `instance` label),
|
||||
which lead to [high churn rate](https://docs.victoriametrics.com/victoriametrics/faq/#what-is-high-churn-rate).
|
||||
For example, the following [MetricsQL query](https://docs.victoriametrics.com/victoriametrics/metricsql/) returns targets,
|
||||
@@ -611,6 +611,7 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
|
||||
`vmagent` sets `scrape_series_added` to zero when it runs with `-promscrape.noStaleMarkers` command-line flag
|
||||
or when it scrapes target with `no_stale_markers: true` option, e.g. when [staleness markers](#prometheus-staleness-markers) are disabled.
|
||||
|
||||
When `vmagent` starts, the `scrape_series_added` for all targets will be increased because all series are newly added for a newly started `vmagent`.
|
||||
* `scrape_series_limit` - the limit on the number of unique [series](https://docs.victoriametrics.com/victoriametrics/keyconcepts/#time-series) the given target can expose according to [these docs](#cardinality-limiter).
|
||||
This metric is exposed only if the series limit is set.
|
||||
|
||||
|
||||
@@ -750,7 +750,7 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vi
|
||||
`vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:
|
||||
|
||||
* `http://<vmalert-addr>` - UI;
|
||||
* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules. Supports `search`, `group_limit`, and `page_num` parameters, as well as additional [filtering](https://prometheus.io/docs/prometheus/latest/querying/api/#rules);
|
||||
* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules. Supports additional [filtering](https://prometheus.io/docs/prometheus/latest/querying/api/#rules);
|
||||
* `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
|
||||
* `http://<vmalert-addr>/api/v1/notifiers` - list all available notifiers;
|
||||
* `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in JSON format.
|
||||
|
||||
@@ -326,9 +326,10 @@ signed with the configured public key.
|
||||
Claim names support dot-notation for traversal of nested JSON objects
|
||||
(a simplified JSONPath-style approach), for example `vm_access.metrics_account_id` matches `{"vm_access": {"metrics_account_id": 1}}` and
|
||||
`security.permissions.0.read` matches `{"security": {"permissions": [{"read": 1}]}}.
|
||||
Claim names must point to a **leaf value** or an **array**. The supported leaf types are string, integer, float and boolean.
|
||||
If the claim value is an array, each scalar element is compared against the match value - the claim matches if any element matches. Objects and nested arrays inside the array are skipped.
|
||||
All configured claims must match and the values use regular expression syntax.
|
||||
Claim names must point to a **leaf value**. The only supported leaf values are string, integer, float and boolean. Any other leaf type
|
||||
is treated as not matched.
|
||||
All configured claims must match exactly.
|
||||
Claim match values use regular expression syntax and must fully match the claim value.
|
||||
|
||||
For example, the following config routes requests based on the `role` claim in the JWT token:
|
||||
|
||||
@@ -381,31 +382,6 @@ users:
|
||||
url_prefix: "http://victoria-metrics-tenant-2:8428/"
|
||||
```
|
||||
|
||||
The following config matches against array claim values.
|
||||
The first user matches a token with claim `{"roles": ["admin"]}`, while the second matches a token with claim `{"roles": ["read"]}` or `{"roles": ["write"]}`.
|
||||
|
||||
```yaml
|
||||
users:
|
||||
- jwt:
|
||||
public_keys:
|
||||
- |
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA...
|
||||
-----END PUBLIC KEY-----
|
||||
match_claims:
|
||||
roles: admin
|
||||
url_prefix: "http://victoria-metrics-admin:8428/"
|
||||
- jwt:
|
||||
public_keys:
|
||||
- |
|
||||
-----BEGIN PUBLIC KEY-----
|
||||
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA...
|
||||
-----END PUBLIC KEY-----
|
||||
match_claims:
|
||||
roles: "^(read|write)$"
|
||||
url_prefix: "http://victoria-metrics-readonly:8428/"
|
||||
```
|
||||
|
||||
The following config matches any valid token (no claim filtering),
|
||||
equivalent to the behavior when `match_claims` is omitted:
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ aliases:
|
||||
---
|
||||
`vmrestore` restores data from backups created by [vmbackup](https://docs.victoriametrics.com/victoriametrics/vmbackup/).
|
||||
|
||||
Restore process can be interrupted at any time. It is automatically resumed when restarting `vmrestore` with the same args. If file preallocation is enabled{{% available_from "v1.139.0" %}}, it resumes from the last complete file, if file preallocation is disabled via `-skipFilePreallocation` then it resumes from the interruption point mid file.
|
||||
Restore process can be interrupted at any time. It is automatically resumed from the interruption point when restarting `vmrestore` with the same args.
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
2
go.mod
@@ -162,7 +162,7 @@ require (
|
||||
google.golang.org/genproto v0.0.0-20260217215200-42d3e9bedb6d // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260217215200-42d3e9bedb6d // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260217215200-42d3e9bedb6d // indirect
|
||||
google.golang.org/grpc v1.79.3 // indirect
|
||||
google.golang.org/grpc v1.79.1 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/apimachinery v0.35.1 // indirect
|
||||
|
||||