mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2026-05-29 23:00:51 +03:00
Compare commits
4 Commits
optimize-a
...
feature/st
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
961540b806 | ||
|
|
c258732902 | ||
|
|
c1de412ec2 | ||
|
|
d343e9f6cf |
2
dashboards/.gitignore
vendored
Normal file
2
dashboards/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Generated binary
|
||||
dashgen/dashgen-bin
|
||||
@@ -8,6 +8,7 @@ dashboard-copy:
|
||||
rm -rf dashboards/vm/*.tmp
|
||||
|
||||
# Copies listed dashboards to vm/* but changes the datasource type from Prometheus to VictoriaMetrics.
|
||||
# Also generates status-page-generated.json from alert rules.
|
||||
# The command should be called before committing changes to dashboards/* files.
|
||||
dashboards-sync:
|
||||
SRC=victoriametrics.json D_UID=wNf0q_kZk TITLE="VictoriaMetrics - single-node" $(MAKE) dashboard-copy
|
||||
@@ -18,3 +19,33 @@ dashboards-sync:
|
||||
SRC=operator.json D_UID=1H179hunk TITLE="VictoriaMetrics - operator" $(MAKE) dashboard-copy
|
||||
SRC=backupmanager.json D_UID=gF-lxRdVz TITLE="VictoriaMetrics - backupmanager" $(MAKE) dashboard-copy
|
||||
SRC=clusterbytenant.json D_UID=IZFqd3lMz TITLE="VictoriaMetrics Cluster Per Tenant Statistic" $(MAKE) dashboard-copy
|
||||
$(MAKE) generate-status-page
|
||||
SRC=status-page-generated.json D_UID=vm-status-page TITLE="VictoriaMetrics - Status Page" $(MAKE) dashboard-copy
|
||||
|
||||
|
||||
# Build the dashboard generator tool
|
||||
build-dashgen:
|
||||
cd dashboards/dashgen && go build -o dashgen-bin ./main.go
|
||||
|
||||
# Generate status page dashboard from alert rules.
|
||||
# This reads alerts from deployment/docker/rules and produces status-page-generated.json.
|
||||
# The generated dashboard shows OK/KO status for each alert across all VictoriaMetrics components.
|
||||
generate-status-page: build-dashgen
|
||||
cd dashboards/dashgen && ./dashgen-bin \
|
||||
--alerts-dir=../../deployment/docker/rules \
|
||||
--output=../status-page-generated.json \
|
||||
--title="VictoriaMetrics - Status Page" \
|
||||
--uid=vm-status-page
|
||||
|
||||
# Clean generated files
|
||||
clean-dashgen:
|
||||
rm -f dashboards/dashgen/dashgen-bin dashboards/status-page-generated.json
|
||||
|
||||
# Run dashgen unit tests (isolated, can be disabled by commenting out)
|
||||
dashgen-test:
|
||||
cd dashboards/dashgen && go test -v ./...
|
||||
|
||||
# Run dashgen tests with coverage
|
||||
dashgen-test-cover:
|
||||
cd dashboards/dashgen && go test -coverprofile=coverage.txt -covermode=atomic ./...
|
||||
|
||||
|
||||
@@ -7,4 +7,13 @@ The `vm` folder contains copies of the listed dashboards but alternated to use
|
||||
The listed dashboards can be found on [Grafana website](https://grafana.com/orgs/victoriametrics/dashboards).
|
||||
|
||||
When making changes to the dashboards in `dashboards` folder, don't forget to call `make dashboards-sync`
|
||||
and sync changes to [Grafana website](https://grafana.com/orgs/victoriametrics/dashboards).
|
||||
and sync changes to [Grafana website](https://grafana.com/orgs/victoriametrics/dashboards).
|
||||
|
||||
## Status Page Dashboard
|
||||
|
||||
`status-page-generated.json` is auto-generated from alert rules (`deployment/docker/rules/*.yml`).
|
||||
Shows health percentage per alert across all components.
|
||||
|
||||
**Generator** in `dashgen/`: parser -> queries -> quicktemplate renderer
|
||||
|
||||
**Testing:** `make dashgen-test`
|
||||
|
||||
15
dashboards/dashgen/generator/dashboard_qt.qtpl
Normal file
15
dashboards/dashgen/generator/dashboard_qt.qtpl
Normal file
@@ -0,0 +1,15 @@
|
||||
{% import "encoding/json" %}
|
||||
|
||||
{% code
|
||||
func marshal(v Dashboard) string {
|
||||
b, err := json.MarshalIndent(v, "", " ")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
%}
|
||||
|
||||
{% func RenderDashboard(d Dashboard) %}
|
||||
{%s= marshal(d) %}
|
||||
{% endfunc %}
|
||||
69
dashboards/dashgen/generator/dashboard_qt.qtpl.go
Normal file
69
dashboards/dashgen/generator/dashboard_qt.qtpl.go
Normal file
@@ -0,0 +1,69 @@
|
||||
// Code generated by qtc from "dashboard_qt.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line dashboard_qt.qtpl:1
|
||||
package generator
|
||||
|
||||
//line dashboard_qt.qtpl:1
|
||||
import "encoding/json"
|
||||
|
||||
//line dashboard_qt.qtpl:3
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line dashboard_qt.qtpl:3
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line dashboard_qt.qtpl:4
|
||||
func marshal(v Dashboard) string {
|
||||
b, err := json.MarshalIndent(v, "", " ")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
//line dashboard_qt.qtpl:13
|
||||
func StreamRenderDashboard(qw422016 *qt422016.Writer, d Dashboard) {
|
||||
//line dashboard_qt.qtpl:13
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line dashboard_qt.qtpl:14
|
||||
qw422016.N().S(marshal(d))
|
||||
//line dashboard_qt.qtpl:14
|
||||
qw422016.N().S(`
|
||||
`)
|
||||
//line dashboard_qt.qtpl:15
|
||||
}
|
||||
|
||||
//line dashboard_qt.qtpl:15
|
||||
func WriteRenderDashboard(qq422016 qtio422016.Writer, d Dashboard) {
|
||||
//line dashboard_qt.qtpl:15
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line dashboard_qt.qtpl:15
|
||||
StreamRenderDashboard(qw422016, d)
|
||||
//line dashboard_qt.qtpl:15
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line dashboard_qt.qtpl:15
|
||||
}
|
||||
|
||||
//line dashboard_qt.qtpl:15
|
||||
func RenderDashboard(d Dashboard) string {
|
||||
//line dashboard_qt.qtpl:15
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line dashboard_qt.qtpl:15
|
||||
WriteRenderDashboard(qb422016, d)
|
||||
//line dashboard_qt.qtpl:15
|
||||
qs422016 := string(qb422016.B)
|
||||
//line dashboard_qt.qtpl:15
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line dashboard_qt.qtpl:15
|
||||
return qs422016
|
||||
//line dashboard_qt.qtpl:15
|
||||
}
|
||||
280
dashboards/dashgen/generator/dashboard_types.go
Normal file
280
dashboards/dashgen/generator/dashboard_types.go
Normal file
@@ -0,0 +1,280 @@
|
||||
package generator
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
// Top-level dashboard.
|
||||
type Dashboard struct {
|
||||
Annotations Annotations `json:"annotations"`
|
||||
Description string `json:"description"`
|
||||
Editable bool `json:"editable"`
|
||||
FiscalYearStartMonth int `json:"fiscalYearStartMonth"`
|
||||
GraphTooltip int `json:"graphTooltip"`
|
||||
ID int `json:"id"`
|
||||
Links []Link `json:"links"`
|
||||
Panels []Panel `json:"panels"`
|
||||
Preload bool `json:"preload"`
|
||||
Refresh string `json:"refresh"`
|
||||
SchemaVersion int `json:"schemaVersion"`
|
||||
Tags []string `json:"tags"`
|
||||
Templating Templating `json:"templating"`
|
||||
Time TimeRange `json:"time"`
|
||||
Timepicker Timepicker `json:"timepicker"`
|
||||
Timezone string `json:"timezone"`
|
||||
Title string `json:"title"`
|
||||
UID string `json:"uid"`
|
||||
Version int `json:"version"`
|
||||
}
|
||||
|
||||
type Annotations struct {
|
||||
List []AnnotationItem `json:"list"`
|
||||
}
|
||||
|
||||
type AnnotationItem struct {
|
||||
BuiltIn int `json:"builtIn"`
|
||||
Datasource Datasource `json:"datasource"`
|
||||
Enable bool `json:"enable"`
|
||||
Hide bool `json:"hide"`
|
||||
IconColor string `json:"iconColor"`
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
AsDropdown bool `json:"asDropdown"`
|
||||
Icon string `json:"icon"`
|
||||
IncludeVars bool `json:"includeVars"`
|
||||
KeepTime bool `json:"keepTime"`
|
||||
Tags []string `json:"tags"`
|
||||
TargetBlank bool `json:"targetBlank"`
|
||||
Title string `json:"title"`
|
||||
Tooltip string `json:"tooltip"`
|
||||
Type string `json:"type"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type Panel struct {
|
||||
Datasource Datasource `json:"datasource"`
|
||||
Description string `json:"description"`
|
||||
FieldConfig FieldConfig `json:"fieldConfig"`
|
||||
GridPos GridPos `json:"gridPos"`
|
||||
ID int `json:"id"`
|
||||
Options PanelOptions `json:"options"`
|
||||
Targets []Target `json:"targets"`
|
||||
Title string `json:"title"`
|
||||
Transformations []Transformation `json:"transformations"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type Datasource struct {
|
||||
Type string `json:"type"`
|
||||
UID string `json:"uid"`
|
||||
}
|
||||
|
||||
type GridPos struct {
|
||||
H int `json:"h"`
|
||||
W int `json:"w"`
|
||||
X int `json:"x"`
|
||||
Y int `json:"y"`
|
||||
}
|
||||
|
||||
type PanelOptions struct {
|
||||
CellHeight string `json:"cellHeight"`
|
||||
EnablePagination *bool `json:"enablePagination,omitempty"`
|
||||
ShowHeader bool `json:"showHeader"`
|
||||
Footer *Footer `json:"footer,omitempty"`
|
||||
}
|
||||
|
||||
type Footer struct {
|
||||
Show *bool `json:"show,omitempty"`
|
||||
CountRows *bool `json:"countRows,omitempty"`
|
||||
EnablePagination *bool `json:"enablePagination,omitempty"`
|
||||
Reducers []string `json:"reducers,omitempty"`
|
||||
}
|
||||
|
||||
type FieldConfig struct {
|
||||
Defaults FieldDefaults `json:"defaults"`
|
||||
Overrides []Override `json:"overrides"`
|
||||
}
|
||||
|
||||
type FieldDefaults struct {
|
||||
Color Color `json:"color"`
|
||||
Custom CustomField `json:"custom"`
|
||||
Mappings []Mapping `json:"mappings"`
|
||||
NoValue string `json:"noValue"`
|
||||
Thresholds Thresholds `json:"thresholds"`
|
||||
Unit string `json:"unit"`
|
||||
}
|
||||
|
||||
type Color struct {
|
||||
Mode string `json:"mode"`
|
||||
FixedColor string `json:"fixedColor,omitempty"`
|
||||
}
|
||||
|
||||
type CustomField struct {
|
||||
Align string `json:"align"`
|
||||
CellOptions CellOptions `json:"cellOptions"`
|
||||
Filterable bool `json:"filterable"`
|
||||
Footer *Footer `json:"footer,omitempty"`
|
||||
Inspect bool `json:"inspect"`
|
||||
MinWidth int `json:"minWidth"`
|
||||
WrapHeaderText bool `json:"wrapHeaderText,omitempty"`
|
||||
WrapText *bool `json:"wrapText,omitempty"`
|
||||
Hidden bool `json:"hidden,omitempty"`
|
||||
Width int `json:"width,omitempty"`
|
||||
}
|
||||
|
||||
type CellOptions struct {
|
||||
ApplyToRow *bool `json:"applyToRow,omitempty"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type Mapping struct {
|
||||
Options MappingOptions `json:"options"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
type MappingOptions struct {
|
||||
From *float64 `json:"from,omitempty"`
|
||||
To *float64 `json:"to,omitempty"`
|
||||
Match string `json:"match,omitempty"`
|
||||
Result MappingResult `json:"result"`
|
||||
}
|
||||
|
||||
type MappingResult struct {
|
||||
Color string `json:"color"`
|
||||
Index int `json:"index"`
|
||||
Text *string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
type Thresholds struct {
|
||||
Mode string `json:"mode"`
|
||||
Steps []ThresholdStep `json:"steps"`
|
||||
}
|
||||
|
||||
type ThresholdStep struct {
|
||||
Color string `json:"color"`
|
||||
Value *float64 `json:"value"`
|
||||
}
|
||||
|
||||
type Override struct {
|
||||
Matcher Matcher `json:"matcher"`
|
||||
Properties []Property `json:"properties"`
|
||||
}
|
||||
|
||||
type Matcher struct {
|
||||
ID string `json:"id"`
|
||||
Options interface{} `json:"options"`
|
||||
}
|
||||
|
||||
type Property struct {
|
||||
ID string `json:"id"`
|
||||
Value interface{} `json:"value"`
|
||||
}
|
||||
|
||||
type Target struct {
|
||||
Datasource Datasource `json:"datasource"`
|
||||
EditorMode string `json:"editorMode"`
|
||||
Expr string `json:"expr"`
|
||||
Format string `json:"format"`
|
||||
Hide bool `json:"hide"`
|
||||
Instant bool `json:"instant"`
|
||||
LegendFormat string `json:"legendFormat"`
|
||||
Range bool `json:"range"`
|
||||
RefID string `json:"refId"`
|
||||
}
|
||||
|
||||
type Transformation struct {
|
||||
ID string `json:"id"`
|
||||
Options interface{} `json:"options"`
|
||||
}
|
||||
|
||||
type Templating struct {
|
||||
List []TemplateVar `json:"list"`
|
||||
}
|
||||
|
||||
type TemplateVar struct {
|
||||
Current TemplateCurrent `json:"current"`
|
||||
IncludeAll bool `json:"includeAll"`
|
||||
Label string `json:"label"`
|
||||
Name string `json:"name"`
|
||||
Options []string `json:"options,omitempty"`
|
||||
Query TemplateQueryValue `json:"query"`
|
||||
Refresh int `json:"refresh"`
|
||||
Regex string `json:"regex"`
|
||||
Type string `json:"type"`
|
||||
AllValue string `json:"allValue,omitempty"`
|
||||
Datasource *Datasource `json:"datasource,omitempty"`
|
||||
Definition string `json:"definition,omitempty"`
|
||||
Multi bool `json:"multi,omitempty"`
|
||||
Sort int `json:"sort,omitempty"`
|
||||
}
|
||||
|
||||
type TimeRange struct {
|
||||
From string `json:"from"`
|
||||
To string `json:"to"`
|
||||
}
|
||||
|
||||
type Timepicker struct {
|
||||
RefreshIntervals []string `json:"refresh_intervals"`
|
||||
}
|
||||
|
||||
// Transformation option helpers.
|
||||
type MergeOptions struct{}
|
||||
|
||||
type OrganizeOptions struct {
|
||||
ExcludeByName map[string]bool `json:"excludeByName,omitempty"`
|
||||
IncludeByName map[string]string `json:"includeByName"`
|
||||
IndexByName map[string]string `json:"indexByName"`
|
||||
RenameByName map[string]string `json:"renameByName,omitempty"`
|
||||
}
|
||||
|
||||
type TransposeOptions struct {
|
||||
FirstFieldName string `json:"firstFieldName"`
|
||||
RestFieldsName string `json:"restFieldsName"`
|
||||
}
|
||||
|
||||
type SortByOptions struct {
|
||||
Fields map[string]string `json:"fields,omitempty"`
|
||||
Sort []SortField `json:"sort"`
|
||||
}
|
||||
|
||||
type SortField struct {
|
||||
Field string `json:"field"`
|
||||
}
|
||||
|
||||
// Template helpers.
|
||||
type TemplateCurrent struct {
|
||||
Text interface{} `json:"text"`
|
||||
Value interface{} `json:"value"`
|
||||
}
|
||||
|
||||
type TemplateQuery struct {
|
||||
Query string `json:"query"`
|
||||
RefID string `json:"refId,omitempty"`
|
||||
}
|
||||
|
||||
// TemplateQueryValue allows using either a raw string (datasource variable)
|
||||
// or a structured query definition.
|
||||
type TemplateQueryValue struct {
|
||||
String *string
|
||||
Query *TemplateQuery
|
||||
}
|
||||
|
||||
func QueryString(s string) TemplateQueryValue {
|
||||
return TemplateQueryValue{String: &s}
|
||||
}
|
||||
|
||||
func QueryTemplate(q TemplateQuery) TemplateQueryValue {
|
||||
return TemplateQueryValue{Query: &q}
|
||||
}
|
||||
|
||||
func (q TemplateQueryValue) MarshalJSON() ([]byte, error) {
|
||||
switch {
|
||||
case q.Query != nil:
|
||||
return json.Marshal(q.Query)
|
||||
case q.String != nil:
|
||||
return json.Marshal(*q.String)
|
||||
default:
|
||||
return json.Marshal(nil)
|
||||
}
|
||||
}
|
||||
3
dashboards/dashgen/generator/qt_generate.go
Normal file
3
dashboards/dashgen/generator/qt_generate.go
Normal file
@@ -0,0 +1,3 @@
|
||||
package generator
|
||||
|
||||
//go:generate go run github.com/valyala/quicktemplate/qtc@v1.7.0 -dir .
|
||||
98
dashboards/dashgen/generator/queries.go
Normal file
98
dashboards/dashgen/generator/queries.go
Normal file
@@ -0,0 +1,98 @@
|
||||
package generator
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
|
||||
)
|
||||
|
||||
// componentVersionPatterns maps component names to regex patterns for vm_app_version filtering.
|
||||
// Note: Prometheus label matchers already anchor at start, so we don't use ^.
|
||||
var componentVersionPatterns = map[string]string{
|
||||
"cluster": "(vminsert|vmselect|vmstorage)-.*",
|
||||
"single": "victoria-metrics-.*",
|
||||
"vmagent": "vmagent-.*",
|
||||
"vmalert": "vmalert-.*",
|
||||
"vmauth": "vmauth-.*",
|
||||
"vmanomaly": "vmanomaly-.*",
|
||||
"unknown": ".*", // Unknown alerts apply to all components
|
||||
}
|
||||
|
||||
// svcNameRegex extracts service name from version label (e.g., "vmagent-20251204-..." -> "vmagent").
|
||||
const svcNameRegex = `^(.+)-\\d{8}-.*`
|
||||
|
||||
// queryTemplate generates a PromQL query that returns the MINIMUM (worst) percentage
|
||||
// of healthy instances over the selected time range.
|
||||
// Returns:
|
||||
// - 100 when no instances fired the alert during the range (all healthy)
|
||||
// - 0-99 when some instances fired (shows worst percentage)
|
||||
// - No data when the alert is not applicable to the component
|
||||
//
|
||||
// Logic:
|
||||
// 1. Count total instances per svc_name (from vm_app_version with version filter)
|
||||
// 2. Count firing instances per svc_name (alert expr joined with vm_app_version)
|
||||
// 3. Calculate: 100 * (total - firing) / total
|
||||
// 4. Take min_over_time to show worst state in selected range
|
||||
const queryTemplate = `min_over_time(
|
||||
(
|
||||
WITH (
|
||||
vm_svc = label_replace(
|
||||
vm_app_version{version=~"%s", version!~"(victoria-(logs|traces)|vl|vt).*", job=~"$job", instance=~"$instance"},
|
||||
"svc_name",
|
||||
"$1",
|
||||
"version",
|
||||
"%s"
|
||||
),
|
||||
total = count by (svc_name) (vm_svc),
|
||||
firing_pod = count by (svc_name) (
|
||||
((%s) > 0) * on(pod, instance, job) group_left(svc_name) vm_svc
|
||||
),
|
||||
firing_inst = count by (svc_name) (
|
||||
((%s) > 0) * on(instance, job) group_left(svc_name) vm_svc
|
||||
),
|
||||
firing = (firing_pod or firing_inst or total * 0)
|
||||
)
|
||||
clamp_min(100 * (total - firing) / total, 0)
|
||||
)[$__range:]
|
||||
)`
|
||||
|
||||
// NormalizeAlertQuery transforms an alert expression into a dashboard query
|
||||
// that returns the health percentage per service name.
|
||||
// Returns 100 when all instances are healthy, <100 when some are firing,
|
||||
// or no data when not applicable to the component.
|
||||
func NormalizeAlertQuery(rule parser.AlertRule) string {
|
||||
expr := strings.TrimSpace(rule.Expr)
|
||||
|
||||
versionFilter := componentVersionPatterns[rule.Component]
|
||||
if versionFilter == "" {
|
||||
versionFilter = ".*" // Default to all if component not mapped
|
||||
}
|
||||
|
||||
return fmt.Sprintf(queryTemplate, versionFilter, svcNameRegex, expr, expr)
|
||||
}
|
||||
|
||||
// refIDReplacer removes characters that are invalid in Grafana refIds.
|
||||
var refIDReplacer = strings.NewReplacer(
|
||||
" ", "",
|
||||
"-", "",
|
||||
"_", "",
|
||||
":", "",
|
||||
".", "",
|
||||
)
|
||||
|
||||
// startsWithDigit checks if a string starts with a digit.
|
||||
var startsWithDigit = regexp.MustCompile(`^\d`)
|
||||
|
||||
// GenerateRefID creates a valid Grafana refId from an alert name.
|
||||
// Grafana refIds must be alphanumeric and start with a letter.
|
||||
func GenerateRefID(alertName string) string {
|
||||
refID := refIDReplacer.Replace(alertName)
|
||||
|
||||
if len(refID) == 0 || startsWithDigit.MatchString(refID) {
|
||||
refID = "Q" + refID
|
||||
}
|
||||
|
||||
return refID
|
||||
}
|
||||
294
dashboards/dashgen/generator/queries_test.go
Normal file
294
dashboards/dashgen/generator/queries_test.go
Normal file
@@ -0,0 +1,294 @@
|
||||
package generator
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
|
||||
)
|
||||
|
||||
// TestSvcNameRegex ensures the current pattern extracts service name prefix
|
||||
// from the vm_app_version label formats we see in real tags (all include date).
|
||||
func TestSvcNameRegex(t *testing.T) {
|
||||
// svcNameRegex is double-escaped for PromQL; unescape for Go regexp.
|
||||
re := regexp.MustCompile(strings.ReplaceAll(svcNameRegex, `\\`, `\`))
|
||||
|
||||
cases := []struct {
|
||||
version string
|
||||
expect string
|
||||
}{
|
||||
{"operator-operator-20251031-152943-v0.65.0", "operator-operator"},
|
||||
{"victoria-logs-20251128-234103-tags-v1.39.0-0-ge4f2a3c0a0", "victoria-logs"},
|
||||
{"victoria-metrics-20251201-111831-tags-v1.131.0-enterprise-0-ge509c64054", "victoria-metrics"},
|
||||
{"vlagent-20251128-234216-tags-v1.39.0-0-ge4f2a3c0a0", "vlagent"},
|
||||
{"vmagent-20251201-112045-tags-v1.131.0-enterprise-0-ge509c64054", "vmagent"},
|
||||
{"vmalert-20251201-112310-tags-v1.131.0-enterprise-0-ge509c64054", "vmalert"},
|
||||
{"vmauth-20251017-122113-tags-v1.128.0-0-gf91789eebd", "vmauth"},
|
||||
{"vmbackupmanager-20251201-113731-tags-v1.131.0-enterprise-0-ge509c64054", "vmbackupmanager"},
|
||||
{"vminsert-20251201-114237-tags-v1.131.0-enterprise-cluster-0-g50309fe153", "vminsert"},
|
||||
{"vmselect-20251201-114427-tags-v1.131.0-enterprise-cluster-0-g50309fe153", "vmselect"},
|
||||
{"vmstorage-20251201-114630-tags-v1.131.0-enterprise-cluster-0-g50309fe153", "vmstorage"},
|
||||
{"vmanomaly-20251204-120000-tags-v1.0.0", "vmanomaly"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
m := re.FindStringSubmatch(tc.version)
|
||||
if len(m) < 2 {
|
||||
t.Fatalf("no match for version %q", tc.version)
|
||||
}
|
||||
got := m[1]
|
||||
if got != tc.expect {
|
||||
t.Errorf("svc name mismatch for %q: got %q, want %q", tc.version, got, tc.expect)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestComponentVersionPatterns verifies that version patterns correctly match expected components.
|
||||
func TestComponentVersionPatterns(t *testing.T) {
|
||||
cases := []struct {
|
||||
component string
|
||||
versions []string // versions that should match
|
||||
}{
|
||||
{"cluster", []string{"vminsert-20251201-114237", "vmselect-20251201-114427", "vmstorage-20251201-114630"}},
|
||||
{"single", []string{"victoria-metrics-20251201-111831"}},
|
||||
{"vmagent", []string{"vmagent-20251201-112045"}},
|
||||
{"vmalert", []string{"vmalert-20251201-112310"}},
|
||||
{"vmauth", []string{"vmauth-20251017-122113"}},
|
||||
{"vmanomaly", []string{"vmanomaly-20251204-120000"}},
|
||||
{"unknown", []string{"grafana-20251201", "telegraf-20251201"}},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
pattern, ok := componentVersionPatterns[tc.component]
|
||||
if !ok {
|
||||
t.Errorf("component %q not found in componentVersionPatterns", tc.component)
|
||||
continue
|
||||
}
|
||||
|
||||
re := regexp.MustCompile("^" + pattern)
|
||||
for _, version := range tc.versions {
|
||||
if !re.MatchString(version) {
|
||||
t.Errorf("pattern for %q should match %q, but didn't", tc.component, version)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestComponentVersionPatternsNoFalsePositives verifies patterns don't match wrong components.
|
||||
func TestComponentVersionPatternsNoFalsePositives(t *testing.T) {
|
||||
cases := []struct {
|
||||
component string
|
||||
versions []string // versions that should NOT match
|
||||
}{
|
||||
{"cluster", []string{"vmagent-20251201", "vmalert-20251201", "victoria-metrics-20251201"}},
|
||||
{"single", []string{"vmagent-20251201", "vminsert-20251201"}},
|
||||
{"vmagent", []string{"vmalert-20251201", "vmauth-20251201"}},
|
||||
{"vmalert", []string{"vmagent-20251201", "vmauth-20251201"}},
|
||||
{"vmauth", []string{"vmagent-20251201", "vmalert-20251201"}},
|
||||
{"vmanomaly", []string{"vmagent-20251201", "vmalert-20251201"}},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
pattern := componentVersionPatterns[tc.component]
|
||||
re := regexp.MustCompile("^" + pattern)
|
||||
for _, version := range tc.versions {
|
||||
if re.MatchString(version) {
|
||||
t.Errorf("pattern for %q should NOT match %q, but did", tc.component, version)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestGenerateRefID verifies refId generation for various alert names.
|
||||
func TestGenerateRefID(t *testing.T) {
|
||||
cases := []struct {
|
||||
alertName string
|
||||
want string
|
||||
}{
|
||||
{"TooManyLogs", "TooManyLogs"},
|
||||
{"Too-Many-Logs", "TooManyLogs"},
|
||||
{"Too_Many_Logs", "TooManyLogs"},
|
||||
{"Too Many Logs", "TooManyLogs"},
|
||||
{"Alert:With:Colons", "AlertWithColons"},
|
||||
{"Alert.With.Dots", "AlertWithDots"},
|
||||
{"123StartWithDigit", "Q123StartWithDigit"},
|
||||
{"NormalAlert", "NormalAlert"},
|
||||
{"ConcurrentInsertsHitTheLimit", "ConcurrentInsertsHitTheLimit"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := GenerateRefID(tc.alertName)
|
||||
if got != tc.want {
|
||||
t.Errorf("GenerateRefID(%q) = %q, want %q", tc.alertName, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeAlertQuery verifies the query generation for different components.
|
||||
func TestNormalizeAlertQuery(t *testing.T) {
|
||||
cases := []struct {
|
||||
rule parser.AlertRule
|
||||
wantContains []string
|
||||
wantNotContain []string
|
||||
}{
|
||||
{
|
||||
rule: parser.AlertRule{
|
||||
Alert: "TestAlert",
|
||||
Expr: "sum(rate(metric[5m])) > 0",
|
||||
Component: "vmagent",
|
||||
},
|
||||
wantContains: []string{
|
||||
"vmagent-.*", // version filter
|
||||
"sum(rate(metric[5m])) > 0", // original expr preserved
|
||||
"min_over_time(", // shows worst state over range
|
||||
"clamp_min(", // prevents negative values
|
||||
"vm_app_version", // joins with vm_app_version
|
||||
"svc_name", // extracts service name
|
||||
"$__range", // uses Grafana range variable
|
||||
},
|
||||
},
|
||||
{
|
||||
rule: parser.AlertRule{
|
||||
Alert: "ClusterAlert",
|
||||
Expr: "disk_usage > 0.9",
|
||||
Component: "cluster",
|
||||
},
|
||||
wantContains: []string{
|
||||
"(vminsert|vmselect|vmstorage)-.*", // cluster pattern
|
||||
},
|
||||
},
|
||||
{
|
||||
rule: parser.AlertRule{
|
||||
Alert: "SingleAlert",
|
||||
Expr: "memory_usage > 0.8",
|
||||
Component: "single",
|
||||
},
|
||||
wantContains: []string{
|
||||
"victoria-metrics-.*", // single pattern
|
||||
},
|
||||
},
|
||||
{
|
||||
rule: parser.AlertRule{
|
||||
Alert: "UnknownAlert",
|
||||
Expr: "some_metric > 0",
|
||||
Component: "unknown",
|
||||
},
|
||||
wantContains: []string{
|
||||
`version=~".*"`, // unknown matches all
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := NormalizeAlertQuery(tc.rule)
|
||||
|
||||
for _, want := range tc.wantContains {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("NormalizeAlertQuery(%q) should contain %q, got:\n%s", tc.rule.Alert, want, got)
|
||||
}
|
||||
}
|
||||
|
||||
for _, notWant := range tc.wantNotContain {
|
||||
if strings.Contains(got, notWant) {
|
||||
t.Errorf("NormalizeAlertQuery(%q) should NOT contain %q", tc.rule.Alert, notWant)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeAlertQueryStructure verifies the overall structure of generated queries.
|
||||
func TestNormalizeAlertQueryStructure(t *testing.T) {
|
||||
rule := parser.AlertRule{
|
||||
Alert: "TestAlert",
|
||||
Expr: "metric > 0",
|
||||
Component: "vmagent",
|
||||
}
|
||||
|
||||
query := NormalizeAlertQuery(rule)
|
||||
|
||||
// Verify min_over_time wrapper
|
||||
if !strings.HasPrefix(query, "min_over_time(") {
|
||||
t.Error("query should start with 'min_over_time('")
|
||||
}
|
||||
|
||||
// Verify key components are present
|
||||
expectedParts := []string{
|
||||
"min_over_time(",
|
||||
"vm_svc = label_replace(",
|
||||
"total = count by (svc_name)",
|
||||
"firing_pod = count by (svc_name)",
|
||||
"firing_inst = count by (svc_name)",
|
||||
"firing = (firing_pod or firing_inst",
|
||||
"clamp_min(100 * (total - firing) / total, 0)",
|
||||
"[$__range:]",
|
||||
}
|
||||
|
||||
for _, part := range expectedParts {
|
||||
if !strings.Contains(query, part) {
|
||||
t.Errorf("query missing expected part: %q", part)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeAlertQueryExprPreserved verifies the original expression is preserved.
|
||||
func TestNormalizeAlertQueryExprPreserved(t *testing.T) {
|
||||
expressions := []string{
|
||||
"sum(rate(http_requests_total[5m])) > 100",
|
||||
"avg(node_cpu_seconds_total) by (instance) > 0.9",
|
||||
`count(vm_app_version{version=~"vmagent.*"}) == 0`,
|
||||
"changes(process_start_time_seconds[1h]) > 2",
|
||||
"(disk_used / disk_total) > 0.95",
|
||||
}
|
||||
|
||||
for _, expr := range expressions {
|
||||
rule := parser.AlertRule{
|
||||
Alert: "TestAlert",
|
||||
Expr: expr,
|
||||
Component: "vmagent",
|
||||
}
|
||||
|
||||
query := NormalizeAlertQuery(rule)
|
||||
|
||||
// The expression should appear twice (for pod join and instance join)
|
||||
count := strings.Count(query, expr)
|
||||
if count != 2 {
|
||||
t.Errorf("expression %q should appear exactly 2 times in query, found %d times", expr, count)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeAlertQueryWhitespace verifies whitespace in expressions is handled.
|
||||
func TestNormalizeAlertQueryWhitespace(t *testing.T) {
|
||||
rule := parser.AlertRule{
|
||||
Alert: "TestAlert",
|
||||
Expr: " sum(rate(metric[5m])) > 0 ", // leading/trailing whitespace
|
||||
Component: "vmagent",
|
||||
}
|
||||
|
||||
query := NormalizeAlertQuery(rule)
|
||||
|
||||
// Should not contain the leading/trailing whitespace
|
||||
if strings.Contains(query, " sum") {
|
||||
t.Error("query should trim leading whitespace from expression")
|
||||
}
|
||||
}
|
||||
|
||||
// TestAllComponentsHavePatterns verifies all known components have version patterns.
|
||||
func TestAllComponentsHavePatterns(t *testing.T) {
|
||||
requiredComponents := []string{
|
||||
"cluster",
|
||||
"single",
|
||||
"vmagent",
|
||||
"vmalert",
|
||||
"vmauth",
|
||||
"vmanomaly",
|
||||
"unknown",
|
||||
}
|
||||
|
||||
for _, component := range requiredComponents {
|
||||
if _, ok := componentVersionPatterns[component]; !ok {
|
||||
t.Errorf("componentVersionPatterns missing entry for %q", component)
|
||||
}
|
||||
}
|
||||
}
|
||||
377
dashboards/dashgen/generator/quicktemplate_renderer.go
Normal file
377
dashboards/dashgen/generator/quicktemplate_renderer.go
Normal file
@@ -0,0 +1,377 @@
|
||||
package generator
|
||||
|
||||
import "sort"
|
||||
|
||||
// AlertDefinition represents a normalized alert expression with refId.
|
||||
type AlertDefinition struct {
|
||||
RefID string `json:"refId"`
|
||||
Expr string `json:"expr"`
|
||||
}
|
||||
|
||||
// RenderWithQuickTemplate builds the dashboard data and renders it via quicktemplate.
|
||||
func RenderWithQuickTemplate(alerts []AlertDefinition, renames map[string]string, title, uid string) (string, error) {
|
||||
dashboard := BuildDashboard(alerts, renames, title, uid)
|
||||
return RenderDashboard(dashboard), nil
|
||||
}
|
||||
|
||||
// BuildDashboard constructs the typed dashboard model that is rendered by quicktemplate.
|
||||
func BuildDashboard(alerts []AlertDefinition, renames map[string]string, title, uid string) Dashboard {
|
||||
promDatasource := Datasource{Type: "prometheus", UID: "${datasource}"}
|
||||
|
||||
targets := make([]Target, 0, len(alerts))
|
||||
for _, a := range alerts {
|
||||
targets = append(targets, Target{
|
||||
Datasource: promDatasource,
|
||||
EditorMode: "code",
|
||||
Expr: a.Expr,
|
||||
Format: "table",
|
||||
Hide: false,
|
||||
Instant: true,
|
||||
LegendFormat: "{{svc_name}}",
|
||||
Range: false,
|
||||
RefID: a.RefID,
|
||||
})
|
||||
}
|
||||
|
||||
instanceCountTarget := Target{
|
||||
Datasource: promDatasource,
|
||||
EditorMode: "code",
|
||||
Expr: `count by (svc_name) (label_replace(vm_app_version{job=~"$job", instance=~"$instance", version!~"(victoria-(logs|traces)|vl|vt).*"}, "svc_name", "$1", "version", "^(.+)-\\d{8}-.*"))`,
|
||||
Format: "table",
|
||||
Hide: false,
|
||||
Instant: true,
|
||||
LegendFormat: "{{svc_name}}",
|
||||
Range: false,
|
||||
RefID: "InstanceCount",
|
||||
}
|
||||
|
||||
// Typed field configuration for health matrix.
|
||||
fieldConfig := FieldConfig{
|
||||
Defaults: FieldDefaults{
|
||||
Color: Color{Mode: "thresholds"},
|
||||
Custom: CustomField{
|
||||
Align: "center",
|
||||
CellOptions: CellOptions{ApplyToRow: boolPtr(false), Type: "color-background"},
|
||||
Filterable: true,
|
||||
Footer: &Footer{Reducers: []string{"min"}},
|
||||
Inspect: false,
|
||||
MinWidth: 80,
|
||||
WrapHeaderText: true,
|
||||
WrapText: boolPtr(false),
|
||||
},
|
||||
Mappings: []Mapping{
|
||||
{
|
||||
Options: MappingOptions{
|
||||
From: floatPtr(100),
|
||||
To: floatPtr(100),
|
||||
Result: MappingResult{
|
||||
Color: "green",
|
||||
Index: 0,
|
||||
Text: strPtr("100%"),
|
||||
},
|
||||
},
|
||||
Type: "range",
|
||||
},
|
||||
{
|
||||
Options: MappingOptions{
|
||||
From: floatPtr(0),
|
||||
To: floatPtr(99.99),
|
||||
Result: MappingResult{
|
||||
Color: "red",
|
||||
Index: 1,
|
||||
},
|
||||
},
|
||||
Type: "range",
|
||||
},
|
||||
{
|
||||
Options: MappingOptions{
|
||||
From: floatPtr(-999999),
|
||||
To: floatPtr(-0.01),
|
||||
Result: MappingResult{
|
||||
Color: "red",
|
||||
Index: 2,
|
||||
Text: strPtr("ERR"),
|
||||
},
|
||||
},
|
||||
Type: "range",
|
||||
},
|
||||
{
|
||||
Options: MappingOptions{
|
||||
Match: "null",
|
||||
Result: MappingResult{
|
||||
Color: "#3D3D3D",
|
||||
Index: 3,
|
||||
Text: strPtr("-"),
|
||||
},
|
||||
},
|
||||
Type: "special",
|
||||
},
|
||||
},
|
||||
NoValue: "-",
|
||||
Thresholds: Thresholds{
|
||||
Mode: "absolute",
|
||||
Steps: []ThresholdStep{
|
||||
{Color: "#3D3D3D", Value: nil},
|
||||
{Color: "red", Value: floatPtr(0)},
|
||||
{Color: "green", Value: floatPtr(100)},
|
||||
},
|
||||
},
|
||||
Unit: "percent",
|
||||
},
|
||||
Overrides: []Override{
|
||||
{
|
||||
Matcher: Matcher{ID: "byName", Options: "Alert"},
|
||||
Properties: []Property{
|
||||
{ID: "custom.cellOptions", Value: CellOptions{Type: "auto"}},
|
||||
{ID: "custom.width", Value: 280},
|
||||
{ID: "custom.filterable", Value: true},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
instanceCountFieldConfig := FieldConfig{
|
||||
Defaults: FieldDefaults{
|
||||
Color: Color{Mode: "fixed", FixedColor: "#1F60C4"},
|
||||
Custom: CustomField{
|
||||
Align: "center",
|
||||
CellOptions: CellOptions{ApplyToRow: boolPtr(false), Type: "color-background"},
|
||||
Filterable: false,
|
||||
Inspect: false,
|
||||
MinWidth: 80,
|
||||
},
|
||||
Mappings: []Mapping{},
|
||||
NoValue: "-",
|
||||
Thresholds: Thresholds{
|
||||
Mode: "absolute",
|
||||
Steps: []ThresholdStep{
|
||||
{Color: "#1F60C4", Value: nil},
|
||||
},
|
||||
},
|
||||
Unit: "none",
|
||||
},
|
||||
Overrides: []Override{
|
||||
{
|
||||
Matcher: Matcher{ID: "byName", Options: "Metric"},
|
||||
Properties: []Property{
|
||||
{ID: "custom.hidden", Value: true},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
transformations := []Transformation{
|
||||
{ID: "merge", Options: MergeOptions{}},
|
||||
{
|
||||
ID: "organize",
|
||||
Options: OrganizeOptions{
|
||||
ExcludeByName: map[string]bool{"Time": true},
|
||||
IncludeByName: map[string]string{},
|
||||
IndexByName: map[string]string{},
|
||||
RenameByName: buildRenameByName(renames),
|
||||
},
|
||||
},
|
||||
{ID: "transpose", Options: TransposeOptions{FirstFieldName: "Alert", RestFieldsName: ""}},
|
||||
{ID: "sortBy", Options: SortByOptions{Fields: map[string]string{}, Sort: []SortField{{Field: "Alert"}}}},
|
||||
}
|
||||
|
||||
instanceCountTransformations := []Transformation{
|
||||
{ID: "merge", Options: MergeOptions{}},
|
||||
{
|
||||
ID: "organize",
|
||||
Options: OrganizeOptions{
|
||||
ExcludeByName: map[string]bool{"Time": true},
|
||||
IncludeByName: map[string]string{},
|
||||
IndexByName: map[string]string{},
|
||||
},
|
||||
},
|
||||
{ID: "transpose", Options: TransposeOptions{FirstFieldName: "Metric", RestFieldsName: ""}},
|
||||
{
|
||||
ID: "organize",
|
||||
Options: OrganizeOptions{
|
||||
ExcludeByName: map[string]bool{"Metric": true},
|
||||
IncludeByName: map[string]string{},
|
||||
IndexByName: map[string]string{},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
templates := []TemplateVar{
|
||||
{
|
||||
Current: TemplateCurrent{Text: "default", Value: "default"},
|
||||
IncludeAll: false,
|
||||
Label: "Datasource",
|
||||
Name: "datasource",
|
||||
Options: []string{},
|
||||
Query: QueryString("prometheus"),
|
||||
Refresh: 1,
|
||||
Regex: "",
|
||||
Type: "datasource",
|
||||
},
|
||||
{
|
||||
AllValue: ".*",
|
||||
Current: TemplateCurrent{Text: []string{"All"}, Value: []string{"$__all"}},
|
||||
Datasource: &promDatasource,
|
||||
Definition: "label_values(vm_app_version, job)",
|
||||
IncludeAll: true,
|
||||
Label: "Job",
|
||||
Multi: true,
|
||||
Name: "job",
|
||||
Options: []string{},
|
||||
Query: QueryTemplate(TemplateQuery{Query: "label_values(vm_app_version, job)", RefID: "StandardVariableQuery"}),
|
||||
Refresh: 1,
|
||||
Regex: "",
|
||||
Sort: 1,
|
||||
Type: "query",
|
||||
},
|
||||
{
|
||||
AllValue: ".*",
|
||||
Current: TemplateCurrent{Text: []string{"All"}, Value: []string{"$__all"}},
|
||||
Datasource: &promDatasource,
|
||||
Definition: `label_values(vm_app_version{job=~"$job"}, instance)`,
|
||||
IncludeAll: true,
|
||||
Label: "Instance",
|
||||
Multi: true,
|
||||
Name: "instance",
|
||||
Options: []string{},
|
||||
Query: QueryTemplate(TemplateQuery{Query: `label_values(vm_app_version{job=~"$job"}, instance)`, RefID: "StandardVariableQuery"}),
|
||||
Refresh: 1,
|
||||
Regex: "",
|
||||
Sort: 1,
|
||||
Type: "query",
|
||||
},
|
||||
}
|
||||
|
||||
desc := `**VictoriaMetrics Status Page** - Health matrix for VictoriaMetrics components.
|
||||
|
||||
**Reading the Table:**
|
||||
- **Instance Count** (Blue): Number of detected instances per component
|
||||
- **100%** (Green): All instances are healthy for this alert
|
||||
- **<100%** (Red): Some instances are experiencing issues (percentage shows healthy instances)
|
||||
- **-** (Gray): Alert not applicable to this component
|
||||
|
||||
**Component Prefixes:**
|
||||
- **ALL:** Applies to all VictoriaMetrics components
|
||||
- **cluster:** Applies to vminsert, vmselect, vmstorage
|
||||
- **single:** Applies to victoria-metrics (single-node)
|
||||
- **vmagent/vmalert/vmauth/vmanomaly:** Component-specific alerts
|
||||
|
||||
**Alert Rules Sources:**
|
||||
- [VictoriaMetrics Alerts Overview](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts)
|
||||
- [vmalert Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/vmalert.yml)
|
||||
- [vmagent Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/vmagent.yml)
|
||||
- [VM Cluster Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/cluster.yml)
|
||||
- [VM Single Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/single.yml)
|
||||
- [VM Operator Rules](https://github.com/VictoriaMetrics/operator/blob/master/config/alerting/vmoperator-rules.yaml)
|
||||
- [VMAnomaly Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/vmanomaly.yml)
|
||||
`
|
||||
|
||||
panels := []Panel{
|
||||
{
|
||||
Datasource: promDatasource,
|
||||
Description: "Number of instances detected per component",
|
||||
FieldConfig: instanceCountFieldConfig,
|
||||
GridPos: GridPos{H: 4, W: 24, X: 0, Y: 0},
|
||||
ID: 8000,
|
||||
Options: PanelOptions{
|
||||
CellHeight: "md",
|
||||
ShowHeader: true,
|
||||
},
|
||||
Targets: []Target{instanceCountTarget},
|
||||
Title: "Instance Count",
|
||||
Transformations: instanceCountTransformations,
|
||||
Type: "table",
|
||||
},
|
||||
{
|
||||
Datasource: promDatasource,
|
||||
Description: "Shows **worst health state** over the selected time range.\n\n**Values:** 100% = all healthy, <100% = issues detected, - = not applicable for this component\n\n**Prefixes:** ALL = all components, cluster = vminsert/vmselect/vmstorage, single = victoria-metrics, or component-specific (vmagent, vmalert, vmauth, vmanomaly)\n\n**Sources:** [Alerts Overview](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts) | [Alert Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/rules)\n",
|
||||
FieldConfig: fieldConfig,
|
||||
GridPos: GridPos{H: 20, W: 24, X: 0, Y: 4},
|
||||
ID: 9000,
|
||||
Options: PanelOptions{
|
||||
CellHeight: "sm",
|
||||
EnablePagination: boolPtr(false),
|
||||
ShowHeader: true,
|
||||
},
|
||||
Targets: targets,
|
||||
Title: "Service Health Matrix",
|
||||
Transformations: transformations,
|
||||
Type: "table",
|
||||
},
|
||||
}
|
||||
|
||||
return Dashboard{
|
||||
Annotations: Annotations{
|
||||
List: []AnnotationItem{
|
||||
{
|
||||
BuiltIn: 1,
|
||||
Datasource: Datasource{Type: "grafana", UID: "-- Grafana --"},
|
||||
Enable: true,
|
||||
Hide: true,
|
||||
IconColor: "rgba(0, 211, 255, 1)",
|
||||
Name: "Annotations & Alerts",
|
||||
Type: "dashboard",
|
||||
},
|
||||
},
|
||||
},
|
||||
Description: desc,
|
||||
Editable: true,
|
||||
FiscalYearStartMonth: 0,
|
||||
GraphTooltip: 0,
|
||||
ID: 0,
|
||||
Links: []Link{
|
||||
{
|
||||
AsDropdown: false,
|
||||
Icon: "external link",
|
||||
IncludeVars: false,
|
||||
KeepTime: false,
|
||||
Tags: []string{},
|
||||
TargetBlank: true,
|
||||
Title: "Alert Rules Source",
|
||||
Tooltip: "View official VictoriaMetrics alert rules on GitHub",
|
||||
Type: "link",
|
||||
URL: "https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/rules",
|
||||
},
|
||||
},
|
||||
Panels: panels,
|
||||
Preload: false,
|
||||
Refresh: "30s",
|
||||
SchemaVersion: 42,
|
||||
Tags: []string{"victoriametrics", "status-page", "alerts", "health"},
|
||||
Templating: Templating{List: templates},
|
||||
Time: TimeRange{From: "now-5m", To: "now"},
|
||||
Timepicker: Timepicker{RefreshIntervals: []string{"10s", "30s", "1m", "5m"}},
|
||||
Timezone: "",
|
||||
Title: title,
|
||||
UID: uid,
|
||||
Version: 1,
|
||||
}
|
||||
}
|
||||
|
||||
func buildRenameByName(renames map[string]string) map[string]string {
|
||||
out := map[string]string{
|
||||
"svc_name": "",
|
||||
}
|
||||
|
||||
keys := make([]string, 0, len(renames))
|
||||
for k := range renames {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
for _, k := range keys {
|
||||
out[k] = renames[k]
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func boolPtr(v bool) *bool {
|
||||
return &v
|
||||
}
|
||||
|
||||
func floatPtr(v float64) *float64 {
|
||||
return &v
|
||||
}
|
||||
|
||||
func strPtr(v string) *string {
|
||||
return &v
|
||||
}
|
||||
70
dashboards/dashgen/generator/quicktemplate_renderer_test.go
Normal file
70
dashboards/dashgen/generator/quicktemplate_renderer_test.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package generator
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
)
|
||||
|
||||
func TestQuickTemplateMatchesJsonnet(t *testing.T) {
|
||||
alertsDir := filepath.Join("..", "..", "..", "deployment", "docker", "rules")
|
||||
rules, err := parser.ParseAlertDirectory(alertsDir)
|
||||
if err != nil {
|
||||
t.Fatalf("parse alerts: %v", err)
|
||||
}
|
||||
|
||||
if len(rules) == 0 {
|
||||
t.Fatal("no alert rules parsed")
|
||||
}
|
||||
|
||||
sort.Slice(rules, func(i, j int) bool { return rules[i].Alert < rules[j].Alert })
|
||||
|
||||
alertDefs := make([]AlertDefinition, 0, len(rules))
|
||||
renames := make(map[string]string, len(rules))
|
||||
for _, r := range rules {
|
||||
prefix := r.Component
|
||||
if prefix == "unknown" {
|
||||
prefix = "ALL"
|
||||
}
|
||||
refID := GenerateRefID(prefix + "_" + r.Alert)
|
||||
expr := NormalizeAlertQuery(r)
|
||||
alertDefs = append(alertDefs, AlertDefinition{RefID: refID, Expr: expr})
|
||||
|
||||
fieldName := "Value #" + refID
|
||||
displayName := prefix + ": " + r.Alert
|
||||
renames[fieldName] = displayName
|
||||
}
|
||||
|
||||
qtplJSON, err := RenderWithQuickTemplate(alertDefs, renames, "VictoriaMetrics - Status Page", "vm-status-page")
|
||||
if err != nil {
|
||||
t.Fatalf("quicktemplate render: %v", err)
|
||||
}
|
||||
|
||||
// Baseline: existing generated dashboard in repo (publishable artifact).
|
||||
baselinePath := filepath.Join("..", "..", "status-page-generated.json")
|
||||
baselineBytes, err := os.ReadFile(baselinePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
t.Skipf("baseline %s not present; generate via dashgen and commit", baselinePath)
|
||||
}
|
||||
t.Fatalf("read baseline: %v", err)
|
||||
}
|
||||
|
||||
var qtObj, baselineObj interface{}
|
||||
if err := json.Unmarshal([]byte(qtplJSON), &qtObj); err != nil {
|
||||
t.Fatalf("unmarshal quicktemplate output: %v", err)
|
||||
}
|
||||
if err := json.Unmarshal(baselineBytes, &baselineObj); err != nil {
|
||||
t.Fatalf("unmarshal baseline output: %v", err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(baselineObj, qtObj, cmpopts.EquateApprox(0, 1e-9)); diff != "" {
|
||||
t.Fatalf("quicktemplate output differs from baseline (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
11
dashboards/dashgen/go.mod
Normal file
11
dashboards/dashgen/go.mod
Normal file
@@ -0,0 +1,11 @@
|
||||
module github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen
|
||||
|
||||
go 1.25.5
|
||||
|
||||
require (
|
||||
github.com/google/go-cmp v0.5.9
|
||||
github.com/valyala/quicktemplate v1.7.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require github.com/valyala/bytebufferpool v1.0.0 // indirect
|
||||
27
dashboards/dashgen/go.sum
Normal file
27
dashboards/dashgen/go.sum
Normal file
@@ -0,0 +1,27 @@
|
||||
github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
|
||||
github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
|
||||
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
|
||||
github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
||||
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
|
||||
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
|
||||
github.com/valyala/fasthttp v1.30.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus=
|
||||
github.com/valyala/quicktemplate v1.7.0 h1:LUPTJmlVcb46OOUY3IeD9DojFpAVbsG+5WFTcjMJzCM=
|
||||
github.com/valyala/quicktemplate v1.7.0/go.mod h1:sqKJnoaOF88V07vkO+9FL8fb9uZg/VPSJnLYn+LmLk8=
|
||||
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
|
||||
golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
105
dashboards/dashgen/main.go
Normal file
105
dashboards/dashgen/main.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"sort"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/generator"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func run() error {
|
||||
alertsDir := flag.String("alerts-dir", "", "Path to directory with alert YAML files")
|
||||
outputFile := flag.String("output", "dashboard.json", "Path to output JSON file")
|
||||
title := flag.String("title", "VictoriaMetrics - Status Page", "Dashboard title")
|
||||
uid := flag.String("uid", "vm-status-page", "Dashboard UID")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if *alertsDir == "" {
|
||||
return fmt.Errorf("--alerts-dir is required")
|
||||
}
|
||||
|
||||
fmt.Printf("Parsing alert files from: %s\n", *alertsDir)
|
||||
|
||||
allRules, err := parser.ParseAlertDirectory(*alertsDir)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parse alerts: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Found %d alert rules\n", len(allRules))
|
||||
|
||||
if len(allRules) == 0 {
|
||||
return fmt.Errorf("no alert rules found in %s", *alertsDir)
|
||||
}
|
||||
|
||||
// Sort rules for deterministic output
|
||||
sort.Slice(allRules, func(i, j int) bool {
|
||||
return allRules[i].Alert < allRules[j].Alert
|
||||
})
|
||||
|
||||
// Prepare data for rendering
|
||||
alerts, renames, err := buildAlertData(allRules)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build alert data: %w", err)
|
||||
}
|
||||
|
||||
jsonOutput, err := generator.RenderWithQuickTemplate(alerts, renames, *title, *uid)
|
||||
if err != nil {
|
||||
return fmt.Errorf("render quicktemplate: %w", err)
|
||||
}
|
||||
|
||||
// Write output
|
||||
if err := os.WriteFile(*outputFile, []byte(jsonOutput), 0644); err != nil {
|
||||
return fmt.Errorf("write output: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("\n✓ Dashboard generated successfully: %s\n", *outputFile)
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildAlertData converts parsed alert rules into Jsonnet-compatible data structures.
|
||||
// Returns error if any alert has empty name or expression.
|
||||
func buildAlertData(rules []parser.AlertRule) ([]generator.AlertDefinition, map[string]string, error) {
|
||||
alerts := make([]generator.AlertDefinition, 0, len(rules))
|
||||
renames := make(map[string]string, len(rules))
|
||||
|
||||
for _, rule := range rules {
|
||||
// Validate required fields
|
||||
if rule.Alert == "" {
|
||||
return nil, nil, fmt.Errorf("alert in group %q has empty name", rule.GroupName)
|
||||
}
|
||||
if rule.Expr == "" {
|
||||
return nil, nil, fmt.Errorf("alert %q has empty expression", rule.Alert)
|
||||
}
|
||||
|
||||
prefix := rule.Component
|
||||
if prefix == "unknown" {
|
||||
prefix = "ALL"
|
||||
}
|
||||
|
||||
refID := generator.GenerateRefID(prefix + "_" + rule.Alert)
|
||||
query := generator.NormalizeAlertQuery(rule)
|
||||
|
||||
alerts = append(alerts, generator.AlertDefinition{
|
||||
RefID: refID,
|
||||
Expr: query,
|
||||
})
|
||||
|
||||
// Grafana uses "Value #<refID>" as the field name
|
||||
fieldName := fmt.Sprintf("Value #%s", refID)
|
||||
displayName := fmt.Sprintf("%s: %s", prefix, rule.Alert)
|
||||
renames[fieldName] = displayName
|
||||
}
|
||||
|
||||
return alerts, renames, nil
|
||||
}
|
||||
131
dashboards/dashgen/parser/alerts.go
Normal file
131
dashboards/dashgen/parser/alerts.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// AlertRule represents a single alert rule from a Prometheus/VictoriaMetrics alert YAML file.
|
||||
type AlertRule struct {
|
||||
Alert string `yaml:"alert"`
|
||||
Expr string `yaml:"expr"`
|
||||
For string `yaml:"for"`
|
||||
Labels map[string]string `yaml:"labels"`
|
||||
Annotations map[string]string `yaml:"annotations"`
|
||||
|
||||
// Derived fields (not from YAML)
|
||||
Component string // Component this alert belongs to (cluster, single, vmagent, etc.)
|
||||
GroupName string // Name of the alert group
|
||||
}
|
||||
|
||||
// AlertGroup represents a group of alert rules.
|
||||
type AlertGroup struct {
|
||||
Name string `yaml:"name"`
|
||||
Interval string `yaml:"interval"`
|
||||
Concurrency int `yaml:"concurrency"`
|
||||
Rules []AlertRule `yaml:"rules"`
|
||||
}
|
||||
|
||||
// AlertFile represents the structure of an alert YAML file.
|
||||
type AlertFile struct {
|
||||
Groups []AlertGroup `yaml:"groups"`
|
||||
}
|
||||
|
||||
// ParseAlertFile parses a single alert YAML file and returns the parsed structure.
|
||||
func ParseAlertFile(path string) (*AlertFile, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read file: %w", err)
|
||||
}
|
||||
|
||||
var alertFile AlertFile
|
||||
if err := yaml.Unmarshal(data, &alertFile); err != nil {
|
||||
return nil, fmt.Errorf("parse YAML: %w", err)
|
||||
}
|
||||
|
||||
// Derive component name from group name for each rule
|
||||
for i := range alertFile.Groups {
|
||||
component := detectComponent(alertFile.Groups[i].Name)
|
||||
for j := range alertFile.Groups[i].Rules {
|
||||
alertFile.Groups[i].Rules[j].Component = component
|
||||
alertFile.Groups[i].Rules[j].GroupName = alertFile.Groups[i].Name
|
||||
}
|
||||
}
|
||||
|
||||
return &alertFile, nil
|
||||
}
|
||||
|
||||
// ParseAlertDirectory parses all .yml/.yaml files in a directory
|
||||
// and returns a flat list of all alert rules.
|
||||
func ParseAlertDirectory(dir string) ([]AlertRule, error) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read directory: %w", err)
|
||||
}
|
||||
|
||||
var allRules []AlertRule
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
name := entry.Name()
|
||||
if !strings.HasSuffix(name, ".yml") && !strings.HasSuffix(name, ".yaml") {
|
||||
continue
|
||||
}
|
||||
|
||||
alertFile, err := ParseAlertFile(filepath.Join(dir, name))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parse %s: %w", name, err)
|
||||
}
|
||||
|
||||
for _, group := range alertFile.Groups {
|
||||
allRules = append(allRules, group.Rules...)
|
||||
}
|
||||
}
|
||||
|
||||
return allRules, nil
|
||||
}
|
||||
|
||||
// componentMapping defines exact group name to component mappings.
|
||||
var componentMapping = map[string]string{
|
||||
"vmcluster": "cluster",
|
||||
"vmsingle": "single",
|
||||
"vmagent": "vmagent",
|
||||
"vmalert": "vmalert",
|
||||
"vmauth": "vmauth",
|
||||
"vmanomaly": "vmanomaly",
|
||||
}
|
||||
|
||||
// detectComponent determines the component type from the group name.
|
||||
// Returns "unknown" if the component cannot be determined.
|
||||
func detectComponent(groupName string) string {
|
||||
groupLower := strings.ToLower(groupName)
|
||||
|
||||
// Check exact match first
|
||||
if component, ok := componentMapping[groupLower]; ok {
|
||||
return component
|
||||
}
|
||||
|
||||
// Fallback to substring matching (order matters: more specific first)
|
||||
switch {
|
||||
case strings.Contains(groupLower, "cluster"):
|
||||
return "cluster"
|
||||
case strings.Contains(groupLower, "single"):
|
||||
return "single"
|
||||
case strings.Contains(groupLower, "vmanomaly"), strings.Contains(groupLower, "anomaly"):
|
||||
return "vmanomaly"
|
||||
case strings.Contains(groupLower, "vmalert"): // Must be before "alert" check
|
||||
return "vmalert"
|
||||
case strings.Contains(groupLower, "vmagent"), strings.Contains(groupLower, "agent"):
|
||||
return "vmagent"
|
||||
case strings.Contains(groupLower, "vmauth"), strings.Contains(groupLower, "auth"):
|
||||
return "vmauth"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
246
dashboards/dashgen/parser/alerts_test.go
Normal file
246
dashboards/dashgen/parser/alerts_test.go
Normal file
@@ -0,0 +1,246 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectComponent(t *testing.T) {
|
||||
cases := []struct {
|
||||
groupName string
|
||||
want string
|
||||
}{
|
||||
// Exact matches (lowercase)
|
||||
{"vmcluster", "cluster"},
|
||||
{"vmsingle", "single"},
|
||||
{"vmagent", "vmagent"},
|
||||
{"vmalert", "vmalert"},
|
||||
{"vmauth", "vmauth"},
|
||||
{"vmanomaly", "vmanomaly"},
|
||||
|
||||
// Case insensitive
|
||||
{"VMCluster", "cluster"},
|
||||
{"VMSingle", "single"},
|
||||
{"VMAgent", "vmagent"},
|
||||
|
||||
// Substring matches
|
||||
{"cluster-alerts", "cluster"},
|
||||
{"single-node-alerts", "single"},
|
||||
{"vmagent-recording", "vmagent"},
|
||||
{"vmalert-errors", "vmalert"},
|
||||
{"vmauth-health", "vmauth"},
|
||||
{"vmanomaly-detection", "vmanomaly"},
|
||||
{"anomaly-alerts", "vmanomaly"},
|
||||
|
||||
// Unknown fallback - group names that don't match any component
|
||||
{"other-alerts", "unknown"},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := detectComponent(tc.groupName)
|
||||
if got != tc.want {
|
||||
t.Errorf("detectComponent(%q) = %q, want %q", tc.groupName, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAlertFile(t *testing.T) {
|
||||
// Create a temporary test file
|
||||
content := `groups:
|
||||
- name: vmagent
|
||||
rules:
|
||||
- alert: TooManyLogs
|
||||
expr: sum(rate(vm_log_messages_total{level="error"}[5m])) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Too many error logs"
|
||||
- alert: TooManyRestarts
|
||||
expr: changes(process_start_time_seconds[1h]) > 2
|
||||
for: 5m
|
||||
- name: vmcluster
|
||||
interval: 30s
|
||||
rules:
|
||||
- alert: DiskRunsOutOfSpace
|
||||
expr: vm_free_disk_space_bytes / vm_data_size_bytes < 0.1
|
||||
labels:
|
||||
severity: critical
|
||||
`
|
||||
tmpDir := t.TempDir()
|
||||
tmpFile := filepath.Join(tmpDir, "test-alerts.yml")
|
||||
if err := os.WriteFile(tmpFile, []byte(content), 0644); err != nil {
|
||||
t.Fatalf("failed to write temp file: %v", err)
|
||||
}
|
||||
|
||||
alertFile, err := ParseAlertFile(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseAlertFile failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify groups count
|
||||
if len(alertFile.Groups) != 2 {
|
||||
t.Errorf("expected 2 groups, got %d", len(alertFile.Groups))
|
||||
}
|
||||
|
||||
// Verify first group
|
||||
if alertFile.Groups[0].Name != "vmagent" {
|
||||
t.Errorf("expected group name 'vmagent', got %q", alertFile.Groups[0].Name)
|
||||
}
|
||||
if len(alertFile.Groups[0].Rules) != 2 {
|
||||
t.Errorf("expected 2 rules in vmagent group, got %d", len(alertFile.Groups[0].Rules))
|
||||
}
|
||||
|
||||
// Verify component detection
|
||||
if alertFile.Groups[0].Rules[0].Component != "vmagent" {
|
||||
t.Errorf("expected component 'vmagent', got %q", alertFile.Groups[0].Rules[0].Component)
|
||||
}
|
||||
if alertFile.Groups[1].Rules[0].Component != "cluster" {
|
||||
t.Errorf("expected component 'cluster', got %q", alertFile.Groups[1].Rules[0].Component)
|
||||
}
|
||||
|
||||
// Verify alert fields
|
||||
rule := alertFile.Groups[0].Rules[0]
|
||||
if rule.Alert != "TooManyLogs" {
|
||||
t.Errorf("expected alert 'TooManyLogs', got %q", rule.Alert)
|
||||
}
|
||||
if rule.For != "15m" {
|
||||
t.Errorf("expected for '15m', got %q", rule.For)
|
||||
}
|
||||
if rule.Labels["severity"] != "warning" {
|
||||
t.Errorf("expected severity 'warning', got %q", rule.Labels["severity"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAlertFileNotFound(t *testing.T) {
|
||||
_, err := ParseAlertFile("/nonexistent/path/file.yml")
|
||||
if err == nil {
|
||||
t.Error("expected error for nonexistent file, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAlertFileInvalidYAML(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
tmpFile := filepath.Join(tmpDir, "invalid.yml")
|
||||
if err := os.WriteFile(tmpFile, []byte("invalid: yaml: content: ["), 0644); err != nil {
|
||||
t.Fatalf("failed to write temp file: %v", err)
|
||||
}
|
||||
|
||||
_, err := ParseAlertFile(tmpFile)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid YAML, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAlertDirectory(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
// Create multiple alert files
|
||||
file1 := `groups:
|
||||
- name: vmagent
|
||||
rules:
|
||||
- alert: Alert1
|
||||
expr: metric1 > 0
|
||||
`
|
||||
file2 := `groups:
|
||||
- name: vmalert
|
||||
rules:
|
||||
- alert: Alert2
|
||||
expr: metric2 > 0
|
||||
- alert: Alert3
|
||||
expr: metric3 > 0
|
||||
`
|
||||
// Non-YAML file should be ignored
|
||||
nonYaml := "this is not yaml at all"
|
||||
|
||||
if err := os.WriteFile(filepath.Join(tmpDir, "vmagent.yml"), []byte(file1), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tmpDir, "vmalert.yaml"), []byte(file2), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(tmpDir, "readme.txt"), []byte(nonYaml), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rules, err := ParseAlertDirectory(tmpDir)
|
||||
if err != nil {
|
||||
t.Fatalf("ParseAlertDirectory failed: %v", err)
|
||||
}
|
||||
|
||||
if len(rules) != 3 {
|
||||
t.Errorf("expected 3 rules, got %d", len(rules))
|
||||
}
|
||||
|
||||
// Verify components are set correctly
|
||||
componentCounts := make(map[string]int)
|
||||
for _, rule := range rules {
|
||||
componentCounts[rule.Component]++
|
||||
}
|
||||
|
||||
if componentCounts["vmagent"] != 1 {
|
||||
t.Errorf("expected 1 vmagent rule, got %d", componentCounts["vmagent"])
|
||||
}
|
||||
if componentCounts["vmalert"] != 2 {
|
||||
t.Errorf("expected 2 vmalert rules, got %d", componentCounts["vmalert"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAlertDirectoryNotFound(t *testing.T) {
|
||||
_, err := ParseAlertDirectory("/nonexistent/directory")
|
||||
if err == nil {
|
||||
t.Error("expected error for nonexistent directory, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertRuleFields(t *testing.T) {
|
||||
content := `groups:
|
||||
- name: test
|
||||
rules:
|
||||
- alert: CompleteAlert
|
||||
expr: metric > threshold
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
team: platform
|
||||
annotations:
|
||||
summary: "Alert summary"
|
||||
description: "Alert description"
|
||||
`
|
||||
tmpDir := t.TempDir()
|
||||
tmpFile := filepath.Join(tmpDir, "complete.yml")
|
||||
if err := os.WriteFile(tmpFile, []byte(content), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
alertFile, err := ParseAlertFile(tmpFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rule := alertFile.Groups[0].Rules[0]
|
||||
|
||||
// Verify all fields
|
||||
if rule.Alert != "CompleteAlert" {
|
||||
t.Errorf("Alert = %q, want 'CompleteAlert'", rule.Alert)
|
||||
}
|
||||
if rule.Expr != "metric > threshold" {
|
||||
t.Errorf("Expr = %q, want 'metric > threshold'", rule.Expr)
|
||||
}
|
||||
if rule.For != "10m" {
|
||||
t.Errorf("For = %q, want '10m'", rule.For)
|
||||
}
|
||||
if rule.Labels["severity"] != "critical" {
|
||||
t.Errorf("Labels[severity] = %q, want 'critical'", rule.Labels["severity"])
|
||||
}
|
||||
if rule.Labels["team"] != "platform" {
|
||||
t.Errorf("Labels[team] = %q, want 'platform'", rule.Labels["team"])
|
||||
}
|
||||
if rule.Annotations["summary"] != "Alert summary" {
|
||||
t.Errorf("Annotations[summary] = %q, want 'Alert summary'", rule.Annotations["summary"])
|
||||
}
|
||||
if rule.GroupName != "test" {
|
||||
t.Errorf("GroupName = %q, want 'test'", rule.GroupName)
|
||||
}
|
||||
}
|
||||
1370
dashboards/status-page-generated.json
Normal file
1370
dashboards/status-page-generated.json
Normal file
File diff suppressed because it is too large
Load Diff
1371
dashboards/vm/status-page-generated.json
Normal file
1371
dashboards/vm/status-page-generated.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user