Compare commits

...

4 Commits

Author SHA1 Message Date
kirillyu
961540b806 Remove old jsonnet status dashboard 2025-12-10 15:10:12 +01:00
kirillyu
c258732902 Regenerate status page dashboards 2025-12-10 14:53:08 +01:00
kirillyu
c1de412ec2 Add status page generator 2025-12-10 14:52:55 +01:00
kirillyu
d343e9f6cf dashboards: add status page dashboard
Add dashboard generator (dashgen) that creates status-page-generated.json
from alert rules in deployment/docker/rules.

The dashboard shows health percentage per alert for all VM components.
Uses min_over_time to display worst state over selected time range.

Files:
- dashboards/dashgen/ - Go tool (parser + generator + jsonnet template)
- dashboards/status-page-generated.json - Prometheus datasource
- dashboards/vm/status-page-generated.json - VictoriaMetrics datasource

Run: make dashboards-sync
Test: make dashgen-test
2025-12-05 11:15:52 +01:00
18 changed files with 4510 additions and 1 deletions

2
dashboards/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
# Generated binary
dashgen/dashgen-bin

View File

@@ -8,6 +8,7 @@ dashboard-copy:
rm -rf dashboards/vm/*.tmp
# Copies listed dashboards to vm/* but changes the datasource type from Prometheus to VictoriaMetrics.
# Also generates status-page-generated.json from alert rules.
# The command should be called before committing changes to dashboards/* files.
dashboards-sync:
SRC=victoriametrics.json D_UID=wNf0q_kZk TITLE="VictoriaMetrics - single-node" $(MAKE) dashboard-copy
@@ -18,3 +19,33 @@ dashboards-sync:
SRC=operator.json D_UID=1H179hunk TITLE="VictoriaMetrics - operator" $(MAKE) dashboard-copy
SRC=backupmanager.json D_UID=gF-lxRdVz TITLE="VictoriaMetrics - backupmanager" $(MAKE) dashboard-copy
SRC=clusterbytenant.json D_UID=IZFqd3lMz TITLE="VictoriaMetrics Cluster Per Tenant Statistic" $(MAKE) dashboard-copy
$(MAKE) generate-status-page
SRC=status-page-generated.json D_UID=vm-status-page TITLE="VictoriaMetrics - Status Page" $(MAKE) dashboard-copy
# Build the dashboard generator tool
build-dashgen:
cd dashboards/dashgen && go build -o dashgen-bin ./main.go
# Generate status page dashboard from alert rules.
# This reads alerts from deployment/docker/rules and produces status-page-generated.json.
# The generated dashboard shows OK/KO status for each alert across all VictoriaMetrics components.
generate-status-page: build-dashgen
cd dashboards/dashgen && ./dashgen-bin \
--alerts-dir=../../deployment/docker/rules \
--output=../status-page-generated.json \
--title="VictoriaMetrics - Status Page" \
--uid=vm-status-page
# Clean generated files
clean-dashgen:
rm -f dashboards/dashgen/dashgen-bin dashboards/status-page-generated.json
# Run dashgen unit tests (isolated, can be disabled by commenting out)
dashgen-test:
cd dashboards/dashgen && go test -v ./...
# Run dashgen tests with coverage
dashgen-test-cover:
cd dashboards/dashgen && go test -coverprofile=coverage.txt -covermode=atomic ./...

View File

@@ -7,4 +7,13 @@ The `vm` folder contains copies of the listed dashboards but alternated to use
The listed dashboards can be found on [Grafana website](https://grafana.com/orgs/victoriametrics/dashboards).
When making changes to the dashboards in `dashboards` folder, don't forget to call `make dashboards-sync`
and sync changes to [Grafana website](https://grafana.com/orgs/victoriametrics/dashboards).
and sync changes to [Grafana website](https://grafana.com/orgs/victoriametrics/dashboards).
## Status Page Dashboard
`status-page-generated.json` is auto-generated from alert rules (`deployment/docker/rules/*.yml`).
Shows health percentage per alert across all components.
**Generator** in `dashgen/`: parser -> queries -> quicktemplate renderer
**Testing:** `make dashgen-test`

View File

@@ -0,0 +1,15 @@
{% import "encoding/json" %}
{% code
func marshal(v Dashboard) string {
b, err := json.MarshalIndent(v, "", " ")
if err != nil {
panic(err)
}
return string(b)
}
%}
{% func RenderDashboard(d Dashboard) %}
{%s= marshal(d) %}
{% endfunc %}

View File

@@ -0,0 +1,69 @@
// Code generated by qtc from "dashboard_qt.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line dashboard_qt.qtpl:1
package generator
//line dashboard_qt.qtpl:1
import "encoding/json"
//line dashboard_qt.qtpl:3
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line dashboard_qt.qtpl:3
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line dashboard_qt.qtpl:4
func marshal(v Dashboard) string {
b, err := json.MarshalIndent(v, "", " ")
if err != nil {
panic(err)
}
return string(b)
}
//line dashboard_qt.qtpl:13
func StreamRenderDashboard(qw422016 *qt422016.Writer, d Dashboard) {
//line dashboard_qt.qtpl:13
qw422016.N().S(`
`)
//line dashboard_qt.qtpl:14
qw422016.N().S(marshal(d))
//line dashboard_qt.qtpl:14
qw422016.N().S(`
`)
//line dashboard_qt.qtpl:15
}
//line dashboard_qt.qtpl:15
func WriteRenderDashboard(qq422016 qtio422016.Writer, d Dashboard) {
//line dashboard_qt.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line dashboard_qt.qtpl:15
StreamRenderDashboard(qw422016, d)
//line dashboard_qt.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line dashboard_qt.qtpl:15
}
//line dashboard_qt.qtpl:15
func RenderDashboard(d Dashboard) string {
//line dashboard_qt.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line dashboard_qt.qtpl:15
WriteRenderDashboard(qb422016, d)
//line dashboard_qt.qtpl:15
qs422016 := string(qb422016.B)
//line dashboard_qt.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line dashboard_qt.qtpl:15
return qs422016
//line dashboard_qt.qtpl:15
}

View File

@@ -0,0 +1,280 @@
package generator
import "encoding/json"
// Top-level dashboard.
type Dashboard struct {
Annotations Annotations `json:"annotations"`
Description string `json:"description"`
Editable bool `json:"editable"`
FiscalYearStartMonth int `json:"fiscalYearStartMonth"`
GraphTooltip int `json:"graphTooltip"`
ID int `json:"id"`
Links []Link `json:"links"`
Panels []Panel `json:"panels"`
Preload bool `json:"preload"`
Refresh string `json:"refresh"`
SchemaVersion int `json:"schemaVersion"`
Tags []string `json:"tags"`
Templating Templating `json:"templating"`
Time TimeRange `json:"time"`
Timepicker Timepicker `json:"timepicker"`
Timezone string `json:"timezone"`
Title string `json:"title"`
UID string `json:"uid"`
Version int `json:"version"`
}
type Annotations struct {
List []AnnotationItem `json:"list"`
}
type AnnotationItem struct {
BuiltIn int `json:"builtIn"`
Datasource Datasource `json:"datasource"`
Enable bool `json:"enable"`
Hide bool `json:"hide"`
IconColor string `json:"iconColor"`
Name string `json:"name"`
Type string `json:"type"`
}
type Link struct {
AsDropdown bool `json:"asDropdown"`
Icon string `json:"icon"`
IncludeVars bool `json:"includeVars"`
KeepTime bool `json:"keepTime"`
Tags []string `json:"tags"`
TargetBlank bool `json:"targetBlank"`
Title string `json:"title"`
Tooltip string `json:"tooltip"`
Type string `json:"type"`
URL string `json:"url"`
}
type Panel struct {
Datasource Datasource `json:"datasource"`
Description string `json:"description"`
FieldConfig FieldConfig `json:"fieldConfig"`
GridPos GridPos `json:"gridPos"`
ID int `json:"id"`
Options PanelOptions `json:"options"`
Targets []Target `json:"targets"`
Title string `json:"title"`
Transformations []Transformation `json:"transformations"`
Type string `json:"type"`
}
type Datasource struct {
Type string `json:"type"`
UID string `json:"uid"`
}
type GridPos struct {
H int `json:"h"`
W int `json:"w"`
X int `json:"x"`
Y int `json:"y"`
}
type PanelOptions struct {
CellHeight string `json:"cellHeight"`
EnablePagination *bool `json:"enablePagination,omitempty"`
ShowHeader bool `json:"showHeader"`
Footer *Footer `json:"footer,omitempty"`
}
type Footer struct {
Show *bool `json:"show,omitempty"`
CountRows *bool `json:"countRows,omitempty"`
EnablePagination *bool `json:"enablePagination,omitempty"`
Reducers []string `json:"reducers,omitempty"`
}
type FieldConfig struct {
Defaults FieldDefaults `json:"defaults"`
Overrides []Override `json:"overrides"`
}
type FieldDefaults struct {
Color Color `json:"color"`
Custom CustomField `json:"custom"`
Mappings []Mapping `json:"mappings"`
NoValue string `json:"noValue"`
Thresholds Thresholds `json:"thresholds"`
Unit string `json:"unit"`
}
type Color struct {
Mode string `json:"mode"`
FixedColor string `json:"fixedColor,omitempty"`
}
type CustomField struct {
Align string `json:"align"`
CellOptions CellOptions `json:"cellOptions"`
Filterable bool `json:"filterable"`
Footer *Footer `json:"footer,omitempty"`
Inspect bool `json:"inspect"`
MinWidth int `json:"minWidth"`
WrapHeaderText bool `json:"wrapHeaderText,omitempty"`
WrapText *bool `json:"wrapText,omitempty"`
Hidden bool `json:"hidden,omitempty"`
Width int `json:"width,omitempty"`
}
type CellOptions struct {
ApplyToRow *bool `json:"applyToRow,omitempty"`
Type string `json:"type"`
}
type Mapping struct {
Options MappingOptions `json:"options"`
Type string `json:"type"`
}
type MappingOptions struct {
From *float64 `json:"from,omitempty"`
To *float64 `json:"to,omitempty"`
Match string `json:"match,omitempty"`
Result MappingResult `json:"result"`
}
type MappingResult struct {
Color string `json:"color"`
Index int `json:"index"`
Text *string `json:"text,omitempty"`
}
type Thresholds struct {
Mode string `json:"mode"`
Steps []ThresholdStep `json:"steps"`
}
type ThresholdStep struct {
Color string `json:"color"`
Value *float64 `json:"value"`
}
type Override struct {
Matcher Matcher `json:"matcher"`
Properties []Property `json:"properties"`
}
type Matcher struct {
ID string `json:"id"`
Options interface{} `json:"options"`
}
type Property struct {
ID string `json:"id"`
Value interface{} `json:"value"`
}
type Target struct {
Datasource Datasource `json:"datasource"`
EditorMode string `json:"editorMode"`
Expr string `json:"expr"`
Format string `json:"format"`
Hide bool `json:"hide"`
Instant bool `json:"instant"`
LegendFormat string `json:"legendFormat"`
Range bool `json:"range"`
RefID string `json:"refId"`
}
type Transformation struct {
ID string `json:"id"`
Options interface{} `json:"options"`
}
type Templating struct {
List []TemplateVar `json:"list"`
}
type TemplateVar struct {
Current TemplateCurrent `json:"current"`
IncludeAll bool `json:"includeAll"`
Label string `json:"label"`
Name string `json:"name"`
Options []string `json:"options,omitempty"`
Query TemplateQueryValue `json:"query"`
Refresh int `json:"refresh"`
Regex string `json:"regex"`
Type string `json:"type"`
AllValue string `json:"allValue,omitempty"`
Datasource *Datasource `json:"datasource,omitempty"`
Definition string `json:"definition,omitempty"`
Multi bool `json:"multi,omitempty"`
Sort int `json:"sort,omitempty"`
}
type TimeRange struct {
From string `json:"from"`
To string `json:"to"`
}
type Timepicker struct {
RefreshIntervals []string `json:"refresh_intervals"`
}
// Transformation option helpers.
type MergeOptions struct{}
type OrganizeOptions struct {
ExcludeByName map[string]bool `json:"excludeByName,omitempty"`
IncludeByName map[string]string `json:"includeByName"`
IndexByName map[string]string `json:"indexByName"`
RenameByName map[string]string `json:"renameByName,omitempty"`
}
type TransposeOptions struct {
FirstFieldName string `json:"firstFieldName"`
RestFieldsName string `json:"restFieldsName"`
}
type SortByOptions struct {
Fields map[string]string `json:"fields,omitempty"`
Sort []SortField `json:"sort"`
}
type SortField struct {
Field string `json:"field"`
}
// Template helpers.
type TemplateCurrent struct {
Text interface{} `json:"text"`
Value interface{} `json:"value"`
}
type TemplateQuery struct {
Query string `json:"query"`
RefID string `json:"refId,omitempty"`
}
// TemplateQueryValue allows using either a raw string (datasource variable)
// or a structured query definition.
type TemplateQueryValue struct {
String *string
Query *TemplateQuery
}
func QueryString(s string) TemplateQueryValue {
return TemplateQueryValue{String: &s}
}
func QueryTemplate(q TemplateQuery) TemplateQueryValue {
return TemplateQueryValue{Query: &q}
}
func (q TemplateQueryValue) MarshalJSON() ([]byte, error) {
switch {
case q.Query != nil:
return json.Marshal(q.Query)
case q.String != nil:
return json.Marshal(*q.String)
default:
return json.Marshal(nil)
}
}

View File

@@ -0,0 +1,3 @@
package generator
//go:generate go run github.com/valyala/quicktemplate/qtc@v1.7.0 -dir .

View File

@@ -0,0 +1,98 @@
package generator
import (
"fmt"
"regexp"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
)
// componentVersionPatterns maps component names to regex patterns for vm_app_version filtering.
// Note: Prometheus label matchers already anchor at start, so we don't use ^.
var componentVersionPatterns = map[string]string{
"cluster": "(vminsert|vmselect|vmstorage)-.*",
"single": "victoria-metrics-.*",
"vmagent": "vmagent-.*",
"vmalert": "vmalert-.*",
"vmauth": "vmauth-.*",
"vmanomaly": "vmanomaly-.*",
"unknown": ".*", // Unknown alerts apply to all components
}
// svcNameRegex extracts service name from version label (e.g., "vmagent-20251204-..." -> "vmagent").
const svcNameRegex = `^(.+)-\\d{8}-.*`
// queryTemplate generates a PromQL query that returns the MINIMUM (worst) percentage
// of healthy instances over the selected time range.
// Returns:
// - 100 when no instances fired the alert during the range (all healthy)
// - 0-99 when some instances fired (shows worst percentage)
// - No data when the alert is not applicable to the component
//
// Logic:
// 1. Count total instances per svc_name (from vm_app_version with version filter)
// 2. Count firing instances per svc_name (alert expr joined with vm_app_version)
// 3. Calculate: 100 * (total - firing) / total
// 4. Take min_over_time to show worst state in selected range
const queryTemplate = `min_over_time(
(
WITH (
vm_svc = label_replace(
vm_app_version{version=~"%s", version!~"(victoria-(logs|traces)|vl|vt).*", job=~"$job", instance=~"$instance"},
"svc_name",
"$1",
"version",
"%s"
),
total = count by (svc_name) (vm_svc),
firing_pod = count by (svc_name) (
((%s) > 0) * on(pod, instance, job) group_left(svc_name) vm_svc
),
firing_inst = count by (svc_name) (
((%s) > 0) * on(instance, job) group_left(svc_name) vm_svc
),
firing = (firing_pod or firing_inst or total * 0)
)
clamp_min(100 * (total - firing) / total, 0)
)[$__range:]
)`
// NormalizeAlertQuery transforms an alert expression into a dashboard query
// that returns the health percentage per service name.
// Returns 100 when all instances are healthy, <100 when some are firing,
// or no data when not applicable to the component.
func NormalizeAlertQuery(rule parser.AlertRule) string {
expr := strings.TrimSpace(rule.Expr)
versionFilter := componentVersionPatterns[rule.Component]
if versionFilter == "" {
versionFilter = ".*" // Default to all if component not mapped
}
return fmt.Sprintf(queryTemplate, versionFilter, svcNameRegex, expr, expr)
}
// refIDReplacer removes characters that are invalid in Grafana refIds.
var refIDReplacer = strings.NewReplacer(
" ", "",
"-", "",
"_", "",
":", "",
".", "",
)
// startsWithDigit checks if a string starts with a digit.
var startsWithDigit = regexp.MustCompile(`^\d`)
// GenerateRefID creates a valid Grafana refId from an alert name.
// Grafana refIds must be alphanumeric and start with a letter.
func GenerateRefID(alertName string) string {
refID := refIDReplacer.Replace(alertName)
if len(refID) == 0 || startsWithDigit.MatchString(refID) {
refID = "Q" + refID
}
return refID
}

View File

@@ -0,0 +1,294 @@
package generator
import (
"regexp"
"strings"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
)
// TestSvcNameRegex ensures the current pattern extracts service name prefix
// from the vm_app_version label formats we see in real tags (all include date).
func TestSvcNameRegex(t *testing.T) {
// svcNameRegex is double-escaped for PromQL; unescape for Go regexp.
re := regexp.MustCompile(strings.ReplaceAll(svcNameRegex, `\\`, `\`))
cases := []struct {
version string
expect string
}{
{"operator-operator-20251031-152943-v0.65.0", "operator-operator"},
{"victoria-logs-20251128-234103-tags-v1.39.0-0-ge4f2a3c0a0", "victoria-logs"},
{"victoria-metrics-20251201-111831-tags-v1.131.0-enterprise-0-ge509c64054", "victoria-metrics"},
{"vlagent-20251128-234216-tags-v1.39.0-0-ge4f2a3c0a0", "vlagent"},
{"vmagent-20251201-112045-tags-v1.131.0-enterprise-0-ge509c64054", "vmagent"},
{"vmalert-20251201-112310-tags-v1.131.0-enterprise-0-ge509c64054", "vmalert"},
{"vmauth-20251017-122113-tags-v1.128.0-0-gf91789eebd", "vmauth"},
{"vmbackupmanager-20251201-113731-tags-v1.131.0-enterprise-0-ge509c64054", "vmbackupmanager"},
{"vminsert-20251201-114237-tags-v1.131.0-enterprise-cluster-0-g50309fe153", "vminsert"},
{"vmselect-20251201-114427-tags-v1.131.0-enterprise-cluster-0-g50309fe153", "vmselect"},
{"vmstorage-20251201-114630-tags-v1.131.0-enterprise-cluster-0-g50309fe153", "vmstorage"},
{"vmanomaly-20251204-120000-tags-v1.0.0", "vmanomaly"},
}
for _, tc := range cases {
m := re.FindStringSubmatch(tc.version)
if len(m) < 2 {
t.Fatalf("no match for version %q", tc.version)
}
got := m[1]
if got != tc.expect {
t.Errorf("svc name mismatch for %q: got %q, want %q", tc.version, got, tc.expect)
}
}
}
// TestComponentVersionPatterns verifies that version patterns correctly match expected components.
func TestComponentVersionPatterns(t *testing.T) {
cases := []struct {
component string
versions []string // versions that should match
}{
{"cluster", []string{"vminsert-20251201-114237", "vmselect-20251201-114427", "vmstorage-20251201-114630"}},
{"single", []string{"victoria-metrics-20251201-111831"}},
{"vmagent", []string{"vmagent-20251201-112045"}},
{"vmalert", []string{"vmalert-20251201-112310"}},
{"vmauth", []string{"vmauth-20251017-122113"}},
{"vmanomaly", []string{"vmanomaly-20251204-120000"}},
{"unknown", []string{"grafana-20251201", "telegraf-20251201"}},
}
for _, tc := range cases {
pattern, ok := componentVersionPatterns[tc.component]
if !ok {
t.Errorf("component %q not found in componentVersionPatterns", tc.component)
continue
}
re := regexp.MustCompile("^" + pattern)
for _, version := range tc.versions {
if !re.MatchString(version) {
t.Errorf("pattern for %q should match %q, but didn't", tc.component, version)
}
}
}
}
// TestComponentVersionPatternsNoFalsePositives verifies patterns don't match wrong components.
func TestComponentVersionPatternsNoFalsePositives(t *testing.T) {
cases := []struct {
component string
versions []string // versions that should NOT match
}{
{"cluster", []string{"vmagent-20251201", "vmalert-20251201", "victoria-metrics-20251201"}},
{"single", []string{"vmagent-20251201", "vminsert-20251201"}},
{"vmagent", []string{"vmalert-20251201", "vmauth-20251201"}},
{"vmalert", []string{"vmagent-20251201", "vmauth-20251201"}},
{"vmauth", []string{"vmagent-20251201", "vmalert-20251201"}},
{"vmanomaly", []string{"vmagent-20251201", "vmalert-20251201"}},
}
for _, tc := range cases {
pattern := componentVersionPatterns[tc.component]
re := regexp.MustCompile("^" + pattern)
for _, version := range tc.versions {
if re.MatchString(version) {
t.Errorf("pattern for %q should NOT match %q, but did", tc.component, version)
}
}
}
}
// TestGenerateRefID verifies refId generation for various alert names.
func TestGenerateRefID(t *testing.T) {
cases := []struct {
alertName string
want string
}{
{"TooManyLogs", "TooManyLogs"},
{"Too-Many-Logs", "TooManyLogs"},
{"Too_Many_Logs", "TooManyLogs"},
{"Too Many Logs", "TooManyLogs"},
{"Alert:With:Colons", "AlertWithColons"},
{"Alert.With.Dots", "AlertWithDots"},
{"123StartWithDigit", "Q123StartWithDigit"},
{"NormalAlert", "NormalAlert"},
{"ConcurrentInsertsHitTheLimit", "ConcurrentInsertsHitTheLimit"},
}
for _, tc := range cases {
got := GenerateRefID(tc.alertName)
if got != tc.want {
t.Errorf("GenerateRefID(%q) = %q, want %q", tc.alertName, got, tc.want)
}
}
}
// TestNormalizeAlertQuery verifies the query generation for different components.
func TestNormalizeAlertQuery(t *testing.T) {
cases := []struct {
rule parser.AlertRule
wantContains []string
wantNotContain []string
}{
{
rule: parser.AlertRule{
Alert: "TestAlert",
Expr: "sum(rate(metric[5m])) > 0",
Component: "vmagent",
},
wantContains: []string{
"vmagent-.*", // version filter
"sum(rate(metric[5m])) > 0", // original expr preserved
"min_over_time(", // shows worst state over range
"clamp_min(", // prevents negative values
"vm_app_version", // joins with vm_app_version
"svc_name", // extracts service name
"$__range", // uses Grafana range variable
},
},
{
rule: parser.AlertRule{
Alert: "ClusterAlert",
Expr: "disk_usage > 0.9",
Component: "cluster",
},
wantContains: []string{
"(vminsert|vmselect|vmstorage)-.*", // cluster pattern
},
},
{
rule: parser.AlertRule{
Alert: "SingleAlert",
Expr: "memory_usage > 0.8",
Component: "single",
},
wantContains: []string{
"victoria-metrics-.*", // single pattern
},
},
{
rule: parser.AlertRule{
Alert: "UnknownAlert",
Expr: "some_metric > 0",
Component: "unknown",
},
wantContains: []string{
`version=~".*"`, // unknown matches all
},
},
}
for _, tc := range cases {
got := NormalizeAlertQuery(tc.rule)
for _, want := range tc.wantContains {
if !strings.Contains(got, want) {
t.Errorf("NormalizeAlertQuery(%q) should contain %q, got:\n%s", tc.rule.Alert, want, got)
}
}
for _, notWant := range tc.wantNotContain {
if strings.Contains(got, notWant) {
t.Errorf("NormalizeAlertQuery(%q) should NOT contain %q", tc.rule.Alert, notWant)
}
}
}
}
// TestNormalizeAlertQueryStructure verifies the overall structure of generated queries.
func TestNormalizeAlertQueryStructure(t *testing.T) {
rule := parser.AlertRule{
Alert: "TestAlert",
Expr: "metric > 0",
Component: "vmagent",
}
query := NormalizeAlertQuery(rule)
// Verify min_over_time wrapper
if !strings.HasPrefix(query, "min_over_time(") {
t.Error("query should start with 'min_over_time('")
}
// Verify key components are present
expectedParts := []string{
"min_over_time(",
"vm_svc = label_replace(",
"total = count by (svc_name)",
"firing_pod = count by (svc_name)",
"firing_inst = count by (svc_name)",
"firing = (firing_pod or firing_inst",
"clamp_min(100 * (total - firing) / total, 0)",
"[$__range:]",
}
for _, part := range expectedParts {
if !strings.Contains(query, part) {
t.Errorf("query missing expected part: %q", part)
}
}
}
// TestNormalizeAlertQueryExprPreserved verifies the original expression is preserved.
func TestNormalizeAlertQueryExprPreserved(t *testing.T) {
expressions := []string{
"sum(rate(http_requests_total[5m])) > 100",
"avg(node_cpu_seconds_total) by (instance) > 0.9",
`count(vm_app_version{version=~"vmagent.*"}) == 0`,
"changes(process_start_time_seconds[1h]) > 2",
"(disk_used / disk_total) > 0.95",
}
for _, expr := range expressions {
rule := parser.AlertRule{
Alert: "TestAlert",
Expr: expr,
Component: "vmagent",
}
query := NormalizeAlertQuery(rule)
// The expression should appear twice (for pod join and instance join)
count := strings.Count(query, expr)
if count != 2 {
t.Errorf("expression %q should appear exactly 2 times in query, found %d times", expr, count)
}
}
}
// TestNormalizeAlertQueryWhitespace verifies whitespace in expressions is handled.
func TestNormalizeAlertQueryWhitespace(t *testing.T) {
rule := parser.AlertRule{
Alert: "TestAlert",
Expr: " sum(rate(metric[5m])) > 0 ", // leading/trailing whitespace
Component: "vmagent",
}
query := NormalizeAlertQuery(rule)
// Should not contain the leading/trailing whitespace
if strings.Contains(query, " sum") {
t.Error("query should trim leading whitespace from expression")
}
}
// TestAllComponentsHavePatterns verifies all known components have version patterns.
func TestAllComponentsHavePatterns(t *testing.T) {
requiredComponents := []string{
"cluster",
"single",
"vmagent",
"vmalert",
"vmauth",
"vmanomaly",
"unknown",
}
for _, component := range requiredComponents {
if _, ok := componentVersionPatterns[component]; !ok {
t.Errorf("componentVersionPatterns missing entry for %q", component)
}
}
}

View File

@@ -0,0 +1,377 @@
package generator
import "sort"
// AlertDefinition represents a normalized alert expression with refId.
type AlertDefinition struct {
RefID string `json:"refId"`
Expr string `json:"expr"`
}
// RenderWithQuickTemplate builds the dashboard data and renders it via quicktemplate.
func RenderWithQuickTemplate(alerts []AlertDefinition, renames map[string]string, title, uid string) (string, error) {
dashboard := BuildDashboard(alerts, renames, title, uid)
return RenderDashboard(dashboard), nil
}
// BuildDashboard constructs the typed dashboard model that is rendered by quicktemplate.
func BuildDashboard(alerts []AlertDefinition, renames map[string]string, title, uid string) Dashboard {
promDatasource := Datasource{Type: "prometheus", UID: "${datasource}"}
targets := make([]Target, 0, len(alerts))
for _, a := range alerts {
targets = append(targets, Target{
Datasource: promDatasource,
EditorMode: "code",
Expr: a.Expr,
Format: "table",
Hide: false,
Instant: true,
LegendFormat: "{{svc_name}}",
Range: false,
RefID: a.RefID,
})
}
instanceCountTarget := Target{
Datasource: promDatasource,
EditorMode: "code",
Expr: `count by (svc_name) (label_replace(vm_app_version{job=~"$job", instance=~"$instance", version!~"(victoria-(logs|traces)|vl|vt).*"}, "svc_name", "$1", "version", "^(.+)-\\d{8}-.*"))`,
Format: "table",
Hide: false,
Instant: true,
LegendFormat: "{{svc_name}}",
Range: false,
RefID: "InstanceCount",
}
// Typed field configuration for health matrix.
fieldConfig := FieldConfig{
Defaults: FieldDefaults{
Color: Color{Mode: "thresholds"},
Custom: CustomField{
Align: "center",
CellOptions: CellOptions{ApplyToRow: boolPtr(false), Type: "color-background"},
Filterable: true,
Footer: &Footer{Reducers: []string{"min"}},
Inspect: false,
MinWidth: 80,
WrapHeaderText: true,
WrapText: boolPtr(false),
},
Mappings: []Mapping{
{
Options: MappingOptions{
From: floatPtr(100),
To: floatPtr(100),
Result: MappingResult{
Color: "green",
Index: 0,
Text: strPtr("100%"),
},
},
Type: "range",
},
{
Options: MappingOptions{
From: floatPtr(0),
To: floatPtr(99.99),
Result: MappingResult{
Color: "red",
Index: 1,
},
},
Type: "range",
},
{
Options: MappingOptions{
From: floatPtr(-999999),
To: floatPtr(-0.01),
Result: MappingResult{
Color: "red",
Index: 2,
Text: strPtr("ERR"),
},
},
Type: "range",
},
{
Options: MappingOptions{
Match: "null",
Result: MappingResult{
Color: "#3D3D3D",
Index: 3,
Text: strPtr("-"),
},
},
Type: "special",
},
},
NoValue: "-",
Thresholds: Thresholds{
Mode: "absolute",
Steps: []ThresholdStep{
{Color: "#3D3D3D", Value: nil},
{Color: "red", Value: floatPtr(0)},
{Color: "green", Value: floatPtr(100)},
},
},
Unit: "percent",
},
Overrides: []Override{
{
Matcher: Matcher{ID: "byName", Options: "Alert"},
Properties: []Property{
{ID: "custom.cellOptions", Value: CellOptions{Type: "auto"}},
{ID: "custom.width", Value: 280},
{ID: "custom.filterable", Value: true},
},
},
},
}
instanceCountFieldConfig := FieldConfig{
Defaults: FieldDefaults{
Color: Color{Mode: "fixed", FixedColor: "#1F60C4"},
Custom: CustomField{
Align: "center",
CellOptions: CellOptions{ApplyToRow: boolPtr(false), Type: "color-background"},
Filterable: false,
Inspect: false,
MinWidth: 80,
},
Mappings: []Mapping{},
NoValue: "-",
Thresholds: Thresholds{
Mode: "absolute",
Steps: []ThresholdStep{
{Color: "#1F60C4", Value: nil},
},
},
Unit: "none",
},
Overrides: []Override{
{
Matcher: Matcher{ID: "byName", Options: "Metric"},
Properties: []Property{
{ID: "custom.hidden", Value: true},
},
},
},
}
transformations := []Transformation{
{ID: "merge", Options: MergeOptions{}},
{
ID: "organize",
Options: OrganizeOptions{
ExcludeByName: map[string]bool{"Time": true},
IncludeByName: map[string]string{},
IndexByName: map[string]string{},
RenameByName: buildRenameByName(renames),
},
},
{ID: "transpose", Options: TransposeOptions{FirstFieldName: "Alert", RestFieldsName: ""}},
{ID: "sortBy", Options: SortByOptions{Fields: map[string]string{}, Sort: []SortField{{Field: "Alert"}}}},
}
instanceCountTransformations := []Transformation{
{ID: "merge", Options: MergeOptions{}},
{
ID: "organize",
Options: OrganizeOptions{
ExcludeByName: map[string]bool{"Time": true},
IncludeByName: map[string]string{},
IndexByName: map[string]string{},
},
},
{ID: "transpose", Options: TransposeOptions{FirstFieldName: "Metric", RestFieldsName: ""}},
{
ID: "organize",
Options: OrganizeOptions{
ExcludeByName: map[string]bool{"Metric": true},
IncludeByName: map[string]string{},
IndexByName: map[string]string{},
},
},
}
templates := []TemplateVar{
{
Current: TemplateCurrent{Text: "default", Value: "default"},
IncludeAll: false,
Label: "Datasource",
Name: "datasource",
Options: []string{},
Query: QueryString("prometheus"),
Refresh: 1,
Regex: "",
Type: "datasource",
},
{
AllValue: ".*",
Current: TemplateCurrent{Text: []string{"All"}, Value: []string{"$__all"}},
Datasource: &promDatasource,
Definition: "label_values(vm_app_version, job)",
IncludeAll: true,
Label: "Job",
Multi: true,
Name: "job",
Options: []string{},
Query: QueryTemplate(TemplateQuery{Query: "label_values(vm_app_version, job)", RefID: "StandardVariableQuery"}),
Refresh: 1,
Regex: "",
Sort: 1,
Type: "query",
},
{
AllValue: ".*",
Current: TemplateCurrent{Text: []string{"All"}, Value: []string{"$__all"}},
Datasource: &promDatasource,
Definition: `label_values(vm_app_version{job=~"$job"}, instance)`,
IncludeAll: true,
Label: "Instance",
Multi: true,
Name: "instance",
Options: []string{},
Query: QueryTemplate(TemplateQuery{Query: `label_values(vm_app_version{job=~"$job"}, instance)`, RefID: "StandardVariableQuery"}),
Refresh: 1,
Regex: "",
Sort: 1,
Type: "query",
},
}
desc := `**VictoriaMetrics Status Page** - Health matrix for VictoriaMetrics components.
**Reading the Table:**
- **Instance Count** (Blue): Number of detected instances per component
- **100%** (Green): All instances are healthy for this alert
- **<100%** (Red): Some instances are experiencing issues (percentage shows healthy instances)
- **-** (Gray): Alert not applicable to this component
**Component Prefixes:**
- **ALL:** Applies to all VictoriaMetrics components
- **cluster:** Applies to vminsert, vmselect, vmstorage
- **single:** Applies to victoria-metrics (single-node)
- **vmagent/vmalert/vmauth/vmanomaly:** Component-specific alerts
**Alert Rules Sources:**
- [VictoriaMetrics Alerts Overview](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts)
- [vmalert Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/vmalert.yml)
- [vmagent Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/vmagent.yml)
- [VM Cluster Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/cluster.yml)
- [VM Single Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/single.yml)
- [VM Operator Rules](https://github.com/VictoriaMetrics/operator/blob/master/config/alerting/vmoperator-rules.yaml)
- [VMAnomaly Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/rules/vmanomaly.yml)
`
panels := []Panel{
{
Datasource: promDatasource,
Description: "Number of instances detected per component",
FieldConfig: instanceCountFieldConfig,
GridPos: GridPos{H: 4, W: 24, X: 0, Y: 0},
ID: 8000,
Options: PanelOptions{
CellHeight: "md",
ShowHeader: true,
},
Targets: []Target{instanceCountTarget},
Title: "Instance Count",
Transformations: instanceCountTransformations,
Type: "table",
},
{
Datasource: promDatasource,
Description: "Shows **worst health state** over the selected time range.\n\n**Values:** 100% = all healthy, <100% = issues detected, - = not applicable for this component\n\n**Prefixes:** ALL = all components, cluster = vminsert/vmselect/vmstorage, single = victoria-metrics, or component-specific (vmagent, vmalert, vmauth, vmanomaly)\n\n**Sources:** [Alerts Overview](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts) | [Alert Rules](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/rules)\n",
FieldConfig: fieldConfig,
GridPos: GridPos{H: 20, W: 24, X: 0, Y: 4},
ID: 9000,
Options: PanelOptions{
CellHeight: "sm",
EnablePagination: boolPtr(false),
ShowHeader: true,
},
Targets: targets,
Title: "Service Health Matrix",
Transformations: transformations,
Type: "table",
},
}
return Dashboard{
Annotations: Annotations{
List: []AnnotationItem{
{
BuiltIn: 1,
Datasource: Datasource{Type: "grafana", UID: "-- Grafana --"},
Enable: true,
Hide: true,
IconColor: "rgba(0, 211, 255, 1)",
Name: "Annotations & Alerts",
Type: "dashboard",
},
},
},
Description: desc,
Editable: true,
FiscalYearStartMonth: 0,
GraphTooltip: 0,
ID: 0,
Links: []Link{
{
AsDropdown: false,
Icon: "external link",
IncludeVars: false,
KeepTime: false,
Tags: []string{},
TargetBlank: true,
Title: "Alert Rules Source",
Tooltip: "View official VictoriaMetrics alert rules on GitHub",
Type: "link",
URL: "https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker/rules",
},
},
Panels: panels,
Preload: false,
Refresh: "30s",
SchemaVersion: 42,
Tags: []string{"victoriametrics", "status-page", "alerts", "health"},
Templating: Templating{List: templates},
Time: TimeRange{From: "now-5m", To: "now"},
Timepicker: Timepicker{RefreshIntervals: []string{"10s", "30s", "1m", "5m"}},
Timezone: "",
Title: title,
UID: uid,
Version: 1,
}
}
func buildRenameByName(renames map[string]string) map[string]string {
out := map[string]string{
"svc_name": "",
}
keys := make([]string, 0, len(renames))
for k := range renames {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
out[k] = renames[k]
}
return out
}
func boolPtr(v bool) *bool {
return &v
}
func floatPtr(v float64) *float64 {
return &v
}
func strPtr(v string) *string {
return &v
}

View File

@@ -0,0 +1,70 @@
package generator
import (
"encoding/json"
"os"
"path/filepath"
"sort"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
)
func TestQuickTemplateMatchesJsonnet(t *testing.T) {
alertsDir := filepath.Join("..", "..", "..", "deployment", "docker", "rules")
rules, err := parser.ParseAlertDirectory(alertsDir)
if err != nil {
t.Fatalf("parse alerts: %v", err)
}
if len(rules) == 0 {
t.Fatal("no alert rules parsed")
}
sort.Slice(rules, func(i, j int) bool { return rules[i].Alert < rules[j].Alert })
alertDefs := make([]AlertDefinition, 0, len(rules))
renames := make(map[string]string, len(rules))
for _, r := range rules {
prefix := r.Component
if prefix == "unknown" {
prefix = "ALL"
}
refID := GenerateRefID(prefix + "_" + r.Alert)
expr := NormalizeAlertQuery(r)
alertDefs = append(alertDefs, AlertDefinition{RefID: refID, Expr: expr})
fieldName := "Value #" + refID
displayName := prefix + ": " + r.Alert
renames[fieldName] = displayName
}
qtplJSON, err := RenderWithQuickTemplate(alertDefs, renames, "VictoriaMetrics - Status Page", "vm-status-page")
if err != nil {
t.Fatalf("quicktemplate render: %v", err)
}
// Baseline: existing generated dashboard in repo (publishable artifact).
baselinePath := filepath.Join("..", "..", "status-page-generated.json")
baselineBytes, err := os.ReadFile(baselinePath)
if err != nil {
if os.IsNotExist(err) {
t.Skipf("baseline %s not present; generate via dashgen and commit", baselinePath)
}
t.Fatalf("read baseline: %v", err)
}
var qtObj, baselineObj interface{}
if err := json.Unmarshal([]byte(qtplJSON), &qtObj); err != nil {
t.Fatalf("unmarshal quicktemplate output: %v", err)
}
if err := json.Unmarshal(baselineBytes, &baselineObj); err != nil {
t.Fatalf("unmarshal baseline output: %v", err)
}
if diff := cmp.Diff(baselineObj, qtObj, cmpopts.EquateApprox(0, 1e-9)); diff != "" {
t.Fatalf("quicktemplate output differs from baseline (-want +got):\n%s", diff)
}
}

11
dashboards/dashgen/go.mod Normal file
View File

@@ -0,0 +1,11 @@
module github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen
go 1.25.5
require (
github.com/google/go-cmp v0.5.9
github.com/valyala/quicktemplate v1.7.0
gopkg.in/yaml.v3 v3.0.1
)
require github.com/valyala/bytebufferpool v1.0.0 // indirect

27
dashboards/dashgen/go.sum Normal file
View File

@@ -0,0 +1,27 @@
github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y=
github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.5/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.30.0/go.mod h1:2rsYD01CKFrjjsvFxx75KlEUNpWNBY9JWD3K/7o2Cus=
github.com/valyala/quicktemplate v1.7.0 h1:LUPTJmlVcb46OOUY3IeD9DojFpAVbsG+5WFTcjMJzCM=
github.com/valyala/quicktemplate v1.7.0/go.mod h1:sqKJnoaOF88V07vkO+9FL8fb9uZg/VPSJnLYn+LmLk8=
github.com/valyala/tcplisten v1.0.0/go.mod h1:T0xQ8SeCZGxckz9qRXTfG43PvQ/mcWh7FwZEA7Ioqkc=
golang.org/x/crypto v0.0.0-20210513164829-c07d793c2f9a/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210510120150-4163338589ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

105
dashboards/dashgen/main.go Normal file
View File

@@ -0,0 +1,105 @@
package main
import (
"flag"
"fmt"
"log"
"os"
"sort"
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/generator"
"github.com/VictoriaMetrics/VictoriaMetrics/dashboards/dashgen/parser"
)
func main() {
if err := run(); err != nil {
log.Fatal(err)
}
}
func run() error {
alertsDir := flag.String("alerts-dir", "", "Path to directory with alert YAML files")
outputFile := flag.String("output", "dashboard.json", "Path to output JSON file")
title := flag.String("title", "VictoriaMetrics - Status Page", "Dashboard title")
uid := flag.String("uid", "vm-status-page", "Dashboard UID")
flag.Parse()
if *alertsDir == "" {
return fmt.Errorf("--alerts-dir is required")
}
fmt.Printf("Parsing alert files from: %s\n", *alertsDir)
allRules, err := parser.ParseAlertDirectory(*alertsDir)
if err != nil {
return fmt.Errorf("parse alerts: %w", err)
}
fmt.Printf("Found %d alert rules\n", len(allRules))
if len(allRules) == 0 {
return fmt.Errorf("no alert rules found in %s", *alertsDir)
}
// Sort rules for deterministic output
sort.Slice(allRules, func(i, j int) bool {
return allRules[i].Alert < allRules[j].Alert
})
// Prepare data for rendering
alerts, renames, err := buildAlertData(allRules)
if err != nil {
return fmt.Errorf("build alert data: %w", err)
}
jsonOutput, err := generator.RenderWithQuickTemplate(alerts, renames, *title, *uid)
if err != nil {
return fmt.Errorf("render quicktemplate: %w", err)
}
// Write output
if err := os.WriteFile(*outputFile, []byte(jsonOutput), 0644); err != nil {
return fmt.Errorf("write output: %w", err)
}
fmt.Printf("\n✓ Dashboard generated successfully: %s\n", *outputFile)
return nil
}
// buildAlertData converts parsed alert rules into Jsonnet-compatible data structures.
// Returns error if any alert has empty name or expression.
func buildAlertData(rules []parser.AlertRule) ([]generator.AlertDefinition, map[string]string, error) {
alerts := make([]generator.AlertDefinition, 0, len(rules))
renames := make(map[string]string, len(rules))
for _, rule := range rules {
// Validate required fields
if rule.Alert == "" {
return nil, nil, fmt.Errorf("alert in group %q has empty name", rule.GroupName)
}
if rule.Expr == "" {
return nil, nil, fmt.Errorf("alert %q has empty expression", rule.Alert)
}
prefix := rule.Component
if prefix == "unknown" {
prefix = "ALL"
}
refID := generator.GenerateRefID(prefix + "_" + rule.Alert)
query := generator.NormalizeAlertQuery(rule)
alerts = append(alerts, generator.AlertDefinition{
RefID: refID,
Expr: query,
})
// Grafana uses "Value #<refID>" as the field name
fieldName := fmt.Sprintf("Value #%s", refID)
displayName := fmt.Sprintf("%s: %s", prefix, rule.Alert)
renames[fieldName] = displayName
}
return alerts, renames, nil
}

View File

@@ -0,0 +1,131 @@
package parser
import (
"fmt"
"os"
"path/filepath"
"strings"
"gopkg.in/yaml.v3"
)
// AlertRule represents a single alert rule from a Prometheus/VictoriaMetrics alert YAML file.
type AlertRule struct {
Alert string `yaml:"alert"`
Expr string `yaml:"expr"`
For string `yaml:"for"`
Labels map[string]string `yaml:"labels"`
Annotations map[string]string `yaml:"annotations"`
// Derived fields (not from YAML)
Component string // Component this alert belongs to (cluster, single, vmagent, etc.)
GroupName string // Name of the alert group
}
// AlertGroup represents a group of alert rules.
type AlertGroup struct {
Name string `yaml:"name"`
Interval string `yaml:"interval"`
Concurrency int `yaml:"concurrency"`
Rules []AlertRule `yaml:"rules"`
}
// AlertFile represents the structure of an alert YAML file.
type AlertFile struct {
Groups []AlertGroup `yaml:"groups"`
}
// ParseAlertFile parses a single alert YAML file and returns the parsed structure.
func ParseAlertFile(path string) (*AlertFile, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read file: %w", err)
}
var alertFile AlertFile
if err := yaml.Unmarshal(data, &alertFile); err != nil {
return nil, fmt.Errorf("parse YAML: %w", err)
}
// Derive component name from group name for each rule
for i := range alertFile.Groups {
component := detectComponent(alertFile.Groups[i].Name)
for j := range alertFile.Groups[i].Rules {
alertFile.Groups[i].Rules[j].Component = component
alertFile.Groups[i].Rules[j].GroupName = alertFile.Groups[i].Name
}
}
return &alertFile, nil
}
// ParseAlertDirectory parses all .yml/.yaml files in a directory
// and returns a flat list of all alert rules.
func ParseAlertDirectory(dir string) ([]AlertRule, error) {
entries, err := os.ReadDir(dir)
if err != nil {
return nil, fmt.Errorf("read directory: %w", err)
}
var allRules []AlertRule
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
if !strings.HasSuffix(name, ".yml") && !strings.HasSuffix(name, ".yaml") {
continue
}
alertFile, err := ParseAlertFile(filepath.Join(dir, name))
if err != nil {
return nil, fmt.Errorf("parse %s: %w", name, err)
}
for _, group := range alertFile.Groups {
allRules = append(allRules, group.Rules...)
}
}
return allRules, nil
}
// componentMapping defines exact group name to component mappings.
var componentMapping = map[string]string{
"vmcluster": "cluster",
"vmsingle": "single",
"vmagent": "vmagent",
"vmalert": "vmalert",
"vmauth": "vmauth",
"vmanomaly": "vmanomaly",
}
// detectComponent determines the component type from the group name.
// Returns "unknown" if the component cannot be determined.
func detectComponent(groupName string) string {
groupLower := strings.ToLower(groupName)
// Check exact match first
if component, ok := componentMapping[groupLower]; ok {
return component
}
// Fallback to substring matching (order matters: more specific first)
switch {
case strings.Contains(groupLower, "cluster"):
return "cluster"
case strings.Contains(groupLower, "single"):
return "single"
case strings.Contains(groupLower, "vmanomaly"), strings.Contains(groupLower, "anomaly"):
return "vmanomaly"
case strings.Contains(groupLower, "vmalert"): // Must be before "alert" check
return "vmalert"
case strings.Contains(groupLower, "vmagent"), strings.Contains(groupLower, "agent"):
return "vmagent"
case strings.Contains(groupLower, "vmauth"), strings.Contains(groupLower, "auth"):
return "vmauth"
default:
return "unknown"
}
}

View File

@@ -0,0 +1,246 @@
package parser
import (
"os"
"path/filepath"
"testing"
)
func TestDetectComponent(t *testing.T) {
cases := []struct {
groupName string
want string
}{
// Exact matches (lowercase)
{"vmcluster", "cluster"},
{"vmsingle", "single"},
{"vmagent", "vmagent"},
{"vmalert", "vmalert"},
{"vmauth", "vmauth"},
{"vmanomaly", "vmanomaly"},
// Case insensitive
{"VMCluster", "cluster"},
{"VMSingle", "single"},
{"VMAgent", "vmagent"},
// Substring matches
{"cluster-alerts", "cluster"},
{"single-node-alerts", "single"},
{"vmagent-recording", "vmagent"},
{"vmalert-errors", "vmalert"},
{"vmauth-health", "vmauth"},
{"vmanomaly-detection", "vmanomaly"},
{"anomaly-alerts", "vmanomaly"},
// Unknown fallback - group names that don't match any component
{"other-alerts", "unknown"},
}
for _, tc := range cases {
got := detectComponent(tc.groupName)
if got != tc.want {
t.Errorf("detectComponent(%q) = %q, want %q", tc.groupName, got, tc.want)
}
}
}
func TestParseAlertFile(t *testing.T) {
// Create a temporary test file
content := `groups:
- name: vmagent
rules:
- alert: TooManyLogs
expr: sum(rate(vm_log_messages_total{level="error"}[5m])) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "Too many error logs"
- alert: TooManyRestarts
expr: changes(process_start_time_seconds[1h]) > 2
for: 5m
- name: vmcluster
interval: 30s
rules:
- alert: DiskRunsOutOfSpace
expr: vm_free_disk_space_bytes / vm_data_size_bytes < 0.1
labels:
severity: critical
`
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "test-alerts.yml")
if err := os.WriteFile(tmpFile, []byte(content), 0644); err != nil {
t.Fatalf("failed to write temp file: %v", err)
}
alertFile, err := ParseAlertFile(tmpFile)
if err != nil {
t.Fatalf("ParseAlertFile failed: %v", err)
}
// Verify groups count
if len(alertFile.Groups) != 2 {
t.Errorf("expected 2 groups, got %d", len(alertFile.Groups))
}
// Verify first group
if alertFile.Groups[0].Name != "vmagent" {
t.Errorf("expected group name 'vmagent', got %q", alertFile.Groups[0].Name)
}
if len(alertFile.Groups[0].Rules) != 2 {
t.Errorf("expected 2 rules in vmagent group, got %d", len(alertFile.Groups[0].Rules))
}
// Verify component detection
if alertFile.Groups[0].Rules[0].Component != "vmagent" {
t.Errorf("expected component 'vmagent', got %q", alertFile.Groups[0].Rules[0].Component)
}
if alertFile.Groups[1].Rules[0].Component != "cluster" {
t.Errorf("expected component 'cluster', got %q", alertFile.Groups[1].Rules[0].Component)
}
// Verify alert fields
rule := alertFile.Groups[0].Rules[0]
if rule.Alert != "TooManyLogs" {
t.Errorf("expected alert 'TooManyLogs', got %q", rule.Alert)
}
if rule.For != "15m" {
t.Errorf("expected for '15m', got %q", rule.For)
}
if rule.Labels["severity"] != "warning" {
t.Errorf("expected severity 'warning', got %q", rule.Labels["severity"])
}
}
func TestParseAlertFileNotFound(t *testing.T) {
_, err := ParseAlertFile("/nonexistent/path/file.yml")
if err == nil {
t.Error("expected error for nonexistent file, got nil")
}
}
func TestParseAlertFileInvalidYAML(t *testing.T) {
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "invalid.yml")
if err := os.WriteFile(tmpFile, []byte("invalid: yaml: content: ["), 0644); err != nil {
t.Fatalf("failed to write temp file: %v", err)
}
_, err := ParseAlertFile(tmpFile)
if err == nil {
t.Error("expected error for invalid YAML, got nil")
}
}
func TestParseAlertDirectory(t *testing.T) {
tmpDir := t.TempDir()
// Create multiple alert files
file1 := `groups:
- name: vmagent
rules:
- alert: Alert1
expr: metric1 > 0
`
file2 := `groups:
- name: vmalert
rules:
- alert: Alert2
expr: metric2 > 0
- alert: Alert3
expr: metric3 > 0
`
// Non-YAML file should be ignored
nonYaml := "this is not yaml at all"
if err := os.WriteFile(filepath.Join(tmpDir, "vmagent.yml"), []byte(file1), 0644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(tmpDir, "vmalert.yaml"), []byte(file2), 0644); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(tmpDir, "readme.txt"), []byte(nonYaml), 0644); err != nil {
t.Fatal(err)
}
rules, err := ParseAlertDirectory(tmpDir)
if err != nil {
t.Fatalf("ParseAlertDirectory failed: %v", err)
}
if len(rules) != 3 {
t.Errorf("expected 3 rules, got %d", len(rules))
}
// Verify components are set correctly
componentCounts := make(map[string]int)
for _, rule := range rules {
componentCounts[rule.Component]++
}
if componentCounts["vmagent"] != 1 {
t.Errorf("expected 1 vmagent rule, got %d", componentCounts["vmagent"])
}
if componentCounts["vmalert"] != 2 {
t.Errorf("expected 2 vmalert rules, got %d", componentCounts["vmalert"])
}
}
func TestParseAlertDirectoryNotFound(t *testing.T) {
_, err := ParseAlertDirectory("/nonexistent/directory")
if err == nil {
t.Error("expected error for nonexistent directory, got nil")
}
}
func TestAlertRuleFields(t *testing.T) {
content := `groups:
- name: test
rules:
- alert: CompleteAlert
expr: metric > threshold
for: 10m
labels:
severity: critical
team: platform
annotations:
summary: "Alert summary"
description: "Alert description"
`
tmpDir := t.TempDir()
tmpFile := filepath.Join(tmpDir, "complete.yml")
if err := os.WriteFile(tmpFile, []byte(content), 0644); err != nil {
t.Fatal(err)
}
alertFile, err := ParseAlertFile(tmpFile)
if err != nil {
t.Fatal(err)
}
rule := alertFile.Groups[0].Rules[0]
// Verify all fields
if rule.Alert != "CompleteAlert" {
t.Errorf("Alert = %q, want 'CompleteAlert'", rule.Alert)
}
if rule.Expr != "metric > threshold" {
t.Errorf("Expr = %q, want 'metric > threshold'", rule.Expr)
}
if rule.For != "10m" {
t.Errorf("For = %q, want '10m'", rule.For)
}
if rule.Labels["severity"] != "critical" {
t.Errorf("Labels[severity] = %q, want 'critical'", rule.Labels["severity"])
}
if rule.Labels["team"] != "platform" {
t.Errorf("Labels[team] = %q, want 'platform'", rule.Labels["team"])
}
if rule.Annotations["summary"] != "Alert summary" {
t.Errorf("Annotations[summary] = %q, want 'Alert summary'", rule.Annotations["summary"])
}
if rule.GroupName != "test" {
t.Errorf("GroupName = %q, want 'test'", rule.GroupName)
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff