From 10dd45c4fddee12bf2cd7a82ded4d52ade318e4e Mon Sep 17 00:00:00 2001 From: sias32 Date: Fri, 3 Apr 2026 21:27:19 +0300 Subject: [PATCH] dashboards: improvement alert statistics (#10571) Changes: - Added the number of `pending alerts` and `firing alerts` - Improvement `transormations` for panel - FIRING over time by group and rules - Added sort for panel - FIRING over time by rule Signed-off-by: sias32 Co-authored-by: Max Kotliar --- dashboards/alert-statistics.json | 220 +++++++++++++++----- docs/victoriametrics/changelog/CHANGELOG.md | 1 + 2 files changed, 171 insertions(+), 50 deletions(-) diff --git a/dashboards/alert-statistics.json b/dashboards/alert-statistics.json index 9b2b7f5e22..50f8ee9ac0 100644 --- a/dashboards/alert-statistics.json +++ b/dashboards/alert-statistics.json @@ -119,7 +119,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "yellow", @@ -199,7 +200,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 } ] } @@ -208,14 +210,14 @@ }, "gridPos": { "h": 4, - "w": 9, + "w": 6, "x": 0, "y": 14 }, "id": 5, "options": { - "colorMode": "value", - "graphMode": "area", + "colorMode": "none", + "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "percentChangeColorMode": "standard", @@ -257,7 +259,7 @@ "type": "prometheus", "uid": "$ds" }, - "description": "", + "description": "Shows the total number of loaded alerting rules across selected instances and groups.", "fieldConfig": { "defaults": { "mappings": [], @@ -266,7 +268,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 } ] } @@ -275,11 +278,11 @@ }, "gridPos": { "h": 4, - "w": 7, - "x": 9, + "w": 6, + "x": 6, "y": 14 }, - "id": 4, + "id": 8, "options": { "colorMode": "value", "graphMode": "area", @@ -320,6 +323,144 @@ "title": "Alerting rules", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the total number of pendings alerts in selected instances and grouping groups.", + "fieldConfig": { + "defaults": { + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "yellow", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 14 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 80 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(vmalert_alerts_pending{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})", + "instant": false, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Alerting pending", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds" + }, + "description": "Shows the total number of firing alerts in selected instances and grouping groups.", + "fieldConfig": { + "defaults": { + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 14 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 80 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})", + "instant": false, + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Alerting firing", + "type": "stat" + }, { "datasource": { "type": "prometheus", @@ -332,6 +473,9 @@ "cellOptions": { "type": "auto" }, + "footer": { + "reducers": [] + }, "inspect": false }, "mappings": [], @@ -339,7 +483,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -352,7 +497,7 @@ { "matcher": { "id": "byName", - "options": "Count (sum)" + "options": "Count" }, "properties": [ { @@ -372,20 +517,12 @@ "id": 2, "options": { "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, "frameIndex": 1, "showHeader": true, "sortBy": [ { "desc": true, - "displayName": "Count (sum)" + "displayName": "Count" } ] }, @@ -398,7 +535,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group, alertname) > 0)", + "expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group) > 0)", "format": "table", "instant": true, "key": "Q-3934f0fb-8ad6-4519-a98d-c26d0fc6b312-0", @@ -414,8 +551,9 @@ "options": { "excludeByName": { "Time": true, - "alertname": false + "alertname": true }, + "includeByName": {}, "indexByName": { "Time": 0, "Value": 3, @@ -428,23 +566,6 @@ "group": "Group" } } - }, - { - "id": "groupBy", - "options": { - "fields": { - "Count": { - "aggregations": [ - "sum" - ], - "operation": "aggregate" - }, - "Group": { - "aggregations": [], - "operation": "groupby" - } - } - } } ], "type": "table" @@ -468,7 +589,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": 0 }, { "color": "red", @@ -531,16 +653,14 @@ "id": 1, "options": { "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, "frameIndex": 1, - "showHeader": true + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Count" + } + ] }, "pluginVersion": "12.0.2", "targets": [ diff --git a/docs/victoriametrics/changelog/CHANGELOG.md b/docs/victoriametrics/changelog/CHANGELOG.md index f2bcf0403b..750949b9fd 100644 --- a/docs/victoriametrics/changelog/CHANGELOG.md +++ b/docs/victoriametrics/changelog/CHANGELOG.md @@ -29,6 +29,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel * FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add per-URL `-remoteWrite.disableMetadata` flag to disable metadata sending for specific remote storage URLs. See [#10711](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10711). Thanks to @evkuzin for the contribution. * FEATURE: introduce `vm_filestream_fsync_duration_seconds_total` and `vm_filestream_fsync_calls_total` metrics, which can be used for detecting slow storage if it cannot keep up with the current data ingestion rate. See [#10432](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432). Thanks to @mehrdadbn9 for the contribution. * FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add dedicated `thanos` mode for [migrating data from Thanos](https://docs.victoriametrics.com/victoriametrics/vmctl/thanos/). This mode supports both raw and downsampled Thanos blocks, including all aggregate types (count, sum, min, max, counter). Each aggregate is imported as a separate metric with resolution and aggregate type suffixes (e.g., `metric_name:5m:count`). The new mode uses `--thanos-*` prefixed flags: `--thanos-snapshot`, `--thanos-concurrency`, `--thanos-filter-time-start`, `--thanos-filter-time-end`, `--thanos-filter-label`, `--thanos-filter-label-value`, and `--thanos-aggr-types`. See [#9262](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9262). +* FEATURE: [dashboards/alert-statistics](https://grafana.com/grafana/dashboards/24553): add pending and firing alerts stats; fix query in `FIRING over time by group` panel. See [#10571](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10571). Thanks to @sias32 for the contribution. * BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): retry the requests that failed with unexpected EOF due to unstable network to S3 service. See [#10699](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10699). * BUGFIX: All VictoriaMetrics components: Fix an issue where `unsupported` metric metadata type was exposed for summaries and quantiles if a summary wasn't updated within a certain time window. See [metrics#120](https://github.com/VictoriaMetrics/metrics/issues/120) and [metrics#121](https://github.com/VictoriaMetrics/metrics/pull/121).