dashboards: improvement alert statistics (#10571)

Changes:

- Added the number of `pending alerts` and `firing alerts`
- Improvement `transormations` for panel - FIRING over time by group and rules
- Added sort for panel - FIRING over time by rule

Signed-off-by: sias32 <sias.32@yandex.ru>
Co-authored-by: Max Kotliar <mkotlyar@victoriametrics.com>
This commit is contained in:
sias32
2026-04-03 21:27:19 +03:00
committed by GitHub
parent a3294b5aa2
commit 10dd45c4fd
2 changed files with 171 additions and 50 deletions

View File

@@ -119,7 +119,8 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green" "color": "green",
"value": 0
}, },
{ {
"color": "yellow", "color": "yellow",
@@ -199,7 +200,8 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green" "color": "green",
"value": 0
} }
] ]
} }
@@ -208,14 +210,14 @@
}, },
"gridPos": { "gridPos": {
"h": 4, "h": 4,
"w": 9, "w": 6,
"x": 0, "x": 0,
"y": 14 "y": 14
}, },
"id": 5, "id": 5,
"options": { "options": {
"colorMode": "value", "colorMode": "none",
"graphMode": "area", "graphMode": "none",
"justifyMode": "auto", "justifyMode": "auto",
"orientation": "auto", "orientation": "auto",
"percentChangeColorMode": "standard", "percentChangeColorMode": "standard",
@@ -257,7 +259,7 @@
"type": "prometheus", "type": "prometheus",
"uid": "$ds" "uid": "$ds"
}, },
"description": "", "description": "Shows the total number of loaded alerting rules across selected instances and groups.",
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"mappings": [], "mappings": [],
@@ -266,7 +268,8 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green" "color": "green",
"value": 0
} }
] ]
} }
@@ -275,11 +278,11 @@
}, },
"gridPos": { "gridPos": {
"h": 4, "h": 4,
"w": 7, "w": 6,
"x": 9, "x": 6,
"y": 14 "y": 14
}, },
"id": 4, "id": 8,
"options": { "options": {
"colorMode": "value", "colorMode": "value",
"graphMode": "area", "graphMode": "area",
@@ -320,6 +323,144 @@
"title": "Alerting rules", "title": "Alerting rules",
"type": "stat" "type": "stat"
}, },
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the total number of pendings alerts in selected instances and grouping groups.",
"fieldConfig": {
"defaults": {
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "yellow",
"value": 0
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 12,
"y": 14
},
"id": 9,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"text": {
"valueSize": 80
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vmalert_alerts_pending{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})",
"instant": false,
"interval": "",
"legendFormat": "",
"range": true,
"refId": "A"
}
],
"title": "Alerting pending",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the total number of firing alerts in selected instances and grouping groups.",
"fieldConfig": {
"defaults": {
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 6,
"x": 18,
"y": 14
},
"id": 10,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"last"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"text": {
"valueSize": 80
},
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.0.2",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${ds}"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"})",
"instant": false,
"interval": "",
"legendFormat": "",
"range": true,
"refId": "A"
}
],
"title": "Alerting firing",
"type": "stat"
},
{ {
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
@@ -332,6 +473,9 @@
"cellOptions": { "cellOptions": {
"type": "auto" "type": "auto"
}, },
"footer": {
"reducers": []
},
"inspect": false "inspect": false
}, },
"mappings": [], "mappings": [],
@@ -339,7 +483,8 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green" "color": "green",
"value": 0
}, },
{ {
"color": "red", "color": "red",
@@ -352,7 +497,7 @@
{ {
"matcher": { "matcher": {
"id": "byName", "id": "byName",
"options": "Count (sum)" "options": "Count"
}, },
"properties": [ "properties": [
{ {
@@ -372,20 +517,12 @@
"id": 2, "id": 2,
"options": { "options": {
"cellHeight": "sm", "cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 1, "frameIndex": 1,
"showHeader": true, "showHeader": true,
"sortBy": [ "sortBy": [
{ {
"desc": true, "desc": true,
"displayName": "Count (sum)" "displayName": "Count"
} }
] ]
}, },
@@ -398,7 +535,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": false, "exemplar": false,
"expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group, alertname) > 0)", "expr": "topk_max(100, sum(increases_over_time(vmalert_alerts_firing{job=~\"$job\",instance=~\"$instance\",group=~\"$group\"}[$__range])) by(group) > 0)",
"format": "table", "format": "table",
"instant": true, "instant": true,
"key": "Q-3934f0fb-8ad6-4519-a98d-c26d0fc6b312-0", "key": "Q-3934f0fb-8ad6-4519-a98d-c26d0fc6b312-0",
@@ -414,8 +551,9 @@
"options": { "options": {
"excludeByName": { "excludeByName": {
"Time": true, "Time": true,
"alertname": false "alertname": true
}, },
"includeByName": {},
"indexByName": { "indexByName": {
"Time": 0, "Time": 0,
"Value": 3, "Value": 3,
@@ -428,23 +566,6 @@
"group": "Group" "group": "Group"
} }
} }
},
{
"id": "groupBy",
"options": {
"fields": {
"Count": {
"aggregations": [
"sum"
],
"operation": "aggregate"
},
"Group": {
"aggregations": [],
"operation": "groupby"
}
}
}
} }
], ],
"type": "table" "type": "table"
@@ -468,7 +589,8 @@
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{ {
"color": "green" "color": "green",
"value": 0
}, },
{ {
"color": "red", "color": "red",
@@ -531,16 +653,14 @@
"id": 1, "id": 1,
"options": { "options": {
"cellHeight": "sm", "cellHeight": "sm",
"footer": {
"countRows": false,
"fields": "",
"reducer": [
"sum"
],
"show": false
},
"frameIndex": 1, "frameIndex": 1,
"showHeader": true "showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Count"
}
]
}, },
"pluginVersion": "12.0.2", "pluginVersion": "12.0.2",
"targets": [ "targets": [

View File

@@ -29,6 +29,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
* FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add per-URL `-remoteWrite.disableMetadata` flag to disable metadata sending for specific remote storage URLs. See [#10711](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10711). Thanks to @evkuzin for the contribution. * FEATURE: [vmagent](https://docs.victoriametrics.com/victoriametrics/vmagent/): add per-URL `-remoteWrite.disableMetadata` flag to disable metadata sending for specific remote storage URLs. See [#10711](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10711). Thanks to @evkuzin for the contribution.
* FEATURE: introduce `vm_filestream_fsync_duration_seconds_total` and `vm_filestream_fsync_calls_total` metrics, which can be used for detecting slow storage if it cannot keep up with the current data ingestion rate. See [#10432](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432). Thanks to @mehrdadbn9 for the contribution. * FEATURE: introduce `vm_filestream_fsync_duration_seconds_total` and `vm_filestream_fsync_calls_total` metrics, which can be used for detecting slow storage if it cannot keep up with the current data ingestion rate. See [#10432](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432). Thanks to @mehrdadbn9 for the contribution.
* FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add dedicated `thanos` mode for [migrating data from Thanos](https://docs.victoriametrics.com/victoriametrics/vmctl/thanos/). This mode supports both raw and downsampled Thanos blocks, including all aggregate types (count, sum, min, max, counter). Each aggregate is imported as a separate metric with resolution and aggregate type suffixes (e.g., `metric_name:5m:count`). The new mode uses `--thanos-*` prefixed flags: `--thanos-snapshot`, `--thanos-concurrency`, `--thanos-filter-time-start`, `--thanos-filter-time-end`, `--thanos-filter-label`, `--thanos-filter-label-value`, and `--thanos-aggr-types`. See [#9262](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9262). * FEATURE: [vmctl](https://docs.victoriametrics.com/victoriametrics/vmctl/): add dedicated `thanos` mode for [migrating data from Thanos](https://docs.victoriametrics.com/victoriametrics/vmctl/thanos/). This mode supports both raw and downsampled Thanos blocks, including all aggregate types (count, sum, min, max, counter). Each aggregate is imported as a separate metric with resolution and aggregate type suffixes (e.g., `metric_name:5m:count`). The new mode uses `--thanos-*` prefixed flags: `--thanos-snapshot`, `--thanos-concurrency`, `--thanos-filter-time-start`, `--thanos-filter-time-end`, `--thanos-filter-label`, `--thanos-filter-label-value`, and `--thanos-aggr-types`. See [#9262](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/9262).
* FEATURE: [dashboards/alert-statistics](https://grafana.com/grafana/dashboards/24553): add pending and firing alerts stats; fix query in `FIRING over time by group` panel. See [#10571](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10571). Thanks to @sias32 for the contribution.
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): retry the requests that failed with unexpected EOF due to unstable network to S3 service. See [#10699](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10699). * BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup/), [vmbackupmanager](https://docs.victoriametrics.com/victoriametrics/vmbackupmanager/): retry the requests that failed with unexpected EOF due to unstable network to S3 service. See [#10699](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10699).
* BUGFIX: All VictoriaMetrics components: Fix an issue where `unsupported` metric metadata type was exposed for summaries and quantiles if a summary wasn't updated within a certain time window. See [metrics#120](https://github.com/VictoriaMetrics/metrics/issues/120) and [metrics#121](https://github.com/VictoriaMetrics/metrics/pull/121). * BUGFIX: All VictoriaMetrics components: Fix an issue where `unsupported` metric metadata type was exposed for summaries and quantiles if a summary wasn't updated within a certain time window. See [metrics#120](https://github.com/VictoriaMetrics/metrics/issues/120) and [metrics#121](https://github.com/VictoriaMetrics/metrics/pull/121).