Compare commits

...

3 Commits

Author SHA1 Message Date
Dominic
ea4b854e1f Merge branch 'master' into add_dash/os-details
Signed-off-by: Dominic <neku113@gmail.com>
2026-06-12 06:40:59 +09:00
Dominic Polizzi
b2080c26e4 better grouping 2026-06-12 06:31:44 +09:00
Dominic Polizzi
283c98d185 Add OS info, Filesystem info & Fsync avg duration panels to dashboards 2026-06-10 05:30:21 +09:00
7 changed files with 2163 additions and 2 deletions

View File

@@ -5136,6 +5136,350 @@
],
"title": "Major page faults rate ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the operating system name and kernel release version for each instance. Use this to quickly correlate incidents with known OS or kernel-specific regressions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 297
},
"id": 228,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_os_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, os, release)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "OS info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"os": {
"aggregations": [],
"operation": "groupby"
},
"release": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the filesystem type for each storage data path. Use this to quickly identify filesystem-related issues (e.g., NFS, XFS bugs) during incident triage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 297
},
"id": 229,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_fs_info{job=~\"$job_storage\", instance=~\"$instance\"}) by(job, instance, path, fs_type)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Filesystem info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"path": {
"aggregations": [],
"operation": "groupby"
},
"fs_type": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Average duration of fsync system calls. High latency indicates storage I/O that cannot keep up with the write rate. This metric measures only the persistence path (fsyncing to disk), not the page-cache write phase, making it a direct signal for disk performance. Computed as rate(vm_filestream_fsync_duration_seconds_total) / rate(vm_filestream_fsync_calls_total). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 305
},
"id": 230,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"hideZeros": true,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(vm_filestream_fsync_duration_seconds_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval]) / rate(vm_filestream_fsync_calls_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"instant": false,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Fsync avg duration ($instance)",
"type": "timeseries"
}
],
"title": "Troubleshooting",

View File

@@ -5181,6 +5181,374 @@
],
"title": "Major page faults rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the operating system name and kernel release version for each instance. Use this to quickly correlate incidents with known OS or kernel-specific regressions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 73
},
"id": 157,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_os_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, os, release)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "OS info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"os": {
"aggregations": [],
"operation": "groupby"
},
"release": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the filesystem type for each storage data path. Use this to quickly identify filesystem-related issues (e.g., NFS, XFS bugs) during incident triage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 81
},
"id": 158,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_fs_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, path, fs_type)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Filesystem info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"path": {
"aggregations": [],
"operation": "groupby"
},
"fs_type": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Average duration of fsync system calls. High latency indicates storage I/O that cannot keep up with the write rate. This metric measures only the persistence path (fsyncing to disk), not the page-cache write phase, making it a direct signal for disk performance. Computed as rate(vm_filestream_fsync_duration_seconds_total) / rate(vm_filestream_fsync_calls_total). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 81
},
"id": 159,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"hideZeros": true,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(vm_filestream_fsync_duration_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) / rate(vm_filestream_fsync_calls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"instant": false,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Fsync avg duration ($instance)",
"type": "timeseries"
}
],
"title": "Troubleshooting",
@@ -8742,4 +9110,4 @@
"uid": "wNf0q_kZk",
"version": 1,
"weekStart": ""
}
}

View File

@@ -5137,6 +5137,350 @@
],
"title": "Major page faults rate ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Shows the operating system name and kernel release version for each instance. Use this to quickly correlate incidents with known OS or kernel-specific regressions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 297
},
"id": 228,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_os_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, os, release)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "OS info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"os": {
"aggregations": [],
"operation": "groupby"
},
"release": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Shows the filesystem type for each storage data path. Use this to quickly identify filesystem-related issues (e.g., NFS, XFS bugs) during incident triage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 297
},
"id": 229,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_fs_info{job=~\"$job_storage\", instance=~\"$instance\"}) by(job, instance, path, fs_type)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Filesystem info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"path": {
"aggregations": [],
"operation": "groupby"
},
"fs_type": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Average duration of fsync system calls. High latency indicates storage I/O that cannot keep up with the write rate. This metric measures only the persistence path (fsyncing to disk), not the page-cache write phase, making it a direct signal for disk performance. Computed as rate(vm_filestream_fsync_duration_seconds_total) / rate(vm_filestream_fsync_calls_total). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 305
},
"id": 230,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"hideZeros": true,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(vm_filestream_fsync_duration_seconds_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval]) / rate(vm_filestream_fsync_calls_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"instant": false,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Fsync avg duration ($instance)",
"type": "timeseries"
}
],
"title": "Troubleshooting",

View File

@@ -5182,6 +5182,374 @@
],
"title": "Major page faults rate",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Shows the operating system name and kernel release version for each instance. Use this to quickly correlate incidents with known OS or kernel-specific regressions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 73
},
"id": 157,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_os_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, os, release)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "OS info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"os": {
"aggregations": [],
"operation": "groupby"
},
"release": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Shows the filesystem type for each storage data path. Use this to quickly identify filesystem-related issues (e.g., NFS, XFS bugs) during incident triage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 81
},
"id": 158,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_fs_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, path, fs_type)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Filesystem info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"path": {
"aggregations": [],
"operation": "groupby"
},
"fs_type": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Average duration of fsync system calls. High latency indicates storage I/O that cannot keep up with the write rate. This metric measures only the persistence path (fsyncing to disk), not the page-cache write phase, making it a direct signal for disk performance. Computed as rate(vm_filestream_fsync_duration_seconds_total) / rate(vm_filestream_fsync_calls_total). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 81
},
"id": 159,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"hideZeros": true,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(vm_filestream_fsync_duration_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) / rate(vm_filestream_fsync_calls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"instant": false,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Fsync avg duration ($instance)",
"type": "timeseries"
}
],
"title": "Troubleshooting",
@@ -8743,4 +9111,4 @@
"uid": "wNf0q_kZk_vm",
"version": 1,
"weekStart": ""
}
}

View File

@@ -4581,6 +4581,374 @@
],
"title": "Rows ignored for last 1h ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Shows the operating system name and kernel release version for each instance. Use this to quickly correlate incidents with known OS or kernel-specific regressions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 181
},
"id": 169,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_os_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, os, release)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "OS info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"os": {
"aggregations": [],
"operation": "groupby"
},
"release": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Shows the filesystem type for each storage data path. Use this to quickly identify filesystem-related issues (e.g., NFS, XFS bugs) during incident triage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 189
},
"id": 170,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_fs_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, path, fs_type)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Filesystem info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"path": {
"aggregations": [],
"operation": "groupby"
},
"fs_type": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"description": "Average duration of fsync system calls. High latency indicates storage I/O that cannot keep up with the write rate. This metric measures only the persistence path (fsyncing to disk), not the page-cache write phase, making it a direct signal for disk performance. Computed as rate(vm_filestream_fsync_duration_seconds_total) / rate(vm_filestream_fsync_calls_total). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 189
},
"id": 171,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"hideZeros": true,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "victoriametrics-metrics-datasource",
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(vm_filestream_fsync_duration_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) / rate(vm_filestream_fsync_calls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"instant": false,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Fsync avg duration ($instance)",
"type": "timeseries"
}
],
"title": "Troubleshooting",

View File

@@ -4580,6 +4580,374 @@
],
"title": "Rows ignored for last 1h ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the operating system name and kernel release version for each instance. Use this to quickly correlate incidents with known OS or kernel-specific regressions. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 181
},
"id": 169,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_os_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, os, release)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "OS info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"os": {
"aggregations": [],
"operation": "groupby"
},
"release": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows the filesystem type for each storage data path. Use this to quickly identify filesystem-related issues (e.g., NFS, XFS bugs) during incident triage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"custom": {
"align": "auto",
"cellOptions": {
"type": "auto"
},
"filterable": true,
"footer": {
"reducers": []
},
"inspect": false
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Time"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "Value"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
},
{
"matcher": {
"id": "byName",
"options": "job"
},
"properties": [
{
"id": "custom.hideFrom.viz",
"value": true
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 189
},
"id": 170,
"options": {
"cellHeight": "sm",
"filterable": true,
"showHeader": true,
"showSearch": true
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "sum(vm_fs_info{job=~\"$job\", instance=~\"$instance\"}) by(job, instance, path, fs_type)",
"format": "table",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "A"
}
],
"title": "Filesystem info",
"type": "table",
"transformations": [
{
"id": "groupBy",
"options": {
"fields": {
"job": {
"aggregations": [],
"operation": "groupby"
},
"path": {
"aggregations": [],
"operation": "groupby"
},
"fs_type": {
"aggregations": [],
"operation": "groupby"
},
"instance": {
"aggregations": [
"uniqueValues"
],
"operation": "aggregate"
}
}
}
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Average duration of fsync system calls. High latency indicates storage I/O that cannot keep up with the write rate. This metric measures only the persistence path (fsyncing to disk), not the page-cache write phase, making it a direct signal for disk performance. Computed as rate(vm_filestream_fsync_duration_seconds_total) / rate(vm_filestream_fsync_calls_total). See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 189
},
"id": 171,
"options": {
"legend": {
"calcs": [
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"hideZeros": true,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "12.2.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"expr": "rate(vm_filestream_fsync_duration_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval]) / rate(vm_filestream_fsync_calls_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])",
"format": "time_series",
"instant": false,
"legendFormat": "{{instance}}",
"refId": "A"
}
],
"title": "Fsync avg duration ($instance)",
"type": "timeseries"
}
],
"title": "Troubleshooting",

View File

@@ -26,6 +26,7 @@ See also [LTS releases](https://docs.victoriametrics.com/victoriametrics/lts-rel
## tip
* FEATURE: [dashboards](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/dashboards): add `OS info`, `Filesystem info`, and `Fsync avg duration` panels to the Troubleshooting section of the single-node, cluster, and vmagent dashboards. These panels surface `vm_os_info`, `vm_fs_info`, and the `vm_filestream_fsync_*` metrics for faster incident triage. See [#10481](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10481), [#10482](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10482) and [#10432](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/10432).
* BUGFIX: [stream aggregation](https://docs.victoriametrics.com/victoriametrics/stream-aggregation/): fix issue with producing aggregated samples with identical timestamps between flushes. See PR [#10808](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/10808) for details.
## [v1.145.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.145.0)