Add pod state observability to minikube dashboard #83

Merged
eblume merged 2 commits from feature/pod-state-dashboard into main 2026-02-03 07:20:06 -08:00
Showing only changes of commit 49b4a9f5be - Show all commits

Add pod state observability to minikube dashboard

- Add "Unhealthy Pods" stat panel that shows count of pods in error states
  (ImagePullBackOff, CrashLoopBackOff, etc.) with red background when > 0
- Add "Pods by Waiting Reason" time series showing container waiting states
- This provides visibility into stuck pods that ArgoCD doesn't track

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Erich Blume 2026-02-03 07:11:47 -08:00

View file

@ -107,6 +107,22 @@ data:
"title": "CPU Requests (cores)",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] }
}
},
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 3 },
"id": 13,
"options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" },
"targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_pod_container_status_waiting_reason{namespace=~\"$namespace\", reason=~\"ImagePullBackOff|ErrImagePull|CrashLoopBackOff|CreateContainerError|RunContainerError\"}) or vector(0)", "refId": "A" }],
"title": "Unhealthy Pods",
"type": "stat"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
@ -118,7 +134,25 @@ data:
"unit": "short"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 3 },
"gridPos": { "h": 4, "w": 18, "x": 6, "y": 3 },
"id": 14,
"options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
"targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (reason) (kube_pod_container_status_waiting_reason{namespace=~\"$namespace\"})", "legendFormat": "{{reason}}", "refId": "A" }],
"title": "Pods by Waiting Reason",
"type": "timeseries"
},
{
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } },
"mappings": [],
"thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] },
"unit": "short"
}
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 7 },
"id": 9,
"options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
"targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count by (namespace) (kube_pod_info{namespace=~\"$namespace\"})", "legendFormat": "{{namespace}}", "refId": "A" }],
@ -136,7 +170,7 @@ data:
"unit": "bytes"
}
},
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 3 },
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 7 },
"id": 10,
"options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } },
"targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (namespace) (kube_pod_container_resource_requests{resource=\"memory\",namespace=~\"$namespace\"})", "legendFormat": "{{namespace}}", "refId": "A" }],
@ -159,7 +193,7 @@ data:
{ "matcher": { "id": "byName", "options": "CPU Limits" }, "properties": [{ "id": "unit", "value": "short" }] }
]
},
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 11 },
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 15 },
"id": 11,
"options": { "cellHeight": "sm", "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, "showHeader": true, "sortBy": [{ "desc": true, "displayName": "Pods" }] },
"targets": [
@ -178,7 +212,7 @@ data:
},
{
"datasource": { "type": "loki", "uid": "loki" },
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 19 },
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 23 },
"id": 12,
"options": { "dedupStrategy": "none", "enableLogDetails": true, "prettifyLogMessage": false, "showCommonLabels": false, "showLabels": true, "showTime": true, "sortOrder": "Descending", "wrapLogMessage": false },
"targets": [{ "datasource": { "type": "loki", "uid": "loki" }, "expr": "{namespace=~\"$namespace\"}", "refId": "A" }],