From 0e2ad47433de65c739dc969ed25623b252811e5f Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 25 Feb 2026 21:44:48 -0800 Subject: [PATCH] Ringtail dashboard: add host metrics, rename to general system health MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add prometheus.exporter.unix to ringtail Alloy with host /proc, /sys, and rootfs mounts so node_* metrics flow from the NixOS host. Rewrite the ringtail dashboard from k8s-only to full system health: uptime, CPU usage by mode, memory usage, filesystem table, network traffic, GPU overview, and k8s summary — matching the macOS dashboard pattern. Co-Authored-By: Claude Opus 4.6 --- argocd/manifests/alloy-ringtail/config.alloy | 28 +- .../manifests/alloy-ringtail/daemonset.yaml | 19 ++ .../dashboards/configmap-ringtail.yaml | 258 ++++++++++++++---- 3 files changed, 252 insertions(+), 53 deletions(-) diff --git a/argocd/manifests/alloy-ringtail/config.alloy b/argocd/manifests/alloy-ringtail/config.alloy index 5cd7d90..9ae8981 100644 --- a/argocd/manifests/alloy-ringtail/config.alloy +++ b/argocd/manifests/alloy-ringtail/config.alloy @@ -1,6 +1,32 @@ -// Alloy ringtail configuration - collects pod logs and scrapes kube-state-metrics +// Alloy ringtail configuration - collects host metrics, pod logs, and kube-state-metrics // Remote-writes metrics to indri Prometheus, logs to indri Loki +// ============== HOST METRICS ============== + +// System metrics exporter (Linux host via /host/proc, /host/sys mounts) +prometheus.exporter.unix "system" { + procfs_path = "/host/proc" + sysfs_path = "/host/sys" + rootfs_path = "/host/root" +} + +// Scrape system metrics and add instance label +prometheus.scrape "system" { + targets = prometheus.exporter.unix.system.targets + forward_to = [prometheus.relabel.instance.receiver] + scrape_interval = "15s" +} + +// Add instance label +prometheus.relabel "instance" { + forward_to = [prometheus.remote_write.prometheus.receiver] + + rule { + target_label = "instance" + replacement = "ringtail" + } +} + // ============== KUBE-STATE-METRICS SCRAPE ============== prometheus.scrape "kube_state_metrics" { diff --git a/argocd/manifests/alloy-ringtail/daemonset.yaml b/argocd/manifests/alloy-ringtail/daemonset.yaml index 89cb930..a8d060a 100644 --- a/argocd/manifests/alloy-ringtail/daemonset.yaml +++ b/argocd/manifests/alloy-ringtail/daemonset.yaml @@ -48,6 +48,16 @@ spec: readOnly: true - name: data mountPath: /var/lib/alloy/data + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + - name: root + mountPath: /host/root + mountPropagation: HostToContainer + readOnly: true securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true @@ -65,3 +75,12 @@ spec: path: /var/log - name: data emptyDir: {} + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + - name: root + hostPath: + path: / diff --git a/argocd/manifests/grafana-config/dashboards/configmap-ringtail.yaml b/argocd/manifests/grafana-config/dashboards/configmap-ringtail.yaml index 8961fdc..63cd2aa 100644 --- a/argocd/manifests/grafana-config/dashboards/configmap-ringtail.yaml +++ b/argocd/manifests/grafana-config/dashboards/configmap-ringtail.yaml @@ -11,72 +11,105 @@ data: "annotations": { "list": [] }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, + "graphTooltip": 1, "id": null, "links": [], "panels": [ + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 100, + "panels": [], + "title": "System Overview", + "type": "row" + }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } } }, - "gridPos": { "h": 3, "w": 6, "x": 0, "y": 0 }, + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "dtdurations" } }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }, "id": 1, - "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" }, - "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_pod_info{cluster=\"ringtail\"})", "refId": "A" }], - "title": "Running Pods", + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "time() - node_boot_time_seconds{instance=\"ringtail\"}", "refId": "A" }], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] }, "unit": "decbytes" } }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }, + "id": 2, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_memory_MemTotal_bytes{instance=\"ringtail\"}", "refId": "A" }], + "title": "Total Memory", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "purple", "value": null }] }, "unit": "short" } }, + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }, + "id": 3, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(node_cpu_seconds_total{instance=\"ringtail\", mode=\"idle\"})", "refId": "A" }], + "title": "CPU Cores", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 4 }, { "color": "red", "value": 8 }] }, "unit": "short", "decimals": 2 } }, + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }, + "id": 4, + "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "auto" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_load1{instance=\"ringtail\"}", "refId": "A" }], + "title": "Load (1m)", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } } }, - "gridPos": { "h": 3, "w": 6, "x": 6, "y": 0 }, - "id": 2, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }, + "id": 5, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" }, - "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_deployment_created{cluster=\"ringtail\"})", "refId": "A" }], - "title": "Deployments", + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_pod_info{cluster=\"ringtail\"})", "refId": "A" }], + "title": "K8s Pods", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "unit": "percent", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 80 }, { "color": "red", "value": 95 }] } } }, - "gridPos": { "h": 3, "w": 6, "x": 12, "y": 0 }, - "id": 3, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 1 }, + "id": 6, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" }, "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "frigate_gpu_usage_percent", "refId": "A" }], "title": "GPU Usage %", "type": "stat" }, { - "datasource": { "type": "prometheus", "uid": "prometheus" }, - "fieldConfig": { - "defaults": { - "color": { "mode": "thresholds" }, - "mappings": [], - "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } - } - }, - "gridPos": { "h": 3, "w": 6, "x": 18, "y": 0 }, - "id": 4, - "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" }, - "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_pod_container_status_waiting_reason{cluster=\"ringtail\", reason=~\"ImagePullBackOff|ErrImagePull|CrashLoopBackOff|CreateContainerError|RunContainerError\"}) or vector(0)", "refId": "A" }], - "title": "Unhealthy Pods", - "type": "stat" + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, + "id": 101, + "panels": [], + "title": "CPU & Memory", + "type": "row" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, - "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, - "unit": "short" + "unit": "percentunit", + "max": 1 } }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 3 }, - "id": 5, - "options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, - "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count by (namespace) (kube_pod_info{cluster=\"ringtail\"})", "legendFormat": "{{namespace}}", "refId": "A" }], - "title": "Pods by Namespace", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, + "id": 7, + "options": { "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (mode) (rate(node_cpu_seconds_total{instance=\"ringtail\", mode!=\"idle\"}[5m])) / on() group_left count(node_cpu_seconds_total{instance=\"ringtail\", mode=\"idle\"})", "legendFormat": "{{mode}}", "refId": "A" } + ], + "title": "CPU Usage by Mode", "type": "timeseries" }, { @@ -84,37 +117,98 @@ data: "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, - "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, "unit": "bytes" } }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 11 }, - "id": 6, - "options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, - "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (namespace) (kube_pod_container_resource_requests{resource=\"memory\", cluster=\"ringtail\"})", "legendFormat": "{{namespace}}", "refId": "A" }], - "title": "Memory Requests by Namespace", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, + "id": 8, + "options": { "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_memory_MemTotal_bytes{instance=\"ringtail\"} - node_memory_MemAvailable_bytes{instance=\"ringtail\"}", "legendFormat": "Used", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_memory_MemAvailable_bytes{instance=\"ringtail\"}", "legendFormat": "Available", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_memory_Cached_bytes{instance=\"ringtail\"}", "legendFormat": "Cached", "refId": "C" } + ], + "title": "Memory Usage", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, + "id": 102, + "panels": [], + "title": "Storage", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "custom": { "align": "auto", "cellOptions": { "type": "auto" }, "inspect": false }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.8 }, { "color": "red", "value": 0.95 }] } + }, + "overrides": [ + { "matcher": { "id": "byName", "options": "Size" }, "properties": [{ "id": "unit", "value": "bytes" }] }, + { "matcher": { "id": "byName", "options": "Available" }, "properties": [{ "id": "unit", "value": "bytes" }] }, + { "matcher": { "id": "byName", "options": "Used %" }, "properties": [{ "id": "unit", "value": "percentunit" }, { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.8 }, { "color": "red", "value": 0.95 }] } }, { "id": "custom.cellOptions", "value": { "mode": "gradient", "type": "gauge" } }] } + ] + }, + "gridPos": { "h": 6, "w": 24, "x": 0, "y": 15 }, + "id": 9, + "options": { "cellHeight": "sm", "footer": { "countRows": false, "fields": "", "reducer": ["sum"], "show": false }, "showHeader": true, "sortBy": [{ "desc": true, "displayName": "Size" }] }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_filesystem_size_bytes{instance=\"ringtail\", fstype!~\"tmpfs|overlay|squashfs\"}", "format": "table", "instant": true, "refId": "size" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "node_filesystem_avail_bytes{instance=\"ringtail\", fstype!~\"tmpfs|overlay|squashfs\"}", "format": "table", "instant": true, "refId": "avail" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "1 - (node_filesystem_avail_bytes{instance=\"ringtail\", fstype!~\"tmpfs|overlay|squashfs\"} / node_filesystem_size_bytes{instance=\"ringtail\", fstype!~\"tmpfs|overlay|squashfs\"})", "format": "table", "instant": true, "refId": "pct" } + ], + "title": "Filesystem Usage", + "transformations": [ + { "id": "seriesToColumns", "options": { "byField": "mountpoint" } }, + { "id": "organize", "options": { "excludeByName": { "Time": true, "Time 1": true, "Time 2": true, "Time 3": true, "__name__": true, "__name__ 1": true, "__name__ 2": true, "device": true, "device 1": true, "device 2": true, "fstype": true, "fstype 1": true, "fstype 2": true, "instance": true, "instance 1": true, "instance 2": true, "job": true, "job 1": true, "job 2": true, "cluster": true, "cluster 1": true, "cluster 2": true }, "renameByName": { "mountpoint": "Mount", "Value #size": "Size", "Value #avail": "Available", "Value #pct": "Used %" } } } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 21 }, + "id": 103, + "panels": [], + "title": "Network", + "type": "row" + }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, - "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, + "custom": { "axisBorderShow": false, "axisCenteredZero": true, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, - "unit": "short" + "unit": "Bps" } }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 11 }, - "id": 7, - "options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, - "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (namespace) (kube_pod_container_resource_requests{resource=\"cpu\", cluster=\"ringtail\"})", "legendFormat": "{{namespace}}", "refId": "A" }], - "title": "CPU Requests by Namespace", + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 22 }, + "id": 10, + "options": { "legend": { "calcs": ["mean", "lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "rate(node_network_receive_bytes_total{instance=\"ringtail\", device!~\"lo|veth.*|cali.*|flannel.*|cni.*\"}[5m])", "legendFormat": "{{device}} rx", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "-rate(node_network_transmit_bytes_total{instance=\"ringtail\", device!~\"lo|veth.*|cali.*|flannel.*|cni.*\"}[5m])", "legendFormat": "{{device}} tx", "refId": "B" } + ], + "title": "Network Traffic", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 30 }, + "id": 104, + "panels": [], + "title": "GPU", + "type": "row" + }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { @@ -126,8 +220,8 @@ data: "unit": "percent" } }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 19 }, - "id": 8, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 31 }, + "id": 11, "options": { "legend": { "calcs": ["mean", "lastNotNull", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "frigate_gpu_usage_percent", "legendFormat": "GPU Usage", "refId": "A" }, @@ -136,10 +230,70 @@ data: "title": "GPU Overview", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 39 }, + "id": 105, + "panels": [], + "title": "Kubernetes", + "type": "row" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 80, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 40 }, + "id": 12, + "options": { "legend": { "calcs": ["lastNotNull"], "displayMode": "table", "placement": "right", "showLegend": true, "sortBy": "Last *", "sortDesc": true }, "tooltip": { "mode": "multi", "sort": "desc" } }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count by (namespace) (kube_pod_info{cluster=\"ringtail\"})", "legendFormat": "{{namespace}}", "refId": "A" }], + "title": "Pods by Namespace", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } + } + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 40 }, + "id": 13, + "options": { "colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_pod_container_status_waiting_reason{cluster=\"ringtail\", reason=~\"ImagePullBackOff|ErrImagePull|CrashLoopBackOff|CreateContainerError|RunContainerError\"}) or vector(0)", "refId": "A" }], + "title": "Unhealthy Pods", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } } }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 40 }, + "id": 14, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "textMode": "value" }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "count(kube_deployment_created{cluster=\"ringtail\"})", "refId": "A" }], + "title": "Deployments", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 48 }, + "id": 106, + "panels": [], + "title": "Logs", + "type": "row" + }, { "datasource": { "type": "loki", "uid": "loki" }, - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 27 }, - "id": 9, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 49 }, + "id": 15, "options": { "dedupStrategy": "none", "enableLogDetails": true, "prettifyLogMessage": false, "showCommonLabels": false, "showLabels": true, "showTime": true, "sortOrder": "Descending", "wrapLogMessage": false }, "targets": [{ "datasource": { "type": "loki", "uid": "loki" }, "expr": "{cluster=\"ringtail\"}", "refId": "A" }], "title": "Pod Logs", @@ -148,12 +302,12 @@ data: ], "refresh": "30s", "schemaVersion": 38, - "tags": ["ringtail", "k3s", "gpu"], + "tags": ["ringtail", "k3s", "gpu", "system"], "templating": { "list": [] }, "time": { "from": "now-6h", "to": "now" }, "timepicker": {}, "timezone": "browser", - "title": "Ringtail (k3s)", + "title": "Ringtail", "uid": "ringtail", "version": 1, "weekStart": ""