From 3337392b558c2db91be7dbea98eb557847558056 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Sun, 18 Jan 2026 11:53:04 -0800 Subject: [PATCH] Add Grafana dashboards and metrics collection for zot and minikube Phase 0 followup: - Enable zot native metrics endpoint (/metrics) - Add zot scraping to Alloy config - Create zot Grafana dashboard (status, requests, latency, memory) - Create minikube_metrics role (collects cluster health metrics) - Create minikube Grafana dashboard (status, pod/namespace counts, logs) - Update indri-services-check with minikube-metrics checks Co-Authored-By: Claude Opus 4.5 --- ansible/playbooks/indri.yml | 2 + ansible/roles/alloy/defaults/main.yml | 4 + ansible/roles/alloy/templates/config.alloy.j2 | 12 + .../grafana/files/dashboards/minikube.json | 449 ++++++++++++++++ .../roles/grafana/files/dashboards/zot.json | 500 ++++++++++++++++++ .../roles/minikube_metrics/defaults/main.yml | 5 + .../roles/minikube_metrics/handlers/main.yml | 6 + ansible/roles/minikube_metrics/tasks/main.yml | 43 ++ .../templates/minikube-metrics.plist.j2 | 21 + .../templates/minikube-metrics.sh.j2 | 57 ++ ansible/roles/zot/templates/config.json.j2 | 6 + mise-tasks/indri-services-check | 2 + 12 files changed, 1107 insertions(+) create mode 100644 ansible/roles/grafana/files/dashboards/minikube.json create mode 100644 ansible/roles/grafana/files/dashboards/zot.json create mode 100644 ansible/roles/minikube_metrics/defaults/main.yml create mode 100644 ansible/roles/minikube_metrics/handlers/main.yml create mode 100644 ansible/roles/minikube_metrics/tasks/main.yml create mode 100644 ansible/roles/minikube_metrics/templates/minikube-metrics.plist.j2 create mode 100644 ansible/roles/minikube_metrics/templates/minikube-metrics.sh.j2 diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml index 63c6dea..c5a08f5 100644 --- a/ansible/playbooks/indri.yml +++ b/ansible/playbooks/indri.yml @@ -107,6 +107,8 @@ tags: podman - role: minikube tags: minikube + - role: minikube_metrics + tags: minikube_metrics - role: plex_metrics tags: plex_metrics - role: postgresql diff --git a/ansible/roles/alloy/defaults/main.yml b/ansible/roles/alloy/defaults/main.yml index 894eb85..117d703 100644 --- a/ansible/roles/alloy/defaults/main.yml +++ b/ansible/roles/alloy/defaults/main.yml @@ -81,6 +81,10 @@ alloy_plex_logs: # Enable log collection (requires Loki to be running) alloy_collect_logs: true +# Zot registry metrics collection +alloy_collect_zot: true +alloy_zot_metrics_url: "http://localhost:5050/metrics" + # PostgreSQL metrics collection alloy_collect_postgres: true alloy_postgres_host: localhost diff --git a/ansible/roles/alloy/templates/config.alloy.j2 b/ansible/roles/alloy/templates/config.alloy.j2 index e0c1cad..d6d2e75 100644 --- a/ansible/roles/alloy/templates/config.alloy.j2 +++ b/ansible/roles/alloy/templates/config.alloy.j2 @@ -54,6 +54,18 @@ prometheus.scrape "postgresql" { } {% endif %} +{% if alloy_collect_zot | default(false) %} +// ============== ZOT REGISTRY METRICS ============== + +// Scrape Zot's native metrics endpoint +prometheus.scrape "zot" { + targets = [{"__address__" = "localhost:5050"}] + metrics_path = "/metrics" + forward_to = [prometheus.relabel.instance.receiver] + scrape_interval = "{{ alloy_scrape_interval }}" +} +{% endif %} + {% if alloy_collect_logs %} // ============== LOG COLLECTION ============== diff --git a/ansible/roles/grafana/files/dashboards/minikube.json b/ansible/roles/grafana/files/dashboards/minikube.json new file mode 100644 index 0000000..484ff40 --- /dev/null +++ b/ansible/roles/grafana/files/dashboards/minikube.json @@ -0,0 +1,449 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 0, "text": "DOWN" } + }, + "type": "value" + }, + { + "options": { + "1": { "color": "green", "index": 1, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_up", + "refId": "A" + } + ], + "title": "Minikube Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 0, "text": "DOWN" } + }, + "type": "value" + }, + { + "options": { + "1": { "color": "green", "index": 1, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_apiserver_up", + "refId": "A" + } + ], + "title": "API Server", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_node_count", + "refId": "A" + } + ], + "title": "Node Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_pod_count", + "refId": "A" + } + ], + "title": "Pod Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 }, + "id": 5, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_namespace_count", + "refId": "A" + } + ], + "title": "Namespaces", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "id": 6, + "options": { + "legend": { + "calcs": ["lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_up", + "legendFormat": "Minikube", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_apiserver_up", + "legendFormat": "API Server", + "refId": "B" + } + ], + "title": "Cluster Health Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "id": 7, + "options": { + "legend": { + "calcs": ["lastNotNull", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_pod_count", + "legendFormat": "Pods", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "minikube_namespace_count", + "legendFormat": "Namespaces", + "refId": "B" + } + ], + "title": "Resource Counts Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { "h": 10, "w": 24, "x": 0, "y": 12 }, + "id": 8, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "loki", "uid": "loki" }, + "expr": "{host=\"indri\"} |= \"minikube\" or {host=\"indri\"} |= \"kube\"", + "refId": "A" + } + ], + "title": "Kubernetes Related Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["minikube", "kubernetes", "k8s"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Minikube Kubernetes", + "uid": "minikube", + "version": 1, + "weekStart": "" +} diff --git a/ansible/roles/grafana/files/dashboards/zot.json b/ansible/roles/grafana/files/dashboards/zot.json new file mode 100644 index 0000000..0ee2908 --- /dev/null +++ b/ansible/roles/grafana/files/dashboards/zot.json @@ -0,0 +1,500 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [ + { + "options": { + "0": { "color": "red", "index": 0, "text": "DOWN" } + }, + "type": "value" + }, + { + "options": { + "1": { "color": "green", "index": 1, "text": "UP" } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "zot_up", + "refId": "A" + } + ], + "title": "Zot Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "go_goroutines{job=\"zot\"}", + "refId": "A" + } + ], + "title": "Goroutines", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 536870912 }, + { "color": "red", "value": 1073741824 } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "process_resident_memory_bytes{job=\"zot\"}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum(rate(zot_http_requests_total{job=\"zot\"}[5m]))", + "refId": "A" + } + ], + "title": "Request Rate", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "id": 5, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (method) (rate(zot_http_requests_total{job=\"zot\"}[5m]))", + "legendFormat": "{{method}}", + "refId": "A" + } + ], + "title": "HTTP Requests by Method", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "id": 6, + "options": { + "legend": { + "calcs": ["mean", "max"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "sum by (code) (rate(zot_http_requests_total{job=\"zot\"}[5m]))", + "legendFormat": "{{code}}", + "refId": "A" + } + ], + "title": "HTTP Requests by Status Code", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "id": 7, + "options": { + "legend": { + "calcs": ["mean", "p95"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.50, sum(rate(zot_http_request_duration_seconds_bucket{job=\"zot\"}[5m])) by (le))", + "legendFormat": "p50", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.95, sum(rate(zot_http_request_duration_seconds_bucket{job=\"zot\"}[5m])) by (le))", + "legendFormat": "p95", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "histogram_quantile(0.99, sum(rate(zot_http_request_duration_seconds_bucket{job=\"zot\"}[5m])) by (le))", + "legendFormat": "p99", + "refId": "C" + } + ], + "title": "HTTP Request Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "id": 8, + "options": { + "legend": { + "calcs": ["lastNotNull"], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "process_resident_memory_bytes{job=\"zot\"}", + "legendFormat": "Resident Memory", + "refId": "A" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "go_memstats_heap_alloc_bytes{job=\"zot\"}", + "legendFormat": "Heap Alloc", + "refId": "B" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "expr": "go_memstats_heap_inuse_bytes{job=\"zot\"}", + "legendFormat": "Heap In Use", + "refId": "C" + } + ], + "title": "Memory Over Time", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["zot", "registry", "oci"], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Zot Container Registry", + "uid": "zot", + "version": 1, + "weekStart": "" +} diff --git a/ansible/roles/minikube_metrics/defaults/main.yml b/ansible/roles/minikube_metrics/defaults/main.yml new file mode 100644 index 0000000..68fd672 --- /dev/null +++ b/ansible/roles/minikube_metrics/defaults/main.yml @@ -0,0 +1,5 @@ +--- +minikube_metrics_dir: /opt/homebrew/var/node_exporter/textfile +minikube_metrics_script: /Users/erichblume/bin/minikube-metrics +minikube_metrics_interval: 60 # seconds between metric collection +minikube_metrics_log_dir: /opt/homebrew/var/log diff --git a/ansible/roles/minikube_metrics/handlers/main.yml b/ansible/roles/minikube_metrics/handlers/main.yml new file mode 100644 index 0000000..595f838 --- /dev/null +++ b/ansible/roles/minikube_metrics/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Reload minikube-metrics + ansible.builtin.shell: | + launchctl unload ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist 2>/dev/null || true + launchctl load ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist + changed_when: true diff --git a/ansible/roles/minikube_metrics/tasks/main.yml b/ansible/roles/minikube_metrics/tasks/main.yml new file mode 100644 index 0000000..da043ab --- /dev/null +++ b/ansible/roles/minikube_metrics/tasks/main.yml @@ -0,0 +1,43 @@ +--- +- name: Ensure metrics directory exists + ansible.builtin.file: + path: "{{ minikube_metrics_dir }}" + state: directory + mode: '0755' + +- name: Ensure log directory exists + ansible.builtin.file: + path: "{{ minikube_metrics_log_dir }}" + state: directory + mode: '0755' + +- name: Ensure bin directory exists + ansible.builtin.file: + path: "{{ minikube_metrics_script | dirname }}" + state: directory + mode: '0755' + +- name: Deploy minikube-metrics script + ansible.builtin.template: + src: minikube-metrics.sh.j2 + dest: "{{ minikube_metrics_script }}" + mode: '0755' + +- name: Deploy minikube-metrics LaunchAgent plist + ansible.builtin.template: + src: minikube-metrics.plist.j2 + dest: ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist + mode: '0644' + notify: Reload minikube-metrics + +- name: Check if minikube-metrics LaunchAgent is loaded + ansible.builtin.command: launchctl list mcquack.eblume.minikube-metrics + register: minikube_metrics_launchctl_check + changed_when: false + failed_when: false + +- name: Load minikube-metrics LaunchAgent if not loaded + ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist + when: minikube_metrics_launchctl_check.rc != 0 + changed_when: true + failed_when: false diff --git a/ansible/roles/minikube_metrics/templates/minikube-metrics.plist.j2 b/ansible/roles/minikube_metrics/templates/minikube-metrics.plist.j2 new file mode 100644 index 0000000..4e751d7 --- /dev/null +++ b/ansible/roles/minikube_metrics/templates/minikube-metrics.plist.j2 @@ -0,0 +1,21 @@ + + + + + + Label + mcquack.eblume.minikube-metrics + ProgramArguments + + {{ minikube_metrics_script }} + + StartInterval + {{ minikube_metrics_interval }} + RunAtLoad + + StandardErrorPath + {{ minikube_metrics_log_dir }}/mcquack.minikube-metrics.err.log + StandardOutPath + {{ minikube_metrics_log_dir }}/mcquack.minikube-metrics.out.log + + diff --git a/ansible/roles/minikube_metrics/templates/minikube-metrics.sh.j2 b/ansible/roles/minikube_metrics/templates/minikube-metrics.sh.j2 new file mode 100644 index 0000000..3e3a526 --- /dev/null +++ b/ansible/roles/minikube_metrics/templates/minikube-metrics.sh.j2 @@ -0,0 +1,57 @@ +#!/bin/bash +# {{ ansible_managed }} +# Collects minikube/kubernetes metrics for node_exporter textfile collector + +set -euo pipefail + +OUTPUT_FILE="{{ minikube_metrics_dir }}/minikube.prom" +TEMP_FILE="${OUTPUT_FILE}.tmp" + +# Start output file +cat > "$TEMP_FILE" << 'HEADER' +# HELP minikube_up Minikube cluster is running +# TYPE minikube_up gauge +# HELP minikube_apiserver_up Kubernetes API server is responding +# TYPE minikube_apiserver_up gauge +# HELP minikube_node_count Number of nodes in the cluster +# TYPE minikube_node_count gauge +# HELP minikube_pod_count Number of pods in the cluster +# TYPE minikube_pod_count gauge +# HELP minikube_namespace_count Number of namespaces in the cluster +# TYPE minikube_namespace_count gauge +HEADER + +# Check if minikube is running +if minikube status --format='{{.Host}}' 2>/dev/null | grep -q "Running"; then + echo "minikube_up 1" >> "$TEMP_FILE" +else + echo "minikube_up 0" >> "$TEMP_FILE" + echo "minikube_apiserver_up 0" >> "$TEMP_FILE" + echo "minikube_node_count 0" >> "$TEMP_FILE" + echo "minikube_pod_count 0" >> "$TEMP_FILE" + echo "minikube_namespace_count 0" >> "$TEMP_FILE" + mv "$TEMP_FILE" "$OUTPUT_FILE" + exit 0 +fi + +# Check API server health +if kubectl get --raw /healthz >/dev/null 2>&1; then + echo "minikube_apiserver_up 1" >> "$TEMP_FILE" +else + echo "minikube_apiserver_up 0" >> "$TEMP_FILE" +fi + +# Get node count +NODE_COUNT=$(kubectl get nodes --no-headers 2>/dev/null | wc -l | tr -d ' ') +echo "minikube_node_count ${NODE_COUNT:-0}" >> "$TEMP_FILE" + +# Get pod count (all namespaces) +POD_COUNT=$(kubectl get pods -A --no-headers 2>/dev/null | wc -l | tr -d ' ') +echo "minikube_pod_count ${POD_COUNT:-0}" >> "$TEMP_FILE" + +# Get namespace count +NS_COUNT=$(kubectl get namespaces --no-headers 2>/dev/null | wc -l | tr -d ' ') +echo "minikube_namespace_count ${NS_COUNT:-0}" >> "$TEMP_FILE" + +# Atomic move +mv "$TEMP_FILE" "$OUTPUT_FILE" diff --git a/ansible/roles/zot/templates/config.json.j2 b/ansible/roles/zot/templates/config.json.j2 index 7124dc2..3c5c668 100644 --- a/ansible/roles/zot/templates/config.json.j2 +++ b/ansible/roles/zot/templates/config.json.j2 @@ -14,6 +14,12 @@ "level": "info" }, "extensions": { + "metrics": { + "enable": true, + "prometheus": { + "path": "/metrics" + } + }, "sync": { "enable": true, "registries": [ diff --git a/mise-tasks/indri-services-check b/mise-tasks/indri-services-check index 73b52b2..07f9feb 100755 --- a/mise-tasks/indri-services-check +++ b/mise-tasks/indri-services-check @@ -55,6 +55,7 @@ check_service "postgresql" "ssh indri 'brew services list | grep postgresql | gr check_service "miniflux" "ssh indri 'brew services list | grep miniflux | grep started'" check_service "zot" "ssh indri 'launchctl list | grep mcquack.eblume.zot | grep -v \"^-\"'" check_service "zot-metrics" "ssh indri 'launchctl list | grep zot-metrics | grep -v \"^-\"'" +check_service "minikube-metrics" "ssh indri 'launchctl list | grep minikube-metrics | grep -v \"^-\"'" echo "" echo "HTTP endpoints (via Tailscale):" @@ -74,6 +75,7 @@ check_service "PostgreSQL" "ssh indri '/opt/homebrew/opt/postgresql@18/bin/pg_is # Zot registry (via Tailscale service) check_http "Zot Registry" "https://registry.tail8d86e.ts.net/v2/_catalog" check_service "Zot metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'" +check_service "Minikube metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'" echo "" echo "Kubernetes cluster:"