K8s Migration Phase 0: Foundation Infrastructure (#26)
## Summary - Step 0.1: Update Pulumi ACLs with tag:registry - Step 0.3: Create Zot registry ansible role with mcquack LaunchAgent - Step 0.4: Add Zot to Tailscale Serve configuration - Step 0.5: Create Zot metrics role for Prometheus scraping - Step 0.6: Add Zot log collection to Alloy - Step 0.7: Update indri-services-check with zot checks - Step 0.8: Add podman role for container runtime - Step 0.9: Add minikube role for Kubernetes cluster - Step 0.10: Configure remote kubectl access with 1Password credentials ## Remaining Steps - [ ] Step 0.11: Add minikube to indri-services-check - [ ] Step 0.12: Create zettelkasten documentation - [ ] Step 0.13: Verify main playbook (already done - roles added) ## Deployment and Testing - [x] Zot registry deployed and accessible at https://registry.tail8d86e.ts.net - [x] Podman machine running on indri - [x] Minikube cluster running on indri - [x] kubectl access from gilbert working with 1Password credentials - [ ] indri-services-check passes all checks 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/26
This commit is contained in:
parent
ee196b0c10
commit
19a82373d5
32 changed files with 1811 additions and 10 deletions
1
Brewfile
1
Brewfile
|
|
@ -1,3 +1,4 @@
|
|||
# CLI tools for blumeops management
|
||||
brew "bat" # Syntax-highlighted file concatenation
|
||||
brew "tea" # Gitea/Forgejo CLI for forge.tail8d86e.ts.net
|
||||
brew "podman" # Container CLI (uses VM on macOS, for building/pushing images)
|
||||
|
|
|
|||
|
|
@ -99,6 +99,16 @@
|
|||
tags: devpi
|
||||
- role: devpi_metrics
|
||||
tags: devpi_metrics
|
||||
- role: zot
|
||||
tags: zot
|
||||
- role: zot_metrics
|
||||
tags: zot_metrics
|
||||
- role: podman
|
||||
tags: podman
|
||||
- role: minikube
|
||||
tags: minikube
|
||||
- role: minikube_metrics
|
||||
tags: minikube_metrics
|
||||
- role: plex_metrics
|
||||
tags: plex_metrics
|
||||
- role: postgresql
|
||||
|
|
|
|||
|
|
@ -66,6 +66,12 @@ alloy_mcquack_logs:
|
|||
- path: /Users/erichblume/Library/Logs/mcquack.borgmatic.err.log
|
||||
service: borgmatic
|
||||
stream: stderr
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.zot.out.log
|
||||
service: zot
|
||||
stream: stdout
|
||||
- path: /Users/erichblume/Library/Logs/mcquack.zot.err.log
|
||||
service: zot
|
||||
stream: stderr
|
||||
|
||||
alloy_plex_logs:
|
||||
- path: /Users/erichblume/Library/Logs/Plex Media Server/Plex Media Server.log
|
||||
|
|
@ -75,6 +81,10 @@ alloy_plex_logs:
|
|||
# Enable log collection (requires Loki to be running)
|
||||
alloy_collect_logs: true
|
||||
|
||||
# Zot registry metrics collection
|
||||
alloy_collect_zot: true
|
||||
alloy_zot_metrics_url: "http://localhost:5050/metrics"
|
||||
|
||||
# PostgreSQL metrics collection
|
||||
alloy_collect_postgres: true
|
||||
alloy_postgres_host: localhost
|
||||
|
|
|
|||
|
|
@ -54,6 +54,18 @@ prometheus.scrape "postgresql" {
|
|||
}
|
||||
{% endif %}
|
||||
|
||||
{% if alloy_collect_zot | default(false) %}
|
||||
// ============== ZOT REGISTRY METRICS ==============
|
||||
|
||||
// Scrape Zot's native metrics endpoint
|
||||
prometheus.scrape "zot" {
|
||||
targets = [{"__address__" = "localhost:5050"}]
|
||||
metrics_path = "/metrics"
|
||||
forward_to = [prometheus.relabel.instance.receiver]
|
||||
scrape_interval = "{{ alloy_scrape_interval }}"
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
{% if alloy_collect_logs %}
|
||||
// ============== LOG COLLECTION ==============
|
||||
|
||||
|
|
|
|||
449
ansible/roles/grafana/files/dashboards/minikube.json
Normal file
449
ansible/roles/grafana/files/dashboards/minikube.json
Normal file
|
|
@ -0,0 +1,449 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": { "color": "red", "index": 0, "text": "DOWN" }
|
||||
},
|
||||
"type": "value"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"1": { "color": "green", "index": 1, "text": "UP" }
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_up",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Minikube Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": { "color": "red", "index": 0, "text": "DOWN" }
|
||||
},
|
||||
"type": "value"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"1": { "color": "green", "index": 1, "text": "UP" }
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_apiserver_up",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "API Server",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_node_count",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Node Count",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_pod_count",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Pod Count",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 0 },
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_namespace_count",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Namespaces",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_up",
|
||||
"legendFormat": "Minikube",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_apiserver_up",
|
||||
"legendFormat": "API Server",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Cluster Health Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_pod_count",
|
||||
"legendFormat": "Pods",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "minikube_namespace_count",
|
||||
"legendFormat": "Namespaces",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Resource Counts Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "loki"
|
||||
},
|
||||
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 12 },
|
||||
"id": 8,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": true,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "loki", "uid": "loki" },
|
||||
"expr": "{host=\"indri\"} |= \"minikube\" or {host=\"indri\"} |= \"kube\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Kubernetes Related Logs",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"tags": ["minikube", "kubernetes", "k8s"],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Minikube Kubernetes",
|
||||
"uid": "minikube",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
488
ansible/roles/grafana/files/dashboards/zot.json
Normal file
488
ansible/roles/grafana/files/dashboards/zot.json
Normal file
|
|
@ -0,0 +1,488 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": { "color": "red", "index": 0, "text": "DOWN" }
|
||||
},
|
||||
"type": "value"
|
||||
},
|
||||
{
|
||||
"options": {
|
||||
"1": { "color": "green", "index": 1, "text": "UP" }
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "red", "value": null },
|
||||
{ "color": "green", "value": 1 }
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 },
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "zot_up",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Zot Status",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 },
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "go_goroutines{job=\"prometheus.scrape.zot\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Goroutines",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 536870912 },
|
||||
{ "color": "red", "value": 1073741824 }
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 },
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(zot_repo_storage_bytes)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Storage",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 0 },
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum(rate(zot_http_requests_total{job=\"prometheus.scrape.zot\"}[5m]))",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request Rate",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum by (method) (rate(zot_http_requests_total{job=\"prometheus.scrape.zot\"}[5m]))",
|
||||
"legendFormat": "{{method}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Requests by Method",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "sum by (code) (rate(zot_http_requests_total{job=\"prometheus.scrape.zot\"}[5m]))",
|
||||
"legendFormat": "{{code}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Requests by Status Code",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "p95"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.50, sum(rate(zot_http_method_latency_seconds_bucket{job=\"prometheus.scrape.zot\"}[5m])) by (le))",
|
||||
"legendFormat": "p50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.95, sum(rate(zot_http_method_latency_seconds_bucket{job=\"prometheus.scrape.zot\"}[5m])) by (le))",
|
||||
"legendFormat": "p95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "histogram_quantile(0.99, sum(rate(zot_http_method_latency_seconds_bucket{job=\"prometheus.scrape.zot\"}[5m])) by (le))",
|
||||
"legendFormat": "p99",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Request Latency",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": { "legend": false, "tooltip": false, "viz": false },
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": { "type": "linear" },
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": { "group": "A", "mode": "none" },
|
||||
"thresholdsStyle": { "mode": "off" }
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{ "color": "green", "value": null }]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": { "mode": "multi", "sort": "desc" }
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||
"expr": "zot_repo_storage_bytes",
|
||||
"legendFormat": "{{repo}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Storage by Repository",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"tags": ["zot", "registry", "oci"],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Zot Container Registry",
|
||||
"uid": "zot",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
14
ansible/roles/minikube/defaults/main.yml
Normal file
14
ansible/roles/minikube/defaults/main.yml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
# Minikube cluster configuration
|
||||
minikube_cpus: 4
|
||||
# Note: Must be less than podman machine memory (8192MB) to account for overhead
|
||||
minikube_memory: 7800
|
||||
minikube_disk_size: "200g"
|
||||
minikube_driver: podman
|
||||
minikube_container_runtime: cri-o
|
||||
|
||||
# Remote access configuration
|
||||
# These allow kubectl from other machines (e.g., gilbert) to connect
|
||||
minikube_apiserver_names:
|
||||
- indri
|
||||
minikube_listen_address: "0.0.0.0"
|
||||
9
ansible/roles/minikube/handlers/main.yml
Normal file
9
ansible/roles/minikube/handlers/main.yml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
---
|
||||
# Minikube handlers
|
||||
# Note: Restarting minikube is a heavy operation and may require manual intervention
|
||||
|
||||
- name: Restart minikube
|
||||
ansible.builtin.shell: |
|
||||
minikube stop 2>/dev/null || true
|
||||
minikube start
|
||||
changed_when: true
|
||||
56
ansible/roles/minikube/tasks/main.yml
Normal file
56
ansible/roles/minikube/tasks/main.yml
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
---
|
||||
# Minikube installation and cluster setup for indri
|
||||
# Requires podman machine to be running (see podman role)
|
||||
#
|
||||
# NOTE: Similar to podman, minikube start may have issues when run via SSH.
|
||||
# If cluster fails to start, manually run on indri:
|
||||
# minikube start --driver=podman --container-runtime=cri-o \
|
||||
# --cpus=4 --memory=7800 --disk-size=200g \
|
||||
# --apiserver-names=indri --listen-address=0.0.0.0
|
||||
|
||||
- name: Install minikube via homebrew
|
||||
community.general.homebrew:
|
||||
name: minikube
|
||||
state: present
|
||||
|
||||
- name: Install kubectl via homebrew
|
||||
community.general.homebrew:
|
||||
name: kubectl
|
||||
state: present
|
||||
|
||||
- name: Check if minikube cluster exists
|
||||
ansible.builtin.command:
|
||||
cmd: minikube status --format={% raw %}'{{.Host}}'{% endraw %}
|
||||
register: minikube_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Start minikube cluster
|
||||
ansible.builtin.command:
|
||||
cmd: >
|
||||
minikube start
|
||||
--driver={{ minikube_driver }}
|
||||
--container-runtime={{ minikube_container_runtime }}
|
||||
--cpus={{ minikube_cpus }}
|
||||
--memory={{ minikube_memory }}
|
||||
--disk-size={{ minikube_disk_size }}
|
||||
{% for name in minikube_apiserver_names %}
|
||||
--apiserver-names={{ name }}
|
||||
{% endfor %}
|
||||
--listen-address={{ minikube_listen_address }}
|
||||
register: minikube_start
|
||||
changed_when: minikube_start.rc == 0
|
||||
failed_when: false # Don't fail - may need manual intervention like podman
|
||||
when: minikube_status.rc != 0 or 'Running' not in minikube_status.stdout
|
||||
|
||||
- name: Check minikube status after start attempt
|
||||
ansible.builtin.command:
|
||||
cmd: minikube status --format={% raw %}'{{.Host}}'{% endraw %}
|
||||
register: minikube_final_status
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Warn if minikube failed to start
|
||||
ansible.builtin.debug:
|
||||
msg: "WARNING: minikube may not have started properly. Run 'minikube start' manually on indri if needed. Status: {{ minikube_final_status.stdout | default('unknown') }}"
|
||||
when: minikube_final_status.rc != 0 or 'Running' not in minikube_final_status.stdout
|
||||
5
ansible/roles/minikube_metrics/defaults/main.yml
Normal file
5
ansible/roles/minikube_metrics/defaults/main.yml
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
minikube_metrics_dir: /opt/homebrew/var/node_exporter/textfile
|
||||
minikube_metrics_script: /Users/erichblume/bin/minikube-metrics
|
||||
minikube_metrics_interval: 60 # seconds between metric collection
|
||||
minikube_metrics_log_dir: /opt/homebrew/var/log
|
||||
6
ansible/roles/minikube_metrics/handlers/main.yml
Normal file
6
ansible/roles/minikube_metrics/handlers/main.yml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: Reload minikube-metrics
|
||||
ansible.builtin.shell: |
|
||||
launchctl unload ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist 2>/dev/null || true
|
||||
launchctl load ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist
|
||||
changed_when: true
|
||||
43
ansible/roles/minikube_metrics/tasks/main.yml
Normal file
43
ansible/roles/minikube_metrics/tasks/main.yml
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
---
|
||||
- name: Ensure metrics directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ minikube_metrics_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure log directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ minikube_metrics_log_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure bin directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ minikube_metrics_script | dirname }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy minikube-metrics script
|
||||
ansible.builtin.template:
|
||||
src: minikube-metrics.sh.j2
|
||||
dest: "{{ minikube_metrics_script }}"
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy minikube-metrics LaunchAgent plist
|
||||
ansible.builtin.template:
|
||||
src: minikube-metrics.plist.j2
|
||||
dest: ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist
|
||||
mode: '0644'
|
||||
notify: Reload minikube-metrics
|
||||
|
||||
- name: Check if minikube-metrics LaunchAgent is loaded
|
||||
ansible.builtin.command: launchctl list mcquack.eblume.minikube-metrics
|
||||
register: minikube_metrics_launchctl_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Load minikube-metrics LaunchAgent if not loaded
|
||||
ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.minikube-metrics.plist
|
||||
when: minikube_metrics_launchctl_check.rc != 0
|
||||
changed_when: true
|
||||
failed_when: false
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- {{ ansible_managed }} -->
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>mcquack.eblume.minikube-metrics</string>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>{{ minikube_metrics_script }}</string>
|
||||
</array>
|
||||
<key>StartInterval</key>
|
||||
<integer>{{ minikube_metrics_interval }}</integer>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>{{ minikube_metrics_log_dir }}/mcquack.minikube-metrics.err.log</string>
|
||||
<key>StandardOutPath</key>
|
||||
<string>{{ minikube_metrics_log_dir }}/mcquack.minikube-metrics.out.log</string>
|
||||
</dict>
|
||||
</plist>
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#!/bin/bash
|
||||
# {{ ansible_managed }}
|
||||
# Collects minikube/kubernetes metrics for node_exporter textfile collector
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
OUTPUT_FILE="{{ minikube_metrics_dir }}/minikube.prom"
|
||||
TEMP_FILE="${OUTPUT_FILE}.tmp"
|
||||
|
||||
# Start output file
|
||||
cat > "$TEMP_FILE" << 'HEADER'
|
||||
# HELP minikube_up Minikube cluster is running
|
||||
# TYPE minikube_up gauge
|
||||
# HELP minikube_apiserver_up Kubernetes API server is responding
|
||||
# TYPE minikube_apiserver_up gauge
|
||||
# HELP minikube_node_count Number of nodes in the cluster
|
||||
# TYPE minikube_node_count gauge
|
||||
# HELP minikube_pod_count Number of pods in the cluster
|
||||
# TYPE minikube_pod_count gauge
|
||||
# HELP minikube_namespace_count Number of namespaces in the cluster
|
||||
# TYPE minikube_namespace_count gauge
|
||||
HEADER
|
||||
|
||||
# Check if minikube is running
|
||||
if minikube status --format='{% raw %}{{.Host}}{% endraw %}' 2>/dev/null | grep -q "Running"; then
|
||||
echo "minikube_up 1" >> "$TEMP_FILE"
|
||||
else
|
||||
echo "minikube_up 0" >> "$TEMP_FILE"
|
||||
echo "minikube_apiserver_up 0" >> "$TEMP_FILE"
|
||||
echo "minikube_node_count 0" >> "$TEMP_FILE"
|
||||
echo "minikube_pod_count 0" >> "$TEMP_FILE"
|
||||
echo "minikube_namespace_count 0" >> "$TEMP_FILE"
|
||||
mv "$TEMP_FILE" "$OUTPUT_FILE"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check API server health
|
||||
if kubectl get --raw /healthz >/dev/null 2>&1; then
|
||||
echo "minikube_apiserver_up 1" >> "$TEMP_FILE"
|
||||
else
|
||||
echo "minikube_apiserver_up 0" >> "$TEMP_FILE"
|
||||
fi
|
||||
|
||||
# Get node count
|
||||
NODE_COUNT=$(kubectl get nodes --no-headers 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo "minikube_node_count ${NODE_COUNT:-0}" >> "$TEMP_FILE"
|
||||
|
||||
# Get pod count (all namespaces)
|
||||
POD_COUNT=$(kubectl get pods -A --no-headers 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo "minikube_pod_count ${POD_COUNT:-0}" >> "$TEMP_FILE"
|
||||
|
||||
# Get namespace count
|
||||
NS_COUNT=$(kubectl get namespaces --no-headers 2>/dev/null | wc -l | tr -d ' ')
|
||||
echo "minikube_namespace_count ${NS_COUNT:-0}" >> "$TEMP_FILE"
|
||||
|
||||
# Atomic move
|
||||
mv "$TEMP_FILE" "$OUTPUT_FILE"
|
||||
3
ansible/roles/podman/handlers/main.yml
Normal file
3
ansible/roles/podman/handlers/main.yml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
---
|
||||
# No handlers currently - podman machine start is unreliable via Ansible
|
||||
# See known issue in tasks/main.yml
|
||||
55
ansible/roles/podman/tasks/main.yml
Normal file
55
ansible/roles/podman/tasks/main.yml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
---
|
||||
# Podman installation and machine setup for indri
|
||||
# Used as container runtime for minikube
|
||||
#
|
||||
# KNOWN ISSUE: podman machine init/start has reliability issues when run via
|
||||
# Ansible/SSH. The machine sometimes gets stuck in "Starting" state due to a
|
||||
# race condition (see https://github.com/containers/podman/issues/16945).
|
||||
# Additionally, Apple Hypervisor may require GUI session context.
|
||||
#
|
||||
# WORKAROUND: If the machine fails to start via Ansible, manually run on indri:
|
||||
# podman machine rm -f podman-machine-default
|
||||
# podman machine init --cpus 4 --memory 8192 --disk-size 220
|
||||
# podman machine start
|
||||
#
|
||||
# TODO: Investigate proper LaunchAgent or other solution for reliable automation.
|
||||
|
||||
- name: Install podman via homebrew
|
||||
community.general.homebrew:
|
||||
name: podman
|
||||
state: present
|
||||
|
||||
- name: Check if podman machine exists
|
||||
ansible.builtin.command:
|
||||
cmd: podman machine list --format json
|
||||
register: podman_machine_list
|
||||
changed_when: false
|
||||
|
||||
- name: Initialize podman machine (if not exists)
|
||||
ansible.builtin.command:
|
||||
cmd: podman machine init --cpus 4 --memory 8192 --disk-size 220
|
||||
register: podman_init
|
||||
changed_when: podman_init.rc == 0
|
||||
failed_when: podman_init.rc not in [0, 125] # 125 = already exists
|
||||
when: podman_machine_list.stdout == '[]'
|
||||
|
||||
- name: Check if podman machine is running
|
||||
ansible.builtin.command:
|
||||
cmd: podman machine list --format "{{ '{{' }}.Running{{ '}}' }}"
|
||||
register: podman_running
|
||||
changed_when: false
|
||||
|
||||
- name: Start podman machine (if stopped)
|
||||
ansible.builtin.command:
|
||||
cmd: podman machine start
|
||||
register: podman_start
|
||||
changed_when: "'started successfully' in podman_start.stdout"
|
||||
failed_when: false # Don't fail - see known issue above
|
||||
when: "'true' not in podman_running.stdout"
|
||||
|
||||
- name: Warn if podman machine failed to start
|
||||
ansible.builtin.debug:
|
||||
msg: "WARNING: podman machine may not have started. Run 'podman machine start' manually on indri if needed."
|
||||
when:
|
||||
- "'true' not in podman_running.stdout"
|
||||
- podman_start.rc != 0 or "'started successfully' not in podman_start.stdout"
|
||||
|
|
@ -35,3 +35,8 @@ tailscale_serve_services:
|
|||
https:
|
||||
port: 443
|
||||
upstream: http://localhost:8080
|
||||
|
||||
- name: svc:registry
|
||||
https:
|
||||
port: 443
|
||||
upstream: http://localhost:5050
|
||||
|
|
|
|||
16
ansible/roles/zot/defaults/main.yml
Normal file
16
ansible/roles/zot/defaults/main.yml
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
---
|
||||
zot_repo_dir: /Users/erichblume/code/3rd/zot
|
||||
zot_binary: "{{ zot_repo_dir }}/bin/zot-darwin-arm64"
|
||||
zot_data_dir: /Users/erichblume/zot
|
||||
zot_config_dir: /Users/erichblume/.config/zot
|
||||
zot_port: 5050
|
||||
zot_log_dir: /Users/erichblume/Library/Logs
|
||||
|
||||
# Pull-through cache registries (on-demand sync)
|
||||
zot_sync_registries:
|
||||
- name: docker.io
|
||||
url: https://registry-1.docker.io
|
||||
- name: ghcr.io
|
||||
url: https://ghcr.io
|
||||
- name: quay.io
|
||||
url: https://quay.io
|
||||
6
ansible/roles/zot/handlers/main.yml
Normal file
6
ansible/roles/zot/handlers/main.yml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: Restart zot
|
||||
ansible.builtin.shell: |
|
||||
launchctl unload ~/Library/LaunchAgents/mcquack.eblume.zot.plist 2>/dev/null || true
|
||||
launchctl load ~/Library/LaunchAgents/mcquack.eblume.zot.plist
|
||||
changed_when: true
|
||||
66
ansible/roles/zot/tasks/main.yml
Normal file
66
ansible/roles/zot/tasks/main.yml
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
---
|
||||
# Note: Zot is built from source, not installed via homebrew.
|
||||
#
|
||||
# ONE-TIME SETUP (before running ansible):
|
||||
#
|
||||
# 1. Clone zot from forge mirror (use localhost:3001 - hairpinning doesn't work):
|
||||
# ssh indri 'git clone http://localhost:3001/eblume/zot.git ~/code/3rd/zot'
|
||||
#
|
||||
# 2. Set up Go via mise:
|
||||
# ssh indri 'cd ~/code/3rd/zot && mise use go@1.25'
|
||||
#
|
||||
# 3. Build (creates bin/zot-darwin-arm64):
|
||||
# ssh indri 'cd ~/code/3rd/zot && mise x -- make binary'
|
||||
#
|
||||
# 4. Run ansible to deploy config and LaunchAgent
|
||||
|
||||
- name: Verify zot binary exists
|
||||
ansible.builtin.stat:
|
||||
path: "{{ zot_binary }}"
|
||||
register: zot_binary_stat
|
||||
|
||||
- name: Fail if zot binary not found
|
||||
ansible.builtin.fail:
|
||||
msg: |
|
||||
Zot binary not found at {{ zot_binary }}.
|
||||
Please build from source first:
|
||||
ssh indri 'cd ~/code/3rd/zot && mise x -- make binary'
|
||||
when: not zot_binary_stat.stat.exists
|
||||
|
||||
- name: Ensure zot data directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ zot_data_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure zot config directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ zot_config_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy zot config
|
||||
ansible.builtin.template:
|
||||
src: config.json.j2
|
||||
dest: "{{ zot_config_dir }}/config.json"
|
||||
mode: '0644'
|
||||
notify: Restart zot
|
||||
|
||||
- name: Deploy zot LaunchAgent plist
|
||||
ansible.builtin.template:
|
||||
src: zot.plist.j2
|
||||
dest: ~/Library/LaunchAgents/mcquack.eblume.zot.plist
|
||||
mode: '0644'
|
||||
notify: Restart zot
|
||||
|
||||
- name: Check if zot LaunchAgent is loaded
|
||||
ansible.builtin.command: launchctl list mcquack.eblume.zot
|
||||
register: zot_launchctl_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Load zot LaunchAgent if not loaded
|
||||
ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.zot.plist
|
||||
when: zot_launchctl_check.rc != 0
|
||||
changed_when: true
|
||||
failed_when: false
|
||||
47
ansible/roles/zot/templates/config.json.j2
Normal file
47
ansible/roles/zot/templates/config.json.j2
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
{
|
||||
"distSpecVersion": "1.1.0",
|
||||
"storage": {
|
||||
"rootDirectory": "{{ zot_data_dir }}",
|
||||
"gc": true,
|
||||
"gcDelay": "1h",
|
||||
"gcInterval": "24h"
|
||||
},
|
||||
"http": {
|
||||
"address": "0.0.0.0",
|
||||
"port": "{{ zot_port }}"
|
||||
},
|
||||
"log": {
|
||||
"level": "info"
|
||||
},
|
||||
"extensions": {
|
||||
"metrics": {
|
||||
"enable": true,
|
||||
"prometheus": {
|
||||
"path": "/metrics"
|
||||
}
|
||||
},
|
||||
"sync": {
|
||||
"enable": true,
|
||||
"registries": [
|
||||
{% for registry in zot_sync_registries %}
|
||||
{
|
||||
"urls": ["{{ registry.url }}"],
|
||||
"content": [{"prefix": "**", "destination": "/{{ registry.name }}"}],
|
||||
"onDemand": true,
|
||||
"tlsVerify": true
|
||||
}{% if not loop.last %},{% endif %}
|
||||
|
||||
{% endfor %}
|
||||
]
|
||||
},
|
||||
"search": {
|
||||
"enable": true,
|
||||
"cve": {
|
||||
"updateInterval": "24h"
|
||||
}
|
||||
},
|
||||
"ui": {
|
||||
"enable": true
|
||||
}
|
||||
}
|
||||
}
|
||||
24
ansible/roles/zot/templates/zot.plist.j2
Normal file
24
ansible/roles/zot/templates/zot.plist.j2
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- {{ ansible_managed }} -->
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>mcquack.eblume.zot</string>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<!-- ABSOLUTE PATH to built binary in ~/code/3rd/zot -->
|
||||
<string>{{ zot_binary }}</string>
|
||||
<string>serve</string>
|
||||
<string>{{ zot_config_dir }}/config.json</string>
|
||||
</array>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>KeepAlive</key>
|
||||
<true/>
|
||||
<key>StandardOutPath</key>
|
||||
<string>{{ zot_log_dir }}/mcquack.zot.out.log</string>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>{{ zot_log_dir }}/mcquack.zot.err.log</string>
|
||||
</dict>
|
||||
</plist>
|
||||
6
ansible/roles/zot_metrics/defaults/main.yml
Normal file
6
ansible/roles/zot_metrics/defaults/main.yml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
zot_metrics_url: http://localhost:5050/v2/_catalog
|
||||
zot_metrics_dir: /opt/homebrew/var/node_exporter/textfile
|
||||
zot_metrics_script: /Users/erichblume/bin/zot-metrics
|
||||
zot_metrics_interval: 60 # seconds between metric collection
|
||||
zot_metrics_log_dir: /opt/homebrew/var/log
|
||||
6
ansible/roles/zot_metrics/handlers/main.yml
Normal file
6
ansible/roles/zot_metrics/handlers/main.yml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
- name: Reload zot-metrics
|
||||
ansible.builtin.shell: |
|
||||
launchctl unload ~/Library/LaunchAgents/mcquack.eblume.zot-metrics.plist 2>/dev/null || true
|
||||
launchctl load ~/Library/LaunchAgents/mcquack.eblume.zot-metrics.plist
|
||||
changed_when: true
|
||||
43
ansible/roles/zot_metrics/tasks/main.yml
Normal file
43
ansible/roles/zot_metrics/tasks/main.yml
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
---
|
||||
- name: Ensure metrics directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ zot_metrics_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure log directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ zot_metrics_log_dir }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Ensure bin directory exists
|
||||
ansible.builtin.file:
|
||||
path: "{{ zot_metrics_script | dirname }}"
|
||||
state: directory
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy zot-metrics script
|
||||
ansible.builtin.template:
|
||||
src: zot-metrics.sh.j2
|
||||
dest: "{{ zot_metrics_script }}"
|
||||
mode: '0755'
|
||||
|
||||
- name: Deploy zot-metrics LaunchAgent plist
|
||||
ansible.builtin.template:
|
||||
src: zot-metrics.plist.j2
|
||||
dest: ~/Library/LaunchAgents/mcquack.eblume.zot-metrics.plist
|
||||
mode: '0644'
|
||||
notify: Reload zot-metrics
|
||||
|
||||
- name: Check if zot-metrics LaunchAgent is loaded
|
||||
ansible.builtin.command: launchctl list mcquack.eblume.zot-metrics
|
||||
register: zot_metrics_launchctl_check
|
||||
changed_when: false
|
||||
failed_when: false
|
||||
|
||||
- name: Load zot-metrics LaunchAgent if not loaded
|
||||
ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.zot-metrics.plist
|
||||
when: zot_metrics_launchctl_check.rc != 0
|
||||
changed_when: true
|
||||
failed_when: false
|
||||
21
ansible/roles/zot_metrics/templates/zot-metrics.plist.j2
Normal file
21
ansible/roles/zot_metrics/templates/zot-metrics.plist.j2
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- {{ ansible_managed }} -->
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>Label</key>
|
||||
<string>mcquack.eblume.zot-metrics</string>
|
||||
<key>ProgramArguments</key>
|
||||
<array>
|
||||
<string>{{ zot_metrics_script }}</string>
|
||||
</array>
|
||||
<key>StartInterval</key>
|
||||
<integer>{{ zot_metrics_interval }}</integer>
|
||||
<key>RunAtLoad</key>
|
||||
<true/>
|
||||
<key>StandardErrorPath</key>
|
||||
<string>{{ zot_metrics_log_dir }}/mcquack.zot-metrics.err.log</string>
|
||||
<key>StandardOutPath</key>
|
||||
<string>{{ zot_metrics_log_dir }}/mcquack.zot-metrics.out.log</string>
|
||||
</dict>
|
||||
</plist>
|
||||
25
ansible/roles/zot_metrics/templates/zot-metrics.sh.j2
Normal file
25
ansible/roles/zot_metrics/templates/zot-metrics.sh.j2
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
# {{ ansible_managed }}
|
||||
# Collects Zot registry metrics for node_exporter textfile collector
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
METRICS_URL="{{ zot_metrics_url }}"
|
||||
OUTPUT_FILE="{{ zot_metrics_dir }}/zot.prom"
|
||||
TEMP_FILE="${OUTPUT_FILE}.tmp"
|
||||
|
||||
# Start output file with header
|
||||
cat > "$TEMP_FILE" << 'HEADER'
|
||||
# HELP zot_up Zot registry is up and responding
|
||||
# TYPE zot_up gauge
|
||||
HEADER
|
||||
|
||||
# Check if zot is up
|
||||
if curl -sf "$METRICS_URL" > /dev/null 2>&1; then
|
||||
echo "zot_up 1" >> "$TEMP_FILE"
|
||||
else
|
||||
echo "zot_up 0" >> "$TEMP_FILE"
|
||||
fi
|
||||
|
||||
# Atomic move
|
||||
mv "$TEMP_FILE" "$OUTPUT_FILE"
|
||||
31
bin/kubectl-credential-1password
Executable file
31
bin/kubectl-credential-1password
Executable file
|
|
@ -0,0 +1,31 @@
|
|||
#!/bin/bash
|
||||
# kubectl exec credential plugin for 1Password
|
||||
# Usage: kubectl-credential-1password <vault-id> <item-id> <cert-field> <key-field>
|
||||
#
|
||||
# Fetches client certificate and key from 1Password and outputs
|
||||
# ExecCredential JSON for kubectl authentication.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
VAULT_ID="$1"
|
||||
ITEM_ID="$2"
|
||||
CERT_FIELD="$3"
|
||||
KEY_FIELD="$4"
|
||||
|
||||
# Fetch credentials from 1Password (strips surrounding quotes from text fields)
|
||||
CLIENT_CERT=$(op --vault "$VAULT_ID" item get "$ITEM_ID" --fields "$CERT_FIELD" | sed 's/^"//; s/"$//')
|
||||
CLIENT_KEY=$(op --vault "$VAULT_ID" item get "$ITEM_ID" --fields "$KEY_FIELD" | sed 's/^"//; s/"$//')
|
||||
|
||||
# Output ExecCredential JSON
|
||||
# Note: jq is used to properly escape the PEM data for JSON
|
||||
jq -n \
|
||||
--arg cert "$CLIENT_CERT" \
|
||||
--arg key "$CLIENT_KEY" \
|
||||
'{
|
||||
"apiVersion": "client.authentication.k8s.io/v1beta1",
|
||||
"kind": "ExecCredential",
|
||||
"status": {
|
||||
"clientCertificateData": $cert,
|
||||
"clientKeyData": $key
|
||||
}
|
||||
}'
|
||||
|
|
@ -53,15 +53,18 @@ check_service "forgejo" "ssh indri 'brew services list | grep forgejo | grep sta
|
|||
check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'"
|
||||
check_service "postgresql" "ssh indri 'brew services list | grep postgresql | grep started'"
|
||||
check_service "miniflux" "ssh indri 'brew services list | grep miniflux | grep started'"
|
||||
check_service "zot" "ssh indri 'launchctl list | grep mcquack.eblume.zot | grep -v \"^-\"'"
|
||||
check_service "zot-metrics" "ssh indri 'launchctl list | grep zot-metrics | grep -v \"^-\"'"
|
||||
check_service "minikube-metrics" "ssh indri 'launchctl list | grep minikube-metrics | grep -v \"^-\"'"
|
||||
|
||||
echo ""
|
||||
echo "HTTP endpoints (via Tailscale):"
|
||||
check_http "Loki" "http://indri:3100/ready"
|
||||
check_http "Prometheus" "http://indri:9090/-/healthy"
|
||||
check_http "Grafana" "http://indri:3000/api/health"
|
||||
check_http "Kiwix" "http://indri:5501/"
|
||||
check_http "Forgejo" "http://indri:3001/"
|
||||
check_http "Devpi" "http://indri:3141/+api"
|
||||
check_http "Grafana" "https://grafana.tail8d86e.ts.net/api/health"
|
||||
check_http "Kiwix" "https://kiwix.tail8d86e.ts.net/"
|
||||
check_http "Forgejo" "https://forge.tail8d86e.ts.net/"
|
||||
check_http "Devpi" "https://pypi.tail8d86e.ts.net/+api"
|
||||
check_http "Miniflux" "https://feed.tail8d86e.ts.net/healthcheck"
|
||||
# Transmission RPC is localhost-only by design, check via SSH
|
||||
check_service "Transmission RPC" "ssh indri 'curl -sf http://127.0.0.1:9091/transmission/rpc'"
|
||||
|
|
@ -69,6 +72,16 @@ check_service "Transmission RPC" "ssh indri 'curl -sf http://127.0.0.1:9091/tran
|
|||
check_service "Transmission metrics" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/transmission.prom'"
|
||||
# PostgreSQL uses TCP not HTTP, check via pg_isready
|
||||
check_service "PostgreSQL" "ssh indri '/opt/homebrew/opt/postgresql@18/bin/pg_isready -h localhost'"
|
||||
# Zot registry (via Tailscale service)
|
||||
check_http "Zot Registry" "https://registry.tail8d86e.ts.net/v2/_catalog"
|
||||
check_service "Zot metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'"
|
||||
check_service "Minikube metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'"
|
||||
|
||||
echo ""
|
||||
echo "Kubernetes cluster:"
|
||||
check_service "minikube" "ssh indri 'minikube status --format={{.Host}} | grep -q Running'"
|
||||
check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'"
|
||||
check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz"
|
||||
|
||||
echo ""
|
||||
if [ $FAILED -eq 0 ]; then
|
||||
|
|
|
|||
|
|
@ -130,6 +130,9 @@ mise run tailnet-preview # Review changes - should show new tag
|
|||
mise run tailnet-up # Apply changes
|
||||
```
|
||||
|
||||
**Implementation Details:**
|
||||
- Also need to add `"tag:registry"` to indri's tags in `pulumi/__main__.py` (the `DeviceTags` resource), not just define it in `policy.hujson`. The policy file defines the tag ownership rules, but the device tags are managed separately in the Python code.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.2: Create Tailscale Services in Admin Console (MANUAL)
|
||||
|
|
@ -140,7 +143,9 @@ mise run tailnet-up # Apply changes
|
|||
2. Create service `registry` with:
|
||||
- Port: 443 (HTTPS)
|
||||
- Host: indri
|
||||
3. Apply tag `tag:registry` to indri if not already tagged
|
||||
|
||||
**Implementation Details:**
|
||||
- Tag is applied to indri via Pulumi in Step 0.1, not manually in admin console.
|
||||
|
||||
**Verification:**
|
||||
```bash
|
||||
|
|
@ -319,6 +324,10 @@ ssh indri 'curl -s http://localhost:5000/v2/_catalog'
|
|||
# Expected: {"repositories":["docker.io/library/alpine"]}
|
||||
```
|
||||
|
||||
**Implementation Details:**
|
||||
- Changed port from 5000 to 5050 because macOS ControlCenter (AirPlay Receiver) uses port 5000 by default.
|
||||
- Fixed sync config: use `"content": [{"prefix": "**", "destination": "/{{ registry.name }}"}]` instead of `"prefix": "{{ registry.name }}/**"`. The destination rewrites the local path, while prefix `**` matches all upstream repos.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.4: Add Zot to Tailscale Serve
|
||||
|
|
@ -352,6 +361,11 @@ curl -s https://registry.tail8d86e.ts.net/v2/_catalog
|
|||
# Expected: {"repositories":["blumeops/test","docker.io/library/alpine"]}
|
||||
```
|
||||
|
||||
**Implementation Details:**
|
||||
- Changed upstream port from 5000 to 5050 (see Step 0.3 implementation details).
|
||||
- After running `tailscale serve`, the service must be approved in Tailscale admin console at https://login.tailscale.com/admin/services before it becomes accessible.
|
||||
- Podman needed on gilbert for testing - added to Brewfile. Requires `podman machine init && podman machine start` after install.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.5: Create Zot Metrics Role
|
||||
|
|
@ -461,6 +475,9 @@ mise run indri-services-check
|
|||
# Zot metrics... OK
|
||||
```
|
||||
|
||||
**Implementation Details:**
|
||||
- Used Tailscale service URL (`https://registry.tail8d86e.ts.net/v2/_catalog`) instead of internal endpoint to verify full path works.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.8: Install and Configure Podman on Indri
|
||||
|
|
@ -504,6 +521,17 @@ ssh indri 'podman info'
|
|||
ssh indri 'podman run --rm hello-world'
|
||||
```
|
||||
|
||||
**Implementation Details:**
|
||||
- **KNOWN ISSUE**: `podman machine init` and `podman machine start` have reliability issues when run via Ansible/SSH. The machine sometimes gets stuck in "Starting" state due to a race condition (see https://github.com/containers/podman/issues/16945). Apple Hypervisor may also require GUI session context.
|
||||
- **WORKAROUND**: If the machine fails to start via Ansible, manually run on indri:
|
||||
```bash
|
||||
podman machine rm -f podman-machine-default
|
||||
podman machine init --cpus 4 --memory 8192 --disk-size 220
|
||||
podman machine start
|
||||
```
|
||||
- LaunchAgent approach was attempted but didn't resolve the issue reliably.
|
||||
- TODO: Investigate proper automation solution for reliable podman machine management.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.9: Install and Configure Minikube
|
||||
|
|
@ -570,6 +598,10 @@ ssh indri 'kubectl get nodes'
|
|||
# Expected: minikube Ready control-plane ...
|
||||
```
|
||||
|
||||
**Implementation Details:**
|
||||
- Changed `minikube_memory` from 8192 to 7800 because podman machine reports slightly less available memory (7908MB) due to VM overhead. Minikube rejects memory requests exceeding what podman reports.
|
||||
- Deployed with Kubernetes v1.34.0 and CRI-O 1.24.6.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.10: Configure Kubeconfig on Gilbert
|
||||
|
|
@ -597,6 +629,93 @@ k9s # Should show the minikube cluster
|
|||
|
||||
The exact approach will be determined during implementation based on what works best with the podman driver.
|
||||
|
||||
**Implementation Details:**
|
||||
|
||||
Chose **Option 3: Recreate cluster with `--apiserver-names`** after researching alternatives:
|
||||
|
||||
1. **SSH tunneling** - Requires keeping a tunnel running or complex on-demand setup
|
||||
2. **SOCKS5 proxy with kubeconfig `proxy-url`** - Kubeconfig supports `proxy-url: socks5://localhost:1080` per-context, but still requires managing the proxy
|
||||
3. **`--apiserver-names` + `--listen-address`** - Native minikube support, cleanest solution
|
||||
|
||||
**Cluster Setup:** Recreated the minikube cluster with additional flags:
|
||||
```bash
|
||||
minikube delete
|
||||
minikube start \
|
||||
--driver=podman \
|
||||
--container-runtime=cri-o \
|
||||
--cpus=4 --memory=7800 --disk-size=200g \
|
||||
--apiserver-names=indri \
|
||||
--listen-address=0.0.0.0
|
||||
```
|
||||
|
||||
- `--apiserver-names=indri` adds "indri" to the API server certificate SAN
|
||||
- `--listen-address=0.0.0.0` tells podman to expose the API port on all interfaces
|
||||
- API server port is dynamic (check with `kubectl config view --minify -o jsonpath="{.clusters[0].cluster.server}"` on indri)
|
||||
|
||||
**Credential Management with 1Password:**
|
||||
|
||||
Rather than copying private keys between machines, credentials are stored in 1Password and fetched on-demand using kubectl's exec credential plugin. This mirrors the 1Password SSH agent pattern for biometric-protected key access.
|
||||
|
||||
1. **Store credentials in 1Password** (vault: `vg6xf6vvfmoh5hqjjhlhbeoaie`, item: `3jo4f2hnzvwfmamudfsbbbec7e`):
|
||||
- `client-cert` - Contents of `~/.minikube/profiles/minikube/client.crt` (text field)
|
||||
- `client-key` - Contents of `~/.minikube/profiles/minikube/client.key` (text field)
|
||||
- `ca-cert` - Contents of `~/.minikube/ca.crt` (text field, not secret but stored for convenience)
|
||||
|
||||
2. **Created credential helper script** at `bin/kubectl-credential-1password`:
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Fetches client cert/key from 1Password, outputs ExecCredential JSON
|
||||
# Usage: kubectl-credential-1password <vault-id> <item-id> <cert-field> <key-field>
|
||||
```
|
||||
Symlinked to `~/.local/bin/kubectl-credential-1password`
|
||||
|
||||
3. **Kubeconfig setup on gilbert:**
|
||||
```bash
|
||||
# Store CA cert locally (not secret - public key for server verification)
|
||||
mkdir -p ~/.kube/minikube-indri
|
||||
op --vault <vault> item get <item> --fields ca-cert | sed 's/^"//; s/"$//' > ~/.kube/minikube-indri/ca.crt
|
||||
|
||||
# Configure cluster
|
||||
kubectl config set-cluster minikube-indri \
|
||||
--server=https://indri:<port> \
|
||||
--certificate-authority=/Users/eblume/.kube/minikube-indri/ca.crt
|
||||
|
||||
# Configure credentials with exec plugin
|
||||
kubectl config set-credentials minikube-indri \
|
||||
--exec-api-version=client.authentication.k8s.io/v1beta1 \
|
||||
--exec-command=kubectl-credential-1password \
|
||||
--exec-arg=<vault-id> \
|
||||
--exec-arg=<item-id> \
|
||||
--exec-arg=client-cert \
|
||||
--exec-arg=client-key
|
||||
|
||||
# Create context
|
||||
kubectl config set-context minikube-indri \
|
||||
--cluster=minikube-indri \
|
||||
--user=minikube-indri
|
||||
```
|
||||
|
||||
4. **Usage:**
|
||||
```bash
|
||||
kubectl --context=minikube-indri get nodes
|
||||
# or
|
||||
kubectl config use-context minikube-indri
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
**Security Notes:**
|
||||
- Client private key never stored on disk - fetched from 1Password on each kubectl command
|
||||
- CA cert stored on disk (not secret - it's a public key for server verification)
|
||||
- 1Password biometric/password prompt required for credential access
|
||||
- `op` command strips quotes from text fields with `sed 's/^"//; s/"$//'`
|
||||
|
||||
**References:**
|
||||
- [minikube start options](https://minikube.sigs.k8s.io/docs/commands/start/)
|
||||
- [Using kubectl via SSH Tunnel](https://blog.scottlowe.org/2020/06/16/using-kubectl-via-an-ssh-tunnel/)
|
||||
- [SOCKS5 Proxy Access to K8s API](https://kubernetes.ltd/docs/tasks/extend-kubernetes/socks5-proxy-access-api/)
|
||||
- [kubectl-tokensshtunnel](https://github.com/jordiprats/kubectl-tokensshtunnel)
|
||||
- [Securing kubectl config with 1Password](https://blog.mikael.green/post/1password-kubeconfig/)
|
||||
|
||||
---
|
||||
|
||||
### Step 0.11: Add Minikube to indri-services-check
|
||||
|
|
@ -623,6 +742,10 @@ mise run indri-services-check
|
|||
# k8s-apiserver... OK
|
||||
```
|
||||
|
||||
**Implementation Notes:**
|
||||
- Added a third check `k8s-apiserver (remote)` that verifies kubectl access from gilbert, not just via SSH to indri. This ensures the 1Password credential flow and remote API server access are working.
|
||||
- The remote check uses both `--kubeconfig` and `--context` flags explicitly since the script runs in bash (not fish) and doesn't inherit the KUBECONFIG environment variable from fish config.
|
||||
|
||||
---
|
||||
|
||||
### Step 0.12: Create Zettelkasten Documentation
|
||||
|
|
@ -631,6 +754,45 @@ mise run indri-services-check
|
|||
- `~/code/personal/zk/zot.md`
|
||||
- `~/code/personal/zk/minikube.md`
|
||||
|
||||
**Files to update:**
|
||||
- `~/code/personal/zk/1767747119-YCPO.md` (main blumeops card)
|
||||
|
||||
**Updates to main blumeops card:**
|
||||
|
||||
1. Add to **Device Tags** table:
|
||||
| `tag:registry` | indri | Container registry access |
|
||||
|
||||
2. Add to **Services** table:
|
||||
| **Registry** | https://registry.tail8d86e.ts.net | OCI container registry (Zot) | [[zot]] |
|
||||
| **Kubernetes** | https://indri:<port> | Minikube cluster | [[minikube]] |
|
||||
|
||||
3. Add to **Port Map (Indri)** table:
|
||||
| 5050 | Zot | HTTP | localhost | Container registry |
|
||||
| <dynamic> | K8s API | HTTPS | 0.0.0.0 | Minikube API server |
|
||||
|
||||
4. Add new section **Remote Kubernetes Access**:
|
||||
```markdown
|
||||
## Remote Kubernetes Access (from Gilbert)
|
||||
|
||||
The minikube cluster on indri is accessible from gilbert via direct connection.
|
||||
Cluster was created with `--apiserver-names=indri --listen-address=0.0.0.0`.
|
||||
|
||||
**Fish abbreviations** (in `~/.config/fish/config.fish`):
|
||||
- `ki` → `kubectl --context=minikube-indri`
|
||||
- `k9i` → `k9s --context=minikube-indri`
|
||||
- `k9` → `k9s`
|
||||
|
||||
```bash
|
||||
# Quick access via abbreviations
|
||||
ki get nodes
|
||||
k9i
|
||||
|
||||
# Or explicitly set context
|
||||
kubectl config use-context minikube-indri
|
||||
kubectl get nodes
|
||||
```
|
||||
```
|
||||
|
||||
**Template for zot.md:**
|
||||
```markdown
|
||||
---
|
||||
|
|
@ -651,7 +813,7 @@ Zot is an OCI-native container registry running on Indri, providing:
|
|||
## Service Details
|
||||
|
||||
- URL: https://registry.tail8d86e.ts.net
|
||||
- Local port: 5000
|
||||
- Local port: 5050
|
||||
- Data directory: ~/zot
|
||||
- Config: ~/.config/zot/config.json
|
||||
- Managed via: mcquack LaunchAgent
|
||||
|
|
@ -669,10 +831,10 @@ Zot is an OCI-native container registry running on Indri, providing:
|
|||
|
||||
\`\`\`bash
|
||||
# List all images
|
||||
curl -s http://localhost:5000/v2/_catalog | jq
|
||||
curl -s http://localhost:5050/v2/_catalog | jq
|
||||
|
||||
# Pull via cache (from indri or k8s)
|
||||
podman pull localhost:5000/docker.io/library/nginx:latest
|
||||
podman pull localhost:5050/docker.io/library/nginx:latest
|
||||
|
||||
# Build and push private image (from gilbert)
|
||||
podman build -t registry.tail8d86e.ts.net/blumeops/myapp:v1 .
|
||||
|
|
@ -691,6 +853,91 @@ tail -f ~/Library/Logs/mcquack.zot.err.log
|
|||
- Initial setup for k8s migration Phase 0
|
||||
```
|
||||
|
||||
**Template for minikube.md:**
|
||||
```markdown
|
||||
---
|
||||
id: minikube
|
||||
aliases:
|
||||
- minikube
|
||||
- kubernetes
|
||||
- k8s
|
||||
tags:
|
||||
- blumeops
|
||||
---
|
||||
|
||||
# Minikube Management Log
|
||||
|
||||
Minikube provides a single-node Kubernetes cluster on Indri for running containerized services.
|
||||
|
||||
## Cluster Details
|
||||
|
||||
- Driver: podman (rootless)
|
||||
- Container runtime: CRI-O
|
||||
- Kubernetes version: v1.34.0
|
||||
- Resources: 4 CPUs, 7800MB RAM, 200GB disk
|
||||
- API server: https://indri:<port> (accessible from gilbert via Tailscale)
|
||||
|
||||
## Remote Access from Gilbert
|
||||
|
||||
Cluster was created with `--apiserver-names=indri --listen-address=0.0.0.0` to allow remote kubectl access.
|
||||
|
||||
\`\`\`bash
|
||||
# Switch context
|
||||
kubectl config use-context minikube-indri
|
||||
|
||||
# Verify
|
||||
kubectl get nodes
|
||||
kubectl get namespaces
|
||||
|
||||
# Use k9s
|
||||
k9s --context minikube-indri
|
||||
\`\`\`
|
||||
|
||||
## Useful Commands (on indri)
|
||||
|
||||
\`\`\`bash
|
||||
# Cluster status
|
||||
minikube status
|
||||
|
||||
# Start/stop cluster
|
||||
minikube start
|
||||
minikube stop
|
||||
|
||||
# Access dashboard
|
||||
minikube dashboard
|
||||
|
||||
# SSH into node
|
||||
minikube ssh
|
||||
|
||||
# View logs
|
||||
minikube logs
|
||||
\`\`\`
|
||||
|
||||
## Podman Machine (prerequisite)
|
||||
|
||||
Minikube uses podman as the container runtime. The podman machine must be running:
|
||||
|
||||
\`\`\`bash
|
||||
# Check podman machine
|
||||
podman machine list
|
||||
|
||||
# Start if needed
|
||||
podman machine start
|
||||
\`\`\`
|
||||
|
||||
## Log
|
||||
|
||||
### [DATE]
|
||||
- Initial cluster setup for k8s migration Phase 0
|
||||
- Configured for remote access with --apiserver-names=indri
|
||||
```
|
||||
|
||||
**Implementation Notes:**
|
||||
- Created zot.md and minikube.md in ~/code/personal/zk/
|
||||
- Updated 1767747119-YCPO.md (main blumeops card) with all specified changes
|
||||
- Added 1Password credential plugin reference to minikube docs
|
||||
- K8s API port is 39535 (dynamically assigned by minikube, may change on cluster recreation)
|
||||
|
||||
---
|
||||
|
||||
### Step 0.13: Update Main Playbook
|
||||
|
|
@ -711,6 +958,10 @@ tail -f ~/Library/Logs/mcquack.zot.err.log
|
|||
tags: minikube
|
||||
```
|
||||
|
||||
**Implementation Notes:**
|
||||
- Roles were added incrementally during Steps 0.3, 0.5, 0.8, and 0.9
|
||||
- All four roles (zot, zot_metrics, podman, minikube) confirmed present in indri.yml
|
||||
|
||||
---
|
||||
|
||||
### Phase 0 Verification Checklist
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ indri_tags = tailscale.DeviceTags(
|
|||
"tag:loki",
|
||||
"tag:pg",
|
||||
"tag:feed",
|
||||
"tag:registry", # Zot container registry
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@
|
|||
"tag:loki": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:pg": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:feed": ["autogroup:admin", "tag:blumeops"],
|
||||
"tag:registry": ["autogroup:admin", "tag:blumeops"],
|
||||
},
|
||||
|
||||
// ============== ACL Tests ==============
|
||||
|
|
@ -108,13 +109,13 @@
|
|||
// Erich can access everything
|
||||
{
|
||||
"src": "blume.erich@gmail.com",
|
||||
"accept": ["tag:grafana:443", "tag:kiwix:443", "tag:feed:443", "tag:loki:3100", "tag:pg:5432", "tag:homelab:22"],
|
||||
"accept": ["tag:grafana:443", "tag:kiwix:443", "tag:feed:443", "tag:loki:3100", "tag:pg:5432", "tag:homelab:22", "tag:registry:443"],
|
||||
},
|
||||
// Allison can access user services but NOT grafana, loki, or NAS
|
||||
{
|
||||
"src": "acmdavis@gmail.com",
|
||||
"accept": ["tag:kiwix:443", "tag:forge:443", "tag:feed:443", "tag:pg:5432"],
|
||||
"deny": ["tag:grafana:443", "tag:loki:3100", "tag:nas:445"],
|
||||
"deny": ["tag:grafana:443", "tag:loki:3100", "tag:nas:445", "tag:registry:443"],
|
||||
},
|
||||
// Homelab can reach homelab and NAS
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue