## Summary C2 Mikado chain for deploying Authentik as the SSO identity provider, replacing Dex. This PR will evolve over multiple sessions. Each iteration adds documentation (prerequisite cards) and eventually code as leaf nodes are resolved. ## Current Mikado State - **Goal:** `deploy-authentik` (active) - **Leaf prerequisites:** - `build-authentik-container` — Build Nix container image - `provision-authentik-database` — Create PostgreSQL database on CNPG cluster - `create-authentik-secrets` — Create 1Password item with credentials ## Process refinements - Updated agent-change-process with lessons from first attempt: reset code before committing cards, open PRs early ## Test plan - [ ] `mise run docs-mikado` shows correct dependency chain - [ ] Leaf nodes can be worked independently - [ ] Container builds on ringtail - [ ] Authentik starts and reaches healthy state - [ ] Forgejo OAuth2 connector works Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/227
144 lines
6.9 KiB
Bash
Executable file
144 lines
6.9 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
#MISE description="Check that all services are online and responding"
|
|
|
|
set -euo pipefail
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
NC='\033[0m' # No Color
|
|
|
|
FAILED=0
|
|
|
|
check_service() {
|
|
local name="$1"
|
|
local check_cmd="$2"
|
|
|
|
printf "%-24s " "$name..."
|
|
if eval "$check_cmd" > /dev/null 2>&1; then
|
|
echo -e "${GREEN}OK${NC}"
|
|
else
|
|
echo -e "${RED}FAILED${NC}"
|
|
FAILED=1
|
|
fi
|
|
}
|
|
|
|
check_http() {
|
|
local name="$1"
|
|
local url="$2"
|
|
|
|
printf "%-24s " "$name..."
|
|
if curl -sf --max-time 5 "$url" > /dev/null 2>&1; then
|
|
echo -e "${GREEN}OK${NC}"
|
|
else
|
|
echo -e "${RED}FAILED${NC}"
|
|
FAILED=1
|
|
fi
|
|
}
|
|
|
|
echo "Checking services..."
|
|
echo "===================="
|
|
echo ""
|
|
|
|
# Local services on indri
|
|
echo "Local services on indri:"
|
|
check_service "forgejo (brew)" "ssh indri 'brew services list | grep forgejo | grep started'"
|
|
check_service "alloy" "ssh indri 'launchctl list mcquack.eblume.alloy | grep -v \"^-\"'"
|
|
check_service "borgmatic" "ssh indri 'launchctl list mcquack.eblume.borgmatic | grep -v \"^-\"'"
|
|
check_service "borgmatic-metrics" "ssh indri 'launchctl list mcquack.borgmatic-metrics | grep -v \"^-\"'"
|
|
check_service "zot" "ssh indri 'launchctl list mcquack.eblume.zot | grep -v \"^-\"'"
|
|
check_service "zot-metrics" "ssh indri 'launchctl list mcquack.zot-metrics | grep -v \"^-\"'"
|
|
check_service "minikube-metrics" "ssh indri 'launchctl list mcquack.minikube-metrics | grep -v \"^-\"'"
|
|
check_service "jellyfin-metrics" "ssh indri 'launchctl list mcquack.eblume.jellyfin-metrics | grep -v \"^-\"'"
|
|
|
|
echo ""
|
|
echo "Metrics textfiles:"
|
|
check_service "borgmatic.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/borgmatic.prom'"
|
|
check_service "zot.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'"
|
|
check_service "minikube.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'"
|
|
check_service "jellyfin.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/jellyfin.prom'"
|
|
|
|
echo ""
|
|
echo "Kubernetes cluster:"
|
|
check_service "minikube" "ssh indri 'minikube status --format={{.Host}} | grep -q Running'"
|
|
check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'"
|
|
check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz"
|
|
|
|
echo ""
|
|
echo "HTTP endpoints (via Caddy):"
|
|
check_http "Prometheus" "https://prometheus.ops.eblu.me/-/healthy"
|
|
check_http "Loki" "https://loki.ops.eblu.me/ready"
|
|
check_http "Grafana" "https://grafana.ops.eblu.me/api/health"
|
|
check_http "ArgoCD" "https://argocd.ops.eblu.me/healthz"
|
|
check_http "Forgejo" "https://forge.ops.eblu.me/"
|
|
check_http "Zot Registry" "https://registry.ops.eblu.me/v2/_catalog"
|
|
check_http "Kiwix" "https://kiwix.ops.eblu.me/"
|
|
check_http "Miniflux" "https://feed.ops.eblu.me/healthcheck"
|
|
check_http "TeslaMate" "https://tesla.ops.eblu.me/"
|
|
check_http "Devpi" "https://pypi.ops.eblu.me/+api"
|
|
check_http "Transmission" "https://torrent.ops.eblu.me/"
|
|
check_http "Immich" "https://photos.ops.eblu.me/"
|
|
check_http "Navidrome" "https://dj.ops.eblu.me/"
|
|
check_http "CV" "https://cv.ops.eblu.me/"
|
|
check_http "Ntfy" "https://ntfy.ops.eblu.me/v1/health"
|
|
check_http "Authentik" "https://authentik.ops.eblu.me/-/health/live/"
|
|
check_http "Frigate" "https://nvr.ops.eblu.me/api/version"
|
|
|
|
echo ""
|
|
echo "Ringtail (NixOS):"
|
|
check_service "ssh" "ssh -o ConnectTimeout=5 ringtail true"
|
|
check_service "tailscale" "ssh ringtail 'tailscale status --self --json' | jq -e '.Self.Online' > /dev/null"
|
|
check_service "k3s" "ssh ringtail 'KUBECONFIG=/etc/rancher/k3s/k3s.yaml k3s kubectl get nodes --no-headers | grep -q Ready'"
|
|
check_service "k3s-apiserver (remote)" "kubectl --context=k3s-ringtail get --raw /healthz"
|
|
check_service "forgejo-runner" "ssh ringtail 'systemctl is-active gitea-runner-nix_container_builder.service'"
|
|
|
|
echo ""
|
|
echo "Ringtail k3s pods:"
|
|
check_service "mosquitto" "kubectl --context=k3s-ringtail -n mqtt get pods -l app=mosquitto -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "ntfy" "kubectl --context=k3s-ringtail -n ntfy get pods -l app=ntfy -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "authentik" "kubectl --context=k3s-ringtail -n authentik get pods -l component=server -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "frigate" "kubectl --context=k3s-ringtail -n frigate get pods -l app=frigate -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "frigate-notify" "kubectl --context=k3s-ringtail -n frigate get pods -l app=frigate-notify -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "nvidia-device-plugin" "kubectl --context=k3s-ringtail -n nvidia-device-plugin get pods -l app=nvidia-device-plugin -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
|
|
echo ""
|
|
echo "Public services (via Fly.io):"
|
|
check_http "Docs (public)" "https://docs.eblu.me/"
|
|
check_http "CV (public)" "https://cv.eblu.me/"
|
|
check_http "Fly.io healthz" "https://blumeops-proxy.fly.dev/healthz"
|
|
|
|
echo ""
|
|
echo "Database:"
|
|
check_service "PostgreSQL (k8s)" "pg_isready -h pg.ops.eblu.me -p 5432"
|
|
|
|
echo ""
|
|
echo "Indri minikube pods:"
|
|
check_service "prometheus-0" "kubectl --context=minikube-indri -n monitoring get pod prometheus-0 -o jsonpath='{.status.phase}' | grep -q Running"
|
|
check_service "loki-0" "kubectl --context=minikube-indri -n monitoring get pod loki-0 -o jsonpath='{.status.phase}' | grep -q Running"
|
|
check_service "grafana" "kubectl --context=minikube-indri -n monitoring get pods -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "miniflux" "kubectl --context=minikube-indri -n miniflux get pods -l app=miniflux -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "teslamate" "kubectl --context=minikube-indri -n teslamate get pods -l app=teslamate -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
check_service "blumeops-pg" "kubectl --context=minikube-indri -n databases get pods -l cnpg.io/cluster=blumeops-pg -o jsonpath='{.items[0].status.phase}' | grep -q Running"
|
|
|
|
echo ""
|
|
echo "ArgoCD app sync status:"
|
|
printf "%-20s %-12s %-12s %s\n" "NAME" "SYNC" "HEALTH" "TARGET"
|
|
while read -r name sync health target; do
|
|
if [[ "$sync" == "Synced" ]]; then
|
|
printf "%-20s ${GREEN}%-12s${NC} %-12s %s\n" "$name" "$sync" "$health" "$target"
|
|
elif [[ "$sync" == "OutOfSync" ]]; then
|
|
printf "%-20s ${RED}%-12s${NC} %-12s %s\n" "$name" "$sync" "$health" "$target"
|
|
FAILED=1
|
|
else
|
|
printf "%-20s %-12s %-12s %s\n" "$name" "$sync" "$health" "$target"
|
|
fi
|
|
done < <(kubectl --context=minikube-indri get applications -n argocd --no-headers -o custom-columns='NAME:.metadata.name,SYNC:.status.sync.status,HEALTH:.status.health.status,TARGET:.spec.source.targetRevision' 2>/dev/null)
|
|
|
|
echo ""
|
|
if [ $FAILED -eq 0 ]; then
|
|
echo -e "${GREEN}All services healthy!${NC}"
|
|
exit 0
|
|
else
|
|
echo -e "${RED}Some services failed health check${NC}"
|
|
exit 1
|
|
fi
|