diff --git a/mise-tasks/indri-services-check b/mise-tasks/indri-services-check index dcda013..1019bee 100755 --- a/mise-tasks/indri-services-check +++ b/mise-tasks/indri-services-check @@ -14,7 +14,7 @@ check_service() { local name="$1" local check_cmd="$2" - printf "%-20s " "$name..." + printf "%-24s " "$name..." if eval "$check_cmd" > /dev/null 2>&1; then echo -e "${GREEN}OK${NC}" else @@ -27,7 +27,7 @@ check_http() { local name="$1" local url="$2" - printf "%-20s " "$name..." + printf "%-24s " "$name..." if curl -sf --max-time 5 "$url" > /dev/null 2>&1; then echo -e "${GREEN}OK${NC}" else @@ -40,39 +40,23 @@ echo "Checking indri services..." echo "==========================" echo "" -# Check via SSH that services are running on indri -echo "Local services (via launchctl/brew services):" -check_service "loki" "ssh indri 'brew services list | grep loki | grep started'" -check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'" -check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'" -check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'" -check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'" -check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'" -check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'" -check_service "forgejo" "ssh indri 'brew services list | grep forgejo | grep started'" -check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'" -# NOTE: postgresql and miniflux moved to k8s - checked below -check_service "zot" "ssh indri 'launchctl list | grep mcquack.eblume.zot | grep -v \"^-\"'" -check_service "zot-metrics" "ssh indri 'launchctl list | grep zot-metrics | grep -v \"^-\"'" -check_service "minikube-metrics" "ssh indri 'launchctl list | grep minikube-metrics | grep -v \"^-\"'" +# Local services on indri +echo "Local services on indri:" +check_service "forgejo (brew)" "ssh indri 'brew services list | grep forgejo | grep started'" +check_service "alloy" "ssh indri 'launchctl list mcquack.eblume.alloy | grep -v \"^-\"'" +check_service "borgmatic" "ssh indri 'launchctl list mcquack.eblume.borgmatic | grep -v \"^-\"'" +check_service "borgmatic-metrics" "ssh indri 'launchctl list mcquack.borgmatic-metrics | grep -v \"^-\"'" +check_service "zot" "ssh indri 'launchctl list mcquack.eblume.zot | grep -v \"^-\"'" +check_service "zot-metrics" "ssh indri 'launchctl list mcquack.zot-metrics | grep -v \"^-\"'" +check_service "minikube-metrics" "ssh indri 'launchctl list mcquack.minikube-metrics | grep -v \"^-\"'" +check_service "plex-metrics" "ssh indri 'launchctl list mcquack.plex-metrics | grep -v \"^-\"'" echo "" -echo "HTTP endpoints (via Tailscale):" -check_http "Loki" "http://indri:3100/ready" -check_http "Prometheus" "http://indri:9090/-/healthy" -check_http "Grafana" "https://grafana.tail8d86e.ts.net/api/health" -check_http "Kiwix" "https://kiwix.tail8d86e.ts.net/" -check_http "Forgejo" "https://forge.tail8d86e.ts.net/" -check_http "Devpi" "https://pypi.tail8d86e.ts.net/+api" -check_http "Miniflux" "https://feed.tail8d86e.ts.net/healthcheck" -# Transmission RPC is localhost-only by design, check via SSH -check_service "Transmission RPC" "ssh indri 'curl -sf http://127.0.0.1:9091/transmission/rpc'" -# Check that transmission metrics are being collected -check_service "Transmission metrics" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/transmission.prom'" -# Zot registry (via Tailscale service) -check_http "Zot Registry" "https://registry.tail8d86e.ts.net/v2/_catalog" -check_service "Zot metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'" -check_service "Minikube metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'" +echo "Metrics textfiles:" +check_service "borgmatic.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/borgmatic.prom'" +check_service "zot.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'" +check_service "minikube.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'" +check_service "plex.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/plex.prom'" echo "" echo "Kubernetes cluster:" @@ -81,14 +65,29 @@ check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'" check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz" echo "" -echo "Kubernetes workloads (via Tailscale):" +echo "HTTP endpoints (via Tailscale):" +check_http "Prometheus" "https://prometheus.tail8d86e.ts.net/-/healthy" +check_http "Loki" "https://loki.tail8d86e.ts.net/ready" +check_http "Grafana" "https://grafana.tail8d86e.ts.net/api/health" check_http "ArgoCD" "https://argocd.tail8d86e.ts.net/healthz" -# k8s PostgreSQL - check TCP connection (no auth needed for pg_isready) +check_http "Forgejo" "https://forge.tail8d86e.ts.net/" +check_http "Zot Registry" "https://registry.tail8d86e.ts.net/v2/_catalog" +check_http "Kiwix" "https://kiwix.tail8d86e.ts.net/" +check_http "Miniflux" "https://feed.tail8d86e.ts.net/healthcheck" +check_http "Devpi" "https://pypi.tail8d86e.ts.net/+api" +check_http "Transmission" "https://torrent.tail8d86e.ts.net/" + +echo "" +echo "Database:" check_service "PostgreSQL (k8s)" "pg_isready -h pg.tail8d86e.ts.net -p 5432" -# k8s miniflux pod -check_service "Miniflux pod" "kubectl --context=minikube-indri -n miniflux get pods -l app=miniflux -o jsonpath='{.items[0].status.phase}' | grep -q Running" -# ArgoCD apps sync status -check_service "ArgoCD apps synced" "kubectl --context=minikube-indri get applications -n argocd -o jsonpath='{.items[*].status.sync.status}' | grep -v OutOfSync" + +echo "" +echo "Kubernetes pods:" +check_service "prometheus-0" "kubectl --context=minikube-indri -n monitoring get pod prometheus-0 -o jsonpath='{.status.phase}' | grep -q Running" +check_service "loki-0" "kubectl --context=minikube-indri -n monitoring get pod loki-0 -o jsonpath='{.status.phase}' | grep -q Running" +check_service "grafana" "kubectl --context=minikube-indri -n monitoring get pods -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].status.phase}' | grep -q Running" +check_service "miniflux" "kubectl --context=minikube-indri -n miniflux get pods -l app=miniflux -o jsonpath='{.items[0].status.phase}' | grep -q Running" +check_service "blumeops-pg" "kubectl --context=minikube-indri -n databases get pods -l cnpg.io/cluster=blumeops-pg -o jsonpath='{.items[0].status.phase}' | grep -q Running" echo "" if [ $FAILED -eq 0 ]; then