blumeops/mise-tasks/indri-services-check
Erich Blume 66badfafd1
All checks were successful
Build Container / build (push) Successful in 13s
Migrate k8s services to Caddy (*.ops.eblu.me) (#59)
## Summary
- Add Caddy reverse proxy routes for all k8s services (grafana, argocd, prometheus, loki, miniflux, devpi, kiwix, torrent, teslamate)
- Add PostgreSQL via Caddy L4 TCP proxy on port 5432
- Caddy proxies to existing Tailscale endpoints - traffic stays local on indri
- Both `*.ops.eblu.me` and `*.tail8d86e.ts.net` URLs continue to work

## Updated References
- Alloy: prometheus/loki push endpoints → `*.ops.eblu.me`
- Borgmatic: PostgreSQL backup host → `pg.ops.eblu.me`
- Devpi: DEVPI_OUTSIDE_URL → `pypi.ops.eblu.me`
- indri-services-check: health check URLs
- CLAUDE.md: argocd login command

## Deployment and Testing
- [ ] Run `mise run provision-indri -- --tags caddy` to deploy new Caddy config
- [ ] Test HTTP services: `curl https://grafana.ops.eblu.me/api/health`
- [ ] Test PostgreSQL: `pg_isready -h pg.ops.eblu.me -p 5432`
- [ ] Run `mise run provision-indri -- --tags alloy` to update Alloy endpoints
- [ ] Run `mise run provision-indri -- --tags borgmatic` to update borgmatic
- [ ] Sync devpi in ArgoCD: `argocd app sync devpi`
- [ ] Re-login to ArgoCD: `argocd login argocd.ops.eblu.me ...`
- [ ] Run `mise run indri-services-check` to verify all services

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/59
2026-01-25 12:56:31 -08:00

115 lines
4.9 KiB
Bash
Executable file

#!/usr/bin/env bash
#MISE description="Check that all indri services are online and responding"
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color
FAILED=0
check_service() {
local name="$1"
local check_cmd="$2"
printf "%-24s " "$name..."
if eval "$check_cmd" > /dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
else
echo -e "${RED}FAILED${NC}"
FAILED=1
fi
}
check_http() {
local name="$1"
local url="$2"
printf "%-24s " "$name..."
if curl -sf --max-time 5 "$url" > /dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
else
echo -e "${RED}FAILED${NC}"
FAILED=1
fi
}
echo "Checking indri services..."
echo "=========================="
echo ""
# Local services on indri
echo "Local services on indri:"
check_service "forgejo (brew)" "ssh indri 'brew services list | grep forgejo | grep started'"
check_service "alloy" "ssh indri 'launchctl list mcquack.eblume.alloy | grep -v \"^-\"'"
check_service "borgmatic" "ssh indri 'launchctl list mcquack.eblume.borgmatic | grep -v \"^-\"'"
check_service "borgmatic-metrics" "ssh indri 'launchctl list mcquack.borgmatic-metrics | grep -v \"^-\"'"
check_service "zot" "ssh indri 'launchctl list mcquack.eblume.zot | grep -v \"^-\"'"
check_service "zot-metrics" "ssh indri 'launchctl list mcquack.zot-metrics | grep -v \"^-\"'"
check_service "minikube-metrics" "ssh indri 'launchctl list mcquack.minikube-metrics | grep -v \"^-\"'"
check_service "plex-metrics" "ssh indri 'launchctl list mcquack.plex-metrics | grep -v \"^-\"'"
echo ""
echo "Metrics textfiles:"
check_service "borgmatic.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/borgmatic.prom'"
check_service "zot.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'"
check_service "minikube.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'"
check_service "plex.prom" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/plex.prom'"
echo ""
echo "Kubernetes cluster:"
check_service "minikube" "ssh indri 'minikube status --format={{.Host}} | grep -q Running'"
check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'"
check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz"
echo ""
echo "HTTP endpoints (via Caddy):"
check_http "Prometheus" "https://prometheus.ops.eblu.me/-/healthy"
check_http "Loki" "https://loki.ops.eblu.me/ready"
check_http "Grafana" "https://grafana.ops.eblu.me/api/health"
check_http "ArgoCD" "https://argocd.ops.eblu.me/healthz"
check_http "Forgejo" "https://forge.ops.eblu.me/"
check_http "Zot Registry" "https://registry.ops.eblu.me/v2/_catalog"
check_http "Kiwix" "https://kiwix.ops.eblu.me/"
check_http "Miniflux" "https://feed.ops.eblu.me/healthcheck"
check_http "TeslaMate" "https://tesla.ops.eblu.me/"
check_http "Devpi" "https://pypi.ops.eblu.me/+api"
check_http "Transmission" "https://torrent.ops.eblu.me/"
echo ""
echo "Database:"
check_service "PostgreSQL (k8s)" "pg_isready -h pg.ops.eblu.me -p 5432"
echo ""
echo "Kubernetes pods:"
check_service "prometheus-0" "kubectl --context=minikube-indri -n monitoring get pod prometheus-0 -o jsonpath='{.status.phase}' | grep -q Running"
check_service "loki-0" "kubectl --context=minikube-indri -n monitoring get pod loki-0 -o jsonpath='{.status.phase}' | grep -q Running"
check_service "grafana" "kubectl --context=minikube-indri -n monitoring get pods -l app.kubernetes.io/name=grafana -o jsonpath='{.items[0].status.phase}' | grep -q Running"
check_service "miniflux" "kubectl --context=minikube-indri -n miniflux get pods -l app=miniflux -o jsonpath='{.items[0].status.phase}' | grep -q Running"
check_service "teslamate" "kubectl --context=minikube-indri -n teslamate get pods -l app=teslamate -o jsonpath='{.items[0].status.phase}' | grep -q Running"
check_service "blumeops-pg" "kubectl --context=minikube-indri -n databases get pods -l cnpg.io/cluster=blumeops-pg -o jsonpath='{.items[0].status.phase}' | grep -q Running"
echo ""
echo "ArgoCD app sync status:"
printf "%-20s %-12s %-12s %s\n" "NAME" "SYNC" "HEALTH" "TARGET"
while read -r name sync health target; do
if [[ "$sync" == "Synced" ]]; then
printf "%-20s ${GREEN}%-12s${NC} %-12s %s\n" "$name" "$sync" "$health" "$target"
elif [[ "$sync" == "OutOfSync" ]]; then
printf "%-20s ${RED}%-12s${NC} %-12s %s\n" "$name" "$sync" "$health" "$target"
FAILED=1
else
printf "%-20s %-12s %-12s %s\n" "$name" "$sync" "$health" "$target"
fi
done < <(kubectl --context=minikube-indri get applications -n argocd --no-headers -o custom-columns='NAME:.metadata.name,SYNC:.status.sync.status,HEALTH:.status.health.status,TARGET:.spec.source.targetRevision' 2>/dev/null)
echo ""
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}All services healthy!${NC}"
exit 0
else
echo -e "${RED}Some services failed health check${NC}"
exit 1
fi