blumeops/mise-tasks/indri-services-check
Erich Blume 19a82373d5 K8s Migration Phase 0: Foundation Infrastructure (#26)
## Summary
- Step 0.1: Update Pulumi ACLs with tag:registry
- Step 0.3: Create Zot registry ansible role with mcquack LaunchAgent
- Step 0.4: Add Zot to Tailscale Serve configuration
- Step 0.5: Create Zot metrics role for Prometheus scraping
- Step 0.6: Add Zot log collection to Alloy
- Step 0.7: Update indri-services-check with zot checks
- Step 0.8: Add podman role for container runtime
- Step 0.9: Add minikube role for Kubernetes cluster
- Step 0.10: Configure remote kubectl access with 1Password credentials

## Remaining Steps
- [ ] Step 0.11: Add minikube to indri-services-check
- [ ] Step 0.12: Create zettelkasten documentation
- [ ] Step 0.13: Verify main playbook (already done - roles added)

## Deployment and Testing
- [x] Zot registry deployed and accessible at https://registry.tail8d86e.ts.net
- [x] Podman machine running on indri
- [x] Minikube cluster running on indri
- [x] kubectl access from gilbert working with 1Password credentials
- [ ] indri-services-check passes all checks

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/26
2026-01-18 12:06:28 -08:00

93 lines
3.9 KiB
Bash
Executable file

#!/usr/bin/env bash
#MISE description="Check that all indri services are online and responding"
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color
FAILED=0
check_service() {
local name="$1"
local check_cmd="$2"
printf "%-20s " "$name..."
if eval "$check_cmd" > /dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
else
echo -e "${RED}FAILED${NC}"
FAILED=1
fi
}
check_http() {
local name="$1"
local url="$2"
printf "%-20s " "$name..."
if curl -sf --max-time 5 "$url" > /dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
else
echo -e "${RED}FAILED${NC}"
FAILED=1
fi
}
echo "Checking indri services..."
echo "=========================="
echo ""
# Check via SSH that services are running on indri
echo "Local services (via launchctl/brew services):"
check_service "loki" "ssh indri 'brew services list | grep loki | grep started'"
check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'"
check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'"
check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'"
check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'"
check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'"
check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'"
check_service "forgejo" "ssh indri 'brew services list | grep forgejo | grep started'"
check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'"
check_service "postgresql" "ssh indri 'brew services list | grep postgresql | grep started'"
check_service "miniflux" "ssh indri 'brew services list | grep miniflux | grep started'"
check_service "zot" "ssh indri 'launchctl list | grep mcquack.eblume.zot | grep -v \"^-\"'"
check_service "zot-metrics" "ssh indri 'launchctl list | grep zot-metrics | grep -v \"^-\"'"
check_service "minikube-metrics" "ssh indri 'launchctl list | grep minikube-metrics | grep -v \"^-\"'"
echo ""
echo "HTTP endpoints (via Tailscale):"
check_http "Loki" "http://indri:3100/ready"
check_http "Prometheus" "http://indri:9090/-/healthy"
check_http "Grafana" "https://grafana.tail8d86e.ts.net/api/health"
check_http "Kiwix" "https://kiwix.tail8d86e.ts.net/"
check_http "Forgejo" "https://forge.tail8d86e.ts.net/"
check_http "Devpi" "https://pypi.tail8d86e.ts.net/+api"
check_http "Miniflux" "https://feed.tail8d86e.ts.net/healthcheck"
# Transmission RPC is localhost-only by design, check via SSH
check_service "Transmission RPC" "ssh indri 'curl -sf http://127.0.0.1:9091/transmission/rpc'"
# Check that transmission metrics are being collected
check_service "Transmission metrics" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/transmission.prom'"
# PostgreSQL uses TCP not HTTP, check via pg_isready
check_service "PostgreSQL" "ssh indri '/opt/homebrew/opt/postgresql@18/bin/pg_isready -h localhost'"
# Zot registry (via Tailscale service)
check_http "Zot Registry" "https://registry.tail8d86e.ts.net/v2/_catalog"
check_service "Zot metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/zot.prom'"
check_service "Minikube metrics file" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/minikube.prom'"
echo ""
echo "Kubernetes cluster:"
check_service "minikube" "ssh indri 'minikube status --format={{.Host}} | grep -q Running'"
check_service "k8s-apiserver (indri)" "ssh indri 'kubectl get --raw /healthz'"
check_service "k8s-apiserver (remote)" "kubectl --kubeconfig=$HOME/.kube/minikube-indri/config.yml --context=minikube-indri get --raw /healthz"
echo ""
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}All services healthy!${NC}"
exit 0
else
echo -e "${RED}Some services failed health check${NC}"
exit 1
fi