blumeops/mise-tasks/indri-services-check
Erich Blume 242c1880de Add Grafana Alloy and Loki for unified observability (#11)
## Summary
- Add Grafana Alloy to replace node_exporter for metrics collection
- Add Loki for log aggregation and storage
- Configure Alloy to collect logs from all services (grafana, forgejo, prometheus, tailscale, transmission, devpi, kiwix, borgmatic)
- Update Prometheus to accept metrics via remote_write
- Add Loki datasource to Grafana

## Test plan
- [ ] Run \`mise run provision-indri -- --check --diff\` to verify changes
- [ ] Apply with \`mise run provision-indri\`
- [ ] Verify services: \`mise run indri-services-check\`
- [ ] Check Grafana Explore with Loki datasource
- [ ] Query logs: \`{service="grafana"}\`
- [ ] Verify metrics still flowing to Prometheus dashboards

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: https://forge.tail8d86e.ts.net/eblume/blumeops/pulls/11
2026-01-15 12:24:13 -08:00

76 lines
2.5 KiB
Bash
Executable file

#!/usr/bin/env bash
#MISE description="Check that all indri services are online and responding"
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color
FAILED=0
check_service() {
local name="$1"
local check_cmd="$2"
printf "%-20s " "$name..."
if eval "$check_cmd" > /dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
else
echo -e "${RED}FAILED${NC}"
FAILED=1
fi
}
check_http() {
local name="$1"
local url="$2"
printf "%-20s " "$name..."
if curl -sf --max-time 5 "$url" > /dev/null 2>&1; then
echo -e "${GREEN}OK${NC}"
else
echo -e "${RED}FAILED${NC}"
FAILED=1
fi
}
echo "Checking indri services..."
echo "=========================="
echo ""
# Check via SSH that services are running on indri
echo "Local services (via launchctl/brew services):"
check_service "loki" "ssh indri 'brew services list | grep loki | grep started'"
check_service "alloy" "ssh indri 'brew services list | grep grafana-alloy | grep started'"
check_service "prometheus" "ssh indri 'brew services list | grep prometheus | grep started'"
check_service "grafana" "ssh indri 'brew services list | grep grafana | grep started'"
check_service "transmission" "ssh indri 'brew services list | grep transmission | grep started'"
check_service "transmission-metrics" "ssh indri 'launchctl list | grep transmission-metrics | grep -v \"^-\"'"
check_service "kiwix-serve" "ssh indri 'launchctl list | grep kiwix | grep -v \"^-\"'"
check_service "forgejo" "ssh indri 'brew services list | grep forgejo | grep started'"
check_service "devpi" "ssh indri 'launchctl list | grep devpi | grep -v \"^-\"'"
echo ""
echo "HTTP endpoints (via Tailscale):"
check_http "Loki" "http://indri:3100/ready"
check_http "Prometheus" "http://indri:9090/-/healthy"
check_http "Grafana" "http://indri:3000/api/health"
check_http "Kiwix" "http://indri:5501/"
check_http "Forgejo" "http://indri:3001/"
check_http "Devpi" "http://indri:3141/+api"
# Transmission RPC is localhost-only by design, check via SSH
check_service "Transmission RPC" "ssh indri 'curl -sf http://127.0.0.1:9091/transmission/rpc'"
# Check that transmission metrics are being collected
check_service "Transmission metrics" "ssh indri 'test -f /opt/homebrew/var/node_exporter/textfile/transmission.prom'"
echo ""
if [ $FAILED -eq 0 ]; then
echo -e "${GREEN}All services healthy!${NC}"
exit 0
else
echo -e "${RED}Some services failed health check${NC}"
exit 1
fi