blumeops/mise-tasks/runner-logs

131 lines
4.1 KiB
Text
Raw Normal View History

#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["httpx>=0.28.1", "rich>=14.0.0", "typer>=0.24.0"]
# ///
#MISE description="Get logs for a Forgejo Actions workflow run (indri or ringtail runner)"
#USAGE arg "<runner>" help="Runner filter: indri, ringtail, or all"
#USAGE arg "[run_id]" help="Run ID to fetch logs for (omit to list recent runs)"
"""Fetch Forgejo Actions workflow logs from indri's log storage.
Both the indri k8s runner and ringtail nix-container-builder runner report
logs back to the Forgejo server on indri. This tool lists recent runs
(optionally filtered by runner) and fetches compressed logs by run ID.
Usage:
mise run runner-logs all # list recent runs from all runners
mise run runner-logs ringtail # list recent ringtail runs
mise run runner-logs all 337 # fetch logs for run 337
"""
import subprocess
import sys
from typing import Annotated
import httpx
import typer
from rich.console import Console
from rich.table import Table
Expose Forgejo publicly at forge.eblu.me (#278) ## Summary Expose Forgejo publicly at `forge.eblu.me` via the Fly.io reverse proxy — the first dynamic, authenticated public-facing service. - **Forgejo hardening:** Domain changed to forge.eblu.me, SSH stays on forge.ops.eblu.me, reverse proxy trust headers configured, local registration locked to external-only (Authentik SSO) - **Tailscale Ingress:** ExternalName Service + Ingress in tailscale-operator creates forge.tail8d86e.ts.net endpoint - **Fly.io proxy:** nginx server block with rate-limited auth endpoints (3r/s), fail2ban with custom nginx-deny action, security headers, /swagger blocked, WebSocket support, 512m body limit - **Authentik:** OAuth callback updated to forge.eblu.me - **DNS/TLS:** CNAME record in Pulumi, cert in fly-setup - **Rename:** ~29 files updated from forge.ops.eblu.me to forge.eblu.me (HTTPS refs only; SSH, container builds, and Caddy table kept as-is) ## Deployment Order 1. `mise run provision-indri -- --tags forgejo` (config changes) 2. Verify forge.ops.eblu.me still works 3. `argocd app set tailscale-operator --revision feature/forge-public && argocd app sync tailscale-operator` 4. Verify `curl https://forge.tail8d86e.ts.net` 5. `cd fly && fly deploy` 6. Verify pre-DNS: `curl -H "Host: forge.eblu.me" https://blumeops-proxy.fly.dev/` 7. `fly certs add forge.eblu.me -a blumeops-proxy` 8. `argocd app set authentik --revision feature/forge-public && argocd app sync authentik` 9. `mise run dns-preview && mise run dns-up` 10. Full verification (see below) 11. Rehearse `mise run fly-shutoff` 12. After merge: reset ArgoCD revisions to main, re-sync ## Verification Checklist - [ ] forge.eblu.me loads, shows public repos - [ ] forge.ops.eblu.me still works from tailnet - [ ] SSH clone via forge.ops.eblu.me:2222 works - [ ] HTTPS clone via forge.eblu.me works - [ ] UI shows forge.eblu.me for HTTPS clone, forge.ops.eblu.me for SSH - [ ] /swagger returns 403 - [ ] Rapid login attempts trigger 429 rate limit - [ ] fail2ban bans after 5 failed logins in 10 minutes - [ ] ArgoCD can still sync (SSH unaffected) - [ ] `mise run fly-shutoff` stops all public traffic - [ ] `mise run services-check` passes Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/278
2026-03-03 08:40:41 -08:00
FORGE_API = "https://forge.eblu.me/api/v1"
REPO = "eblume/blumeops"
ACTIONS_LOG_DIR = "/opt/homebrew/var/forgejo/data/actions_log/eblume/blumeops"
# Workflows using the ringtail nix-container-builder runner; everything else
# runs on the indri k8s runner.
RINGTAIL_WORKFLOWS = {"build-container-nix.yaml"}
app = typer.Typer(add_completion=False)
def runner_for_workflow(workflow_id: str) -> str:
return "ringtail" if workflow_id in RINGTAIL_WORKFLOWS else "indri"
def list_runs(runner: str, console: Console) -> None:
resp = httpx.get(
f"{FORGE_API}/repos/{REPO}/actions/tasks",
timeout=15,
)
resp.raise_for_status()
runs = resp.json().get("workflow_runs", [])
table = Table(title=f"Recent runs (filter: {runner})")
table.add_column("ID", style="cyan", no_wrap=True)
table.add_column("Status")
table.add_column("Runner")
table.add_column("Name")
table.add_column("Title")
for run in runs[:20]:
host = runner_for_workflow(run.get("workflow_id", ""))
if runner != "all" and host != runner:
continue
status = run.get("status", "")
style = "green" if status == "success" else "red" if status == "failure" else "yellow"
table.add_row(
str(run["id"]),
f"[{style}]{status}[/{style}]",
host,
(run.get("name") or "")[:40],
(run.get("display_title") or "")[:30],
)
console.print(table)
def fetch_log(run_id: int) -> None:
hex_subdir = f"{run_id:02x}"
log_file = f"{ACTIONS_LOG_DIR}/{hex_subdir}/{run_id}.log.zst"
# All logs live on indri (the Forgejo server) regardless of runner
result = subprocess.run(
["ssh", "indri", f"test -f '{log_file}' && zstd -d -c '{log_file}'"],
capture_output=True,
text=True,
)
if result.returncode == 0:
sys.stdout.write(result.stdout)
else:
typer.echo(f"Error: Log file not found for run {run_id}", err=True)
typer.echo(f"Expected path: {log_file}", err=True)
typer.echo("", err=True)
typer.echo("Available logs:", err=True)
avail = subprocess.run(
[
"ssh",
"indri",
f"find '{ACTIONS_LOG_DIR}' -name '*.log.zst' -exec basename {{}} .log.zst \\; | sort -n | tail -10",
],
capture_output=True,
text=True,
)
typer.echo(avail.stdout, err=True)
raise typer.Exit(1)
@app.command()
def main(
runner: Annotated[
str,
typer.Argument(help="Runner filter: indri, ringtail, or all"),
],
run_id: Annotated[
int | None,
typer.Argument(help="Run ID to fetch logs for (omit to list recent runs)"),
] = None,
) -> None:
"""Get logs for a Forgejo Actions workflow run."""
if runner not in ("indri", "ringtail", "all"):
typer.echo(f"Error: runner must be 'indri', 'ringtail', or 'all', got '{runner}'")
raise typer.Exit(1)
if run_id is None:
list_runs(runner, Console())
else:
fetch_log(run_id)
if __name__ == "__main__":
app()