Merge pull request 'heph Authentik: grant offline_access scope (fixes spoke sync refresh-token 400)' (#371 ) from heph-offline-access into main

heph Authentik: grant offline_access scope (fixes spoke sync refresh-token 400)
The heph CLI requests scope "openid offline_access", but the Authentik heph OAuth2 provider only mapped openid/email/profile. Without the offline_access mapping the issued refresh token is bound to the login session rather than the 30-day refresh-token window; once the session lapses, hephd's refresh_token grant returns 400 Bad Request and spoke sync silently degrades (heph sync --status -> auth_failure: true). Add the built-in offline_access scope mapping to the provider's property_mappings and document the requirement in the service reference. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 18:29:47 -07:00 · 2026-06-06 18:07:13 -07:00 · 2026-06-05 08:22:46 -07:00 · 2026-06-05 07:40:51 -07:00 · 2026-06-05 07:30:31 -07:00 · 2026-06-05 06:46:58 -07:00
380 changed files with 10018 additions and 4312 deletions
--- a/.forgejo/workflows/build-blumeops.yaml
+++ b/.forgejo/workflows/build-blumeops.yaml
@ -178,10 +178,11 @@ jobs:

            echo "## Documentation"
            echo ""
-            echo "Download \`$TARBALL\` and configure the quartz container with:"
+            echo "Download \`$TARBALL\` directly, or bump \`docs_version\`"
+            echo "in \`ansible/roles/docs/defaults/main.yml\` and run:"
            echo ""
            echo "\`\`\`"
-            echo "DOCS_RELEASE_URL=https://forge.eblu.me/eblume/blumeops/releases/download/$VERSION/$TARBALL"
+            echo "mise run provision-indri -- --tags docs"
            echo "\`\`\`"
          } > /tmp/release_body.txt

@ -223,18 +224,16 @@ jobs:
          echo ""
          echo "Release created successfully!"

-      - name: Update docs deployment
+      - name: Bump docs_version in ansible role
        run: |
          VERSION="${{ steps.version.outputs.version }}"
-          TARBALL="docs-${VERSION}.tar.gz"
-          DEPLOYMENT_FILE="argocd/manifests/docs/deployment.yaml"
-          RELEASE_URL="https://forge.eblu.me/eblume/blumeops/releases/download/${VERSION}/${TARBALL}"
+          DEFAULTS_FILE="ansible/roles/docs/defaults/main.yml"

-          echo "Updating $DEPLOYMENT_FILE with new release URL..."
-          yq -i "(.spec.template.spec.containers[0].env[] | select(.name == \"DOCS_RELEASE_URL\")).value = \"${RELEASE_URL}\"" "$DEPLOYMENT_FILE"
+          echo "Bumping docs_version in $DEFAULTS_FILE to ${VERSION}..."
+          yq -i ".docs_version = \"${VERSION}\"" "$DEFAULTS_FILE"

-          echo "Updated deployment:"
-          grep -A1 "DOCS_RELEASE_URL" "$DEPLOYMENT_FILE"
+          echo "Updated defaults:"
+          grep -E "^docs_version:" "$DEFAULTS_FILE"

      - name: Commit release changes
        env:
@ -248,7 +247,7 @@ jobs:
          git config user.email "actions@forge.ops.eblu.me"

          # Stage deployment changes
-          git add argocd/manifests/docs/deployment.yaml
+          git add ansible/roles/docs/defaults/main.yml

          # Stage changelog changes if updated
          if [ "$CHANGELOG_UPDATED" = "true" ]; then
@ -270,34 +269,6 @@ jobs:
            echo "Changes committed and pushed"
          fi

-      - name: Deploy docs
-        env:
-          ARGOCD_AUTH_TOKEN: ${{ secrets.ARGOCD_AUTH_TOKEN }}
-        run: |
-          echo "Syncing docs app via ArgoCD..."
-
-          # Sync docs app (uses ARGOCD_AUTH_TOKEN env var for auth)
-          argocd app sync docs \
-            --server argocd.ops.eblu.me \
-            --grpc-web \
-            --prune
-
-          # Wait for sync to complete
-          argocd app wait docs \
-            --server argocd.ops.eblu.me \
-            --grpc-web \
-            --timeout 120
-
-          echo "Docs app synced successfully!"
-
-      - name: Purge Fly.io proxy cache
-        env:
-          FLY_API_TOKEN: ${{ secrets.FLY_DEPLOY_TOKEN }}
-        run: |
-          echo "Purging nginx cache on Fly.io proxy..."
-          fly ssh console -a blumeops-proxy -C "sh -c 'rm -rf /tmp/cache && nginx -s reload'"
-          echo "Cache purged"
-
      - name: Summary
        run: |
          VERSION="${{ steps.version.outputs.version }}"
@ -309,5 +280,12 @@ jobs:
          echo "Release URL:"
          echo "  https://forge.eblu.me/eblume/blumeops/releases/tag/$VERSION"
          echo ""
-          echo "Asset URL (for DOCS_RELEASE_URL ConfigMap):"
+          echo "Asset URL:"
          echo "  https://forge.eblu.me/eblume/blumeops/releases/download/$VERSION/$TARBALL"
+          echo ""
+          echo "To deploy on indri, run from gilbert:"
+          echo "  mise run provision-indri -- --tags docs"
+          echo ""
+          echo "Then purge the Fly.io proxy cache:"
+          echo "  fly ssh console -a blumeops-proxy -C \\"
+          echo "    \"sh -c 'rm -rf /tmp/cache && nginx -s reload'\""
--- a/.forgejo/workflows/build-container.yaml
+++ b/.forgejo/workflows/build-container.yaml
@ -1,14 +1,13 @@
 # Unified container build workflow
-# Triggers on pushes to main that modify containers/*, or via manual dispatch.
-# Detects which containers changed and routes to the correct runner:
-#   - Dockerfile containers build on k8s (indri) via Dagger
+# Manual dispatch only — use `mise run container-build-and-release <name>`.
+# Shared Dagger helpers (src/blumeops/) make path-based auto-triggers unreliable,
+# so all container builds are triggered explicitly.
+# Routes to the correct runner:
+#   - Dockerfile/Dagger containers build on k8s (indri) via Dagger
 #   - Nix containers build on nix-container-builder (ringtail) via nix-build + skopeo
 name: Build Container

 on:
-  push:
-    branches: [main]
-    paths: ['containers/**']
  workflow_dispatch:
    inputs:
      container:
@ -33,18 +32,11 @@ jobs:
          ref: ${{ inputs.ref || github.sha }}
          fetch-depth: 2

-      - name: Detect and classify changed containers
+      - name: Classify container build type
        id: classify
        run: |
-          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
-            CHANGED='["${{ inputs.container }}"]'
-          else
-            CHANGED=$(git diff --name-only HEAD~1 HEAD -- containers/ \
-              | cut -d/ -f2 | sort -u \
-              | jq -R -s -c 'split("\n") | map(select(length > 0))')
-          fi
-
-          echo "Changed containers: $CHANGED"
+          CHANGED='["${{ inputs.container }}"]'
+          echo "Building container: $CHANGED"

          # Classify each container by build type (a container can appear in both)
          DAGGER='[]'
@ -74,9 +66,10 @@ jobs:
    if: needs.detect.outputs.dagger != '[]'
    runs-on: k8s
    env:
-      # Disable Python SDK OTLP metrics exporter — the Dagger engine's local
-      # OTLP endpoint returns 500s, causing ~9s retry cycles per minute.
-      OTEL_METRICS_EXPORTER: none
+      # Send Dagger OTLP telemetry to Tempo. Without a real backend the
+      # engine's internal proxy returns 500 on /v1/metrics, causing noisy
+      # retry warnings in every build.
+      OTEL_EXPORTER_OTLP_ENDPOINT: http://tempo.tracing.svc.cluster.local:4318
    strategy:
      matrix:
        container: ${{ fromJson(needs.detect.outputs.dagger) }}
--- a/.forgejo/workflows/cv-deploy.yaml
+++ b/.forgejo/workflows/cv-deploy.yaml
@ -1,12 +1,14 @@
 # CV Deploy Workflow
 #
-# Updates the CV deployment to a specific package version, commits
-# the change, and syncs via ArgoCD.
+# Bumps cv_version in ansible/roles/cv/defaults/main.yml and pushes the change.
+# Deployment to indri is manual (runner has no SSH access to indri):
+#   mise run provision-indri -- --tags cv
 #
 # Usage:
 #   1. Release a new CV package from the cv repo first
 #   2. Go to Actions > Deploy CV > Run workflow
 #   3. Enter the version to deploy, or leave as "latest"
+#   4. Run the command above on gilbert to apply

 name: Deploy CV

@ -60,18 +62,16 @@ jobs:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

-      - name: Update CV deployment
+      - name: Bump cv_version in ansible role
        run: |
          VERSION="${{ steps.version.outputs.version }}"
-          TARBALL="cv-${VERSION}.tar.gz"
-          DEPLOYMENT_FILE="argocd/manifests/cv/deployment.yaml"
-          RELEASE_URL="https://forge.eblu.me/api/packages/eblume/generic/cv/${VERSION}/${TARBALL}"
+          DEFAULTS_FILE="ansible/roles/cv/defaults/main.yml"

-          echo "Updating $DEPLOYMENT_FILE with CV_RELEASE_URL..."
-          yq -i "(.spec.template.spec.containers[0].env[] | select(.name == \"CV_RELEASE_URL\")).value = \"${RELEASE_URL}\"" "$DEPLOYMENT_FILE"
+          echo "Bumping cv_version in $DEFAULTS_FILE to ${VERSION}..."
+          yq -i ".cv_version = \"${VERSION}\"" "$DEFAULTS_FILE"

-          echo "Updated deployment:"
-          grep -A1 "CV_RELEASE_URL" "$DEPLOYMENT_FILE"
+          echo "Updated defaults:"
+          grep -E "^cv_version:" "$DEFAULTS_FILE"

      - name: Commit release changes
        env:
@ -82,7 +82,7 @@ jobs:
          git config user.name "Forgejo Actions"
          git config user.email "actions@forge.ops.eblu.me"

-          git add argocd/manifests/cv/deployment.yaml
+          git add ansible/roles/cv/defaults/main.yml

          if git diff --cached --quiet; then
            echo "No changes to commit (already at $VERSION)"
@ -94,38 +94,16 @@ jobs:
            echo "Changes committed and pushed"
          fi

-      - name: Deploy CV
-        env:
-          ARGOCD_AUTH_TOKEN: ${{ secrets.ARGOCD_AUTH_TOKEN }}
-        run: |
-          echo "Syncing CV app via ArgoCD..."
-
-          argocd app sync cv \
-            --server argocd.ops.eblu.me \
-            --grpc-web \
-            --prune
-
-          argocd app wait cv \
-            --server argocd.ops.eblu.me \
-            --grpc-web \
-            --timeout 120
-
-          echo "CV app synced successfully!"
-
-      - name: Purge Fly.io proxy cache
-        env:
-          FLY_API_TOKEN: ${{ secrets.FLY_DEPLOY_TOKEN }}
-        run: |
-          echo "Purging nginx cache on Fly.io proxy..."
-          fly ssh console -a blumeops-proxy -C "sh -c 'rm -rf /tmp/cache && nginx -s reload'"
-          echo "Cache purged"
-
      - name: Summary
        run: |
          VERSION="${{ steps.version.outputs.version }}"
          echo "================================================"
-          echo "CV Deployed: $VERSION"
+          echo "CV version bumped: $VERSION"
          echo "================================================"
          echo ""
-          echo "CV should now be live at:"
-          echo "  https://cv.ops.eblu.me/"
+          echo "To deploy on indri, run from gilbert:"
+          echo "  mise run provision-indri -- --tags cv"
+          echo ""
+          echo "Then purge the Fly.io proxy cache:"
+          echo "  fly ssh console -a blumeops-proxy -C \\"
+          echo "    \"sh -c 'rm -rf /tmp/cache && nginx -s reload'\""
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,6 @@
 .claude/settings.local.json
 .claude/agent-memory/
+.claude/scheduled_tasks.lock

 # Python
 __pycache__/
@ -12,3 +13,5 @@ __pycache__/

 # OS
 .DS_Store
+/**/__pycache__
+/.env
--- a/AGENTS.md
+++ b/AGENTS.md
@ -0,0 +1,171 @@
+# AGENTS.md
+
+Guidance for AI agents working in this repository. See also [[ai-assistance-guide]].
+
+## Overview
+
+blumeops is Erich Blume's GitOps repository for personal infrastructure, orchestrated via tailnet `tail8d86e.ts.net`.
+
+**CRITICAL: Public repo at github.com/eblume/blumeops - never commit secrets!**
+
+**Shell:** The user's interactive shell may differ from the current harness shell. Prefer repo-safe, non-interactive commands when possible, and match the user's shell conventions when giving interactive examples.
+
+## Rules
+
+1. **Always run `mise run ai-docs` at session start**
+    This will refresh your context with important information you will be assumed to know and follow.
+    **Read the full output** — never truncate, pipe to `head`/`tail`, or skip sections.
+    For problems with a large surface area, ask the user if `mise run ai-sources` should also be run — it concatenates all non-doc source files (~270K tokens) for deep codebase context.
+2. **Always use `--context=minikube-indri` with kubectl** (or `--context=k3s-ringtail` for ringtail services) - work contexts must never be touched
+    **NEVER run `minikube delete`** — it destroys all PVs, etcd, and cluster state. Use `minikube stop`/`minikube start` for restarts. If minikube is stuck, see [[restart-indri]]. Full rebuild from scratch requires the DR procedure in [[rebuild-minikube-cluster]].
+3. **Classify the change as C0/C1/C2 before starting** (see below) — this determines branching and PR requirements
+4. **Feature branches + PRs for C1/C2** - checkout main, pull, create branch, open PR via `tea pr create`. C0 goes direct to main.
+5. **Check PR comments with `mise run pr-comments <pr_number>`** before proceeding
+6. **Add changelog fragments (all change levels)** - `docs/changelog.d/<name>.<type>.md`
+    Types: `feature`, `bugfix`, `infra`, `doc`, `ai`, `misc`
+    Applies to C0, C1, and C2 whenever the change is user-visible or noteworthy.
+    - **C1/C2:** Use branch name: `<branch>.<type>.md`
+    - **C0:** Use orphan prefix: `+<descriptive-slug>.<type>.md` (avoids `main.*` collisions)
+7. **Test before applying** - dry runs (`--check --diff`), syntax checks, `ssh indri '...'`
+8. **Wait for user review before deploying** (C1/C2)
+9. **Never merge PRs or push to main without explicit request** (C0 commits to main are fine)
+10. **Verify deployments** - `mise run services-check`
+
+## Change Classification
+
+Before starting work, classify the change:
+
+| Class | Name | When to use | Key trait |
+|-------|------|-------------|-----------|
+| **C0** | Quick Fix | Small, low-risk, fix-forward safe | Direct to main, no PR |
+| **C1** | Human Review | Moderate complexity or risk | Feature branch + PR, docs-first |
+| **C2** | Mikado Chain | Multi-phase, multi-session, high complexity | Mikado Branch Invariant |
+
+**C0** — commit directly to main. No branch or PR needed. Fix forward if problems arise.
+
+**C1** — feature branch with early PR. Search related docs first, write documentation changes before code, deploy from the unmerged branch (ArgoCD `--revision`, Ansible from checkout). Upgrade to C2 if complexity spirals.
+
+**C2** — branch `mikado/<chain-stem>` governed by the Mikado Branch Invariant: all card commits first, then code progress, then card closures. Commits use `C2(<chain>): plan/impl/close/finalize` convention. Reset the branch when new prerequisites are discovered. Resume with `mise run docs-mikado --resume`.
+
+See [[agent-change-process]] for the full methodology.
+
+## Project Structure
+
+```
+./docs/                 # documentation (Diataxis, Quartz)
+./docs/changelog.d/     # towncrier fragments
+./.dagger/              # dagger pipelines
+./.forgejo/             # forgejo-runner actions and workflows
+./mise-tasks/           # scripts via `mise run`
+./ansible/playbooks/    # ansible (indri.yml primary)
+./ansible/roles/        # indri service roles
+./argocd/apps/          # ArgoCD Application definitions
+./argocd/manifests/     # k8s manifests per service
+./fly/                  # fly.io proxy for public routing
+./pulumi/               # Pulumi IaC (tailnet ACLs, dns, cloud)
+~/.config/{nvim,fish}   # user's shell config, managed by chezmoi
+~/code/personal/        # user's projects
+~/code/personal/zk      # user's zettelkasten (Obsidian-sync). Reference-data source; migrating into heph docs (hephaestus).
+~/code/3rd/             # mirrored external projects
+~/code/work             # FORBIDDEN
+```
+Other code paths will be listed via ai-docs, this is just an overview. When you
+encounter wiki-links (`[[like-this]]`) it is referring to docs/ cards.
+
+## Service Deployment
+
+### Kubernetes (ArgoCD)
+
+Most services run in minikube on indri via ArgoCD (app-of-apps, manual sync). GPU workloads (Frigate, ntfy) run on ringtail's k3s cluster, also managed by ArgoCD.
+
+**PR workflow:**
+1. Create branch, modify `argocd/manifests/<service>/`
+2. Push. Sync 'apps' app if service definition changed (set --revision to branch).
+3. Test on branch: `argocd app set <service> --revision <branch> && argocd app sync <service>`
+4. After merge: `argocd app set <service> --revision main && argocd app sync <service>`
+
+**Commands:** `argocd app list|get|diff|sync <app>`
+
+**Login:** `argocd login argocd.ops.eblu.me --sso` (opens browser for Authentik SSO). Admin fallback for break-glass: `argocd login argocd.ops.eblu.me --username admin --password "$(op read 'op://vg6xf6vvfmoh5hqjjhlhbeoaie/srogeebssulhtb6tnqd7ls6qey/password')"`
+
+### Indri (Ansible)
+
+Native services: Forgejo, Zot, Caddy, Borgmatic, Alloy
+
+```fish
+mise run provision-indri                    # full
+mise run provision-indri -- --tags <role>   # specific
+mise run provision-indri -- --check --diff  # dry run
+```
+
+### Routing
+
+| Domain | Mechanism | Reachable from |
+|--------|-----------|----------------|
+| `*.eblu.me` | Fly.io proxy (Tailscale tunnel) | public internet |
+| `*.ops.eblu.me` | Caddy on indri | k8s pods, containers, tailnet |
+| `*.tail8d86e.ts.net` | Tailscale MagicDNS | tailnet clients only |
+
+Check tailscale serve: `ssh indri 'tailscale serve status --json'`
+
+## Container Releases
+
+```fish
+mise run container-list                       # show images/tags
+mise run container-release <name> <version>   # tag and build
+```
+The goal is to eventually use only locally built containers in all cases, with
+full supply chain control via forge.ops.eblu.me repositories, mirroring source
+from upstream.
+
+**After triggering a build** (manual dispatch or push to main), verify the
+workflow succeeded before proceeding:
+
+```fish
+mise run runner-logs                          # find the run number
+mise run runner-logs <run#>                   # see jobs in the run
+mise run runner-logs <run#> -j <N>            # fetch logs on failure
+```
+
+This also works for other forge repos (`--repo eblume/hermes`).
+
+## Third-Party Projects
+
+Ask user to mirror on forge first, then clone to `~/code/3rd/<project>/`.
+
+### Sporked Projects
+
+Some mirrored projects are "sporked" — a floating-branch soft-fork strategy
+where local patches are continuously rebased on top of upstream. See
+[[spork-strategy]] and [[create-a-spork]] for the full methodology.
+
+Sporked projects live in `~/code/3rd/<project>/` with three remotes:
+`origin` (eblume/ fork on forge), `mirror` (mirrors/ on forge), `upstream`
+(canonical). The `blumeops` branch is the default; `deploy` merges everything.
+
+Create a new spork: `mise run spork-create <mirror-name>`
+
+## Task Discovery
+
+BlumeOps tasks live in [hephaestus](https://github.com/eblume/hephaestus) (`heph`),
+the user's self-hosted context/task system. Fetch them with the CLI:
+
+```fish
+heph list --project Blumeops --json  # outstanding Blumeops tasks as JSON
+```
+
+(This replaced the retired `blumeops-tasks` mise task, which read from Todoist.)
+
+Most operational scripts are stored in `./mise-tasks/`. For scripts with any logic or
+complexity, use uv run --script 's with explicit dependencies. Complex
+workflows with artifacts should become dagger pipelines. Mise tasks are for
+development processes and operations - tools for the user or the agent.
+
+## Credentials
+
+Root store is 1Password. Never grab directly - use existing patterns (ansible
+pre_tasks, external-secrets, scripts with `op` CLI). It's ok to use `op item
+get` without `--reveal` to explore what secrets are available, however.
+
+Prefer `op read "op://vault/item/field"` over `op item get --fields` to avoid
+quoting issues with multi-line values.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -12,6 +12,343 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

 <!-- towncrier release notes start -->

+## [v1.17.0] - 2026-06-03
+
+### Features
+
+- Deploy the Adelaide / Heidi / Addie baby shower app — guest splash, raffle
+  picker, and prize assignment console — on ringtail k3s with `shower.eblu.me`
+  as the public entry and `shower.ops.eblu.me` as the tailnet admin host. App
+  source: [`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app).
+- Deploy adelaide-baby-shower-app v1.1.0 to ringtail k3s. Replaces the
+  boolean lock with a four-phase `ShowerState` (`pre_event` → `party` →
+  `prizes_locked` → `event_locked`), adds an append-only "guest memories"
+  panel where guests can leave photos and comments for the baby, and
+  polishes the admin and QR views. Three Django migrations
+  (`0009_shower_phase`, `0010_guest_memories`, `0011_book_description`)
+  run automatically in the entrypoint against the SQLite PV. No config
+  or env-var changes.
+
+  Container build also gains a Forgejo-PyPI workaround: Forgejo's simple
+  index returns absolute file URLs hardcoded to the public ROOT_URL
+  (`forge.eblu.me`), which the Fly edge 403s on `/api/packages/*`. The
+  wheel and sdist are now both pulled via direct `fetchurl` against
+  `forge.ops.eblu.me` (tailnet-only) and the wheel is handed to pip as
+  a local path.
+- `review-compliance-reports` now also fetches and summarizes the weekly Prowler container-image and IaC scans (previously only the K8s CIS in-cluster scan was processed). For each scan it shows status counts, severity breakdown, week-over-week delta, and — for the high-volume image/IaC scans — top-N tables grouped by check ID and resource instead of per-finding listings.
+- runner-logs now authenticates with Forgejo API token and auto-detects the repo from git remote. Job logs are fetched via SSH to indri (reading Forgejo's on-disk zstd log files) instead of the web endpoint, which doesn't support token auth for private repos.
+
+### Bug Fixes
+
+- Fix nightly borgmatic backups failing for 2 days. The shower SQLite
+  dump hook referenced `kubectl --context=k3s-ringtail`, but indri's
+  kubeconfig deliberately doesn't carry the ringtail credentials. The
+  `before_backup` hook's failure aborted the entire run, taking out
+  *both* the local sifaka repo and the BorgBase offsite. Replaced
+  the inline-shell dump with a `~/bin/borgmatic-k8s-sqlite-dump`
+  helper deployed by the ansible role. Each dump entry now declares a
+  `target` of either `local:<context>` (mealie — kubectl uses indri's
+  kubeconfig) or `ssh:<user@host>` (shower — ssh into ringtail and
+  run `k3s kubectl` there, no indri-side kubeconfig needed; k3s.yaml
+  on ringtail is mode 644 so no sudo required). Bytes stream back via
+  `kubectl exec ... -- cat` rather than `kubectl cp`, since `kubectl
+  cp` requires `tar` inside the pod and nix-built images like shower
+  don't bundle it.
+- Shower app container now bakes the wheel + Python deps into the image
+  at build time via `buildPythonPackage` instead of pip-installing on
+  first boot. Boots are deterministic and don't depend on forge PyPI
+  being reachable from the pod. The `wheelHash` in
+  `containers/shower/default.nix` is the sha256 sourced from the
+  [forge PyPI simple index](https://forge.eblu.me/api/packages/eblume/pypi/simple/adelaide-baby-shower-app/);
+  bumping the version means bumping that hash too.
+
+  Borgmatic now covers the shower app: SQLite is dumped from the live
+  pod via `kubectl exec` (mirroring the existing mealie entry, with
+  `context: k3s-ringtail`), and the prize-photo media share is picked up
+  through `/Volumes/shower` (sifaka SMB mount on indri, same pattern as
+  `/Volumes/photos`).
+- Disabled adaptive sync (VRR) on ringtail's DP-1 output. The OMEN 27i IPS panel pumps brightness when its refresh rate swings into the low VRR range during low-framerate content (e.g. game cutscenes), producing a flicker that worsened over a session until a reboot. Pinning the panel to a fixed 165Hz eliminates it.
+- Fixed forge.eblu.me static assets (CSS, JS, images, fonts) not loading — the proxy's static asset cache block was missing the `Host` header, so Caddy couldn't route the requests.
+- Fixed homepage container EACCES on cold start: the nix-built image now chowns
+  `/app/config` to uid 1000 at build time via `fakeRootCommands`, matching the
+  behavior of the old Dockerfile. Without this, homepage couldn't seed missing
+  skeleton configs (proxmox.yaml etc.) or create `/app/config/logs`, crashing on
+  its first uncached request. Caught during the ringtail cutover.
+- Fixed sway keybindings on ringtail — the home-manager `keybindings` block was replacing the module's defaults entirely, leaving only explicit overrides (no workspace switching, focus, move, splits, resize mode, etc). Switched to `lib.mkOptionDefault` with `lib.mkForce` on the conflicting custom binds (`Mod+Return`, `Mod+d`, `Mod+space`, `Mod+l`) so defaults merge back in. Also added `Mod+F1` to show a filterable fuzzel list of current keybindings.
+
+  Fixed fuzzel config errors on launch — `border-radius` and `border-width` were under `[main]`, but fuzzel expects them as `radius`/`width` under a `[border]` section.
+- Pin the Quartz docs build to v4.5.2. The Dagger `build_docs` pipeline cloned Quartz from the default branch unpinned; Quartz v5.0.0 restructured its config layout (`.quartz/plugins`, `../quartz` imports) and broke the docs build against our existing `quartz.config.ts`/`quartz.layout.ts`.
+
+### Infrastructure
+
+- Wire the ringtail `blumeops-pg` cluster (which holds the wave-1-migrated
+  paperless + teslamate databases) into backups and Grafana. Adds a Tailscale
+  LoadBalancer Service (`blumeops-pg-ringtail.tail8d86e.ts.net`) and a Caddy L4
+  route (`pg.ops.eblu.me:5434`), then repoints borgmatic's `teslamate` +
+  `paperless` postgres dumps and the `mealie` SQLite dump at ringtail, and the
+  Grafana TeslaMate datasource at the ringtail DB. Closes the backup gap that
+  opened at cutover (the migrated live data was still being backed up from the
+  now-frozen minikube copies) and unblocks the wave-1 decommission.
+- Migrated homepage dashboard from minikube (indri/arm64) to k3s (ringtail/amd64).
+  The container is now built via nix (`containers/homepage/default.nix`), adapted
+  from nixpkgs `homepage-dashboard` with the upstream Next.js cache patches and
+  wrapped with `dockerTools.buildLayeredImage`. Autodiscovery shifts: services on
+  minikube (ArgoCD, Immich, Kiwix, Mealie, Miniflux, Grafana, Prometheus,
+  Navidrome, Paperless, TeslaMate, Transmission) become explicit static entries
+  in `services.yaml`; ringtail services (Authentik, Frigate/NVR, Ntfy, Ollama)
+  auto-populate via Ingress annotations.
+- Migrated CV (`cv.eblu.me`) and Docs (`docs.eblu.me`) from minikube Deployments to indri-native ansible roles. Caddy now serves the extracted release tarballs directly via a new `kind: static` service-block in the Caddy template — no daemon, no container — replacing the prior nginx-in-a-pod layer. Removes a network hop on every request and shrinks minikube's footprint. See [[cv-on-indri]] and [[docs-on-indri]]. Part of the broader minikube wind-down.
+- Migrated devpi (PyPI mirror at `pypi.ops.eblu.me`) from a minikube StatefulSet to a launchd-managed service on indri. devpi-server now runs in a uv-managed venv with pinned `devpi-server` and `devpi-web` versions, listens on `127.0.0.1:3141`, and is fronted by Caddy. The minikube StatefulSet was crash-looping under memory pressure (and breaking the Python toolchain everywhere); the new layout removes a layer of dependency on cluster health for critical-path tooling. See [[devpi-on-indri]].
+- Move the entire Immich stack — server, machine-learning, valkey,
+  and the PostgreSQL+VectorChord cluster — off `minikube-indri` and
+  onto `k3s-ringtail`. Postgres data migrated zero-loss via CNPG
+  `pg_basebackup` (replica catch-up then promote); row counts on
+  `asset`, `user`, `album`, `smart_search`, `activity`, `asset_face`
+  verified equal between source and replica before cutover. The ML
+  pod now uses ringtail's RTX 4080 via the nvidia-device-plugin
+  (time-slicing bumped 2 → 4 to share with frigate + ollama). Caddy
+  routing at `photos.ops.eblu.me` is unchanged (still
+  `photos.tail8d86e.ts.net`, the device just lives on ringtail now).
+  Borgmatic backups continue against the same `immich-pg` tailnet
+  hostname. First concrete chain in the broader indri-k8s
+  decommission effort.
+- Add local nix container build for `tailscale` (`containers/tailscale/default.nix`) so ringtail's tailscale-operator ProxyClass proxy pods pull from the forge mirror instead of `docker.io/tailscale/tailscale`. Pinned at v1.94.2 to match `service-versions.yaml`. Indri's tailscale-operator continues to use upstream during the k8s-to-ringtail migration.
+- Address the 6 critical Prowler IaC findings against `argocd/manifests/`. Prowler's IaC provider hardcodes `self._mutelist = None` and delegates filtering to Trivy, but doesn't plumb `--ignorefile` through — so the documented "use Trivy filtering" path is actually broken. Added a shim around `trivy` in the Prowler image that injects `--ignorefile $TRIVY_IGNOREFILE` for `trivy fs` invocations when the env var points at a real file. The IaC cronjob now mounts `mutelist/trivyignore.yaml` (Trivy's per-path schema) and sets the env var, muting the `external-secrets` and `kube-state-metrics` Secret-access findings (KSV-0041, KSV-0114). Separately, `grafana-clusterrole` is tightened to remove `secrets` access entirely: the dashboard sidecar already only consumes ConfigMap-labeled dashboards, so its `RESOURCE` env var is now `configmap` instead of `both`.
+- Pin ringtail's wired IP to `192.168.1.21` via NixOS scripted networking; NetworkManager no longer manages `enp5s0`. Removes DHCP lease renewal as a failure mode after a silent lease teardown took ringtail offline. Also explicitly enables `net.ipv4.ip_forward` (previously set implicitly by scripted-DHCP) so k3s pod networking and Tailscale routing continue to work with static networking.
+- Ripped out the compensating-controls (CC) framework: deleted `compensating-controls.yaml`, the `review-compensating-controls` mise task, and the associated how-to / explanation docs. Prowler and Kingfisher continue to run weekly and produce reports; the Prowler mutelist YAML files remain in place but no longer carry `CC: <id>` prefixes — each entry just keeps a free-form `Description` of why the finding is muted. The CC review cadence proved to be more overhead than this single-operator homelab needed.
+- Wire shower app for public exposure: fly nginx `shower.eblu.me` server
+  block as a guest-only surface — splash page, `/prizes/<token>/`, static
+  assets, media. Everything authenticated (`/admin/`, `/host/`,
+  `/accounts/`) returns 403 with a "tailnet only" pointer. Staff hit
+  `shower.ops.eblu.me` for the operator console + admin; the app's
+  v1.0.1 `DJANGO_PUBLIC_URL_BASE` setting makes QR codes generated on
+  the tailnet point back at the WAN host for guests. Plus a Caddy route
+  on indri, Pulumi Gandi CNAME, and a Grafana APM dashboard tracking
+  request rate, error rate, latency, bandwidth, and access logs.
+- Mirror Valkey 8.1 locally as `registry.ops.eblu.me/blumeops/valkey`. Replaces direct pulls of `docker.io/valkey/valkey:8.1-alpine` for paperless and immich sidecars. Built via native Dagger pipeline on Alpine 3.22. Stateless swap — no data migration. Authentik's nix-built Redis remains separate.
+- Add nix-built amd64 valkey for ringtail (`containers/valkey/default.nix`) so immich-ringtail can stop pulling the upstream multi-arch `docker.io/valkey/valkey` image. Existing `container.py` continues to build Alpine arm64 for paperless on indri. Both bump to valkey 8.1.7 (Alpine 3.22 8.1.7-r0 / nixpkgs 8.1.7).
+- Upgrade Grafana Alloy v1.14.0 → v1.16.0 across all four service deployments
+  (alloy-k8s, alloy-ringtail, alloy-tracing-ringtail on k8s; alloy native on
+  indri). Pulls in stable database observability (v1.15) and the OTel Collector
+  v0.147.0 bump. Container build also migrated from Dockerfile to native Dagger
+  `container.py` per the build-container-image migration playbook.
+- Upgraded Dagger from v0.20.1 to v0.20.6 (engine, CLI pin, and SDK regen) and migrated `runner-job-image` from a Debian-based Dockerfile to a native Dagger `container.py` on Alpine 3.23, reusing the shared `alpine_runtime` helper.
+- Decommission the wave-1 services on minikube-indri now that paperless,
+  teslamate, and mealie run on ringtail with their data backed up. Removes the
+  minikube `paperless`/`teslamate`/`mealie` manifest dirs + ArgoCD app
+  definitions (pruning the parked Deployments, Services, and the redundant
+  minikube mealie/paperless PVCs), and drops the `paperless`/`teslamate` roles
+  from the minikube `blumeops-pg` cluster. The `paperless` and `teslamate`
+  databases are dropped from indri's blumeops-pg as the finalization step.
+  miniflux + authentik remain on the minikube cluster (later waves).
+- Upgraded the k8s Forgejo runner to the v12.8 line, switched it from first-boot registration to declarative `server.connections` credentials from 1Password, and consolidated the supporting runner how-to documentation.
+- Move paperless, teslamate, and mealie off `minikube-indri` onto
+  `k3s-ringtail`, shedding ~1.1 GiB of resident load from the
+  OOM-thrashing 8 GiB minikube node (the kernel OOM killer had been
+  killing `kube-apiserver`/`dockerd`/argocd, flapping every
+  minikube-hosted service at once). paperless + teslamate databases
+  move into a fresh CNPG `blumeops-pg` cluster on ringtail via a cold
+  `pg_dump`/`pg_restore` from the quiesced source — row counts verified
+  equal before any routing flip; source DBs dropped only after the
+  ringtail side serves traffic. mealie's SQLite PVC is copied as-is.
+  paperless media stays on sifaka NFS. Downtime-tolerant cold cutover
+  (no streaming replication); rollback is repoint-and-scale-up with the
+  source untouched. Second chain in the indri-k8s decommission after
+  [[migrate-immich-to-ringtail]].
+- Recurring maintenance batch:
+
+  - Ringtail flake inputs refreshed (`disko`, `home-manager`, `nixpkgs`).
+  - Tooling deps bumped: prek hooks (trufflehog v3.95.3, kingfisher v1.101.0, ruff v0.15.14, `ansible-core` 2.21.0); fly proxy base images (nginx 1.30.1-alpine, alloy v1.16.1); `typer==0.26.2` in mise tasks.
+- Updated `nixos/ringtail/flake.lock` (weekly cadence): `disko`, `home-manager`, and `nixpkgs` inputs refreshed. `nixpkgs-services` skipped per overlay convention.
+- Reviewed `mealie` service version freshness; upstream is 5 minor versions ahead (v3.17.0 vs deployed v3.12.0). Marked reviewed; upgrade deferred.
+- Deploy shower v1.1.2 — bump container build to new app release.
+- Upgrade unpoller v2.34.0 → v3.2.0 and migrate container build from Dockerfile to native Dagger (container.py). v3.0.0 carries breaking UniFi API changes; v3.2.0 introduces a 60s background poll (cached scrapes) by default — set `interval = 0` in `up.conf` to restore on-demand polling.
+- Monthly tooling dependency refresh: prek hooks (trufflehog, kingfisher, ruff, shfmt, prettier, actionlint, ansible-lint), fly proxy base images (nginx 1.30.0, tailscale v1.94.2, alloy v1.16.0), normalize pyyaml lower bound in mise-tasks.
+- Add GE-Proton (`pkgs.proton-ge-bin`) to `programs.steam.extraCompatPackages`
+  on ringtail. Subnautica 2 hangs at Mercuna plugin init under Proton
+  Experimental + DXVK D3D12; GE-Proton is available as a Steam per-game
+  compatibility option to work around it.
+- Add `sn2-prelaunch` Steam launch wrapper on ringtail that removes
+  Subnautica 2's stale `Saved/running.dat` and `Saved/beforelobby.dat`
+  lockfiles before each launch. SN2 pops up an invisible (0×0-sized)
+  Error dialog when it detects an unclean exit, blocking GameThread
+  forever; this is observable only as a black screen with a spinning
+  loader. Use via Steam launch option: `sn2-prelaunch %command%`.
+- Add local nix container build for `frigate-notify` (`containers/frigate-notify/default.nix`) so the Frigate→ntfy bridge is rebuilt on ringtail from the forge mirror instead of pulled from `ghcr.io/0x2142/frigate-notify`.
+- Add resource limits to all ArgoCD pods to prevent unbounded resource consumption during node-wide pressure events.
+- Black-hole the `/mirrors/*` repositories at the Fly proxy edge (`return 403` → `forge.ops.eblu.me`). A surprise $29.60 Fly bill traced to ~1.24 TB/30d of egress on `forge.eblu.me`, 99.95% of all proxy egress — of which ~71% was AI scrapers (Meta `meta-externalagent`, OpenAI `GPTBot`, Amazonbot) crawling the near-infinite git-history URL space of the public mirror repos and timing out Forgejo in the process. Mirrors exist for supply-chain control and are consumed over the tailnet, so their public web UI had no legitimate audience. `robots.txt` already disallowed `/mirrors/`, but the offending agents ignore it. Tier-2 mitigations (user-agent denylist, Anubis proof-of-work gateway) are documented in `docs/explanation/ai-scraper-mitigation.md`.
+- Bump paperless and immich kustomizations to the main-SHA-built valkey tag (`v8.1.6-r0-fabca04`). Routine post-merge follow-up to keep production manifests pointing at images built from a commit on main.
+- Bump shower container to v1.1.1 (probe FOD hash).
+- Bumped shower app to v1.1.3 (wheel/sdist + FOD hashes probed on ringtail).
+- Cap systemd-coredump on ringtail (ProcessSizeMax/ExternalSizeMax 1G, MaxUse 2G) so multi-GB Wine/Proton game crash dumps no longer thrash the disk and lock up the desktop.
+- Deploy shower v1.1.1 to ringtail (kustomize newTag bump).
+- Deployed shower v1.1.3 to ringtail (image built and pushed from ringtail; runner bypassed due to indri overload).
+- Fix three follow-ups from the wave-1 decommission: grant the local
+  break-glass `admin` account ArgoCD admin rights (`g, admin, role:admin` —
+  previously only the Authentik `admins` group had access, so admin was
+  locked out whenever its token expired), and repoint the alloy blackbox
+  probe for teslamate from the deleted minikube service to
+  `https://tesla.ops.eblu.me/` (through Caddy over Tailscale). The orphaned
+  paperless/teslamate roles + ExternalSecrets left on the minikube
+  blumeops-pg are also cleaned up.
+- Moved the Immich blackbox health probe from indri's alloy to ringtail's alloy. After the immich migration to ringtail, the probe still targeted `immich-server.immich.svc.cluster.local` on indri's cluster where the service no longer exists, causing a persistent `ServiceProbeFailure` alert.
+- Pin shower v1.1.1 FOD outputHash (probed locally on ringtail).
+- Rebuild Prowler container against main HEAD (v5.23.0-495e45d) after merging the IaC mutelist Dockerfile changes.
+- Rebuild and retag alloy v1.16.0 container images from the main-branch SHA
+  following the squash-merge of #345, per the build-container-image
+  squash-merge convention. Both images (`registry.ops.eblu.me/blumeops/alloy`)
+  now reference `9564435` rather than the branch SHA `26a3ab5`, restoring
+  source traceability after branch cleanup.
+- Rebuild shower from the post-merge commit on main so the container's
+  SHA tag points at a commit that will still exist after the 30-day
+  branch-cleanup window. Functionally identical to the branch-tag image
+  already deployed, just preserves source traceability per
+  [[build-container-image#Squash-merge and container tags]].
+- Rebuild unpoller container from squashed main commit so the image SHA tag matches a commit in main's history (was tagged with the pre-squash branch SHA).
+- Rebuild valkey container from squashed main commit (both arm64 dagger and amd64 nix variants), and update paperless + immich-ringtail kustomizations to the main-SHA tags `v8.1.7-ecded30` and `v8.1.7-ecded30-nix`.
+- Retired the `blumeops-tasks` mise task (Todoist API) in favor of `heph list --project Blumeops --json` from the self-hosted [hephaestus](https://github.com/eblume/hephaestus) system. Updated docs to point task discovery and rotation reminders at heph, and noted that the `~/code/personal/zk` zettelkasten is migrating into heph docs.
+- Switch the Fly proxy deploy strategy from `bluegreen` to `immediate` in `fly/fly.toml`. With a single proxy machine, bluegreen offers little benefit — the green machine routinely failed to reach "started" inside Fly's default 5-minute deploy timeout (the cold-start sequence of `tailscaled` → `tailscale up` → wait-for-MagicDNS → nginx startup eats most of the budget), and the failed deploys would roll back. `immediate` replaces the machine in place with a brief downtime (~5–10s) but actually completes.
+- Switch the ringtail provisioning playbook's blumeops clone URL from `forge.eblu.me` (public, via Fly proxy) to `forge.ops.eblu.me` (tailnet, direct via Caddy on indri). Ringtail is always on the tailnet, so the WAN round-trip is pure overhead — it also made `provision-ringtail` brittle whenever the Fly proxy was slow or down.
+- Switched Grafana's deployment strategy from `RollingUpdate` to `Recreate`. With an RWO PVC holding the SQLite database and Bleve search index, `RollingUpdate` reliably crashloops the new pod on the index lock until rollout timeout. `Recreate` terminates the old pod first so the new one acquires the lock cleanly.
+- Update `tailscale-operator-ringtail` ProxyClass to reference the `0108b68` main-SHA build of the tailscale container. Routine post-merge cleanup so the deployed image traces to a commit that survives PR branch cleanup.
+- Update the ringtail NixOS flake lockfile (`nixos/ringtail/flake.lock`): bump
+  `nixpkgs` (b77b3de → 25f5383) and `disko` (5ba0c95 → 115e521) to latest.
+  `nixpkgs-services` was intentionally left pinned (skipped by the
+  `flake-update` pipeline). Routine recurring maintenance per [[manage-lockfile]].
+- Upgrade native macOS Alloy on indri to v1.16.0. Built on gilbert with Go
+  1.26.2 + CGO (required for the macOS native DNS resolver, which Tailscale
+  MagicDNS depends on), scp'd to `~/.local/bin/alloy` on indri, codesigned,
+  and the LaunchAgent reloaded. Completes the v1.16.0 fleet upgrade started
+  in #345 — all four Alloy services (alloy-k8s, alloy-ringtail,
+  alloy-tracing-ringtail, alloy ansible) now run v1.16.0.
+- Upgraded zot on indri from v2.1.15 to v2.1.16 (security fixes: TLS verification on metrics client, CORS Allow-Credentials suppression on wildcard origins, manifest/API-key body size limits).
+
+### Documentation
+
+- Reviewed `replicating-blumeops` tutorial: fixed "BluemeOps" typos (also in `contributing.md`) and added `last-reviewed` frontmatter.
+- Reviewed [[indri]] reference card: added `devpi`, `cv`, and `docs` to the native-services list; widened the k8s note to reflect the growing set of apps now on ringtail and the planned indri-minikube decommission; added CPU/RAM specs.
+- New how-to: rotate-fly-deploy-token. Documents the 75-day rotation cadence, why we use `org`-scoped tokens (silences the cosmetic metrics-token warning on `fly status` with marginal blast-radius cost given the single-app personal org), and the procedure for rotation + Forgejo Actions secret sync.
+- Add `docs/explanation/ai-scraper-mitigation.md` — the egress-cost / AI-crawler threat model for the public Fly proxy, the tiered mitigation plan (Tier 1: mirror black-hole, shipped; Tier 2: user-agent denylist + Anubis; Tier 3: Cloudflare, rejected on principle), and the data behind it.
+- Fix manage-forgejo-mirrors verify step — sync button is on the repo settings page ("Synchronize now"), not the main repo page.
+- Fixed the `op item edit` invocation in the [[zot]] API-key rotation procedure: the previous `pbpaste | op item edit ... "field[password]=-"` stdin syntax is rejected by op 2.34 as "invalid JSON" (recent op versions treat piped input as a full JSON template, not a single field value). Procedure now reads the clipboard into a local fish variable and passes it as an inline assignment.
+- Fixed the export-filename step in [[run-1password-backup]]: 1Password's desktop app names the export `1PasswordExport-<account-uuid>-<timestamp>.1pux` automatically rather than letting you save to a fixed name, so the procedure now points the task at that glob instead of pretending the default name is `1Password-export.1pux`.
+- Refresh the contributing tutorial: add `last-reviewed`, include the `.ai.md` changelog fragment type, and clarify that `prek` is pinned via `mise`.
+- Review and refresh the Navidrome reference card: add `last-reviewed`, correct the scanner env var name, document the current image/version, and record routing and runtime details from the manifests.
+- Review and refresh the Ollama reference card: add `last-reviewed`, bump the documented image tag to 0.20.4, and add the two `qwen3.5` models now declared in `models.txt`.
+- Reviewed [[1password]] reference card: added the `blumeops` vs `Personal` vault split, noted that `onepassword-connect` runs on both indri and ringtail (not just one cluster), and pulled the `op read` vs `op item get --fields` guidance up from agent memory into the card.
+- Reviewed `index.md`; added ringtail to the infrastructure overview and stamped `last-reviewed`.
+- Reviewed transmission card: corrected storage layout (`/config/` is emptyDir, watch dir disabled) and noted the Prometheus exporter sidecar.
+- rotate-fly-deploy-token: combine mint+store into one command with both fish and bash forms; document the `op item edit` "Password item requires ps value" validator gotcha and the placeholder-password workaround.
+
+### AI Assistance
+
+- Adopt `AGENTS.md` as the canonical agent instruction file, keep `CLAUDE.md` as a compatibility shim, and update docs to reference the neutral file and the correct agent-change-process path.
+- CLAUDE.md now imports AGENTS.md via `@AGENTS.md` instead of telling agents to go read it. Claude Code only auto-loads CLAUDE.md, so the prose shim was easy to skip; the import inlines AGENTS.md into the session prompt unconditionally.
+
+### Miscellaneous
+
+- Removed the dead minikube manifests, container builds, and tooling shims left behind after the cv + docs migration to indri-native (#342). Deletes `argocd/{apps,manifests}/{cv,docs}/`, `containers/{cv,quartz}/`, and the `quartz`→`docs` mapping in `mise-tasks/container-version-check`. Bumps `docs.current-version` to `v1.16.0` (the blumeops release tag) now that the legacy nginx-base version pin is gone.
+- Rebuild shower v1.1.0 container from main HEAD (`3c7967e`) and bump the
+  kustomization tag to `v1.1.0-3c7967e-nix`. The PR was squash-merged, so
+  the branch commit `444ff91` baked into the prior tag isn't reachable
+  from main's history. The new tag points at a commit that exists on
+  main; image content is byte-identical because the FOD output is content
+  addressed and the inputs didn't change.
+- Rebuild shower v1.1.2 from main HEAD (a33fa47) and retag — PR #358 was squash-merged so the branch SHA baked into the prior image tag isn't reachable from main. FOD is content-addressed, so image bytes are identical; only provenance changes.
+- Remove the duplicate Homepage tiles for Mealie, Paperless, Immich, and
+  TeslaMate. Homepage runs on ringtail and autodiscovers ringtail Ingresses via
+  `gethomepage.dev/*` annotations; once these services migrated to ringtail they
+  were discovered automatically, making their leftover static `services.yaml`
+  entries (needed only while they lived on minikube) redundant.
+- Removed the now-unused `containers/devpi/` Dagger build artifact. Devpi runs natively on indri via uv venv; the container image is no longer referenced anywhere. Doc examples in `docs/reference/tools/dagger.md` updated to use `miniflux` as the example container name.
+- `container-build-and-release` now prints the specific `mise run runner-logs <N>` command after dispatching, polling the Forgejo API to resolve the run number for the commit it just triggered.
+- `mise run runner-logs <run> -j <n>` now reports a clear error when the log file doesn't exist on indri (e.g. a runner crash that left `action_task.log_in_storage = 0`). Previously it printed only the header and exited 0, because `zstdcat` exits 0 with a "can't stat … -- ignored" stderr message and ssh+fish on indri swallows the remote exit code.
+
+
+## [v1.16.0] - 2026-04-18
+
+### Infrastructure
+
+- Route Fly.io proxy through Caddy on indri with direct WireGuard peering, reducing public-facing latency from 20+ seconds (DERP relay) to sub-second. Fixed Beyla eBPF tracing on ringtail (memlock rlimit + BPF permissions). Restored trace collection to Tempo.
+
+
+## [v1.15.7] - 2026-04-18
+
+### Bug Fixes
+
+- Fix borgmatic LaunchAgent failing silently due to macOS TCC permission dialogs. LaunchAgents now call borgmatic directly instead of routing through `mise x`, which triggered "wants to access Documents" dialogs that hung headless sessions. The ansible role now also manages borgmatic installation via `mise install`.
+
+### Infrastructure
+
+- Automate verification of Prowler MANUAL findings (kubelet file perms, kubelet config, etcd CA, RBAC cluster-admin) in `review-compliance-reports` and mute them with `node-config-automated-verification` compensating control.
+- Migrate transmission and transmission-exporter containers from Dockerfile to native Dagger builds (`container.py`). Updates base images to Alpine 3.23 and Python 3.14, pins uv to 0.11.6.
+- Switched Fly proxy to upstream keepalive pools, reducing forge.eblu.me latency from 35s+ p50 to sub-second. Added `mise run fly-reload` for DNS re-resolution without redeploy.
+- Upgrade Prowler from 5.22.0 to 5.23.0; remove init container workaround for broken `--registry` flag (upstream fix in PR #10470).
+- Added `robots.txt` to `forge.eblu.me` blocking crawlers from `/mirrors/` to reduce load from Facebook scraping.
+- Container builds are now manual-only via `mise run container-build-and-release`. Removed auto-trigger on push to main — shared Dagger helpers made path-based detection unreliable.
+- Migrate devpi container from Dockerfile to native Dagger build; bump devpi-server 6.19.1→6.19.3 and devpi-web 5.0.1→5.0.2.
+- Migrated kiwix-serve container from Dockerfile to native Dagger build, bumping Alpine base from 3.22 to 3.23.
+- Mitigated Forgejo archive endpoint DoS: redirect public archive requests to tailnet, expanded robots.txt, enabled archive cleanup cron, cached release downloads at proxy.
+- Refactored Dagger container pipelines: extended `go_build()` helper with `buildmode` and `extra_env` params, migrated miniflux and forgejo-runner to use it, and standardized all Alpine bases from 3.22 to 3.23.
+
+### Miscellaneous
+
+- Review compensating control `sso-gated-admin-tools`: tightened scope to ArgoCD only, removed Grafana reference.
+- container-build-and-release now verifies the commit exists on the remote before dispatching a build.
+
+
+## [v1.15.6] - 2026-04-14
+
+### Bug Fixes
+
+- Rotate ArgoCD workflow-bot token and admin password after DR rebuild invalidated signing keys, fixing build-blumeops workflow failures.
+
+
+## [v1.15.5] - 2026-04-14
+
+### Features
+
+- Deploy Paperless-ngx document management system at paperless.ops.eblu.me with OCR, Authentik SSO, and NFS storage on sifaka.
+- Add `ty` (Astral) Python typechecker to prek hooks, configured for Dagger SDK and container.py modules. Add `type: mise` to service-versions.yaml for tracking development tool versions (dagger, ansible-core, prek, pulumi, ty) through the standard service review process.
+- Upgrade grafana-sidecar from 1.28.0 to 2.6.0, adding health probes and porting build to native Dagger container.py.
+- Upgrade Navidrome to v0.61.1 — major artwork overhaul with per-disc cover art, rebuilt search engine (SQLite FTS5), server-managed transcoding, and WebP performance fix.
+- Add `mise run review-compliance-reports` task for weekly compliance report review with muted/unmuted distinction and week-over-week delta
+
+### Bug Fixes
+
+- Add paperless database to borgmatic backup configuration. Previously the only service DB not included in nightly pg_dump backups.
+- Fix Fly.io proxy rate limiting to key on real client IP instead of Fly's internal proxy IP, so crawlers no longer consume the shared rate limit bucket for all clients.
+- Fix UnPoller (UniFi) Grafana dashboards failing to load due to UID exceeding Grafana 12's 40-character limit.
+- Fix blumeops-tasks swallowing wiki-link brackets in task descriptions (rich markup escaping)
+- Fix dagger flake-update pipeline: replace nonexistent `--exclude` flag with dynamic input discovery
+- Fix services-check to display all firing alerts for a given alert name, not just the first one.
+- Pin Fly.io proxy Tailscale to v1.94.1 — the `:stable` tag pulled v1.96.5 which has a MagicDNS regression (SERVFAIL on tailnet names), breaking all public routing through forge.eblu.me, docs.eblu.me, and cv.eblu.me.
+- Rewrite `mise run runner-logs` CLI: list runs by run number (not task ID), drill into jobs per run, fetch logs via Forgejo web API instead of SSH+filesystem. Fixes broken log retrieval caused by incorrect hex path calculation and stale data directory. Added `--repo` to query any forge repo (e.g. sporks) and `--limit`/`-n` to control listing size (0 for all).
+- Route Dagger build telemetry to Tempo, fixing OTEL metrics exporter warnings.
+- Switch paperless redis sidecar from amd64-only nix-built `authentik-redis` image to upstream `valkey:8.1-alpine` (multi-arch). The nix image was previously running under QEMU emulation on arm64 minikube.
+
+### Infrastructure
+
+- Build forgejo-runner container locally via native Dagger pipeline instead of pulling from upstream.
+- Build kube-state-metrics container locally (Dockerfile + nix) from forge mirror, replacing upstream registry.k8s.io image on both indri and ringtail.
+- Upgrade miniflux from 2.2.17 to 2.2.19 and migrate from Dockerfile to native Dagger container.py build (second container after navidrome). Refactor `alpine_runtime()` with `create_user` parameter to support Alpine's built-in nobody user. Pin all mise.toml tool versions to explicit versions instead of "latest".
+- Migrate Dagger module from .dagger/ to repo root (src/blumeops/) and replace docker_build() with native Dagger pipelines for container builds. Navidrome is the first container migrated, with full build error visibility.
+- Migrate teslamate container build from legacy Dockerfile to native Dagger container.py.
+- Add seccomp RuntimeDefault profiles to alloy-k8s and immich pods, resolving 4 unmuted Prowler findings
+- Full DR recovery from power loss and minikube cluster rebuild. Validated bootstrap procedure, identified circular dependencies (forge.eblu.me, Zot/Authentik OIDC), Tailscale device name collision issues, and documented recovery steps for restart-indri.
+- Set Frigate preview quality to CRF 8 (from default 1) to reduce preview file sizes and improve review timeline loading over NFS.
+- Track Fly.io proxy component versions (Tailscale, nginx, Alloy) in service-versions.yaml with new `fly` service type.
+- Upgrade ArgoCD from v3.3.2 to v3.3.6 (bug-fix patches), SHA-pin install manifest
+- Upgrade authentik 2026.2.0 → 2026.2.2 (bug-fix patch release)
+- Upgrade ollama from 0.17.5 to 0.20.4 (adds Gemma 4 support, benchmark tooling, Apple Silicon perf improvements)
+
+### Documentation
+
+- Delete outdated install-dagger-on-nix-runner card; add service-versions reference card; clean up zot.md and review-services.md links.
+- Enhanced the adding-a-service tutorial with kustomization setup, corrected Tailscale ingress format, updated ArgoCD repoURL, and added a step for creating service reference cards.
+- Review gandi.md: add missing forge.eblu.me CNAME, fix program description, stamp review date.
+
+
 ## [v1.15.4] - 2026-04-06

 ### Infrastructure
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -1,153 +1 @@
-# CLAUDE.md
-
-Guidance for Claude Code working in this repository. See also [[ai-assistance-guide]].
-
-## Overview
-
-blumeops is Erich Blume's GitOps repository for personal infrastructure, orchestrated via tailnet `tail8d86e.ts.net`.
-
-**CRITICAL: Public repo at github.com/eblume/blumeops - never commit secrets!**
-
-**Shell:** The user's shell is **fish**. Use `$status` not `$?` for exit codes. Use fish syntax in interactive examples.
-
-## Rules
-
-1. **Always run `mise run ai-docs` at session start**
-    This will refresh your context with important information you will be assumed to know and follow.
-    **Read the full output** — never truncate, pipe to `head`/`tail`, or skip sections.
-    For problems with a large surface area, ask the user if `mise run ai-sources` should also be run — it concatenates all non-doc source files (~270K tokens) for deep codebase context.
-2. **Always use `--context=minikube-indri` with kubectl** (or `--context=k3s-ringtail` for ringtail services) - work contexts must never be touched
-3. **Classify the change as C0/C1/C2 before starting** (see below) — this determines branching and PR requirements
-4. **Feature branches + PRs for C1/C2** - checkout main, pull, create branch, open PR via `tea pr create`. C0 goes direct to main.
-5. **Check PR comments with `mise run pr-comments <pr_number>`** before proceeding
-6. **Add changelog fragments (all change levels)** - `docs/changelog.d/<name>.<type>.md`
-    Types: `feature`, `bugfix`, `infra`, `doc`, `ai`, `misc`
-    Applies to C0, C1, and C2 whenever the change is user-visible or noteworthy.
-    - **C1/C2:** Use branch name: `<branch>.<type>.md`
-    - **C0:** Use orphan prefix: `+<descriptive-slug>.<type>.md` (avoids `main.*` collisions)
-7. **Test before applying** - dry runs (`--check --diff`), syntax checks, `ssh indri '...'`
-8. **Wait for user review before deploying** (C1/C2)
-9. **Never merge PRs or push to main without explicit request** (C0 commits to main are fine)
-10. **Verify deployments** - `mise run services-check`
-
-## Change Classification
-
-Before starting work, classify the change:
-
-| Class | Name | When to use | Key trait |
-|-------|------|-------------|-----------|
-| **C0** | Quick Fix | Small, low-risk, fix-forward safe | Direct to main, no PR |
-| **C1** | Human Review | Moderate complexity or risk | Feature branch + PR, docs-first |
-| **C2** | Mikado Chain | Multi-phase, multi-session, high complexity | Mikado Branch Invariant |
-
-**C0** — commit directly to main. No branch or PR needed. Fix forward if problems arise.
-
-**C1** — feature branch with early PR. Search related docs first, write documentation changes before code, deploy from the unmerged branch (ArgoCD `--revision`, Ansible from checkout). Upgrade to C2 if complexity spirals.
-
-**C2** — branch `mikado/<chain-stem>` governed by the Mikado Branch Invariant: all card commits first, then code progress, then card closures. Commits use `C2(<chain>): plan/impl/close/finalize` convention. Reset the branch when new prerequisites are discovered. Resume with `mise run docs-mikado --resume`.
-
-See [[agent-change-process]] for the full methodology.
-
-## Project Structure
-
-```
-./docs/                 # documentation (Diataxis, Quartz)
-./docs/changelog.d/     # towncrier fragments
-./.dagger/              # dagger pipelines
-./.forgejo/             # forgejo-runner actions and workflows
-./mise-tasks/           # scripts via `mise run`
-./ansible/playbooks/    # ansible (indri.yml primary)
-./ansible/roles/        # indri service roles
-./argocd/apps/          # ArgoCD Application definitions
-./argocd/manifests/     # k8s manifests per service
-./fly/                  # fly.io proxy for public routing
-./pulumi/               # Pulumi IaC (tailnet ACLs, dns, cloud)
-~/.config/{nvim,fish}   # user's shell config, managed by chezmoi
-~/code/personal/        # user's projects
-~/code/personal/zk      # user's Obsidian-sync managed zettelkasten. Potential source for reference data.
-~/code/3rd/             # mirrored external projects
-~/code/work             # FORBIDDEN
-```
-Other code paths will be listed via ai-docs, this is just an overview. When you
-encounter wiki-links (`[[like-this]]`) it is referring to docs/ cards.
-
-## Service Deployment
-
-### Kubernetes (ArgoCD)
-
-Most services run in minikube on indri via ArgoCD (app-of-apps, manual sync). GPU workloads (Frigate, ntfy) run on ringtail's k3s cluster, also managed by ArgoCD.
-
-**PR workflow:**
-1. Create branch, modify `argocd/manifests/<service>/`
-2. Push. Sync 'apps' app if service definition changed (set --revision to branch).
-3. Test on branch: `argocd app set <service> --revision <branch> && argocd app sync <service>`
-4. After merge: `argocd app set <service> --revision main && argocd app sync <service>`
-
-**Commands:** `argocd app list|get|diff|sync <app>`
-
-**Login:** `argocd login argocd.ops.eblu.me --username admin --password "$(op read 'op://vg6xf6vvfmoh5hqjjhlhbeoaie/srogeebssulhtb6tnqd7ls6qey/password')"`
-
-### Indri (Ansible)
-
-Native services: Forgejo, Zot, Caddy, Borgmatic, Alloy
-
-```fish
-mise run provision-indri                    # full
-mise run provision-indri -- --tags <role>   # specific
-mise run provision-indri -- --check --diff  # dry run
-```
-
-### Routing
-
-| Domain | Mechanism | Reachable from |
-|--------|-----------|----------------|
-| `*.eblu.me` | Fly.io proxy (Tailscale tunnel) | public internet |
-| `*.ops.eblu.me` | Caddy on indri | k8s pods, containers, tailnet |
-| `*.tail8d86e.ts.net` | Tailscale MagicDNS | tailnet clients only |
-
-Check tailscale serve: `ssh indri 'tailscale serve status --json'`
-
-## Container Releases
-
-```fish
-mise run container-list                       # show images/tags
-mise run container-release <name> <version>   # tag and build
-```
-The goal is to eventually use only locally built containers in all cases, with
-full supply chain control via forge.ops.eblu.me repositories, mirroring source
-from upstream.
-
-## Third-Party Projects
-
-Ask user to mirror on forge first, then clone to `~/code/3rd/<project>/`.
-
-### Sporked Projects
-
-Some mirrored projects are "sporked" — a floating-branch soft-fork strategy
-where local patches are continuously rebased on top of upstream. See
-[[spork-strategy]] and [[create-a-spork]] for the full methodology.
-
-Sporked projects live in `~/code/3rd/<project>/` with three remotes:
-`origin` (eblume/ fork on forge), `mirror` (mirrors/ on forge), `upstream`
-(canonical). The `blumeops` branch is the default; `deploy` merges everything.
-
-Create a new spork: `mise run spork-create <mirror-name>`
-
-## Task Discovery
-
-```fish
-mise run blumeops-tasks  # fetch from Todoist, sorted by priority
-```
-Most tasks are stored in `./mise-tasks/`. For scripts with any logic or
-complexity, use uv run --script 's with explicit dependencies. Complex
-workflows with artifacts should become dagger pipelines. Mise tasks are for
-development processes and operations - tools for the user or the agent.
-
-## Credentials
-
-Root store is 1Password. Never grab directly - use existing patterns (ansible
-pre_tasks, external-secrets, scripts with `op` CLI). It's ok to use `op item
-get` without `--reveal` to explore what secrets are available, however.
-
-Prefer `op read "op://vault/item/field"` over `op item get --fields` to avoid
-quoting issues with multi-line values.
+@AGENTS.md
--- a/README.md
+++ b/README.md
@ -5,7 +5,7 @@ Tools and configuration for Erich Blume's personal infrastructure, orchestrated
 across a Tailscale tailnet.

 This is a homelab, but it's also a testing ground for AI-assisted
-infrastructure development. Much of this codebase was co-authored with [Claude
+infrastructure development. Much of this codebase was initially co-authored with [Claude
 Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview),
 and the repo places heavy emphasis on documentation, process, and change
 classification to make that collaboration work well. I don't know entirely how
@ -77,7 +77,7 @@ mise run container-list        # list tracked container images
 ## AI-assisted development

 This repo is designed to be worked on by both humans and AI agents. The
-[`CLAUDE.md`](CLAUDE.md) file provides instructions for Claude Code, and the
+[`AGENTS.md`](AGENTS.md) file provides shared instructions for agentic tools, and the
 [`docs/tutorials/ai-assistance-guide.md`](docs/tutorials/ai-assistance-guide.md)
 explains the full workflow.

@ -87,7 +87,7 @@ Changes are classified before starting work:
 - **C1** - feature branch + PR, documentation written before code
 - **C2** - multi-phase work using the Mikado method for dependency tracking

-See the [agent change process](docs/how-to/agent-change-process.md) for
+See the [agent change process](docs/explanation/agent-change-process.md) for
 details.

 ## License
--- a/ansible/playbooks/indri.yml
+++ b/ansible/playbooks/indri.yml
@ -212,6 +212,23 @@
      no_log: true
      tags: [forgejo_metrics]

+    # Devpi root password (PyPI mirror admin)
+    - name: Fetch devpi root password
+      ansible.builtin.command:
+        cmd: op read "op://vg6xf6vvfmoh5hqjjhlhbeoaie/kyhzfifryqnuk7jeyibmmjvxxm/add more/root password"
+      delegate_to: localhost
+      register: _devpi_root_password
+      changed_when: false
+      no_log: true
+      check_mode: false
+      tags: [devpi]
+
+    - name: Set devpi root password fact
+      ansible.builtin.set_fact:
+        devpi_root_password: "{{ _devpi_root_password.stdout }}"
+      no_log: true
+      tags: [devpi]
+
  roles:
    - role: alloy
      tags: alloy
@ -227,6 +244,8 @@
      tags: zot
    - role: zot_metrics
      tags: zot_metrics
+    - role: devpi
+      tags: devpi
    - role: minikube
      tags: minikube
    - role: minikube_metrics
@ -237,5 +256,11 @@
      tags: jellyfin_metrics
    - role: forgejo_metrics
      tags: forgejo_metrics
+    - role: cv
+      tags: cv
+    - role: docs
+      tags: docs
+    - role: heph
+      tags: heph
    - role: caddy
      tags: caddy
--- a/ansible/playbooks/ringtail.yml
+++ b/ansible/playbooks/ringtail.yml
@ -57,7 +57,7 @@
  tasks:
    - name: Ensure blumeops repo is present
      ansible.builtin.git:
-        repo: "https://forge.eblu.me/eblume/blumeops.git"
+        repo: "https://forge.ops.eblu.me/eblume/blumeops.git"
        dest: /etc/blumeops
        version: "{{ ringtail_commit | default('main') }}"
        force: true
--- a/ansible/roles/alloy/defaults/main.yml
+++ b/ansible/roles/alloy/defaults/main.yml
@ -101,6 +101,10 @@ alloy_op_vault: vg6xf6vvfmoh5hqjjhlhbeoaie
 alloy_op_postgres_item: guxu3j7ajhjyey6xxl2ovsl2ui
 alloy_op_postgres_field: alloy-user-pw

+# Forgejo metrics collection
+alloy_collect_forgejo: true
+alloy_forgejo_port: 3001
+
 # macOS power metrics collection (via powermetrics, requires root)
 alloy_collect_power_metrics: true
 alloy_power_metrics_script: /usr/local/bin/macos-power-metrics
--- a/ansible/roles/alloy/templates/config.alloy.j2
+++ b/ansible/roles/alloy/templates/config.alloy.j2
@ -74,6 +74,18 @@ prometheus.scrape "zot" {
 }
 {% endif %}

+{% if alloy_collect_forgejo | default(false) %}
+// ============== FORGEJO METRICS ==============
+
+// Scrape Forgejo's native metrics endpoint
+prometheus.scrape "forgejo" {
+  targets         = [{"__address__" = "localhost:{{ alloy_forgejo_port }}"}]
+  metrics_path    = "/metrics"
+  forward_to      = [prometheus.relabel.instance.receiver]
+  scrape_interval = "{{ alloy_scrape_interval }}"
+}
+{% endif %}
+
 {% if alloy_collect_logs %}
 // ============== LOG COLLECTION ==============

--- a/ansible/roles/borgmatic/defaults/main.yml
+++ b/ansible/roles/borgmatic/defaults/main.yml
@ -6,6 +6,16 @@ borgmatic_log_dir: /Users/erichblume/Library/Logs
 # Full path to borg binary since LaunchAgent doesn't have homebrew in PATH
 borgmatic_local_path: /opt/homebrew/bin/borg

+# Borgmatic version — keep in sync with mise.toml in the repo root.
+# Ansible installs this via `mise install` so indri doesn't need the repo cloned.
+borgmatic_version: "2.1.4"
+
+# Full path to borgmatic binary — called directly by LaunchAgents to avoid
+# routing through mise, which triggers macOS TCC permission dialogs for
+# protected folders (e.g. ~/Documents) that hang headless LaunchAgent sessions.
+# Uses mise's "latest" symlink so version bumps don't break the LaunchAgent path.
+borgmatic_bin: /Users/erichblume/.local/share/mise/installs/pipx-borgmatic/latest/bin/borgmatic
+
 # Schedule: runs daily at 2:00 AM
 borgmatic_schedule_hour: 2
 borgmatic_schedule_minute: 0
@ -17,6 +27,9 @@ borgmatic_source_directories:
  - /Users/erichblume/.config/borgmatic
  - /Users/erichblume/Documents
  - /Users/erichblume/.local/share/borgmatic/k8s-dumps
+  # Shower app prize-photo uploads (sifaka SMB mount). Mounted manually
+  # on indri via Finder — see docs/how-to/operations/shower-app.md.
+  - /Volumes/shower

 # Backup repositories
 borgmatic_repositories:
@ -43,7 +56,17 @@ borgmatic_k8s_sqlite_dumps:
    namespace: mealie
    label_selector: app=mealie
    db_path: /app/data/mealie.db
-    context: minikube
+    # migrated to ringtail (wave-1); ssh to ringtail and run k3s kubectl
+    # there, same as shower below.
+    target: ssh:eblume@ringtail
+  - name: shower
+    namespace: shower
+    label_selector: app=shower
+    db_path: /app/data/db.sqlite3
+    # ssh to ringtail and run k3s kubectl there — avoids needing a
+    # ringtail kubeconfig on indri. k3s.yaml on ringtail is
+    # world-readable (mode 644), so no sudo required.
+    target: ssh:eblume@ringtail

 # Exclude patterns
 borgmatic_exclude_patterns: []
@ -80,14 +103,19 @@ borgmatic_postgresql_databases:
    hostname: pg.ops.eblu.me
    port: 5432
    username: borgmatic
-  - name: teslamate
-    hostname: pg.ops.eblu.me
-    port: 5432
-    username: borgmatic
  - name: authentik
    hostname: pg.ops.eblu.me
    port: 5432
    username: borgmatic
+  # migrated to ringtail blumeops-pg (wave-1); port 5434 = Caddy L4 route
+  - name: teslamate
+    hostname: pg.ops.eblu.me
+    port: 5434
+    username: borgmatic
+  - name: paperless
+    hostname: pg.ops.eblu.me
+    port: 5434
+    username: borgmatic
  # immich-pg cluster (VectorChord) via Caddy L4 on port 5433
  - name: immich
    hostname: pg.ops.eblu.me
--- a/ansible/roles/borgmatic/tasks/main.yml
+++ b/ansible/roles/borgmatic/tasks/main.yml
@ -1,6 +1,11 @@
 ---
-# Note: borgmatic is installed via mise (pipx), not managed here.
-# This role manages the config file and scheduled LaunchAgent.
+# Borgmatic is installed via mise (pipx) and called directly by LaunchAgents.
+# This role manages installation, config, and the scheduled LaunchAgents.
+
+- name: Install borgmatic via mise
+  ansible.builtin.command: mise install pipx:borgmatic@{{ borgmatic_version }}
+  register: borgmatic_install
+  changed_when: "'installed' in borgmatic_install.stderr"

 - name: Ensure borgmatic config directory exists
  ansible.builtin.file:
@ -14,8 +19,10 @@
  ansible.builtin.copy:
    content: |
      # Managed by ansible (borgmatic role) - k8s PostgreSQL backup credentials
+      # 5432 = minikube blumeops-pg, 5433 = immich-pg, 5434 = ringtail blumeops-pg
      pg.ops.eblu.me:5432:*:borgmatic:{{ borgmatic_db_password }}
      pg.ops.eblu.me:5433:*:borgmatic:{{ borgmatic_db_password }}
+      pg.ops.eblu.me:5434:*:borgmatic:{{ borgmatic_db_password }}
    dest: ~/.pgpass
    mode: '0600'
  no_log: true
@ -44,6 +51,20 @@
    mode: '0700'
  when: borgmatic_k8s_sqlite_dumps | length > 0

+- name: Ensure ~/bin exists
+  ansible.builtin.file:
+    path: "{{ ansible_env.HOME }}/bin"
+    state: directory
+    mode: '0755'
+  when: borgmatic_k8s_sqlite_dumps | length > 0
+
+- name: Deploy k8s SQLite dump helper script
+  ansible.builtin.template:
+    src: k8s-sqlite-dump.sh.j2
+    dest: "{{ ansible_env.HOME }}/bin/borgmatic-k8s-sqlite-dump"
+    mode: '0755'
+  when: borgmatic_k8s_sqlite_dumps | length > 0
+
 - name: Deploy borgmatic configuration
  ansible.builtin.template:
    src: config.yaml.j2
--- a/ansible/roles/borgmatic/templates/borgmatic-photos.plist.j2
+++ b/ansible/roles/borgmatic/templates/borgmatic-photos.plist.j2
@ -14,10 +14,7 @@
 	</dict>
 	<key>ProgramArguments</key>
 	<array>
-		<string>/opt/homebrew/opt/mise/bin/mise</string>
-		<string>x</string>
-		<string>--</string>
-		<string>borgmatic</string>
+		<string>{{ borgmatic_bin }}</string>
 		<string>--config</string>
 		<string>{{ borgmatic_photos_config }}</string>
 		<string>create</string>
--- a/ansible/roles/borgmatic/templates/borgmatic.plist.j2
+++ b/ansible/roles/borgmatic/templates/borgmatic.plist.j2
@ -14,10 +14,7 @@
 	</dict>
 	<key>ProgramArguments</key>
 	<array>
-		<string>/opt/homebrew/opt/mise/bin/mise</string>
-		<string>x</string>
-		<string>--</string>
-		<string>borgmatic</string>
+		<string>{{ borgmatic_bin }}</string>
 		<string>--config</string>
 		<string>{{ borgmatic_config }}</string>
 		<string>create</string>
--- a/ansible/roles/borgmatic/templates/config.yaml.j2
+++ b/ansible/roles/borgmatic/templates/config.yaml.j2
@ -32,12 +32,20 @@ exclude_patterns:
 encryption_passcommand: {{ borgmatic_encryption_passcommand }}

 {% if borgmatic_k8s_sqlite_dumps %}
-# Pre-backup: dump SQLite databases from k8s pods
-# Uses sqlite3 .backup for a safe, consistent copy (no corruption from concurrent writes)
+# Pre-backup: dump SQLite databases from k8s pods.
+# Uses sqlite3.backup() for a safe, consistent copy.
+#
+# Quoting/escaping is delegated to ~/bin/borgmatic-k8s-sqlite-dump
+# (deployed by the borgmatic ansible role). Each entry's `target`
+# is either:
+#   - local:<context>  -> local kubectl with --context (mealie etc.)
+#   - ssh:<user@host>  -> ssh + k3s kubectl on the cluster host,
+#                         used for ringtail since indri's kubeconfig
+#                         deliberately doesn't carry that context.
 before_backup:
    - mkdir -p {{ borgmatic_k8s_dump_dir }}
 {% for db in borgmatic_k8s_sqlite_dumps %}
-    - /opt/homebrew/bin/kubectl --context={{ db.context }} exec -n {{ db.namespace }} deploy/{{ db.name }} -- python3 -c "import sqlite3; sqlite3.connect('{{ db.db_path }}').backup(sqlite3.connect('/tmp/{{ db.name }}-backup.db'))" && /opt/homebrew/bin/kubectl --context={{ db.context }} cp {{ db.namespace }}/$(/opt/homebrew/bin/kubectl --context={{ db.context }} get pod -n {{ db.namespace }} -l {{ db.label_selector }} -o jsonpath='{.items[0].metadata.name}'):/tmp/{{ db.name }}-backup.db {{ borgmatic_k8s_dump_dir }}/{{ db.name }}.db
+    - {{ ansible_env.HOME }}/bin/borgmatic-k8s-sqlite-dump {{ db.target }} {{ db.namespace }} {{ db.label_selector }} {{ db.db_path }} {{ db.name }} {{ borgmatic_k8s_dump_dir }}/{{ db.name }}.db
 {% endfor %}
 {% endif %}

--- a/ansible/roles/borgmatic/templates/k8s-sqlite-dump.sh.j2
+++ b/ansible/roles/borgmatic/templates/k8s-sqlite-dump.sh.j2
@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+# {{ ansible_managed }}
+#
+# Helper script invoked by borgmatic's before_backup hook to capture a
+# k8s pod's SQLite database. Keeps the borgmatic config readable by
+# pulling all the quoting out of YAML.
+#
+# Usage:
+#   borgmatic-k8s-sqlite-dump <target> <namespace> <selector> \
+#                             <db_path> <name> <dump_target>
+#
+# <target> is one of:
+#   local:<context>   - run local kubectl with --context=<context>
+#   ssh:<user@host>   - ssh to host and run k3s kubectl there
+#                       (no indri-side kubeconfig needed)
+#
+# <namespace>      - k8s namespace of the pod
+# <selector>       - label selector to find the pod (e.g. app=shower)
+# <db_path>        - absolute path inside the pod to the SQLite DB
+# <name>           - short name used for temp filenames
+# <dump_target>    - file on this host to receive the dump
+set -euo pipefail
+
+target=${1:?missing target}
+namespace=${2:?missing namespace}
+selector=${3:?missing selector}
+db_path=${4:?missing db path}
+name=${5:?missing name}
+dump_target=${6:?missing dump target}
+
+# Stage the backup next to the source DB (a guaranteed-writable volume);
+# minimal nix images (e.g. mealie) have no /tmp.
+pod_tmp="$(dirname "$db_path")/.borgmatic-backup-${name}.db"
+
+python_backup='import sqlite3; sqlite3.connect("'"$db_path"'").backup(sqlite3.connect("'"$pod_tmp"'"))'
+
+mode=${target%%:*}
+ref=${target#*:}
+
+case "$mode" in
+    local)
+        # Pulls dump bytes out via "kubectl exec -- cat" rather than
+        # "kubectl cp", which would otherwise need tar inside the pod
+        # (nix-built images like shower don't bundle tar).
+        context=$ref
+        kubectl="/opt/homebrew/bin/kubectl --context=$context -n $namespace"
+        pod=$($kubectl get pod -l "$selector" \
+            -o jsonpath='{.items[0].metadata.name}')
+        $kubectl exec "$pod" -- python3 -c "$python_backup"
+        $kubectl exec "$pod" -- cat "$pod_tmp" > "$dump_target"
+        $kubectl exec "$pod" -- rm -f "$pod_tmp"
+        ;;
+    ssh)
+        host=$ref
+        # Force bash on the remote (user's login shell on ringtail is
+        # fish). Pipe the script via stdin to dodge nested quoting.
+        # The dump bytes come back over the ssh stdout stream — no
+        # intermediate scp, no tar requirement in the pod.
+        ssh "$host" bash <<EOF > "$dump_target"
+set -euo pipefail
+export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
+pod=\$(k3s kubectl -n "$namespace" get pod -l "$selector" -o jsonpath='{.items[0].metadata.name}')
+k3s kubectl -n "$namespace" exec "\$pod" -- python3 -c '$python_backup' 1>&2
+k3s kubectl -n "$namespace" exec "\$pod" -- cat "$pod_tmp"
+k3s kubectl -n "$namespace" exec "\$pod" -- rm -f "$pod_tmp" 1>&2
+EOF
+        ;;
+    *)
+        echo "borgmatic-k8s-sqlite-dump: unknown target mode: $mode" >&2
+        echo "  expected local:<context> or ssh:<user@host>" >&2
+        exit 1
+        ;;
+esac
--- a/ansible/roles/caddy/defaults/main.yml
+++ b/ansible/roles/caddy/defaults/main.yml
@ -51,7 +51,10 @@ caddy_services:
    backend: "https://feed.tail8d86e.ts.net"
  - name: devpi
    host: "pypi.{{ caddy_domain }}"
-    backend: "https://pypi.tail8d86e.ts.net"
+    backend: "http://localhost:3141"
+  - name: heph
+    host: "heph.{{ caddy_domain }}"
+    backend: "http://localhost:8787"  # hephaestus hub (server mode) + PWA shell
  - name: kiwix
    host: "kiwix.{{ caddy_domain }}"
    backend: "https://kiwix.tail8d86e.ts.net"
@ -72,10 +75,16 @@ caddy_services:
    backend: "https://go.tail8d86e.ts.net"
  - name: docs
    host: "docs.{{ caddy_domain }}"
-    backend: "https://docs.tail8d86e.ts.net"
+    kind: static
+    root: "{{ docs_content_dir }}"
+    try_html: true  # Quartz: path → path/ → path.html → 404.html
  - name: cv
    host: "cv.{{ caddy_domain }}"
-    backend: "https://cv.tail8d86e.ts.net"
+    kind: static
+    root: "{{ cv_content_dir }}"
+    download_paths:
+      - path: /resume.pdf
+        filename: erich-blume-resume.pdf
  - name: nvr
    host: "nvr.{{ caddy_domain }}"
    backend: "https://nvr.tail8d86e.ts.net"
@ -95,6 +104,9 @@ caddy_services:
  - name: paperless
    host: "paperless.{{ caddy_domain }}"
    backend: "https://paperless.tail8d86e.ts.net"
+  - name: shower
+    host: "shower.{{ caddy_domain }}"
+    backend: "https://shower.tail8d86e.ts.net"
  - name: sifaka
    host: "nas.{{ caddy_domain }}"
    backend: "http://sifaka:5000"
@ -108,6 +120,8 @@ caddy_tcp_services:
    backend: "pg.tail8d86e.ts.net:5432"  # PostgreSQL (blumeops-pg)
  - port: 5433
    backend: "immich-pg.tail8d86e.ts.net:5432"  # PostgreSQL (immich-pg)
+  - port: 5434
+    backend: "blumeops-pg-ringtail.tail8d86e.ts.net:5432"  # PostgreSQL (blumeops-pg on ringtail)
  - port: "{{ sifaka_node_exporter_port }}"
    backend: "sifaka:{{ sifaka_node_exporter_port }}"  # Sifaka node_exporter
  - port: "{{ sifaka_smartctl_exporter_port }}"
--- a/ansible/roles/caddy/templates/Caddyfile.j2
+++ b/ansible/roles/caddy/templates/Caddyfile.j2
@ -31,6 +31,25 @@
 {% for service in caddy_services %}
 	@{{ service.name }} host {{ service.host }}
 	handle @{{ service.name }} {
+{% if service.kind | default('proxy') == 'static' %}
+		root * {{ service.root }}
+		encode gzip
+		# Long-cache fingerprinted assets; everything else stays default.
+		@{{ service.name }}_assets path_regexp \.(css|js|png|jpg|jpeg|gif|ico|svg|woff|woff2)$
+		header @{{ service.name }}_assets Cache-Control "public, max-age=31536000, immutable"
+{% for dl in service.download_paths | default([]) %}
+		@{{ service.name }}_dl{{ loop.index }} path {{ dl.path }}
+		header @{{ service.name }}_dl{{ loop.index }} Content-Disposition `attachment; filename="{{ dl.filename }}"`
+{% endfor %}
+{% if service.try_html | default(false) %}
+		# Quartz clean URLs: path → path/ → path.html → /404.html (200).
+		# Caddy's handle_errors is a top-level directive and can't live in
+		# this nested handle, so the 404 page rides as the final try_files
+		# candidate (served with 200 — acceptable for a human-facing 404).
+		try_files {path} {path}/ {path}.html /404.html
+{% endif %}
+		file_server
+{% else %}
 {% if service.cache_policy | default('') == 'spa' %}
 		# SPA cache policy: hashed static assets are immutable, HTML must revalidate.
 		# Prevents stale HTML from referencing chunk hashes that no longer exist.
@ -47,6 +66,7 @@
 		}
 {% else %}
 		reverse_proxy {{ service.backend }}
+{% endif %}
 {% endif %}
 	}

--- a/ansible/roles/cv/defaults/main.yml
+++ b/ansible/roles/cv/defaults/main.yml
@ -0,0 +1,10 @@
+---
+# CV / resume static site (native, replaces minikube Deployment)
+# Caddy serves cv_content_dir directly via the static-kind service block.
+
+cv_version: "v1.0.3"
+cv_release_url: "https://forge.ops.eblu.me/api/packages/eblume/generic/cv/{{ cv_version }}/cv-{{ cv_version }}.tar.gz"
+
+cv_home: /Users/erichblume/blumeops/cv
+cv_content_dir: "{{ cv_home }}/content"
+cv_version_sentinel: "{{ cv_home }}/.installed-version"
--- a/ansible/roles/cv/tasks/main.yml
+++ b/ansible/roles/cv/tasks/main.yml
@ -0,0 +1,57 @@
+---
+# cv role — download and extract the CV release tarball into cv_content_dir.
+# Caddy serves the directory directly; there is no daemon to manage.
+#
+# Idempotency: a sentinel file records the installed cv_version. The
+# download/extract steps only run when the sentinel doesn't match cv_version.
+#
+# We use curl rather than ansible.builtin.get_url because the forge generic-
+# packages endpoint returns 405 on HEAD requests, which get_url issues before
+# downloading.
+
+- name: Ensure cv home exists
+  ansible.builtin.file:
+    path: "{{ cv_home }}"
+    state: directory
+    mode: '0755'
+
+- name: Read installed cv version sentinel
+  ansible.builtin.slurp:
+    src: "{{ cv_version_sentinel }}"
+  register: cv_installed_raw
+  failed_when: false
+  changed_when: false
+
+- name: Set installed cv version fact
+  ansible.builtin.set_fact:
+    cv_installed_version: >-
+      {{ (cv_installed_raw.content | b64decode).strip()
+         if (cv_installed_raw.content is defined) else '' }}
+
+- name: Recreate cv content dir
+  ansible.builtin.file:
+    path: "{{ cv_content_dir }}"
+    state: "{{ item }}"
+    mode: '0755'
+  loop:
+    - absent
+    - directory
+  when: cv_installed_version != cv_version
+
+- name: Download and extract cv release tarball
+  ansible.builtin.shell:
+    cmd: >-
+      set -euo pipefail;
+      curl -fsSL {{ cv_release_url | quote }} -o {{ cv_home }}/cv.tar.gz &&
+      tar -xzf {{ cv_home }}/cv.tar.gz -C {{ cv_content_dir }} &&
+      rm -f {{ cv_home }}/cv.tar.gz
+    executable: /bin/bash
+  when: cv_installed_version != cv_version
+  changed_when: true
+
+- name: Write cv version sentinel
+  ansible.builtin.copy:
+    content: "{{ cv_version }}\n"
+    dest: "{{ cv_version_sentinel }}"
+    mode: '0644'
+  when: cv_installed_version != cv_version
--- a/ansible/roles/devpi/defaults/main.yml
+++ b/ansible/roles/devpi/defaults/main.yml
@ -0,0 +1,21 @@
+---
+# devpi PyPI caching mirror (native launchd, replaces minikube StatefulSet)
+
+devpi_home: /Users/erichblume/devpi
+devpi_venv: "{{ devpi_home }}/venv"
+devpi_server_dir: "{{ devpi_home }}/server-dir"
+devpi_binary: "{{ devpi_venv }}/bin/devpi-server"
+devpi_init_binary: "{{ devpi_venv }}/bin/devpi-init"
+
+devpi_python_version: "3.12"
+devpi_server_version: "6.19.3"
+devpi_web_version: "5.0.2"
+
+devpi_host: 127.0.0.1
+devpi_port: 3141
+devpi_outside_url: "https://pypi.ops.eblu.me"
+
+devpi_log_dir: /Users/erichblume/Library/Logs
+
+# uv binary on indri — mise shim so version bumps via `mise upgrade uv` flow through transparently
+devpi_uv_binary: /Users/erichblume/.local/share/mise/shims/uv
--- a/ansible/roles/devpi/handlers/main.yml
+++ b/ansible/roles/devpi/handlers/main.yml
@ -0,0 +1,6 @@
+---
+- name: Restart devpi
+  ansible.builtin.shell: |
+    launchctl unload ~/Library/LaunchAgents/mcquack.eblume.devpi.plist 2>/dev/null || true
+    launchctl load ~/Library/LaunchAgents/mcquack.eblume.devpi.plist
+  changed_when: true
--- a/ansible/roles/devpi/tasks/main.yml
+++ b/ansible/roles/devpi/tasks/main.yml
@ -0,0 +1,71 @@
+---
+# devpi role — devpi-server in a uv-managed venv, run via LaunchAgent.
+# Replaces the prior minikube StatefulSet; see [[devpi-on-indri]].
+#
+# The root password is fetched in the indri.yml playbook pre_tasks and
+# exposed as `devpi_root_password`.
+
+- name: Ensure devpi home exists
+  ansible.builtin.file:
+    path: "{{ devpi_home }}"
+    state: directory
+    mode: '0755'
+
+- name: Ensure devpi server-dir exists
+  ansible.builtin.file:
+    path: "{{ devpi_server_dir }}"
+    state: directory
+    mode: '0700'
+
+- name: Create devpi venv if missing
+  ansible.builtin.command:
+    cmd: "{{ devpi_uv_binary }} venv --python {{ devpi_python_version }} {{ devpi_venv }}"
+    creates: "{{ devpi_venv }}/bin/python"
+
+- name: Install devpi-server and devpi-web into venv
+  # Always bootstrap from upstream PyPI — devpi is the index it would otherwise resolve through,
+  # and that's a circular dependency (devpi cannot install itself from itself).
+  ansible.builtin.command:
+    cmd: >-
+      {{ devpi_uv_binary }} pip install
+      --python {{ devpi_venv }}/bin/python
+      --index-url https://pypi.org/simple/
+      devpi-server=={{ devpi_server_version }}
+      devpi-web=={{ devpi_web_version }}
+  register: devpi_pip_install
+  changed_when: "'Installed' in devpi_pip_install.stdout or 'Uninstalled' in devpi_pip_install.stdout"
+  notify: Restart devpi
+
+- name: Check if devpi server-dir is initialized
+  ansible.builtin.stat:
+    path: "{{ devpi_server_dir }}/.serverversion"
+  register: devpi_serverversion
+
+- name: Initialize devpi server-dir
+  ansible.builtin.command:
+    cmd: >-
+      {{ devpi_init_binary }}
+      --serverdir {{ devpi_server_dir }}
+      --root-passwd {{ devpi_root_password }}
+  when: not devpi_serverversion.stat.exists
+  changed_when: true
+  no_log: true
+
+- name: Deploy devpi LaunchAgent plist
+  ansible.builtin.template:
+    src: devpi.plist.j2
+    dest: ~/Library/LaunchAgents/mcquack.eblume.devpi.plist
+    mode: '0644'
+  notify: Restart devpi
+
+- name: Check if devpi LaunchAgent is loaded
+  ansible.builtin.command: launchctl list mcquack.eblume.devpi
+  register: devpi_launchctl_check
+  changed_when: false
+  failed_when: false
+
+- name: Load devpi LaunchAgent if not loaded
+  ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.devpi.plist
+  when: devpi_launchctl_check.rc != 0
+  changed_when: true
+  failed_when: false
--- a/ansible/roles/devpi/templates/devpi.plist.j2
+++ b/ansible/roles/devpi/templates/devpi.plist.j2
@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- {{ ansible_managed }} -->
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>Label</key>
+	<string>mcquack.eblume.devpi</string>
+	<key>ProgramArguments</key>
+	<array>
+		<string>{{ devpi_binary }}</string>
+		<string>--serverdir</string>
+		<string>{{ devpi_server_dir }}</string>
+		<string>--host</string>
+		<string>{{ devpi_host }}</string>
+		<string>--port</string>
+		<string>{{ devpi_port }}</string>
+		<string>--outside-url</string>
+		<string>{{ devpi_outside_url }}</string>
+	</array>
+	<key>RunAtLoad</key>
+	<true/>
+	<key>KeepAlive</key>
+	<true/>
+	<key>EnvironmentVariables</key>
+	<dict>
+		<key>PATH</key>
+		<string>{{ devpi_venv }}/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
+	</dict>
+	<key>StandardOutPath</key>
+	<string>{{ devpi_log_dir }}/mcquack.devpi.out.log</string>
+	<key>StandardErrorPath</key>
+	<string>{{ devpi_log_dir }}/mcquack.devpi.err.log</string>
+</dict>
+</plist>
--- a/ansible/roles/docs/defaults/main.yml
+++ b/ansible/roles/docs/defaults/main.yml
@ -0,0 +1,10 @@
+---
+# Docs (Quartz-built static site) — replaces minikube Deployment.
+# Caddy serves docs_content_dir directly via the static-kind service block,
+# with Quartz-style try_files (path → path/ → path.html → 404).
+
+docs_version: "v1.17.0"
+docs_release_url: "https://forge.eblu.me/eblume/blumeops/releases/download/{{ docs_version }}/docs-{{ docs_version }}.tar.gz"
+docs_home: /Users/erichblume/blumeops/docs
+docs_content_dir: "{{ docs_home }}/content"
+docs_version_sentinel: "{{ docs_home }}/.installed-version"
--- a/ansible/roles/docs/tasks/main.yml
+++ b/ansible/roles/docs/tasks/main.yml
@ -0,0 +1,57 @@
+---
+# docs role — download and extract the Quartz-built docs tarball into
+# docs_content_dir. Caddy serves the directory directly with Quartz-style
+# try_files; there is no daemon to manage.
+#
+# Idempotency: a sentinel file records the installed docs_version. The
+# download/extract steps only run when the sentinel doesn't match docs_version.
+#
+# Mirrors the cv role's curl-based download for consistency, even though the
+# forge releases endpoint here does support HEAD.
+
+- name: Ensure docs home exists
+  ansible.builtin.file:
+    path: "{{ docs_home }}"
+    state: directory
+    mode: '0755'
+
+- name: Read installed docs version sentinel
+  ansible.builtin.slurp:
+    src: "{{ docs_version_sentinel }}"
+  register: docs_installed_raw
+  failed_when: false
+  changed_when: false
+
+- name: Set installed docs version fact
+  ansible.builtin.set_fact:
+    docs_installed_version: >-
+      {{ (docs_installed_raw.content | b64decode).strip()
+         if (docs_installed_raw.content is defined) else '' }}
+
+- name: Recreate docs content dir
+  ansible.builtin.file:
+    path: "{{ docs_content_dir }}"
+    state: "{{ item }}"
+    mode: '0755'
+  loop:
+    - absent
+    - directory
+  when: docs_installed_version != docs_version
+
+- name: Download and extract docs release tarball
+  ansible.builtin.shell:
+    cmd: >-
+      set -euo pipefail;
+      curl -fsSL {{ docs_release_url | quote }} -o {{ docs_home }}/docs.tar.gz &&
+      tar -xzf {{ docs_home }}/docs.tar.gz -C {{ docs_content_dir }} &&
+      rm -f {{ docs_home }}/docs.tar.gz
+    executable: /bin/bash
+  when: docs_installed_version != docs_version
+  changed_when: true
+
+- name: Write docs version sentinel
+  ansible.builtin.copy:
+    content: "{{ docs_version }}\n"
+    dest: "{{ docs_version_sentinel }}"
+    mode: '0644'
+  when: docs_installed_version != docs_version
--- a/ansible/roles/forgejo/templates/app.ini.j2
+++ b/ansible/roles/forgejo/templates/app.ini.j2
@ -61,6 +61,12 @@ MIN_INTERVAL = 10m
 [cron.update_checker]
 ENABLED = false

+[cron.archive_cleanup]
+ENABLED = true
+RUN_AT_START = true
+SCHEDULE = @midnight
+OLDER_THAN = 2h
+
 [session]
 PROVIDER = {{ forgejo_session_provider }}

@ -89,6 +95,11 @@ ACCOUNT_LINKING = login
 USERNAME = nickname
 REGISTER_EMAIL_CONFIRM = false

+[metrics]
+ENABLED = true
+ENABLED_ISSUE_BY_LABEL = false
+ENABLED_ISSUE_BY_REPOSITORY = false
+
 [actions]
 ENABLED = {{ forgejo_actions_enabled | lower }}
 DEFAULT_ACTIONS_URL = {{ forgejo_actions_default_url }}
--- a/ansible/roles/heph/defaults/main.yml
+++ b/ansible/roles/heph/defaults/main.yml
@ -0,0 +1,49 @@
+---
+# hephaestus hub — the canonical heph replica (server mode) on indri.
+# Other devices (e.g. gilbert) are spokes that sync against this hub.
+# See [[set-up-sync-hub]] and [[host-heph-pwa]] in the hephaestus repo.
+
+# Pinned release used for the initial `cargo install` and the PWA shell.
+# After bootstrap, hephd's own --self-update keeps the binary current; this
+# pin only governs the first install and the bundled PWA shell version.
+heph_version: v1.2.1
+
+# Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial
+# install and unattended self-update build from the same source (no ssh-agent).
+heph_repo_url: https://forge.eblu.me/eblume/hephaestus.git
+
+heph_bin_dir: /Users/erichblume/.cargo/bin
+heph_binary: "{{ heph_bin_dir }}/hephd"
+
+# rustc/cargo here are rustup shims. The bare (non-mise) environment that the
+# launchagent and ansible run in falls back to rustup's *default* toolchain,
+# which can lag behind heph's rust-version floor (Cargo.toml: 1.89). Pin the
+# channel explicitly so both the bootstrap build and unattended self-update
+# always use a current toolchain regardless of the host's rustup default.
+heph_rust_toolchain: stable
+
+heph_data_dir: /Users/erichblume/.local/share/heph
+heph_db: "{{ heph_data_dir }}/heph.db"
+heph_socket: "{{ heph_data_dir }}/hephd.sock"
+heph_log_dir: /Users/erichblume/Library/Logs
+
+# Version-pinned source checkout; the PWA static shell is served directly from
+# its heph-pwa/ subdir (no copy), keeping shell and hub in lockstep at heph_version.
+heph_pwa_src_dir: /Users/erichblume/.cache/heph-pwa-src
+heph_web_root: "{{ heph_pwa_src_dir }}/heph-pwa"
+
+# Hub listens on all interfaces so tailnet spokes can reach it directly
+# (http://indri.tail8d86e.ts.net:8787) and Caddy can proxy heph.ops.eblu.me.
+# Access is gated by Authentik OIDC regardless — tailnet reachability is not
+# enough (this is the owner's most sensitive data).
+heph_http_addr: 0.0.0.0:8787
+heph_port: 8787
+heph_external_url: https://heph.ops.eblu.me
+
+# Authentik OIDC — issuer + audience together turn hub auth on. The audience is
+# the device-code client id (see argocd/manifests/authentik heph blueprint).
+heph_oidc_issuer: https://authentik.ops.eblu.me/application/o/heph/
+heph_oidc_audience: heph
+
+# Self-update poll interval (seconds). 10 minutes.
+heph_self_update_interval_secs: 600
--- a/ansible/roles/heph/handlers/main.yml
+++ b/ansible/roles/heph/handlers/main.yml
@ -0,0 +1,6 @@
+---
+- name: Restart heph
+  ansible.builtin.shell: |
+    launchctl unload ~/Library/LaunchAgents/mcquack.eblume.heph.plist 2>/dev/null || true
+    launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist
+  changed_when: true
--- a/ansible/roles/heph/tasks/main.yml
+++ b/ansible/roles/heph/tasks/main.yml
@ -0,0 +1,82 @@
+---
+# hephaestus hub (server mode) on indri.
+#
+# DATA SEEDING (one-time, Path A — do this BEFORE the first provision so the hub
+# adopts gilbert's existing data instead of being born empty):
+#
+#   1. On the seed device (gilbert):   heph daemon stop
+#   2. Copy its store to indri:         scp ~/.local/share/heph/heph.db \
+#                                           indri:~/.local/share/heph/heph.db
+#   3. On indri, give the hub its OWN device origin (keeps gilbert's owner_id +
+#      data; hephd regenerates a fresh origin on next start when it is missing):
+#        sqlite3 ~/.local/share/heph/heph.db "DELETE FROM meta WHERE key='origin';"
+#   4. Run this role (installs hephd, stages the PWA, loads the launchagent).
+#
+# hephd auto-creates an empty store on first start if none exists, so seeding is
+# optional — skip it only if you intend a fresh, empty hub.
+
+- name: Ensure heph data directory exists
+  ansible.builtin.file:
+    path: "{{ heph_data_dir }}"
+    state: directory
+    mode: '0700'
+
+- name: Check for installed hephd binary
+  ansible.builtin.stat:
+    path: "{{ heph_binary }}"
+  register: heph_binary_stat
+
+# Bootstrap install only when hephd is absent. Thereafter hephd's own
+# --self-update keeps it current; ansible must not fight (or downgrade) it.
+# This builds from source and can take several minutes on a cold cargo cache.
+- name: Bootstrap-install heph + hephd from the forge ({{ heph_version }})
+  ansible.builtin.command:
+    cmd: >-
+      {{ heph_bin_dir }}/cargo install --locked
+      --git {{ heph_repo_url }}
+      --tag {{ heph_version }}
+      heph hephd
+  environment:
+    PATH: "{{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin"
+    RUSTUP_TOOLCHAIN: "{{ heph_rust_toolchain }}"
+  when: not heph_binary_stat.stat.exists
+  changed_when: true
+  notify: Restart heph
+
+# Checkout provides the PWA shell at {{ heph_web_root }} (heph-pwa/ subdir),
+# served directly by hephd. Static files are read from disk per request, so a
+# version bump needs no restart; the service worker (CACHE = "heph-pwa-vN")
+# evicts stale assets on next load.
+- name: Ensure heph cache parent directory exists
+  ansible.builtin.file:
+    path: "{{ heph_pwa_src_dir | dirname }}"
+    state: directory
+    mode: '0755'
+
+- name: Stage heph-pwa source at {{ heph_version }}
+  ansible.builtin.git:
+    repo: "{{ heph_repo_url }}"
+    dest: "{{ heph_pwa_src_dir }}"
+    version: "{{ heph_version }}"
+    depth: 1
+    single_branch: true
+    force: true
+
+- name: Deploy heph LaunchAgent plist
+  ansible.builtin.template:
+    src: heph.plist.j2
+    dest: ~/Library/LaunchAgents/mcquack.eblume.heph.plist
+    mode: '0644'
+  notify: Restart heph
+
+- name: Check if heph LaunchAgent is loaded
+  ansible.builtin.command: launchctl list mcquack.eblume.heph
+  register: heph_launchctl_check
+  changed_when: false
+  failed_when: false
+
+- name: Load heph LaunchAgent if not loaded
+  ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist
+  when: heph_launchctl_check.rc != 0
+  changed_when: true
+  failed_when: false
--- a/ansible/roles/heph/templates/heph.plist.j2
+++ b/ansible/roles/heph/templates/heph.plist.j2
@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- {{ ansible_managed }} -->
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>Label</key>
+	<string>mcquack.eblume.heph</string>
+	<key>ProgramArguments</key>
+	<array>
+		<string>{{ heph_binary }}</string>
+		<string>--mode</string>
+		<string>server</string>
+		<string>--http-addr</string>
+		<string>{{ heph_http_addr }}</string>
+		<string>--db</string>
+		<string>{{ heph_db }}</string>
+		<string>--socket</string>
+		<string>{{ heph_socket }}</string>
+		<string>--web-root</string>
+		<string>{{ heph_web_root }}</string>
+		<string>--oidc-issuer</string>
+		<string>{{ heph_oidc_issuer }}</string>
+		<string>--oidc-audience</string>
+		<string>{{ heph_oidc_audience }}</string>
+		<string>--self-update</string>
+		<string>--self-update-interval-secs</string>
+		<string>{{ heph_self_update_interval_secs }}</string>
+	</array>
+	<key>RunAtLoad</key>
+	<true/>
+	<key>KeepAlive</key>
+	<true/>
+	<key>EnvironmentVariables</key>
+	<dict>
+		<!-- cargo + toolchain on PATH so --self-update can run `cargo install`. -->
+		<key>PATH</key>
+		<string>{{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin</string>
+		<key>HOME</key>
+		<string>/Users/erichblume</string>
+		<!-- Pin the rustup channel: the launchagent runs without mise, so a bare
+		     cargo shim would otherwise use rustup's (stale) default toolchain. -->
+		<key>RUSTUP_TOOLCHAIN</key>
+		<string>{{ heph_rust_toolchain }}</string>
+	</dict>
+	<key>StandardOutPath</key>
+	<string>{{ heph_log_dir }}/mcquack.heph.out.log</string>
+	<key>StandardErrorPath</key>
+	<string>{{ heph_log_dir }}/mcquack.heph.err.log</string>
+</dict>
+</plist>
--- a/argocd/apps/cloudnative-pg-ringtail.yaml
+++ b/argocd/apps/cloudnative-pg-ringtail.yaml
@ -0,0 +1,27 @@
+# CloudNativePG Operator for ringtail k3s cluster
+# Deploys the operator only; PostgreSQL clusters are created separately
+#
+# Sibling of cloudnative-pg.yaml (minikube). Same mirror, same release,
+# different destination. Both apps will coexist during the immich
+# migration; the minikube one is removed at the end of the broader
+# indri-k8s decommission.
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: cloudnative-pg-ringtail
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/mirrors/cloudnative-pg.git
+    targetRevision: v1.27.1
+    path: releases
+    directory:
+      include: 'cnpg-1.27.1.yaml'
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: cnpg-system
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
+      - ServerSideApply=true  # Required for large CRDs that exceed annotation size limit
--- a/argocd/apps/cv.yaml
+++ b/argocd/apps/cv.yaml
@ -1,18 +0,0 @@
---
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: cv
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/cv
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: cv
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/apps/databases-ringtail.yaml
+++ b/argocd/apps/databases-ringtail.yaml
@ -0,0 +1,26 @@
+# Databases on ringtail k3s.
+#
+# Today: only immich-pg (CNPG Cluster) + its borgmatic ExternalSecret.
+# More databases may move here as the indri-k8s decommission proceeds.
+#
+# Prerequisites:
+# - cloudnative-pg-ringtail (operator must exist before the Cluster CR)
+# - external-secrets-ringtail + 1password-connect-ringtail (for the
+#   immich-pg-borgmatic ExternalSecret to sync)
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: databases-ringtail
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/databases-ringtail
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: databases
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/devpi.yaml
+++ b/argocd/apps/devpi.yaml
@ -1,29 +0,0 @@
-# devpi PyPI Caching Proxy
-# Provides PyPI cache and private package hosting
-#
-# After first deployment, initialize devpi:
-#   kubectl -n devpi exec -it devpi-0 -- devpi-init --serverdir /devpi --root-passwd <password>
-#   kubectl -n devpi rollout restart statefulset devpi
-#
-# Then create user/index:
-#   uvx devpi use https://pypi.tail8d86e.ts.net
-#   uvx devpi login root
-#   uvx devpi user -c eblume email=blume.erich@gmail.com
-#   uvx devpi index -c eblume/dev bases=root/pypi
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: devpi
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/devpi
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: devpi
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/apps/docs.yaml
+++ b/argocd/apps/docs.yaml
@ -1,18 +0,0 @@
---
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: docs
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/docs
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: docs
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/apps/external-secrets-ringtail.yaml
+++ b/argocd/apps/external-secrets-ringtail.yaml
@ -15,7 +15,7 @@ spec:
  source:
    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
    targetRevision: main
-    path: argocd/manifests/external-secrets
+    path: argocd/manifests/external-secrets-ringtail
  destination:
    server: https://ringtail.tail8d86e.ts.net:6443
    namespace: external-secrets
--- a/argocd/apps/homepage.yaml
+++ b/argocd/apps/homepage.yaml
@ -14,7 +14,7 @@ spec:
    targetRevision: main
    path: argocd/manifests/homepage
  destination:
-    server: https://kubernetes.default.svc
+    server: https://ringtail.tail8d86e.ts.net:6443
    namespace: homepage
  syncPolicy:
    syncOptions:
--- a/argocd/apps/immich-ringtail.yaml
+++ b/argocd/apps/immich-ringtail.yaml
@ -0,0 +1,31 @@
+# Immich on ringtail k3s.
+#
+# Staging deployment; the minikube `immich` app remains in parallel
+# until cutover. See [[immich-cutover-and-decommission]] for the
+# routing flip + minikube cleanup.
+#
+# Prerequisites:
+# - cnpg-on-ringtail + databases-ringtail (postgres)
+# - 1password-connect-ringtail + external-secrets-ringtail (not used
+#   by this app today — immich-db Secret is created manually,
+#   matching the minikube pattern)
+# - The immich-db Secret in the immich namespace, holding the
+#   password for the `immich` postgres role (copied from the source
+#   immich-pg-app Secret at migration time).
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: immich-ringtail
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/immich-ringtail
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: immich
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/immich.yaml
+++ b/argocd/apps/immich.yaml
@ -1,30 +0,0 @@
-# Immich - Self-hosted photo and video management
-# High-performance Google Photos/iCloud alternative with AI features
-#
-# Kustomize manifests in argocd/manifests/immich/
-# Components: server, machine-learning, valkey (Redis)
-#
-# Prerequisites:
-# 1. Create immich namespace and secrets:
-#    kubectl create namespace immich
-#    kubectl --context=minikube-indri create secret generic immich-db -n immich \
-#      --from-literal=password="$(kubectl --context=minikube-indri -n databases get secret immich-pg-app -o jsonpath='{.data.password}' | base64 -d)"
-# 2. Create immich-pg database and user (see immich-pg app)
-# 3. NFS share on sifaka at /volume1/photos with read/write for indri
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: immich
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/immich
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: immich
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/apps/mealie-ringtail.yaml
+++ b/argocd/apps/mealie-ringtail.yaml
@ -0,0 +1,26 @@
+# Mealie on ringtail k3s.
+#
+# Wave-1 indri-k8s decommission. Staging deployment; the minikube `mealie`
+# app stays in parallel until cutover (copy SQLite PVC, drop the minikube
+# tailscale ingress, flip Caddy). See [[migrate-wave1-ringtail]].
+#
+# Prerequisites:
+# - external-secrets-ringtail (onepassword-blumeops ClusterSecretStore)
+# - mealie-data PVC contents copied from minikube at cutover
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: mealie-ringtail
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/mealie-ringtail
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: mealie
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/mealie.yaml
+++ b/argocd/apps/mealie.yaml
@ -1,17 +0,0 @@
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: mealie
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/mealie
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: mealie
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/apps/paperless-ringtail.yaml
+++ b/argocd/apps/paperless-ringtail.yaml
@ -0,0 +1,28 @@
+# Paperless-ngx on ringtail k3s.
+#
+# Wave-1 indri-k8s decommission. Staging deployment; the minikube
+# `paperless` app stays in parallel until cutover (drop the minikube
+# tailscale ingress to free the name, then flip Caddy). See
+# [[migrate-wave1-ringtail]].
+#
+# Prerequisites:
+# - databases-ringtail blumeops-pg (paperless database + role)
+# - external-secrets-ringtail (onepassword-blumeops ClusterSecretStore)
+# - sifaka NFS rule granting ringtail access to /volume1/paperless
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: paperless-ringtail
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/paperless-ringtail
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: paperless
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/paperless.yaml
+++ b/argocd/apps/paperless.yaml
@ -1,17 +0,0 @@
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: paperless
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/paperless
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: paperless
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/apps/shower.yaml
+++ b/argocd/apps/shower.yaml
@ -0,0 +1,20 @@
+# Adelaide / Heidi / Addie baby shower app — Django guest/raffle/prize system.
+# Public landing page at shower.eblu.me (via fly proxy), staff console + admin
+# at shower.ops.eblu.me (tailnet only). Built from forge PyPI wheel.
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: shower
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/shower
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: shower
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/teslamate-ringtail.yaml
+++ b/argocd/apps/teslamate-ringtail.yaml
@ -0,0 +1,28 @@
+# TeslaMate on ringtail k3s.
+#
+# Wave-1 indri-k8s decommission. Staging deployment; the minikube
+# `teslamate` app stays in parallel until cutover (migrate the teslamate
+# database, drop the minikube tailscale ingress, flip Caddy). See
+# [[migrate-wave1-ringtail]].
+#
+# Prerequisites:
+# - databases-ringtail blumeops-pg (teslamate database + role; cube +
+#   earthdistance extensions created by superuser at cutover)
+# - external-secrets-ringtail (onepassword-blumeops ClusterSecretStore)
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: teslamate-ringtail
+  namespace: argocd
+spec:
+  project: default
+  source:
+    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
+    targetRevision: main
+    path: argocd/manifests/teslamate-ringtail
+  destination:
+    server: https://ringtail.tail8d86e.ts.net:6443
+    namespace: teslamate
+  syncPolicy:
+    syncOptions:
+      - CreateNamespace=true
--- a/argocd/apps/teslamate.yaml
+++ b/argocd/apps/teslamate.yaml
@ -1,32 +0,0 @@
-# TeslaMate Tesla Data Logger
-# Requires: CloudNativePG PostgreSQL cluster and manual secret setup
-#
-# Before syncing, create the namespace and secrets:
-#   kubectl create namespace teslamate
-#   op inject -i argocd/manifests/databases/secret-teslamate.yaml.tpl | kubectl apply -f -
-#   op inject -i argocd/manifests/teslamate/secret-encryption-key.yaml.tpl | kubectl apply -f -
-#   op inject -i argocd/manifests/teslamate/secret-db.yaml.tpl | kubectl apply -f -
-#
-# Then create the database:
-#   PGPASSWORD=$(op read "op://blumeops/postgres/password") \
-#     psql -h pg.ops.eblu.me -U eblume -c "CREATE DATABASE teslamate OWNER teslamate;"
-#
-# After syncing, access the TeslaMate UI at https://tesla.tail8d86e.ts.net to complete
-# Tesla API authentication via OAuth flow.
-apiVersion: argoproj.io/v1alpha1
-kind: Application
-metadata:
-  name: teslamate
-  namespace: argocd
-spec:
-  project: default
-  source:
-    repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git
-    targetRevision: main
-    path: argocd/manifests/teslamate
-  destination:
-    server: https://kubernetes.default.svc
-    namespace: teslamate
-  syncPolicy:
-    syncOptions:
-      - CreateNamespace=true
--- a/argocd/manifests/alloy-k8s/config.alloy
+++ b/argocd/manifests/alloy-k8s/config.alloy
@ -159,8 +159,10 @@ prometheus.exporter.blackbox "services" {
  }

  target {
+    // devpi runs natively on indri (LaunchAgent), not in-cluster.
+    // We probe through Caddy (https://pypi.ops.eblu.me) which the cluster can reach via Tailscale.
    name    = "devpi"
-    address = "http://devpi.devpi.svc.cluster.local:3141/+api"
+    address = "https://pypi.ops.eblu.me/+api"
    module  = "http_2xx"
  }

@ -189,14 +191,9 @@ prometheus.exporter.blackbox "services" {
  }

  target {
+    // Migrated to ringtail (wave-1); probe through Caddy over Tailscale.
    name    = "teslamate"
-    address = "http://teslamate.teslamate.svc.cluster.local:4000/"
-    module  = "http_2xx"
-  }
-
-  target {
-    name    = "immich"
-    address = "http://immich-server.immich.svc.cluster.local:2283/api/server/ping"
+    address = "https://tesla.ops.eblu.me/"
    module  = "http_2xx"
  }

--- a/argocd/manifests/alloy-k8s/kustomization.yaml
+++ b/argocd/manifests/alloy-k8s/kustomization.yaml
@ -10,7 +10,7 @@ resources:

 images:
  - name: registry.ops.eblu.me/blumeops/alloy
-    newTag: v1.14.0-fd0bebb
+    newTag: v1.16.0-9564435

 configMapGenerator:
  - name: alloy-config
--- a/argocd/manifests/alloy-ringtail/config.alloy
+++ b/argocd/manifests/alloy-ringtail/config.alloy
@ -45,6 +45,26 @@ prometheus.scrape "kube_state_metrics" {
  forward_to      = [prometheus.remote_write.prometheus.receiver]
 }

+// ============== SERVICE HEALTH PROBES ==============
+
+// Blackbox-style HTTP probes for in-cluster services on ringtail
+prometheus.exporter.blackbox "services" {
+  config = "{ modules: { http_2xx: { prober: http, timeout: 5s } } }"
+
+  target {
+    name    = "immich"
+    address = "http://immich-server.immich.svc.cluster.local:2283/api/server/ping"
+    module  = "http_2xx"
+  }
+}
+
+// Scrape blackbox probe results
+prometheus.scrape "blackbox" {
+  targets         = prometheus.exporter.blackbox.services.targets
+  scrape_interval = "30s"
+  forward_to      = [prometheus.remote_write.prometheus.receiver]
+}
+
 // Push metrics to indri Prometheus
 prometheus.remote_write "prometheus" {
  external_labels = { cluster = "ringtail" }
--- a/argocd/manifests/alloy-ringtail/kustomization.yaml
+++ b/argocd/manifests/alloy-ringtail/kustomization.yaml
@ -10,7 +10,7 @@ resources:

 images:
  - name: registry.ops.eblu.me/blumeops/alloy
-    newTag: v1.14.0-fd0bebb-nix
+    newTag: v1.16.0-9564435-nix

 configMapGenerator:
  - name: alloy-config
--- a/argocd/manifests/alloy-tracing-ringtail/daemonset.yaml
+++ b/argocd/manifests/alloy-tracing-ringtail/daemonset.yaml
@ -46,6 +46,7 @@ spec:
              mountPath: /var/lib/alloy/data
          securityContext:
            privileged: true
+            runAsUser: 0
      tolerations:
        - operator: Exists
      volumes:
--- a/argocd/manifests/alloy-tracing-ringtail/kustomization.yaml
+++ b/argocd/manifests/alloy-tracing-ringtail/kustomization.yaml
@ -9,7 +9,7 @@ resources:

 images:
  - name: registry.ops.eblu.me/blumeops/alloy
-    newTag: v1.14.0-fd0bebb-nix
+    newTag: v1.16.0-9564435-nix

 configMapGenerator:
  - name: alloy-tracing-config
--- a/argocd/manifests/argocd/README.md
+++ b/argocd/manifests/argocd/README.md
@ -25,7 +25,7 @@ kubectl wait --for=condition=available deployment/argocd-server -n argocd --time
 kubectl -n argocd get secret argocd-initial-admin-secret -o jsonpath="{.data.password}" | base64 -d && echo

 # 5. Login and change password
-argocd login argocd.tail8d86e.ts.net --username admin --grpc-web
+argocd login argocd.tail8d86e.ts.net --username admin
 argocd account update-password

 # 6. Apply repo-creds-forge credential template for SSH access to all forge repos
@ -114,4 +114,4 @@ spec:
  Future improvement: integrate with a secrets operator (e.g., External Secrets).
 - The credential template (`repo-creds`) uses a URL prefix to match all repos on forge.
 - ArgoCD uses Tailscale Ingress with Let's Encrypt for TLS termination.
- The `--grpc-web` flag is required for CLI access through the Tailscale ingress.
+- After Authentik is up, prefer `argocd login argocd.ops.eblu.me --sso` over the admin password login above; admin is only needed during bootstrap or as break-glass.
--- a/argocd/manifests/argocd/argocd-cm-patch.yaml
+++ b/argocd/manifests/argocd/argocd-cm-patch.yaml
@ -16,7 +16,6 @@ data:
    name: Authentik
    issuer: https://authentik.ops.eblu.me/application/o/argocd/
    clientID: argocd
-    clientSecret: $argocd-oidc-authentik:client-secret
    requestedScopes:
      - openid
      - profile
--- a/argocd/manifests/argocd/argocd-rbac-cm-patch.yaml
+++ b/argocd/manifests/argocd/argocd-rbac-cm-patch.yaml
@ -2,6 +2,9 @@
 #
 # - workflow-bot: minimal CI/CD permissions (sync, get)
 # - admins: Authentik admins group mapped to ArgoCD admin role
+# - admin: local break-glass account — keeps ArgoCD admin rights for when
+#   Authentik SSO is unavailable (without this it has no permissions, since
+#   policy.default is unset)
 #
 apiVersion: v1
 kind: ConfigMap
@ -14,3 +17,4 @@ data:
    p, role:workflow-bot, applications, get, *, allow
    g, workflow-bot, role:workflow-bot
    g, admins, role:admin
+    g, admin, role:admin
--- a/argocd/manifests/argocd/argocd-resources-patch.yaml
+++ b/argocd/manifests/argocd/argocd-resources-patch.yaml
@ -0,0 +1,118 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: argocd-server
+spec:
+  template:
+    spec:
+      containers:
+        - name: argocd-server
+          resources:
+            requests:
+              cpu: 50m
+              memory: 128Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: argocd-repo-server
+spec:
+  template:
+    spec:
+      containers:
+        - name: argocd-repo-server
+          resources:
+            requests:
+              cpu: 50m
+              memory: 128Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: argocd-application-controller
+spec:
+  template:
+    spec:
+      containers:
+        - name: argocd-application-controller
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: "1"
+              memory: 1Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: argocd-applicationset-controller
+spec:
+  template:
+    spec:
+      containers:
+        - name: argocd-applicationset-controller
+          resources:
+            requests:
+              cpu: 25m
+              memory: 64Mi
+            limits:
+              cpu: 250m
+              memory: 256Mi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: argocd-dex-server
+spec:
+  template:
+    spec:
+      containers:
+        - name: dex
+          resources:
+            requests:
+              cpu: 25m
+              memory: 64Mi
+            limits:
+              cpu: 250m
+              memory: 256Mi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: argocd-redis
+spec:
+  template:
+    spec:
+      containers:
+        - name: redis
+          resources:
+            requests:
+              cpu: 25m
+              memory: 64Mi
+            limits:
+              cpu: 250m
+              memory: 256Mi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: argocd-notifications-controller
+spec:
+  template:
+    spec:
+      containers:
+        - name: argocd-notifications-controller
+          resources:
+            requests:
+              cpu: 25m
+              memory: 64Mi
+            limits:
+              cpu: 250m
+              memory: 256Mi
--- a/argocd/manifests/argocd/external-secret-oidc-authentik.yaml
+++ b/argocd/manifests/argocd/external-secret-oidc-authentik.yaml
@ -1,31 +0,0 @@
-# ExternalSecret for ArgoCD OIDC client secret (Authentik)
-#
-# Referenced from argocd-cm as $argocd-oidc-authentik:client-secret
-# Must have app.kubernetes.io/part-of: argocd label for ArgoCD to read it
-#
---
-apiVersion: external-secrets.io/v1
-kind: ExternalSecret
-metadata:
-  name: argocd-oidc-authentik
-  namespace: argocd
-spec:
-  refreshInterval: 1h
-  secretStoreRef:
-    kind: ClusterSecretStore
-    name: onepassword-blumeops
-  target:
-    name: argocd-oidc-authentik
-    creationPolicy: Owner
-    template:
-      metadata:
-        labels:
-          app.kubernetes.io/part-of: argocd
-  data:
-    - secretKey: client-secret
-      remoteRef:
-        conversionStrategy: Default
-        decodingStrategy: None
-        key: "Authentik (blumeops)"
-        metadataPolicy: None
-        property: argocd-client-secret
--- a/argocd/manifests/argocd/kustomization.yaml
+++ b/argocd/manifests/argocd/kustomization.yaml
@ -9,10 +9,10 @@ resources:
  - https://raw.githubusercontent.com/argoproj/argo-cd/998fb59dc355653c0657908a6ea2f87136e022d1/manifests/install.yaml
  - ingress-tailscale.yaml
  - external-secret-repo-forge.yaml
-  - external-secret-oidc-authentik.yaml

 patches:
  - path: argocd-cmd-params-cm.yaml
  - path: argocd-ssh-known-hosts-cm.yaml
  - path: argocd-cm-patch.yaml
  - path: argocd-rbac-cm-patch.yaml
+  - path: argocd-resources-patch.yaml
--- a/argocd/manifests/authentik/configmap-blueprint.yaml
+++ b/argocd/manifests/authentik/configmap-blueprint.yaml
@ -262,14 +262,15 @@ data:
          name: ArgoCD
          authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]]
          invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]]
-          client_type: confidential
+          client_type: public
          client_id: argocd
-          client_secret: !Env AUTHENTIK_ARGOCD_CLIENT_SECRET
          redirect_uris:
            - matching_mode: strict
              url: https://argocd.ops.eblu.me/auth/callback
            - matching_mode: strict
              url: https://argocd.tail8d86e.ts.net/auth/callback
+            - matching_mode: strict
+              url: http://localhost:8085/auth/callback
          signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]]
          property_mappings:
            - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]]
@ -433,3 +434,93 @@ data:
          provider: !KeyOf mealie-provider
          meta_launch_url: https://meals.ops.eblu.me
          policy_engine_mode: all
+
+  heph.yaml: |
+    version: 1
+    metadata:
+      name: BlumeOps Heph SSO
+      labels:
+        blueprints.goauthentik.io/description: "Hephaestus hub OIDC (device-code) provider, application, and device-code flow"
+    entries:
+      # Device-code flow (RFC 8628). authentik ships no default for this, so we
+      # create one and bind it to the brand below. An empty stage_configuration
+      # flow is sufficient: the already-authenticated user just confirms the code.
+      - model: authentik_flows.flow
+        id: device-code-flow
+        identifiers:
+          slug: default-device-code-flow
+        attrs:
+          name: Device code flow
+          title: Device code flow
+          slug: default-device-code-flow
+          designation: stage_configuration
+          authentication: require_authenticated
+
+      # Enable the device-code grant globally by binding the flow to the default
+      # brand (domain authentik-default). Partial update — only sets this field.
+      - model: authentik_brands.brand
+        identifiers:
+          domain: authentik-default
+        attrs:
+          flow_device_code: !KeyOf device-code-flow
+
+      # OAuth2 provider for heph — PUBLIC client (device-code + PKCE, no secret).
+      # client_id doubles as the token audience the hub verifies (--oidc-audience heph),
+      # and the app slug 'heph' is the issuer path (/application/o/heph/).
+      - model: authentik_providers_oauth2.oauth2provider
+        id: heph-provider
+        identifiers:
+          name: Heph
+        attrs:
+          name: Heph
+          authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]]
+          invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]]
+          client_type: public
+          client_id: heph
+          # CLI/TUI use the device-code grant (no redirect). The heph-pwa browser
+          # login uses Authorization Code + PKCE, which DOES redirect back to the
+          # app's origin — register those here (Authentik also keys token-endpoint
+          # CORS off these origins). Trailing slash matters: the PWA's redirect_uri
+          # is its base dir, e.g. https://heph.ops.eblu.me/.
+          redirect_uris:
+            - matching_mode: strict
+              url: https://heph.ops.eblu.me/
+            - matching_mode: strict
+              url: http://localhost:8787/  # local dev (hephd --web-root)
+          signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]]
+          property_mappings:
+            - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]]
+            - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]]
+            - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]]
+            # offline_access: heph CLI requests "openid offline_access"; without
+            # this mapping the refresh token is session-bound and hephd's
+            # refresh_token grant 400s once the session lapses (spoke sync dies).
+            - !Find [authentik_providers_oauth2.scopemapping, [scope_name, offline_access]]
+          sub_mode: hashed_user_id
+          include_claims_in_id_token: true
+
+      # Heph application — linked to the OAuth2 provider
+      - model: authentik_core.application
+        id: heph-app
+        identifiers:
+          slug: heph
+        attrs:
+          name: Hephaestus
+          slug: heph
+          provider: !KeyOf heph-provider
+          meta_launch_url: https://heph.ops.eblu.me
+          policy_engine_mode: any
+
+      # Policy binding — restrict heph to admins group (single-owner, sensitive data)
+      - model: authentik_policies.policybinding
+        identifiers:
+          order: 0
+          target: !KeyOf heph-app
+          group: !Find [authentik_core.group, [name, admins]]
+        attrs:
+          target: !KeyOf heph-app
+          group: !Find [authentik_core.group, [name, admins]]
+          order: 0
+          enabled: true
+          negate: false
+          timeout: 30
--- a/argocd/manifests/authentik/deployment-worker.yaml
+++ b/argocd/manifests/authentik/deployment-worker.yaml
@ -75,11 +75,6 @@ spec:
                secretKeyRef:
                  name: authentik-config
                  key: jellyfin-client-secret
-            - name: AUTHENTIK_ARGOCD_CLIENT_SECRET
-              valueFrom:
-                secretKeyRef:
-                  name: authentik-config
-                  key: argocd-client-secret
            - name: AUTHENTIK_MEALIE_CLIENT_SECRET
              valueFrom:
                secretKeyRef:
--- a/argocd/manifests/authentik/external-secret.yaml
+++ b/argocd/manifests/authentik/external-secret.yaml
@ -53,10 +53,6 @@ spec:
      remoteRef:
        key: "Authentik (blumeops)"
        property: jellyfin-client-secret
-    - secretKey: argocd-client-secret
-      remoteRef:
-        key: "Authentik (blumeops)"
-        property: argocd-client-secret
    - secretKey: mealie-client-secret
      remoteRef:
        key: "Authentik (blumeops)"
--- a/argocd/manifests/cv/deployment.yaml
+++ b/argocd/manifests/cv/deployment.yaml
@ -1,51 +0,0 @@
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: cv
-  namespace: cv
-spec:
-  replicas: 2
-  strategy:
-    type: RollingUpdate
-    rollingUpdate:
-      maxUnavailable: 0
-      maxSurge: 1
-  selector:
-    matchLabels:
-      app: cv
-  template:
-    metadata:
-      labels:
-        app: cv
-    spec:
-      securityContext:
-        seccompProfile:
-          type: RuntimeDefault
-      containers:
-        - name: cv
-          image: registry.ops.eblu.me/blumeops/cv:kustomized
-          ports:
-            - containerPort: 80
-              name: http
-          env:
-            - name: CV_RELEASE_URL
-              value: "https://forge.eblu.me/api/packages/eblume/generic/cv/v1.0.3/cv-v1.0.3.tar.gz"
-          resources:
-            requests:
-              memory: "64Mi"
-              cpu: "10m"
-            limits:
-              memory: "128Mi"
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 80
-            initialDelaySeconds: 10
-            periodSeconds: 30
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 80
-            initialDelaySeconds: 5
-            periodSeconds: 10
--- a/argocd/manifests/cv/ingress-tailscale.yaml
+++ b/argocd/manifests/cv/ingress-tailscale.yaml
@ -1,27 +0,0 @@
---
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  name: cv-tailscale
-  namespace: cv
-  annotations:
-    tailscale.com/proxy-class: "default"
-    tailscale.com/proxy-group: "ingress"
-    tailscale.com/tags: "tag:k8s,tag:flyio-target"
-    gethomepage.dev/enabled: "true"
-    gethomepage.dev/name: "CV"
-    gethomepage.dev/group: "Services"
-    gethomepage.dev/icon: "mdi-file-document"
-    gethomepage.dev/description: "Resume / CV"
-    gethomepage.dev/href: "https://cv.eblu.me"
-    gethomepage.dev/pod-selector: "app=cv"
-spec:
-  ingressClassName: tailscale
-  defaultBackend:
-    service:
-      name: cv
-      port:
-        number: 80
-  tls:
-    - hosts:
-        - cv
--- a/argocd/manifests/cv/kustomization.yaml
+++ b/argocd/manifests/cv/kustomization.yaml
@ -1,12 +0,0 @@
---
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: cv
-resources:
-  - deployment.yaml
-  - service.yaml
-  - ingress-tailscale.yaml
-  - pdb.yaml
-images:
-  - name: registry.ops.eblu.me/blumeops/cv
-    newTag: v1.0.3-613f05d
--- a/argocd/manifests/cv/pdb.yaml
+++ b/argocd/manifests/cv/pdb.yaml
@ -1,10 +0,0 @@
---
-apiVersion: policy/v1
-kind: PodDisruptionBudget
-metadata:
-  name: cv
-spec:
-  minAvailable: 1
-  selector:
-    matchLabels:
-      app: cv
--- a/argocd/manifests/cv/service.yaml
+++ b/argocd/manifests/cv/service.yaml
@ -1,13 +0,0 @@
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: cv
-  namespace: cv
-spec:
-  selector:
-    app: cv
-  ports:
-    - name: http
-      port: 80
-      targetPort: 80
--- a/argocd/manifests/databases-ringtail/blumeops-pg.yaml
+++ b/argocd/manifests/databases-ringtail/blumeops-pg.yaml
@ -0,0 +1,97 @@
+# PostgreSQL Cluster for blumeops services on ringtail k3s.
+#
+# Wave-1 indri-k8s decommission target (see [[migrate-wave1-ringtail]]).
+# Holds the paperless and teslamate databases migrated off the minikube
+# blumeops-pg via cold pg_dump/pg_restore at cutover. miniflux + authentik
+# stay where they are for now (later waves), so this cluster only carries
+# the wave-1 roles.
+#
+# Apps reach this in-cluster at blumeops-pg-rw.databases.svc.cluster.local
+# — the same name they used on minikube, so teslamate's DATABASE_HOST is
+# unchanged.
+#
+# Database creation is deferred to cutover, mirroring the minikube cluster
+# (where only the bootstrap database is declared and the rest were created
+# out-of-band):
+#   - paperless: the bootstrap database below (restored into at cutover).
+#   - teslamate: created at its cutover by the eblume superuser, because the
+#     dump's `earthdistance` extension is untrusted and CREATE EXTENSION
+#     needs superuser. (cube + earthdistance ownership then transferred to
+#     the teslamate role so it can ALTER EXTENSION UPDATE.)
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: blumeops-pg
+  namespace: databases
+spec:
+  instances: 1
+  imageName: ghcr.io/cloudnative-pg/postgresql:18.3
+
+  storage:
+    size: 10Gi
+    storageClass: local-path
+
+  bootstrap:
+    initdb:
+      database: paperless
+      owner: paperless
+
+  managed:
+    roles:
+      # eblume superuser for admin + privileged restore steps (extensions)
+      - name: eblume
+        login: true
+        superuser: true
+        createdb: true
+        createrole: true
+        connectionLimit: -1
+        ensure: present
+        inherit: true
+        passwordSecret:
+          name: blumeops-pg-eblume
+      # borgmatic read-only user for backups
+      - name: borgmatic
+        login: true
+        connectionLimit: -1
+        ensure: present
+        inherit: true
+        inRoles:
+          - pg_read_all_data
+        passwordSecret:
+          name: blumeops-pg-borgmatic
+      # paperless user (also the bootstrap database owner above; the
+      # managed role sets its password from the 1Password-backed secret)
+      - name: paperless
+        login: true
+        connectionLimit: -1
+        ensure: present
+        inherit: true
+        passwordSecret:
+          name: blumeops-pg-paperless
+      # teslamate user. Extension ownership (cube, earthdistance) is
+      # transferred to this role at cutover so it can ALTER EXTENSION UPDATE.
+      - name: teslamate
+        login: true
+        connectionLimit: -1
+        ensure: present
+        inherit: true
+        passwordSecret:
+          name: blumeops-pg-teslamate
+
+  resources:
+    requests:
+      memory: "256Mi"
+      cpu: "100m"
+    limits:
+      memory: "1Gi"
+      cpu: "500m"
+
+  postgresql:
+    parameters:
+      max_connections: "50"
+      shared_buffers: "128MB"
+      password_encryption: "scram-sha-256"
+    pg_hba:
+      # Password auth from anywhere; network security is via Tailscale.
+      - host all all 0.0.0.0/0 scram-sha-256
+      - host all all ::/0 scram-sha-256
--- a/argocd/manifests/databases-ringtail/external-secret-borgmatic.yaml
+++ b/argocd/manifests/databases-ringtail/external-secret-borgmatic.yaml
@ -1,13 +1,14 @@
-# ExternalSecret for borgmatic backup user password on immich-pg cluster
+# ExternalSecret for borgmatic backup user password
+#
+# Replaces the manual op inject workflow from secret-borgmatic.yaml.tpl
 #
-# Reuses the same 1Password item as blumeops-pg-borgmatic.
 # 1Password item: "borgmatic" in blumeops vault
 # Field: "db-password"
 #
 apiVersion: external-secrets.io/v1
 kind: ExternalSecret
 metadata:
-  name: immich-pg-borgmatic
+  name: blumeops-pg-borgmatic
  namespace: databases
 spec:
  refreshInterval: 1h
@ -15,7 +16,7 @@ spec:
    kind: ClusterSecretStore
    name: onepassword-blumeops
  target:
-    name: immich-pg-borgmatic
+    name: blumeops-pg-borgmatic
    creationPolicy: Owner
    template:
      type: kubernetes.io/basic-auth
--- a/argocd/manifests/databases-ringtail/external-secret-eblume.yaml
+++ b/argocd/manifests/databases-ringtail/external-secret-eblume.yaml
@ -0,0 +1,30 @@
+# ExternalSecret for eblume superuser password
+#
+# Replaces the manual op inject workflow from secret-eblume.yaml.tpl
+#
+# 1Password item: "postgres" in blumeops vault
+# Field: "password"
+#
+apiVersion: external-secrets.io/v1
+kind: ExternalSecret
+metadata:
+  name: blumeops-pg-eblume
+  namespace: databases
+spec:
+  refreshInterval: 1h
+  secretStoreRef:
+    kind: ClusterSecretStore
+    name: onepassword-blumeops
+  target:
+    name: blumeops-pg-eblume
+    creationPolicy: Owner
+    template:
+      type: kubernetes.io/basic-auth
+      data:
+        username: eblume
+        password: "{{ .password }}"
+  data:
+  - secretKey: password
+    remoteRef:
+      key: postgres
+      property: password
--- a/argocd/manifests/databases-ringtail/external-secret-immich-borgmatic.yaml
+++ b/argocd/manifests/databases-ringtail/external-secret-immich-borgmatic.yaml
@ -0,0 +1,32 @@
+# ExternalSecret for borgmatic backup user password on immich-pg cluster
+# (ringtail k3s).
+#
+# Mirror of argocd/manifests/databases/external-secret-immich-borgmatic.yaml.
+# The onepassword-blumeops ClusterSecretStore exists on ringtail via the
+# external-secrets-ringtail app.
+#
+# 1Password item: "borgmatic" in blumeops vault
+# Field: "db-password"
+apiVersion: external-secrets.io/v1
+kind: ExternalSecret
+metadata:
+  name: immich-pg-borgmatic
+  namespace: databases
+spec:
+  refreshInterval: 1h
+  secretStoreRef:
+    kind: ClusterSecretStore
+    name: onepassword-blumeops
+  target:
+    name: immich-pg-borgmatic
+    creationPolicy: Owner
+    template:
+      type: kubernetes.io/basic-auth
+      data:
+        username: borgmatic
+        password: "{{ .password }}"
+  data:
+    - secretKey: password
+      remoteRef:
+        key: borgmatic
+        property: db-password
--- a/argocd/manifests/databases-ringtail/external-secret-paperless.yaml
+++ b/argocd/manifests/databases-ringtail/external-secret-paperless.yaml
--- a/argocd/manifests/databases-ringtail/external-secret-teslamate.yaml
+++ b/argocd/manifests/databases-ringtail/external-secret-teslamate.yaml
--- a/argocd/manifests/databases-ringtail/immich-pg.yaml
+++ b/argocd/manifests/databases-ringtail/immich-pg.yaml
@ -0,0 +1,53 @@
+# PostgreSQL Cluster for Immich on ringtail k3s.
+#
+# Initially bootstrapped via CNPG pg_basebackup from the minikube
+# immich-pg cluster on 2026-05-13, then promoted to primary. The
+# externalClusters + bootstrap.pg_basebackup blocks have been pruned
+# from this manifest now that the migration is complete — leaving
+# them around is a footgun (re-enabling replica.enabled=true would
+# try to demote this cluster against a stale source). See
+# [[immich-pg-data-migration]] for the procedure used.
+apiVersion: postgresql.cnpg.io/v1
+kind: Cluster
+metadata:
+  name: immich-pg
+  namespace: databases
+spec:
+  instances: 1
+  imageName: ghcr.io/tensorchord/cloudnative-vectorchord:17-0.5.0
+
+  storage:
+    size: 10Gi
+    storageClass: local-path
+
+  # Managed roles
+  managed:
+    roles:
+      - name: borgmatic
+        login: true
+        connectionLimit: -1
+        ensure: present
+        inherit: true
+        inRoles:
+          - pg_read_all_data
+        passwordSecret:
+          name: immich-pg-borgmatic
+
+  resources:
+    requests:
+      memory: "256Mi"
+      cpu: "100m"
+    limits:
+      memory: "1Gi"
+      cpu: "500m"
+
+  postgresql:
+    shared_preload_libraries:
+      - "vchord.so"
+    parameters:
+      max_connections: "50"
+      shared_buffers: "128MB"
+      password_encryption: "scram-sha-256"
+    pg_hba:
+      - host all all 0.0.0.0/0 scram-sha-256
+      - host all all ::/0 scram-sha-256
--- a/argocd/manifests/databases-ringtail/kustomization.yaml
+++ b/argocd/manifests/databases-ringtail/kustomization.yaml
@ -0,0 +1,16 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+namespace: databases
+
+resources:
+  - immich-pg.yaml
+  - external-secret-immich-borgmatic.yaml
+  - service-immich-pg-tailscale.yaml
+  # wave-1 indri-k8s decommission: blumeops-pg (paperless + teslamate)
+  - blumeops-pg.yaml
+  - service-blumeops-pg-tailscale.yaml
+  - external-secret-eblume.yaml
+  - external-secret-borgmatic.yaml
+  - external-secret-paperless.yaml
+  - external-secret-teslamate.yaml
--- a/argocd/manifests/databases-ringtail/service-blumeops-pg-tailscale.yaml
+++ b/argocd/manifests/databases-ringtail/service-blumeops-pg-tailscale.yaml
@ -0,0 +1,24 @@
+# Tailscale LoadBalancer for the ringtail blumeops-pg cluster.
+# Canonical hostname: blumeops-pg-ringtail.tail8d86e.ts.net (distinct from
+# the minikube blumeops-pg, which still owns pg.tail8d86e.ts.net until the
+# wave-1 decommission). Borgmatic on indri and the Grafana TeslaMate
+# datasource reach it via the Caddy L4 route pg.ops.eblu.me:5434.
+apiVersion: v1
+kind: Service
+metadata:
+  name: blumeops-pg-tailscale
+  namespace: databases
+  annotations:
+    tailscale.com/hostname: "blumeops-pg-ringtail"
+    tailscale.com/proxy-class: "default"
+spec:
+  type: LoadBalancer
+  loadBalancerClass: tailscale
+  selector:
+    cnpg.io/cluster: blumeops-pg
+    role: primary
+  ports:
+    - name: postgresql
+      port: 5432
+      targetPort: 5432
+      protocol: TCP
--- a/argocd/manifests/databases-ringtail/service-immich-pg-tailscale.yaml
+++ b/argocd/manifests/databases-ringtail/service-immich-pg-tailscale.yaml
@ -1,6 +1,8 @@
-# Tailscale LoadBalancer for immich-pg PostgreSQL access
-# Canonical hostname: immich-pg.tail8d86e.ts.net
-# Caddy L4 proxies pg.ops.eblu.me:5433 → this service for borgmatic backups
+# Tailscale LoadBalancer for immich-pg PostgreSQL access on ringtail.
+# Canonical hostname: immich-pg.tail8d86e.ts.net (claimed from the
+# minikube side after the minikube service was removed during the
+# immich-to-ringtail migration). Borgmatic on indri uses this
+# hostname for nightly backups.
 apiVersion: v1
 kind: Service
 metadata:
--- a/argocd/manifests/databases/blumeops-pg.yaml
+++ b/argocd/manifests/databases/blumeops-pg.yaml
@ -44,18 +44,9 @@ spec:
          - pg_read_all_data
        passwordSecret:
          name: blumeops-pg-borgmatic
-      # teslamate user for TeslaMate Tesla data logger
-      # Superuser removed. Extension ownership (cube, earthdistance)
-      # transferred manually so teslamate can ALTER EXTENSION UPDATE.
-      # earthdistance is untrusted — DROP+CREATE needs temporary
-      # superuser escalation during upgrades.
-      - name: teslamate
-        login: true
-        connectionLimit: -1
-        ensure: present
-        inherit: true
-        passwordSecret:
-          name: blumeops-pg-teslamate
+      # teslamate + paperless roles removed: migrated to ringtail blumeops-pg
+      # (wave-1 decommission). Their databases were dropped from this cluster
+      # after the cutover was verified and backed up.
      # authentik user for Authentik identity provider (runs on ringtail)
      - name: authentik
        login: true
@ -65,14 +56,6 @@ spec:
        createdb: true
        passwordSecret:
          name: blumeops-pg-authentik
-      # paperless user for Paperless-ngx document management
-      - name: paperless
-        login: true
-        connectionLimit: -1
-        ensure: present
-        inherit: true
-        passwordSecret:
-          name: blumeops-pg-paperless

  # Resource limits for minikube environment
  resources:
--- a/argocd/manifests/databases/immich-pg.yaml
+++ b/argocd/manifests/databases/immich-pg.yaml
@ -1,69 +0,0 @@
-# PostgreSQL Cluster for Immich
-# Uses VectorChord (successor to pgvecto.rs) for AI-powered vector search
-# See: https://github.com/immich-app/immich/discussions/9060
-# Managed by CloudNativePG operator
-apiVersion: postgresql.cnpg.io/v1
-kind: Cluster
-metadata:
-  name: immich-pg
-  namespace: databases
-spec:
-  instances: 1
-  # VectorChord image for PostgreSQL 17 with VectorChord 0.5.0
-  # Immich v2.4.1 requires VectorChord >=0.3 <0.6
-  # See: https://github.com/tensorchord/VectorChord
-  imageName: ghcr.io/tensorchord/cloudnative-vectorchord:17-0.5.0
-
-  storage:
-    size: 10Gi
-    storageClass: standard
-
-  # Bootstrap creates initial database and owner
-  bootstrap:
-    initdb:
-      database: immich
-      owner: immich
-      postInitSQL:
-        # Extensions required by Immich
-        - CREATE EXTENSION IF NOT EXISTS vector;
-        - CREATE EXTENSION IF NOT EXISTS vchord CASCADE;
-        - CREATE EXTENSION IF NOT EXISTS cube CASCADE;
-        - CREATE EXTENSION IF NOT EXISTS earthdistance CASCADE;
-
-  # Managed roles
-  # Note: connectionLimit, ensure, inherit are CNPG defaults added to prevent ArgoCD drift
-  managed:
-    roles:
-      # borgmatic read-only user for backups
-      - name: borgmatic
-        login: true
-        connectionLimit: -1
-        ensure: present
-        inherit: true
-        inRoles:
-          - pg_read_all_data
-        passwordSecret:
-          name: immich-pg-borgmatic
-
-  # Resource limits for minikube environment
-  resources:
-    requests:
-      memory: "256Mi"
-      cpu: "100m"
-    limits:
-      memory: "1Gi"
-      cpu: "500m"
-
-  # PostgreSQL configuration
-  postgresql:
-    # VectorChord requires vchord.so in shared_preload_libraries
-    shared_preload_libraries:
-      - "vchord.so"
-    parameters:
-      max_connections: "50"
-      shared_buffers: "128MB"
-      password_encryption: "scram-sha-256"
-    pg_hba:
-      # Allow connections from k8s pods
-      - host all all 0.0.0.0/0 scram-sha-256
-      - host all all ::/0 scram-sha-256
--- a/argocd/manifests/databases/kustomization.yaml
+++ b/argocd/manifests/databases/kustomization.yaml
@ -5,13 +5,8 @@ namespace: databases

 resources:
  - blumeops-pg.yaml
-  - immich-pg.yaml
  - service-tailscale.yaml
-  - service-immich-pg-tailscale.yaml
  - service-metrics-tailscale.yaml
  - external-secret-eblume.yaml
  - external-secret-borgmatic.yaml
-  - external-secret-immich-borgmatic.yaml
-  - external-secret-teslamate.yaml
  - external-secret-authentik.yaml
-  - external-secret-paperless.yaml
--- a/argocd/manifests/devpi/README.md
+++ b/argocd/manifests/devpi/README.md
@ -1,72 +0,0 @@
-# devpi PyPI Caching Proxy
-
-devpi-server running in Kubernetes, providing:
- PyPI caching proxy at `root/pypi`
- Private package hosting at `eblume/dev`
-
-## Setup
-
-### 1. Create the root password secret
-
-```fish
-kubectl create namespace devpi
-op inject -i argocd/manifests/devpi/secret-root.yaml.tpl | kubectl apply -f -
-```
-
-### 2. Deploy via ArgoCD
-
-```fish
-argocd app sync apps
-argocd app sync devpi
-```
-
-The container will auto-initialize on first startup using the root password from the secret.
-
-### 3. Create user and index (first time only)
-
-After the pod is running:
-
-```fish
-# Login to devpi as root
-uvx --from devpi-client devpi use https://pypi.tail8d86e.ts.net
-uvx --from devpi-client devpi login root
-# Enter root password when prompted
-
-# Create eblume user (prompts for password - use the one from 1Password)
-uvx --from devpi-client devpi user -c eblume email=blume.erich@gmail.com
-
-# Create private index inheriting from PyPI
-uvx --from devpi-client devpi index -c eblume/dev bases=root/pypi
-```
-
-## Usage
-
-### As pip index (caching proxy)
-
-Configure `~/.config/pip/pip.conf`:
-
-```ini
-[global]
-index-url = https://pypi.tail8d86e.ts.net/root/pypi/+simple/
-trusted-host = pypi.tail8d86e.ts.net
-```
-
-### Upload private packages
-
-```fish
-cd ~/code/personal/your-package
-uv build
-uv publish --publish-url https://pypi.tail8d86e.ts.net/eblume/dev/
-```
-
-## URLs
-
- Web UI: https://pypi.tail8d86e.ts.net
- PyPI cache: https://pypi.tail8d86e.ts.net/root/pypi/+simple/
- Private index: https://pypi.tail8d86e.ts.net/eblume/dev/+simple/
-
-## Credentials
-
-Stored in 1Password vault `blumeops`, item `kyhzfifryqnuk7jeyibmmjvxxm`:
- `root password` - devpi root user
- `password` - eblume user password
--- a/argocd/manifests/devpi/external-secret.yaml
+++ b/argocd/manifests/devpi/external-secret.yaml
@ -1,25 +0,0 @@
-# ExternalSecret for devpi root password
-#
-# Replaces the manual op inject workflow from secret-root.yaml.tpl
-#
-# 1Password item: "devpi" in blumeops vault
-# Field: "root password"
-#
-apiVersion: external-secrets.io/v1
-kind: ExternalSecret
-metadata:
-  name: devpi-root
-  namespace: devpi
-spec:
-  refreshInterval: 1h
-  secretStoreRef:
-    kind: ClusterSecretStore
-    name: onepassword-blumeops
-  target:
-    name: devpi-root
-    creationPolicy: Owner
-  data:
-  - secretKey: password
-    remoteRef:
-      key: devpi
-      property: root password
--- a/argocd/manifests/devpi/ingress-tailscale.yaml
+++ b/argocd/manifests/devpi/ingress-tailscale.yaml
@ -1,25 +0,0 @@
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  name: devpi-tailscale
-  namespace: devpi
-  annotations:
-    tailscale.com/proxy-class: "default"
-    tailscale.com/proxy-group: "ingress"
-    gethomepage.dev/enabled: "true"
-    gethomepage.dev/name: "PyPI"
-    gethomepage.dev/group: "Infrastructure"
-    gethomepage.dev/icon: "pypi.png"
-    gethomepage.dev/description: "PyPI cache"
-    gethomepage.dev/href: "https://pypi.ops.eblu.me"
-    gethomepage.dev/pod-selector: "app=devpi"
-spec:
-  ingressClassName: tailscale
-  defaultBackend:
-    service:
-      name: devpi
-      port:
-        number: 3141
-  tls:
-    - hosts:
-        - pypi
--- a/argocd/manifests/devpi/kustomization.yaml
+++ b/argocd/manifests/devpi/kustomization.yaml
@ -1,14 +0,0 @@
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-
-namespace: devpi
-
-resources:
-  - statefulset.yaml
-  - service.yaml
-  - ingress-tailscale.yaml
-  - external-secret.yaml
-
-images:
-  - name: registry.ops.eblu.me/blumeops/devpi
-    newTag: v6.19.1-613f05d
--- a/argocd/manifests/devpi/statefulset.yaml
+++ b/argocd/manifests/devpi/statefulset.yaml
@ -1,64 +0,0 @@
-apiVersion: apps/v1
-kind: StatefulSet
-metadata:
-  name: devpi
-  namespace: devpi
-spec:
-  serviceName: devpi
-  replicas: 1
-  selector:
-    matchLabels:
-      app: devpi
-  template:
-    metadata:
-      labels:
-        app: devpi
-    spec:
-      securityContext:
-        fsGroup: 1000
-        seccompProfile:
-          type: RuntimeDefault
-      containers:
-        - name: devpi
-          image: registry.ops.eblu.me/blumeops/devpi:kustomized
-          env:
-            - name: DEVPI_ROOT_PASSWORD
-              valueFrom:
-                secretKeyRef:
-                  name: devpi-root
-                  key: password
-            - name: DEVPI_OUTSIDE_URL
-              value: "https://pypi.ops.eblu.me"
-          ports:
-            - containerPort: 3141
-              name: http
-          volumeMounts:
-            - name: data
-              mountPath: /devpi
-          resources:
-            requests:
-              memory: "256Mi"
-              cpu: "100m"
-            limits:
-              memory: "2Gi"  # High limit for initial PyPI index build, reclaimed after
-              cpu: "500m"
-          livenessProbe:
-            httpGet:
-              path: /+api
-              port: 3141
-            initialDelaySeconds: 30
-            periodSeconds: 30
-          readinessProbe:
-            httpGet:
-              path: /+api
-              port: 3141
-            initialDelaySeconds: 10
-            periodSeconds: 10
-  volumeClaimTemplates:
-    - metadata:
-        name: data
-      spec:
-        accessModes: ["ReadWriteOnce"]
-        resources:
-          requests:
-            storage: 50Gi
--- a/argocd/manifests/docs/deployment.yaml
+++ b/argocd/manifests/docs/deployment.yaml
@ -1,51 +0,0 @@
---
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: docs
-  namespace: docs
-spec:
-  replicas: 2
-  strategy:
-    type: RollingUpdate
-    rollingUpdate:
-      maxUnavailable: 0
-      maxSurge: 1
-  selector:
-    matchLabels:
-      app: docs
-  template:
-    metadata:
-      labels:
-        app: docs
-    spec:
-      securityContext:
-        seccompProfile:
-          type: RuntimeDefault
-      containers:
-        - name: docs
-          image: registry.ops.eblu.me/blumeops/quartz:kustomized
-          ports:
-            - containerPort: 80
-              name: http
-          env:
-            - name: DOCS_RELEASE_URL
-              value: "https://forge.eblu.me/eblume/blumeops/releases/download/v1.15.4/docs-v1.15.4.tar.gz"
-          resources:
-            requests:
-              memory: "64Mi"
-              cpu: "10m"
-            limits:
-              memory: "128Mi"
-          livenessProbe:
-            httpGet:
-              path: /healthz
-              port: 80
-            initialDelaySeconds: 10
-            periodSeconds: 30
-          readinessProbe:
-            httpGet:
-              path: /healthz
-              port: 80
-            initialDelaySeconds: 5
-            periodSeconds: 10
--- a/argocd/manifests/docs/ingress-tailscale.yaml
+++ b/argocd/manifests/docs/ingress-tailscale.yaml
@ -1,27 +0,0 @@
---
-apiVersion: networking.k8s.io/v1
-kind: Ingress
-metadata:
-  name: docs-tailscale
-  namespace: docs
-  annotations:
-    tailscale.com/proxy-class: "default"
-    tailscale.com/proxy-group: "ingress"
-    tailscale.com/tags: "tag:k8s,tag:flyio-target"
-    gethomepage.dev/enabled: "true"
-    gethomepage.dev/name: "Docs"
-    gethomepage.dev/group: "Services"
-    gethomepage.dev/icon: "mdi-book-open-page-variant"
-    gethomepage.dev/description: "BlumeOps Documentation"
-    gethomepage.dev/href: "https://docs.eblu.me"
-    gethomepage.dev/pod-selector: "app=docs"
-spec:
-  ingressClassName: tailscale
-  defaultBackend:
-    service:
-      name: docs
-      port:
-        number: 80
-  tls:
-    - hosts:
-        - docs
--- a/argocd/manifests/docs/kustomization.yaml
+++ b/argocd/manifests/docs/kustomization.yaml
@ -1,12 +0,0 @@
---
-apiVersion: kustomize.config.k8s.io/v1beta1
-kind: Kustomization
-namespace: docs
-resources:
-  - deployment.yaml
-  - service.yaml
-  - ingress-tailscale.yaml
-  - pdb.yaml
-images:
-  - name: registry.ops.eblu.me/blumeops/quartz
-    newTag: v1.28.2-613f05d
--- a/argocd/manifests/docs/pdb.yaml
+++ b/argocd/manifests/docs/pdb.yaml
@ -1,10 +0,0 @@
---
-apiVersion: policy/v1
-kind: PodDisruptionBudget
-metadata:
-  name: docs
-spec:
-  minAvailable: 1
-  selector:
-    matchLabels:
-      app: docs
--- a/argocd/manifests/docs/service.yaml
+++ b/argocd/manifests/docs/service.yaml
@ -1,13 +0,0 @@
---
-apiVersion: v1
-kind: Service
-metadata:
-  name: docs
-  namespace: docs
-spec:
-  selector:
-    app: docs
-  ports:
-    - name: http
-      port: 80
-      targetPort: 80
--- a/argocd/manifests/external-secrets-ringtail/kustomization.yaml
+++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml
@ -0,0 +1,16 @@
+# Ringtail (amd64) overlay for external-secrets.
+#
+# Reuses the shared indri manifest as a base and only overrides the controller
+# image to the nix-built amd64 variant (`-nix` tag). The base sets the arm64
+# image (built via containers/external-secrets/container.py on indri's Dagger
+# runner); ringtail's k3s is amd64 and needs the image built by
+# containers/external-secrets/default.nix on the nix-container-builder.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - ../external-secrets
+
+images:
+  - name: registry.ops.eblu.me/blumeops/external-secrets
+    newTag: v2.2.0-13895bb-nix
--- a/argocd/manifests/external-secrets/kustomization.yaml
+++ b/argocd/manifests/external-secrets/kustomization.yaml
@ -12,4 +12,5 @@ resources:

 images:
  - name: ghcr.io/external-secrets/external-secrets
-    newTag: v2.2.0
+    newName: registry.ops.eblu.me/blumeops/external-secrets
+    newTag: v2.2.0-13895bb
--- a/argocd/manifests/forgejo-runner/config.yaml
+++ b/argocd/manifests/forgejo-runner/config.yaml
@ -1,9 +1,8 @@
-# Reviewed against v12.7.3 defaults (2026-03-30)
+# Reviewed against v12.8.2 defaults (2026-04-20)
 log:
  level: info

 runner:
-  file: /data/.runner
  capacity: 2
  timeout: 3h
  shutdown_timeout: 3h
@ -13,7 +12,15 @@ runner:
    TZ: America/Los_Angeles

 container:
-  # Job execution image is set via RUNNER_LABELS in deployment.yaml
  network: "host"
  # Connect to DinD sidecar via TCP (not socket)
  docker_host: tcp://127.0.0.1:2375
+
+server:
+  connections:
+    forgejo:
+      url: https://forge.ops.eblu.me/
+      uuid: ${FORGEJO_RUNNER_UUID}
+      token: ${FORGEJO_RUNNER_TOKEN}
+      labels:
+        - k8s:docker://registry.ops.eblu.me/blumeops/runner-job-image:v0.20.6-50f8c2a
--- a/argocd/manifests/forgejo-runner/deployment.yaml
+++ b/argocd/manifests/forgejo-runner/deployment.yaml
@ -25,14 +25,6 @@ spec:
          env:
            - name: TZ
              value: America/Los_Angeles
-            - name: DOCKER_HOST
-              value: tcp://localhost:2375
-            - name: FORGEJO_URL
-              value: "https://forge.ops.eblu.me"
-            - name: RUNNER_NAME
-              value: "k8s-runner"
-            - name: RUNNER_LABELS
-              value: "k8s:docker://registry.ops.eblu.me/blumeops/runner-job-image:v0.20.1-24f7512"
          command:
            - /bin/sh
            - -c
@ -44,19 +36,11 @@ spec:
              done
              echo "Docker daemon ready"

-              # Register if not already registered
-              if [ ! -f /data/.runner ]; then
-                echo "Registering runner..."
-                forgejo-runner register \
-                  --instance "$FORGEJO_URL" \
-                  --token "$RUNNER_TOKEN" \
-                  --name "$RUNNER_NAME" \
-                  --labels "$RUNNER_LABELS" \
-                  --no-interactive
-              fi
+              # Render config with credentials from ExternalSecret.
+              envsubst < /config/config.yaml > /tmp/config.yaml

              # Start daemon
-              exec forgejo-runner daemon --config /config/config.yaml
+              exec forgejo-runner daemon --config /tmp/config.yaml
          envFrom:
            - secretRef:
                name: forgejo-runner-env
--- a/argocd/manifests/forgejo-runner/external-secret.yaml
+++ b/argocd/manifests/forgejo-runner/external-secret.yaml
@ -1,11 +1,7 @@
-# ExternalSecret for Forgejo Runner token
+# ExternalSecret for Forgejo Runner credentials
 #
 # 1Password item: "Forgejo Secrets" in blumeops vault
-# Field: runner_reg (runner registration token)
-#
-# Non-secret env vars (FORGEJO_URL, RUNNER_NAME, RUNNER_LABELS) live in the
-# deployment spec so that changes (e.g. image version bumps) trigger a rollout
-# automatically.
+# Fields: runner_k8s_uuid, runner_k8s_token
 #
 apiVersion: external-secrets.io/v1
 kind: ExternalSecret
@ -21,7 +17,11 @@ spec:
    name: forgejo-runner-env
    creationPolicy: Owner
  data:
-  - secretKey: RUNNER_TOKEN
+  - secretKey: FORGEJO_RUNNER_UUID
    remoteRef:
      key: Forgejo Secrets
-      property: runner_reg
+      property: runner_k8s_uuid
+  - secretKey: FORGEJO_RUNNER_TOKEN
+    remoteRef:
+      key: Forgejo Secrets
+      property: runner_k8s_token
--- a/argocd/manifests/forgejo-runner/kustomization.yaml
+++ b/argocd/manifests/forgejo-runner/kustomization.yaml
@ -10,7 +10,8 @@ resources:

 images:
  - name: code.forgejo.org/forgejo/runner
-    newTag: "12.7.3"
+    newName: registry.ops.eblu.me/blumeops/forgejo-runner
+    newTag: v12.8.2-1425bf1
  - name: docker
    newTag: 27-dind

--- a/argocd/manifests/frigate/deployment-notify.yaml
+++ b/argocd/manifests/frigate/deployment-notify.yaml
@ -16,7 +16,7 @@ spec:
    spec:
      containers:
        - name: frigate-notify
-          image: ghcr.io/0x2142/frigate-notify:kustomized
+          image: registry.ops.eblu.me/blumeops/frigate-notify:kustomized
          env:
            - name: TZ
              value: America/Los_Angeles
--- a/argocd/manifests/frigate/kustomization.yaml
+++ b/argocd/manifests/frigate/kustomization.yaml
@ -17,8 +17,8 @@ images:
    newTag: "1.37"
  - name: ghcr.io/blakeblackshear/frigate
    newTag: 0.17.1-tensorrt
-  - name: ghcr.io/0x2142/frigate-notify
-    newTag: v0.5.4
+  - name: registry.ops.eblu.me/blumeops/frigate-notify
+    newTag: v0.5.4-e928054-nix

 configMapGenerator:
  - name: frigate-config
--- a/Show more
+++ b/Show more