From bd53ad51f84f5d91cb67996303ef8fcaa2234fd4 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 27 May 2026 18:32:58 -0700 Subject: [PATCH 01/35] C1: update ringtail flake inputs (disko, home-manager, nixpkgs) Routine weekly flake update. Will deploy with `mise run provision-ringtail` after PR review. --- nixos/ringtail/flake.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/nixos/ringtail/flake.lock b/nixos/ringtail/flake.lock index 0f53d0e..0f0da7e 100644 --- a/nixos/ringtail/flake.lock +++ b/nixos/ringtail/flake.lock @@ -7,11 +7,11 @@ ] }, "locked": { - "lastModified": 1777713215, - "narHash": "sha256-8GzXDOXckDWwST8TY5DbwYFjdvQLlP7K9CLSVx6iTTo=", + "lastModified": 1779699611, + "narHash": "sha256-EcCaSTKnmg2o4wLKaN1aqQFomwyhO7ik0bX9COdyCas=", "owner": "nix-community", "repo": "disko", - "rev": "63b4e7e6cf75307c1d26ac3762b886b5b0247267", + "rev": "5ba0c9555c28685e57fa54c7a25e42c7efdbfc8d", "type": "github" }, "original": { @@ -27,11 +27,11 @@ ] }, "locked": { - "lastModified": 1778401693, - "narHash": "sha256-OVHdCqXXUF5UdGkH+FF2ZL06OLZjj2kvP2dIUmzVWoo=", + "lastModified": 1779506708, + "narHash": "sha256-QOD/CNm196nCJRheux/URi4/HE66fthdOMqCJoPP1Y0=", "owner": "nix-community", "repo": "home-manager", - "rev": "389b83002efc26f1145e89a6a8e6edc5a6435948", + "rev": "3ee51fbdac8c8bdfe1e7e1fcaba6520a563f394f", "type": "github" }, "original": { @@ -43,11 +43,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1778430510, - "narHash": "sha256-Ti+ZBvW6yrWWAg2szExVTwCd4qOJ3KlVr1tFHfyfi8Q=", + "lastModified": 1779467186, + "narHash": "sha256-nOesoDCiXcUftqbRBMz9tt4blI5PvljMWbm3kuCA+0s=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "8fd9daa3db09ced9700431c5b7ad0e8ba199b575", + "rev": "b77b3de8775677f84492abe84635f87b0e153f0f", "type": "github" }, "original": { From 568e355d108d8253628acb2f331ad6ead63c12bf Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 27 May 2026 18:40:05 -0700 Subject: [PATCH 02/35] =?UTF-8?q?C1:=20tooling=20deps=20bump=20=E2=80=94?= =?UTF-8?q?=20prek=20hooks,=20fly=20proxy,=20typer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Monthly tooling refresh per [[update-tooling-dependencies]]: - prek: trufflehog v3.95.3, kingfisher v1.101.0, ruff v0.15.14, ansible-core 2.21.0 - fly proxy: nginx 1.30.1-alpine, alloy v1.16.1 - mise-tasks: typer==0.26.2 across all scripts - tailscale held at v1.94.2 (v1.96.5+ MagicDNS regression) --- .../changelog.d/recurring-maintenance-2026-05-27.infra.md | 4 ++++ fly/Dockerfile | 8 ++++---- mise-tasks/branch-cleanup | 2 +- mise-tasks/container-build-and-release | 2 +- mise-tasks/container-list | 2 +- mise-tasks/container-version-check | 2 +- mise-tasks/dns-acme-cleanup | 2 +- mise-tasks/docs-mikado | 2 +- mise-tasks/docs-preview | 2 +- mise-tasks/docs-review | 2 +- mise-tasks/docs-review-stale | 2 +- mise-tasks/mikado-branch-invariant-check | 2 +- mise-tasks/op-backup | 2 +- mise-tasks/pr-comments | 2 +- mise-tasks/prune-ringtail-generations | 2 +- mise-tasks/review-compliance-reports | 2 +- mise-tasks/runner-logs | 2 +- mise-tasks/service-review | 2 +- mise-tasks/spork-create | 2 +- prek.toml | 8 ++++---- 20 files changed, 29 insertions(+), 25 deletions(-) create mode 100644 docs/changelog.d/recurring-maintenance-2026-05-27.infra.md diff --git a/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md b/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md new file mode 100644 index 0000000..f2d48ad --- /dev/null +++ b/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md @@ -0,0 +1,4 @@ +Recurring maintenance batch: + +- Ringtail flake inputs refreshed (`disko`, `home-manager`, `nixpkgs`). +- Tooling deps bumped: prek hooks (trufflehog v3.95.3, kingfisher v1.101.0, ruff v0.15.14, `ansible-core` 2.21.0); fly proxy base images (nginx 1.30.1-alpine, alloy v1.16.1); `typer==0.26.2` in mise tasks. diff --git a/fly/Dockerfile b/fly/Dockerfile index eae8c35..d4e7a18 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -1,5 +1,5 @@ -# nginx 1.30.0-alpine -FROM nginx@sha256:0272e4604ed93c1792f03695a033a6e8546840f86e0de20a884bb17d2c924883 +# nginx 1.30.1-alpine +FROM nginx@sha256:c819f83c54b0361f5557601bf5eb4943d09360e7a7fdf426afc466570f45874d # Copy tailscale binaries from official image (v1.94.2) COPY --from=docker.io/tailscale/tailscale@sha256:95e528798bebe75f39b10e74e7051cf51188ee615934f232ba7ad06a3390ffa1 \ @@ -13,8 +13,8 @@ RUN mkdir -p /var/run/tailscale /var/lib/tailscale \ && apk add --no-cache fail2ban \ && rm -f /etc/fail2ban/jail.d/alpine-ssh.conf -# Copy Alloy binary from official image (v1.16.0, Ubuntu-based, needs libc6-compat) -COPY --from=docker.io/grafana/alloy@sha256:6e00cf7c5a692ff5f24844529416ed017d76fce922f8199004e73d5eca46b6b8 \ +# Copy Alloy binary from official image (v1.16.1, Ubuntu-based, needs libc6-compat) +COPY --from=docker.io/grafana/alloy@sha256:51aeb9d829239345070619dad3edd6873186f913c84f45b365b74574fcb38ec0 \ /bin/alloy /usr/local/bin/alloy RUN mkdir -p /var/log/nginx /etc/alloy /tmp/alloy-data diff --git a/mise-tasks/branch-cleanup b/mise-tasks/branch-cleanup index 575c9a1..a538880 100755 --- a/mise-tasks/branch-cleanup +++ b/mise-tasks/branch-cleanup @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Delete branches that have been merged into main (local and remote)" #MISE alias="bc" diff --git a/mise-tasks/container-build-and-release b/mise-tasks/container-build-and-release index ba569e7..85e6cb8 100755 --- a/mise-tasks/container-build-and-release +++ b/mise-tasks/container-build-and-release @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["typer==0.25.0", "httpx==0.28.1"] +# dependencies = ["typer==0.26.2", "httpx==0.28.1"] # /// #MISE description="Trigger container build workflows via Forgejo API" #USAGE arg "" help="Container name (directory under containers/)" diff --git a/mise-tasks/container-list b/mise-tasks/container-list index 26639f2..7dad346 100755 --- a/mise-tasks/container-list +++ b/mise-tasks/container-list @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="List available containers and their recent tags" #USAGE arg "[name]" help="Optional container name to filter output" diff --git a/mise-tasks/container-version-check b/mise-tasks/container-version-check index 4ebe3b6..06f96ae 100755 --- a/mise-tasks/container-version-check +++ b/mise-tasks/container-version-check @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Validate container version consistency across container.py, Dockerfiles, nix derivations, and service-versions.yaml" #USAGE flag "--all-files" help="Check all containers, not just changed ones" diff --git a/mise-tasks/dns-acme-cleanup b/mise-tasks/dns-acme-cleanup index 432a6ce..3a53b11 100755 --- a/mise-tasks/dns-acme-cleanup +++ b/mise-tasks/dns-acme-cleanup @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Delete orphaned ACME challenge TXT records in eblu.me" #USAGE flag "--dry-run" help="List orphans without deleting" diff --git a/mise-tasks/docs-mikado b/mise-tasks/docs-mikado index eea052f..c632e46 100755 --- a/mise-tasks/docs-mikado +++ b/mise-tasks/docs-mikado @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="View active Mikado dependency chains for C2 changes" #USAGE arg "[card]" help="Card stem to show chain for" diff --git a/mise-tasks/docs-preview b/mise-tasks/docs-preview index faa79af..9e0bd16 100755 --- a/mise-tasks/docs-preview +++ b/mise-tasks/docs-preview @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Build docs with Dagger and serve locally, opening to a specific card" #USAGE arg "" help="Card path relative to docs/, e.g. how-to/knowledgebase/review-documentation" diff --git a/mise-tasks/docs-review b/mise-tasks/docs-review index d07904d..12e301f 100755 --- a/mise-tasks/docs-review +++ b/mise-tasks/docs-review @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Review the most stale documentation card by last-reviewed date" #USAGE flag "--limit " default="15" help="Number of docs to show in the table" diff --git a/mise-tasks/docs-review-stale b/mise-tasks/docs-review-stale index 4449213..0c5490e 100755 --- a/mise-tasks/docs-review-stale +++ b/mise-tasks/docs-review-stale @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Report docs by git-last-modified date, highlighting stale ones" #USAGE flag "--threshold " default="180" help="Days before a doc is considered stale" diff --git a/mise-tasks/mikado-branch-invariant-check b/mise-tasks/mikado-branch-invariant-check index 1f0fbcf..3135bf2 100755 --- a/mise-tasks/mikado-branch-invariant-check +++ b/mise-tasks/mikado-branch-invariant-check @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Validate Mikado Branch Invariant on mikado/* branches" #USAGE arg "[commit_msg_file]" help="Commit message file (passed by commit-msg hook)" diff --git a/mise-tasks/op-backup b/mise-tasks/op-backup index 37a97a6..7db033b 100755 --- a/mise-tasks/op-backup +++ b/mise-tasks/op-backup @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Encrypt a 1Password .1pux export and send to indri for borgmatic" #USAGE arg "[export_path]" help="Path to .1pux export file (prompted if omitted)" diff --git a/mise-tasks/pr-comments b/mise-tasks/pr-comments index 7205617..39d7c9a 100755 --- a/mise-tasks/pr-comments +++ b/mise-tasks/pr-comments @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="List unresolved comments on a PR" #USAGE arg "" help="Pull request number" diff --git a/mise-tasks/prune-ringtail-generations b/mise-tasks/prune-ringtail-generations index 2b8e3f9..2ad8dc8 100755 --- a/mise-tasks/prune-ringtail-generations +++ b/mise-tasks/prune-ringtail-generations @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Prune old NixOS generations on ringtail, preserving rollback safety" #MISE alias="prg" diff --git a/mise-tasks/review-compliance-reports b/mise-tasks/review-compliance-reports index a9146c8..24d2afc 100755 --- a/mise-tasks/review-compliance-reports +++ b/mise-tasks/review-compliance-reports @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0", "pyyaml==6.0.3"] +# dependencies = ["rich==15.0.0", "typer==0.26.2", "pyyaml==6.0.3"] # /// #MISE description="Summarize the latest Prowler and Kingfisher compliance reports from sifaka" #USAGE flag "--full" help="Show all unmuted failures, not just new ones" diff --git a/mise-tasks/runner-logs b/mise-tasks/runner-logs index 9c988ee..3c5e8e3 100755 --- a/mise-tasks/runner-logs +++ b/mise-tasks/runner-logs @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="List recent Forgejo Actions runs or fetch logs for a specific job" #USAGE arg "[run_number]" help="Run number to show jobs for (omit to list recent runs)" diff --git a/mise-tasks/service-review b/mise-tasks/service-review index 2d50e0b..f83b104 100755 --- a/mise-tasks/service-review +++ b/mise-tasks/service-review @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Review the most stale service for version freshness" #USAGE flag "--limit " default="15" help="Number of services to show in the table" diff --git a/mise-tasks/spork-create b/mise-tasks/spork-create index 92f4e5c..3f18563 100755 --- a/mise-tasks/spork-create +++ b/mise-tasks/spork-create @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Create a spork (floating-branch soft-fork) of a mirrored upstream project" #USAGE arg "" help="Repository name in the mirrors/ org on forge (e.g. kingfisher)" diff --git a/prek.toml b/prek.toml index add7799..2c66b82 100644 --- a/prek.toml +++ b/prek.toml @@ -28,7 +28,7 @@ hooks = [{ id = "check-yaml", args = ["--unsafe"] }] # Secret detection (running both tools in parallel to compare coverage) [[repos]] repo = "https://github.com/trufflesecurity/trufflehog" -rev = "17456f8c7d042d8c82c9a8ca9e937231f9f42e26" # v3.95.2 +rev = "37b77001d0174ebec2fcca2bd83ff83a6d45a3ab" # v3.95.3 hooks = [ { id = "trufflehog", entry = "trufflehog git file://. --since-commit HEAD --no-verification --fail", stages = [ "pre-commit", @@ -38,7 +38,7 @@ hooks = [ [[repos]] repo = "https://github.com/mongodb/kingfisher" -rev = "9ddec4ab8b53653d4941e6b3fd4ff602ce91d81b" # v1.97.0 +rev = "6f560103cc6ea082ef4b80a9098e3f3111afb8bc" # v1.101.0 hooks = [ { id = "kingfisher", args = [ "scan", @@ -69,12 +69,12 @@ name = "ansible-lint" entry = "env ANSIBLE_ROLES_PATH=ansible/roles ansible-lint" language = "python" files = "^ansible/" -additional_dependencies = ["ansible-lint==26.4.0", "ansible-core==2.20.5"] +additional_dependencies = ["ansible-lint==26.4.0", "ansible-core==2.21.0"] # Python - ruff for linting and formatting [[repos]] repo = "https://github.com/astral-sh/ruff-pre-commit" -rev = "6fec9b7edb08fd9989088709d864a7826dc74e80" # v0.15.12 +rev = "0c7b6c989466a93942def1f84baf36ddfcd60c83" # v0.15.14 hooks = [{ id = "ruff", args = ["--fix"] }, { id = "ruff-format" }] # Python - ty type checker From bf72493e7f1256283b7931e3568a4d449cf13e24 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 27 May 2026 18:40:17 -0700 Subject: [PATCH 03/35] =?UTF-8?q?C1:=20doc=20review=20=E2=80=94=20refresh?= =?UTF-8?q?=20indri=20reference=20card?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Top stale doc per docs-review (never reviewed). Add devpi, cv, docs to native-services list (post-2026-04 migration to LaunchAgent/Caddy), widen k8s note to reflect the broader set of apps on ringtail and the planned indri-minikube decommission, add CPU/RAM specs. --- docs/changelog.d/recurring-maintenance-2026-05-27.doc.md | 1 + docs/reference/infrastructure/indri.md | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 docs/changelog.d/recurring-maintenance-2026-05-27.doc.md diff --git a/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md b/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md new file mode 100644 index 0000000..af30489 --- /dev/null +++ b/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md @@ -0,0 +1 @@ +Reviewed [[indri]] reference card: added `devpi`, `cv`, and `docs` to the native-services list; widened the k8s note to reflect the growing set of apps now on ringtail and the planned indri-minikube decommission; added CPU/RAM specs. diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md index cbb2a0f..67652ca 100644 --- a/docs/reference/infrastructure/indri.md +++ b/docs/reference/infrastructure/indri.md @@ -1,6 +1,7 @@ --- title: Indri -modified: 2026-02-19 +modified: 2026-05-27 +last-reviewed: 2026-05-27 tags: - infrastructure - host @@ -15,6 +16,7 @@ Primary BlumeOps server. Mac Mini M1 (2020). | Property | Value | |----------|-------| | **Model** | Mac mini M1, 2020 (Macmini9,1) | +| **CPU / RAM** | 8 cores / 16 GB | | **Storage** | 2TB internal SSD | | **macOS** | 15.7.3 (Sequoia) | | **Tailscale hostname** | `indri.tail8d86e.ts.net` | @@ -30,9 +32,12 @@ Primary BlumeOps server. Mac Mini M1 (2020). - [[borgmatic]] - Backup system - [[alloy|Alloy]] - Metrics/logs collector - [[caddy]] - Reverse proxy for `*.ops.eblu.me` +- [[devpi]] - PyPI mirror (LaunchAgent) +- [[cv]] - Static CV site, served by Caddy +- [[docs]] - Quartz-built docs site, served by Caddy **Kubernetes (via minikube):** -- [[apps|Most k8s applications]] (Frigate, ntfy migrated to [[ringtail]] k3s) +- [[apps|Most k8s applications]]. A growing set of apps (Authentik, Frigate, ntfy, Immich, Homepage, Shower, Kingfisher, alloy-ringtail) now run on [[ringtail]]'s k3s instead. Long-term plan is to decommission indri's minikube entirely. **GUI Applications (manual start required):** - Docker Desktop - Container runtime for minikube From c00d7db5079e78772e5e7e3780d7594baa009bd4 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 06:01:57 -0700 Subject: [PATCH 04/35] Recurring maintenance batch (2026-05-27) (#360) Bundle of recurring overdue tasks: - Ringtail flake update - Security & compliance report review - Tooling deps bump (prek, fly, mise, forgejo workflows) - Top stale doc review - Top stale service review (if trivial) Larger items (service version bumps requiring upgrades, non-local container migration) split out as separate PRs. Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/360 --- .../recurring-maintenance-2026-05-27.doc.md | 1 + .../recurring-maintenance-2026-05-27.infra.md | 4 ++++ docs/reference/infrastructure/indri.md | 9 +++++++-- fly/Dockerfile | 8 ++++---- mise-tasks/branch-cleanup | 2 +- mise-tasks/container-build-and-release | 2 +- mise-tasks/container-list | 2 +- mise-tasks/container-version-check | 2 +- mise-tasks/dns-acme-cleanup | 2 +- mise-tasks/docs-mikado | 2 +- mise-tasks/docs-preview | 2 +- mise-tasks/docs-review | 2 +- mise-tasks/docs-review-stale | 2 +- mise-tasks/mikado-branch-invariant-check | 2 +- mise-tasks/op-backup | 2 +- mise-tasks/pr-comments | 2 +- mise-tasks/prune-ringtail-generations | 2 +- mise-tasks/review-compliance-reports | 2 +- mise-tasks/runner-logs | 2 +- mise-tasks/service-review | 2 +- mise-tasks/spork-create | 2 +- nixos/ringtail/flake.lock | 18 +++++++++--------- prek.toml | 8 ++++---- 23 files changed, 46 insertions(+), 36 deletions(-) create mode 100644 docs/changelog.d/recurring-maintenance-2026-05-27.doc.md create mode 100644 docs/changelog.d/recurring-maintenance-2026-05-27.infra.md diff --git a/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md b/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md new file mode 100644 index 0000000..af30489 --- /dev/null +++ b/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md @@ -0,0 +1 @@ +Reviewed [[indri]] reference card: added `devpi`, `cv`, and `docs` to the native-services list; widened the k8s note to reflect the growing set of apps now on ringtail and the planned indri-minikube decommission; added CPU/RAM specs. diff --git a/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md b/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md new file mode 100644 index 0000000..f2d48ad --- /dev/null +++ b/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md @@ -0,0 +1,4 @@ +Recurring maintenance batch: + +- Ringtail flake inputs refreshed (`disko`, `home-manager`, `nixpkgs`). +- Tooling deps bumped: prek hooks (trufflehog v3.95.3, kingfisher v1.101.0, ruff v0.15.14, `ansible-core` 2.21.0); fly proxy base images (nginx 1.30.1-alpine, alloy v1.16.1); `typer==0.26.2` in mise tasks. diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md index cbb2a0f..67652ca 100644 --- a/docs/reference/infrastructure/indri.md +++ b/docs/reference/infrastructure/indri.md @@ -1,6 +1,7 @@ --- title: Indri -modified: 2026-02-19 +modified: 2026-05-27 +last-reviewed: 2026-05-27 tags: - infrastructure - host @@ -15,6 +16,7 @@ Primary BlumeOps server. Mac Mini M1 (2020). | Property | Value | |----------|-------| | **Model** | Mac mini M1, 2020 (Macmini9,1) | +| **CPU / RAM** | 8 cores / 16 GB | | **Storage** | 2TB internal SSD | | **macOS** | 15.7.3 (Sequoia) | | **Tailscale hostname** | `indri.tail8d86e.ts.net` | @@ -30,9 +32,12 @@ Primary BlumeOps server. Mac Mini M1 (2020). - [[borgmatic]] - Backup system - [[alloy|Alloy]] - Metrics/logs collector - [[caddy]] - Reverse proxy for `*.ops.eblu.me` +- [[devpi]] - PyPI mirror (LaunchAgent) +- [[cv]] - Static CV site, served by Caddy +- [[docs]] - Quartz-built docs site, served by Caddy **Kubernetes (via minikube):** -- [[apps|Most k8s applications]] (Frigate, ntfy migrated to [[ringtail]] k3s) +- [[apps|Most k8s applications]]. A growing set of apps (Authentik, Frigate, ntfy, Immich, Homepage, Shower, Kingfisher, alloy-ringtail) now run on [[ringtail]]'s k3s instead. Long-term plan is to decommission indri's minikube entirely. **GUI Applications (manual start required):** - Docker Desktop - Container runtime for minikube diff --git a/fly/Dockerfile b/fly/Dockerfile index eae8c35..d4e7a18 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -1,5 +1,5 @@ -# nginx 1.30.0-alpine -FROM nginx@sha256:0272e4604ed93c1792f03695a033a6e8546840f86e0de20a884bb17d2c924883 +# nginx 1.30.1-alpine +FROM nginx@sha256:c819f83c54b0361f5557601bf5eb4943d09360e7a7fdf426afc466570f45874d # Copy tailscale binaries from official image (v1.94.2) COPY --from=docker.io/tailscale/tailscale@sha256:95e528798bebe75f39b10e74e7051cf51188ee615934f232ba7ad06a3390ffa1 \ @@ -13,8 +13,8 @@ RUN mkdir -p /var/run/tailscale /var/lib/tailscale \ && apk add --no-cache fail2ban \ && rm -f /etc/fail2ban/jail.d/alpine-ssh.conf -# Copy Alloy binary from official image (v1.16.0, Ubuntu-based, needs libc6-compat) -COPY --from=docker.io/grafana/alloy@sha256:6e00cf7c5a692ff5f24844529416ed017d76fce922f8199004e73d5eca46b6b8 \ +# Copy Alloy binary from official image (v1.16.1, Ubuntu-based, needs libc6-compat) +COPY --from=docker.io/grafana/alloy@sha256:51aeb9d829239345070619dad3edd6873186f913c84f45b365b74574fcb38ec0 \ /bin/alloy /usr/local/bin/alloy RUN mkdir -p /var/log/nginx /etc/alloy /tmp/alloy-data diff --git a/mise-tasks/branch-cleanup b/mise-tasks/branch-cleanup index 575c9a1..a538880 100755 --- a/mise-tasks/branch-cleanup +++ b/mise-tasks/branch-cleanup @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Delete branches that have been merged into main (local and remote)" #MISE alias="bc" diff --git a/mise-tasks/container-build-and-release b/mise-tasks/container-build-and-release index ba569e7..85e6cb8 100755 --- a/mise-tasks/container-build-and-release +++ b/mise-tasks/container-build-and-release @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["typer==0.25.0", "httpx==0.28.1"] +# dependencies = ["typer==0.26.2", "httpx==0.28.1"] # /// #MISE description="Trigger container build workflows via Forgejo API" #USAGE arg "" help="Container name (directory under containers/)" diff --git a/mise-tasks/container-list b/mise-tasks/container-list index 26639f2..7dad346 100755 --- a/mise-tasks/container-list +++ b/mise-tasks/container-list @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="List available containers and their recent tags" #USAGE arg "[name]" help="Optional container name to filter output" diff --git a/mise-tasks/container-version-check b/mise-tasks/container-version-check index 4ebe3b6..06f96ae 100755 --- a/mise-tasks/container-version-check +++ b/mise-tasks/container-version-check @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Validate container version consistency across container.py, Dockerfiles, nix derivations, and service-versions.yaml" #USAGE flag "--all-files" help="Check all containers, not just changed ones" diff --git a/mise-tasks/dns-acme-cleanup b/mise-tasks/dns-acme-cleanup index 432a6ce..3a53b11 100755 --- a/mise-tasks/dns-acme-cleanup +++ b/mise-tasks/dns-acme-cleanup @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Delete orphaned ACME challenge TXT records in eblu.me" #USAGE flag "--dry-run" help="List orphans without deleting" diff --git a/mise-tasks/docs-mikado b/mise-tasks/docs-mikado index eea052f..c632e46 100755 --- a/mise-tasks/docs-mikado +++ b/mise-tasks/docs-mikado @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="View active Mikado dependency chains for C2 changes" #USAGE arg "[card]" help="Card stem to show chain for" diff --git a/mise-tasks/docs-preview b/mise-tasks/docs-preview index faa79af..9e0bd16 100755 --- a/mise-tasks/docs-preview +++ b/mise-tasks/docs-preview @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Build docs with Dagger and serve locally, opening to a specific card" #USAGE arg "" help="Card path relative to docs/, e.g. how-to/knowledgebase/review-documentation" diff --git a/mise-tasks/docs-review b/mise-tasks/docs-review index d07904d..12e301f 100755 --- a/mise-tasks/docs-review +++ b/mise-tasks/docs-review @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Review the most stale documentation card by last-reviewed date" #USAGE flag "--limit " default="15" help="Number of docs to show in the table" diff --git a/mise-tasks/docs-review-stale b/mise-tasks/docs-review-stale index 4449213..0c5490e 100755 --- a/mise-tasks/docs-review-stale +++ b/mise-tasks/docs-review-stale @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Report docs by git-last-modified date, highlighting stale ones" #USAGE flag "--threshold " default="180" help="Days before a doc is considered stale" diff --git a/mise-tasks/mikado-branch-invariant-check b/mise-tasks/mikado-branch-invariant-check index 1f0fbcf..3135bf2 100755 --- a/mise-tasks/mikado-branch-invariant-check +++ b/mise-tasks/mikado-branch-invariant-check @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Validate Mikado Branch Invariant on mikado/* branches" #USAGE arg "[commit_msg_file]" help="Commit message file (passed by commit-msg hook)" diff --git a/mise-tasks/op-backup b/mise-tasks/op-backup index 37a97a6..7db033b 100755 --- a/mise-tasks/op-backup +++ b/mise-tasks/op-backup @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Encrypt a 1Password .1pux export and send to indri for borgmatic" #USAGE arg "[export_path]" help="Path to .1pux export file (prompted if omitted)" diff --git a/mise-tasks/pr-comments b/mise-tasks/pr-comments index 7205617..39d7c9a 100755 --- a/mise-tasks/pr-comments +++ b/mise-tasks/pr-comments @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="List unresolved comments on a PR" #USAGE arg "" help="Pull request number" diff --git a/mise-tasks/prune-ringtail-generations b/mise-tasks/prune-ringtail-generations index 2b8e3f9..2ad8dc8 100755 --- a/mise-tasks/prune-ringtail-generations +++ b/mise-tasks/prune-ringtail-generations @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0"] +# dependencies = ["rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Prune old NixOS generations on ringtail, preserving rollback safety" #MISE alias="prg" diff --git a/mise-tasks/review-compliance-reports b/mise-tasks/review-compliance-reports index a9146c8..24d2afc 100755 --- a/mise-tasks/review-compliance-reports +++ b/mise-tasks/review-compliance-reports @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["rich==15.0.0", "typer==0.25.0", "pyyaml==6.0.3"] +# dependencies = ["rich==15.0.0", "typer==0.26.2", "pyyaml==6.0.3"] # /// #MISE description="Summarize the latest Prowler and Kingfisher compliance reports from sifaka" #USAGE flag "--full" help="Show all unmuted failures, not just new ones" diff --git a/mise-tasks/runner-logs b/mise-tasks/runner-logs index 9c988ee..3c5e8e3 100755 --- a/mise-tasks/runner-logs +++ b/mise-tasks/runner-logs @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="List recent Forgejo Actions runs or fetch logs for a specific job" #USAGE arg "[run_number]" help="Run number to show jobs for (omit to list recent runs)" diff --git a/mise-tasks/service-review b/mise-tasks/service-review index 2d50e0b..f83b104 100755 --- a/mise-tasks/service-review +++ b/mise-tasks/service-review @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["pyyaml==6.0.3", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Review the most stale service for version freshness" #USAGE flag "--limit " default="15" help="Number of services to show in the table" diff --git a/mise-tasks/spork-create b/mise-tasks/spork-create index 92f4e5c..3f18563 100755 --- a/mise-tasks/spork-create +++ b/mise-tasks/spork-create @@ -1,7 +1,7 @@ #!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.25.0"] +# dependencies = ["httpx==0.28.1", "rich==15.0.0", "typer==0.26.2"] # /// #MISE description="Create a spork (floating-branch soft-fork) of a mirrored upstream project" #USAGE arg "" help="Repository name in the mirrors/ org on forge (e.g. kingfisher)" diff --git a/nixos/ringtail/flake.lock b/nixos/ringtail/flake.lock index 0f53d0e..0f0da7e 100644 --- a/nixos/ringtail/flake.lock +++ b/nixos/ringtail/flake.lock @@ -7,11 +7,11 @@ ] }, "locked": { - "lastModified": 1777713215, - "narHash": "sha256-8GzXDOXckDWwST8TY5DbwYFjdvQLlP7K9CLSVx6iTTo=", + "lastModified": 1779699611, + "narHash": "sha256-EcCaSTKnmg2o4wLKaN1aqQFomwyhO7ik0bX9COdyCas=", "owner": "nix-community", "repo": "disko", - "rev": "63b4e7e6cf75307c1d26ac3762b886b5b0247267", + "rev": "5ba0c9555c28685e57fa54c7a25e42c7efdbfc8d", "type": "github" }, "original": { @@ -27,11 +27,11 @@ ] }, "locked": { - "lastModified": 1778401693, - "narHash": "sha256-OVHdCqXXUF5UdGkH+FF2ZL06OLZjj2kvP2dIUmzVWoo=", + "lastModified": 1779506708, + "narHash": "sha256-QOD/CNm196nCJRheux/URi4/HE66fthdOMqCJoPP1Y0=", "owner": "nix-community", "repo": "home-manager", - "rev": "389b83002efc26f1145e89a6a8e6edc5a6435948", + "rev": "3ee51fbdac8c8bdfe1e7e1fcaba6520a563f394f", "type": "github" }, "original": { @@ -43,11 +43,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1778430510, - "narHash": "sha256-Ti+ZBvW6yrWWAg2szExVTwCd4qOJ3KlVr1tFHfyfi8Q=", + "lastModified": 1779467186, + "narHash": "sha256-nOesoDCiXcUftqbRBMz9tt4blI5PvljMWbm3kuCA+0s=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "8fd9daa3db09ced9700431c5b7ad0e8ba199b575", + "rev": "b77b3de8775677f84492abe84635f87b0e153f0f", "type": "github" }, "original": { diff --git a/prek.toml b/prek.toml index add7799..2c66b82 100644 --- a/prek.toml +++ b/prek.toml @@ -28,7 +28,7 @@ hooks = [{ id = "check-yaml", args = ["--unsafe"] }] # Secret detection (running both tools in parallel to compare coverage) [[repos]] repo = "https://github.com/trufflesecurity/trufflehog" -rev = "17456f8c7d042d8c82c9a8ca9e937231f9f42e26" # v3.95.2 +rev = "37b77001d0174ebec2fcca2bd83ff83a6d45a3ab" # v3.95.3 hooks = [ { id = "trufflehog", entry = "trufflehog git file://. --since-commit HEAD --no-verification --fail", stages = [ "pre-commit", @@ -38,7 +38,7 @@ hooks = [ [[repos]] repo = "https://github.com/mongodb/kingfisher" -rev = "9ddec4ab8b53653d4941e6b3fd4ff602ce91d81b" # v1.97.0 +rev = "6f560103cc6ea082ef4b80a9098e3f3111afb8bc" # v1.101.0 hooks = [ { id = "kingfisher", args = [ "scan", @@ -69,12 +69,12 @@ name = "ansible-lint" entry = "env ANSIBLE_ROLES_PATH=ansible/roles ansible-lint" language = "python" files = "^ansible/" -additional_dependencies = ["ansible-lint==26.4.0", "ansible-core==2.20.5"] +additional_dependencies = ["ansible-lint==26.4.0", "ansible-core==2.21.0"] # Python - ruff for linting and formatting [[repos]] repo = "https://github.com/astral-sh/ruff-pre-commit" -rev = "6fec9b7edb08fd9989088709d864a7826dc74e80" # v0.15.12 +rev = "0c7b6c989466a93942def1f84baf36ddfcd60c83" # v0.15.14 hooks = [{ id = "ruff", args = ["--fix"] }, { id = "ruff-format" }] # Python - ty type checker From 4e25180b0ae3ff212b7fc4d57d136f215a92c310 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 07:13:13 -0700 Subject: [PATCH 05/35] C0: clone blumeops via tailnet on ringtail provision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Switch ringtail.yml from forge.eblu.me (Fly proxy, WAN) to forge.ops.eblu.me (Caddy on indri, tailnet). Ringtail is always on the tailnet — the WAN round-trip was overhead and made provision-ringtail fail any time Fly was slow or down. --- ansible/playbooks/ringtail.yml | 2 +- docs/changelog.d/+ringtail-clone-via-tailnet.infra.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+ringtail-clone-via-tailnet.infra.md diff --git a/ansible/playbooks/ringtail.yml b/ansible/playbooks/ringtail.yml index ee5604b..b05d67a 100644 --- a/ansible/playbooks/ringtail.yml +++ b/ansible/playbooks/ringtail.yml @@ -57,7 +57,7 @@ tasks: - name: Ensure blumeops repo is present ansible.builtin.git: - repo: "https://forge.eblu.me/eblume/blumeops.git" + repo: "https://forge.ops.eblu.me/eblume/blumeops.git" dest: /etc/blumeops version: "{{ ringtail_commit | default('main') }}" force: true diff --git a/docs/changelog.d/+ringtail-clone-via-tailnet.infra.md b/docs/changelog.d/+ringtail-clone-via-tailnet.infra.md new file mode 100644 index 0000000..d664163 --- /dev/null +++ b/docs/changelog.d/+ringtail-clone-via-tailnet.infra.md @@ -0,0 +1 @@ +Switch the ringtail provisioning playbook's blumeops clone URL from `forge.eblu.me` (public, via Fly proxy) to `forge.ops.eblu.me` (tailnet, direct via Caddy on indri). Ringtail is always on the tailnet, so the WAN round-trip is pure overhead — it also made `provision-ringtail` brittle whenever the Fly proxy was slow or down. From f6febb1f772e858a82d69e7baade4f526e550f97 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 07:59:22 -0700 Subject: [PATCH 06/35] C0: switch fly proxy deploy strategy to immediate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bluegreen kept timing out — the new green machine couldn't reach "started" within Fly's 5-minute deploy budget. The cold-start sequence (tailscaled → tailscale up → wait-for-MagicDNS → nginx startup) eats most of that, leaving no headroom for healthcheck propagation. For a single-machine proxy, bluegreen offers little benefit anyway: no warm second instance, so trading 5-10s of downtime for predictable completion is the right call. --- docs/changelog.d/+fly-deploy-immediate-strategy.infra.md | 1 + fly/fly.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+fly-deploy-immediate-strategy.infra.md diff --git a/docs/changelog.d/+fly-deploy-immediate-strategy.infra.md b/docs/changelog.d/+fly-deploy-immediate-strategy.infra.md new file mode 100644 index 0000000..205bd6a --- /dev/null +++ b/docs/changelog.d/+fly-deploy-immediate-strategy.infra.md @@ -0,0 +1 @@ +Switch the Fly proxy deploy strategy from `bluegreen` to `immediate` in `fly/fly.toml`. With a single proxy machine, bluegreen offers little benefit — the green machine routinely failed to reach "started" inside Fly's default 5-minute deploy timeout (the cold-start sequence of `tailscaled` → `tailscale up` → wait-for-MagicDNS → nginx startup eats most of the budget), and the failed deploys would roll back. `immediate` replaces the machine in place with a brief downtime (~5–10s) but actually completes. diff --git a/fly/fly.toml b/fly/fly.toml index 11aac9c..6ccf29d 100644 --- a/fly/fly.toml +++ b/fly/fly.toml @@ -7,7 +7,7 @@ primary_region = "sjc" memory = "512mb" [deploy] -strategy = "bluegreen" +strategy = "immediate" [http_service] internal_port = 8080 From 4d1f4af25b9d2a55c1b0731e3a6b83259fc33dfa Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 09:59:46 -0700 Subject: [PATCH 07/35] =?UTF-8?q?Upgrade=20unpoller=20v2.34.0=20=E2=86=92?= =?UTF-8?q?=20v3.2.0,=20migrate=20to=20container.py=20(#361)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Service Review pickup: unpoller (last reviewed 73 days ago). - Upgrades unpoller from v2.34.0 to v3.2.0 (major version bump). - Migrates the container build from a Dockerfile to a native Dagger pipeline (`containers/unpoller/container.py`) following the navidrome / miniflux pattern. - Refreshes `service-versions.yaml` (last-reviewed, current-version). ## Breaking changes (upstream) - **v3.0.0** — UniFi network API shifts (later 10.x). Some metric / event / log names and labels may have changed. Worth a follow-up sweep of the unpoller Grafana dashboard for missing series. - **v3.2.0** — defaults to a 60s background poll feeding cached Prometheus scrapes (was on-demand poll per scrape). To restore previous behavior, set `interval = 0` in `up.conf`. Leaving the new default in this PR — every-15s scrapes will simply serve from cache, which is fine for our use. ## Build - Image: `registry.ops.eblu.me/blumeops/unpoller:v3.2.0-1b27242` - Built by build-container workflow run #559 from this branch. ## Test plan - [ ] `argocd app set unpoller --revision unpoller-v3 && argocd app sync unpoller` - [ ] Pod comes Ready - [ ] Verify metrics exported (`Site/Client/UAP/USG/USW` counts in logs, `unpoller_*` series in Prometheus) - [ ] Spot-check unpoller Grafana dashboard for missing series after the v3 API shift - [ ] After merge: `argocd app set unpoller --revision main && argocd app sync unpoller` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/361 --- argocd/manifests/unpoller/kustomization.yaml | 2 +- containers/unpoller/Dockerfile | 43 ---------------- containers/unpoller/container.py | 53 ++++++++++++++++++++ docs/changelog.d/unpoller-v3.infra.md | 1 + service-versions.yaml | 4 +- 5 files changed, 57 insertions(+), 46 deletions(-) delete mode 100644 containers/unpoller/Dockerfile create mode 100644 containers/unpoller/container.py create mode 100644 docs/changelog.d/unpoller-v3.infra.md diff --git a/argocd/manifests/unpoller/kustomization.yaml b/argocd/manifests/unpoller/kustomization.yaml index 5b7a9e2..d2c4e28 100644 --- a/argocd/manifests/unpoller/kustomization.yaml +++ b/argocd/manifests/unpoller/kustomization.yaml @@ -10,7 +10,7 @@ resources: images: - name: registry.ops.eblu.me/blumeops/unpoller - newTag: v2.34.0-613f05d + newTag: v3.2.0-1b27242 configMapGenerator: - name: unpoller-config diff --git a/containers/unpoller/Dockerfile b/containers/unpoller/Dockerfile deleted file mode 100644 index 241b375..0000000 --- a/containers/unpoller/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# UnPoller — UniFi metrics exporter for Prometheus -# Two-stage build: Go compilation, then minimal Alpine runtime - -ARG CONTAINER_APP_VERSION=v2.34.0 - -FROM golang:alpine3.22 AS build - -ARG CONTAINER_APP_VERSION -RUN apk add --no-cache git - -RUN git clone --depth 1 --branch ${CONTAINER_APP_VERSION} \ - https://forge.ops.eblu.me/mirrors/unpoller.git /app - -WORKDIR /app - -ENV CGO_ENABLED=0 - -RUN go build -ldflags="-s -w \ - -X main.version=${CONTAINER_APP_VERSION} \ - -X main.builtBy=blumeops \ - -X golift.io/version.Version=${CONTAINER_APP_VERSION} \ - -X golift.io/version.Branch=HEAD \ - -X golift.io/version.BuildUser=blumeops \ - -X golift.io/version.Revision=blumeops-build" \ - -o /bin/unpoller . - -FROM alpine:3.22 - -ARG CONTAINER_APP_VERSION -LABEL org.opencontainers.image.title="UnPoller" -LABEL org.opencontainers.image.description="UniFi metrics exporter for Prometheus" -LABEL org.opencontainers.image.version="${CONTAINER_APP_VERSION}" -LABEL org.opencontainers.image.source="https://forge.eblu.me/eblume/blumeops" -LABEL org.opencontainers.image.vendor="blumeops" - -RUN apk add --no-cache ca-certificates tzdata - -COPY --from=build /bin/unpoller /usr/bin/unpoller - -EXPOSE 9130 -USER 65534:65534 -ENTRYPOINT ["/usr/bin/unpoller"] -CMD ["--config", "/etc/unpoller/up.conf"] diff --git a/containers/unpoller/container.py b/containers/unpoller/container.py new file mode 100644 index 0000000..bfc75ba --- /dev/null +++ b/containers/unpoller/container.py @@ -0,0 +1,53 @@ +"""UnPoller — UniFi metrics exporter for Prometheus. + +Two-stage build: Go backend, Alpine runtime. +Source cloned from forge mirror. +""" + +import dagger + +from blumeops.containers import ( + alpine_runtime, + clone_from_forge, + go_build, + oci_labels, +) + +VERSION = "v3.2.0" + + +async def build(src: dagger.Directory) -> dagger.Container: + source = clone_from_forge("unpoller", VERSION) + + backend = go_build( + source, + "/unpoller", + ldflags=( + f"-s -w " + f"-X main.version={VERSION} " + f"-X main.builtBy=blumeops " + f"-X golift.io/version.Version={VERSION} " + f"-X golift.io/version.Branch=HEAD " + f"-X golift.io/version.BuildUser=blumeops " + f"-X golift.io/version.Revision=blumeops-build" + ), + ) + + runtime = alpine_runtime( + extra_apk=["ca-certificates", "tzdata"], + create_user=False, + ) + runtime = oci_labels( + runtime, + title="UnPoller", + description="UniFi metrics exporter for Prometheus", + version=VERSION, + ) + return ( + runtime.with_file("/usr/bin/unpoller", backend.file("/unpoller")) + .with_exposed_port(9130) + .with_user("65534") + .with_default_args( + args=["/usr/bin/unpoller", "--config", "/etc/unpoller/up.conf"] + ) + ) diff --git a/docs/changelog.d/unpoller-v3.infra.md b/docs/changelog.d/unpoller-v3.infra.md new file mode 100644 index 0000000..fa6eaf9 --- /dev/null +++ b/docs/changelog.d/unpoller-v3.infra.md @@ -0,0 +1 @@ +Upgrade unpoller v2.34.0 → v3.2.0 and migrate container build from Dockerfile to native Dagger (container.py). v3.0.0 carries breaking UniFi API changes; v3.2.0 introduces a 60s background poll (cached scrapes) by default — set `interval = 0` in `up.conf` to restore on-demand polling. diff --git a/service-versions.yaml b/service-versions.yaml index 02f2979..63b0f15 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -345,8 +345,8 @@ services: - name: unpoller type: argocd - last-reviewed: 2026-03-16 - current-version: "v2.34.0" + last-reviewed: 2026-05-28 + current-version: "v3.2.0" upstream-source: https://github.com/unpoller/unpoller/releases notes: UniFi metrics exporter for Prometheus From e703d25efe2b2da12793a6c459bce95ecdc48435 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 10:10:21 -0700 Subject: [PATCH 08/35] C0: rebuild unpoller container from squashed main commit Image was previously tagged with the unpoller-v3 branch SHA (1b27242), which doesn't exist in main's history after squash-merge. Rebuilt from the squashed commit so the tag references a reachable commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- argocd/manifests/unpoller/kustomization.yaml | 2 +- docs/changelog.d/+unpoller-rebuild-on-main.infra.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+unpoller-rebuild-on-main.infra.md diff --git a/argocd/manifests/unpoller/kustomization.yaml b/argocd/manifests/unpoller/kustomization.yaml index d2c4e28..bf776bb 100644 --- a/argocd/manifests/unpoller/kustomization.yaml +++ b/argocd/manifests/unpoller/kustomization.yaml @@ -10,7 +10,7 @@ resources: images: - name: registry.ops.eblu.me/blumeops/unpoller - newTag: v3.2.0-1b27242 + newTag: v3.2.0-4d1f4af configMapGenerator: - name: unpoller-config diff --git a/docs/changelog.d/+unpoller-rebuild-on-main.infra.md b/docs/changelog.d/+unpoller-rebuild-on-main.infra.md new file mode 100644 index 0000000..60ae8fa --- /dev/null +++ b/docs/changelog.d/+unpoller-rebuild-on-main.infra.md @@ -0,0 +1 @@ +Rebuild unpoller container from squashed main commit so the image SHA tag matches a commit in main's history (was tagged with the pre-squash branch SHA). From 1ce381cb6e15ca1226feee1d6a0fa2c449f929b7 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 14:36:33 -0700 Subject: [PATCH 09/35] C0: surface missing-log failures in runner-logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `mise run runner-logs -j ` previously silently succeeded with no output when forgejo had no log for the task. Two layered causes: 1. zstdcat exits 0 even when the file is missing (writes "can't stat … -- ignored" to stderr). 2. ssh to indri runs fish, which silently drops the remote exit code so the subprocess returncode is always 0. Probe `test -f` over SSH and parse a stdout marker (EXISTS / MISSING) to detect the missing-log case, then report it explicitly with the indri path and a hint about action_task.log_in_storage = 0 so the operator knows where to look next. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../+runner-logs-missing-log.misc.md | 1 + mise-tasks/runner-logs | 25 ++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+runner-logs-missing-log.misc.md diff --git a/docs/changelog.d/+runner-logs-missing-log.misc.md b/docs/changelog.d/+runner-logs-missing-log.misc.md new file mode 100644 index 0000000..c06704a --- /dev/null +++ b/docs/changelog.d/+runner-logs-missing-log.misc.md @@ -0,0 +1 @@ +`mise run runner-logs -j ` now reports a clear error when the log file doesn't exist on indri (e.g. a runner crash that left `action_task.log_in_storage = 0`). Previously it printed only the header and exited 0, because `zstdcat` exits 0 with a "can't stat … -- ignored" stderr message and ssh+fish on indri swallows the remote exit code. diff --git a/mise-tasks/runner-logs b/mise-tasks/runner-logs index 3c5e8e3..0d3028b 100755 --- a/mise-tasks/runner-logs +++ b/mise-tasks/runner-logs @@ -229,12 +229,35 @@ def fetch_log(run_number: int, job_index: int, repo: str, token: str) -> None: hex_prefix = f"{task_id & 0xff:02x}" log_path = f"~/forgejo/data/actions_log/{repo}/{hex_prefix}/{task_id}.log.zst" + # indri's login shell (fish) silently swallows SSH exit codes, so we can't + # rely on returncode. zstdcat itself also exits 0 with a "can't stat ... + # -- ignored" stderr message when the file is missing. Detect missing logs + # by running `test -f` over SSH and parsing the marker line from stdout. + probe = subprocess.run( + ["ssh", "indri", f"test -f {log_path} && echo EXISTS || echo MISSING"], + capture_output=True, + text=True, + ) + marker = probe.stdout.strip().splitlines()[-1] if probe.stdout.strip() else "" + if marker != "EXISTS": + typer.echo( + f"Error: log not found for run #{run_number} job {job_index} (task {task_id})", + err=True, + ) + typer.echo(f"Path: indri:{log_path}", err=True) + typer.echo( + "The runner may have crashed before uploading its log buffer " + "(action_task.log_in_storage = 0).", + err=True, + ) + raise typer.Exit(1) + result = subprocess.run( ["ssh", "indri", f"zstdcat {log_path}"], capture_output=True, text=True, ) - if result.returncode != 0: + if result.returncode != 0 or not result.stdout: typer.echo( f"Error: could not read log for run #{run_number} job {job_index} (task {task_id})", err=True, From ecded3007368e094baebeed10fbf2a3fe49aed90 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 14:51:09 -0700 Subject: [PATCH 10/35] Make valkey local on ringtail (nix amd64) + bump to 8.1.7 (#362) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Weekly "make one non-local container local" pickup: immich-ringtail still pulled `docker.io/valkey/valkey:8.1.6` because the existing `containers/valkey/container.py` build was arm64-only. - Adds `containers/valkey/default.nix` — nix-built amd64 valkey image, packaged by the ringtail nix-container-builder runner using `pkgs.dockerTools.buildLayeredImage`. Mirrors the existing `containers/authentik-redis/default.nix` pattern. - `containers/valkey/container.py` keeps building the Alpine arm64 image for paperless on indri. Bumped both builds to upstream valkey 8.1.7 (Alpine 3.22 now ships `8.1.7-r0`; nixpkgs has 8.1.7). - Splits `VERSION` (upstream app) from `ALPINE_PIN` (apk pin) in `container.py` so both build files can declare the same upstream version and pass `container-version-check`. - Updates `service-versions.yaml`: current-version 8.1.7, refreshed last-reviewed, upstream-source now points at the canonical valkey-io releases page. - Switches kustomizations: - `immich-ringtail/kustomization.yaml`: `docker.io/valkey/valkey:8.1.6` → `registry.ops.eblu.me/blumeops/valkey:v8.1.7-02859c5-nix`, comment updated. - `paperless/kustomization.yaml`: `v8.1.6-r0-fabca04` → `v8.1.7-02859c5`. ## Build build-container run #563 — both jobs succeeded after a transient runner crash on the first dispatch (#562 build-nix), which surfaced two separate bugs that landed in a separate C0 on main: - `runner-logs` silently returned 0 with no output when the log file didn't exist on indri - `ssh indri` swallowing remote exit codes (fish login shell), which the wrapper now works around via a stdout marker ## Test plan - [ ] `argocd app set immich-ringtail --revision valkey-nix && argocd app sync immich-ringtail` - [ ] `argocd app set paperless --revision valkey-nix && argocd app sync paperless` - [ ] Both valkey pods come Ready and start serving on :6379 - [ ] Immich app + paperless can read/write their respective cache - [ ] After merge: rebuild from squashed main commit + update kustomization tags (squash-tag follow-up) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/362 --- .../immich-ringtail/kustomization.yaml | 9 +++--- argocd/manifests/paperless/kustomization.yaml | 2 +- containers/valkey/container.py | 15 +++++----- containers/valkey/default.nix | 30 +++++++++++++++++++ docs/changelog.d/valkey-nix.infra.md | 1 + service-versions.yaml | 15 +++++----- 6 files changed, 53 insertions(+), 19 deletions(-) create mode 100644 containers/valkey/default.nix create mode 100644 docs/changelog.d/valkey-nix.infra.md diff --git a/argocd/manifests/immich-ringtail/kustomization.yaml b/argocd/manifests/immich-ringtail/kustomization.yaml index c1f639e..7a97fef 100644 --- a/argocd/manifests/immich-ringtail/kustomization.yaml +++ b/argocd/manifests/immich-ringtail/kustomization.yaml @@ -21,8 +21,9 @@ images: - name: ghcr.io/immich-app/immich-machine-learning # CUDA variant of the same release — ringtail has an RTX 4080 newTag: v2.6.3-cuda - # Using upstream multi-arch valkey image directly; the - # registry.ops.eblu.me/blumeops/valkey mirror is arm64-only (built - # on indri) and would crashloop on ringtail. + # amd64 valkey built via nix on the ringtail nix-container-builder + # (see containers/valkey/default.nix). The Alpine container.py build + # is arm64-only and serves paperless on indri. - name: docker.io/valkey/valkey - newTag: "8.1.6" + newName: registry.ops.eblu.me/blumeops/valkey + newTag: v8.1.7-02859c5-nix diff --git a/argocd/manifests/paperless/kustomization.yaml b/argocd/manifests/paperless/kustomization.yaml index 9c6a086..575dfb4 100644 --- a/argocd/manifests/paperless/kustomization.yaml +++ b/argocd/manifests/paperless/kustomization.yaml @@ -16,4 +16,4 @@ images: newTag: v2.20.13-07f52e9 - name: docker.io/library/redis newName: registry.ops.eblu.me/blumeops/valkey - newTag: v8.1.6-r0-fabca04 + newTag: v8.1.7-02859c5 diff --git a/containers/valkey/container.py b/containers/valkey/container.py index 5d150e7..34e8524 100644 --- a/containers/valkey/container.py +++ b/containers/valkey/container.py @@ -1,8 +1,8 @@ -"""Valkey — native Dagger build. +"""Valkey — native Dagger build (arm64, indri). Alpine 3.22 base with the `valkey` apk package (8.1.x — Redis-compatible). -Mirrors `docker.io/valkey/valkey:8.1-alpine`, used by paperless and immich -as a cache/queue sidecar. +Used by paperless (sidecar) on indri. immich on ringtail uses the +nix-built amd64 variant from `default.nix` in this directory. """ import dagger @@ -10,9 +10,10 @@ from dagger import dag from blumeops.containers import oci_labels -# Alpine 3.22 ships valkey 8.1.6-r0. Alpine 3.23 jumps to 9.0 — hold on 3.22 -# to keep this a 1:1 swap for the upstream `valkey:8.1-alpine` image. -VERSION = "8.1.6-r0" +# Alpine 3.22 currently ships valkey 8.1.7-r0. Alpine 3.23 jumps to 9.0 — +# hold on 3.22 to keep this aligned with the 8.1 line. +VERSION = "8.1.7" +ALPINE_PIN = "8.1.7-r0" ALPINE_BASE = "alpine:3.22" @@ -21,7 +22,7 @@ async def build(src: dagger.Directory) -> dagger.Container: ctr = ( dag.container() .from_(ALPINE_BASE) - .with_exec(["apk", "add", "--no-cache", f"valkey={VERSION}"]) + .with_exec(["apk", "add", "--no-cache", f"valkey={ALPINE_PIN}"]) .with_exec(["mkdir", "-p", "/data"]) .with_exec(["chown", "valkey:valkey", "/data"]) .with_workdir("/data") diff --git a/containers/valkey/default.nix b/containers/valkey/default.nix new file mode 100644 index 0000000..9cb1713 --- /dev/null +++ b/containers/valkey/default.nix @@ -0,0 +1,30 @@ +# Nix-built Valkey for ringtail (amd64) +# Companion to container.py (Alpine 3.22, arm64 on indri). +# Used by immich-ringtail which needs an amd64 image; paperless on indri +# continues to use the Alpine container.py build. +# +# The version assertion ensures nix-build fails if a flake.lock update +# changes the Valkey version — forcing an explicit version acknowledgment +# here and in service-versions.yaml (enforced by container-version-check). +{ pkgs ? import { } }: + +let + version = "8.1.7"; +in + +assert pkgs.valkey.version == version; + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/valkey"; + contents = [ + pkgs.valkey + ]; + + config = { + Entrypoint = [ "${pkgs.valkey}/bin/valkey-server" ]; + Cmd = [ "--bind" "0.0.0.0" "--protected-mode" "no" "--dir" "/data" ]; + ExposedPorts = { + "6379/tcp" = { }; + }; + }; +} diff --git a/docs/changelog.d/valkey-nix.infra.md b/docs/changelog.d/valkey-nix.infra.md new file mode 100644 index 0000000..e41eb63 --- /dev/null +++ b/docs/changelog.d/valkey-nix.infra.md @@ -0,0 +1 @@ +Add nix-built amd64 valkey for ringtail (`containers/valkey/default.nix`) so immich-ringtail can stop pulling the upstream multi-arch `docker.io/valkey/valkey` image. Existing `container.py` continues to build Alpine arm64 for paperless on indri. Both bump to valkey 8.1.7 (Alpine 3.22 8.1.7-r0 / nixpkgs 8.1.7). diff --git a/service-versions.yaml b/service-versions.yaml index 63b0f15..5440f01 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -146,14 +146,15 @@ services: - name: valkey type: argocd - last-reviewed: 2026-05-01 - current-version: "8.1.6-r0" - upstream-source: https://pkgs.alpinelinux.org/package/v3.22/community/aarch64/valkey + last-reviewed: 2026-05-28 + current-version: "8.1.7" + upstream-source: https://github.com/valkey-io/valkey/releases notes: >- - Shared Alpine-built valkey image, used as a sidecar/cache by paperless - (sidecar) and immich (separate Deployment). Mirrors the upstream - docker.io/valkey/valkey:8.1-alpine. Pinned to Alpine 3.22 for valkey 8.1.x; - Alpine 3.23 jumps to 9.0. Distinct from authentik-redis (nix-built Redis + Dual-build valkey image: container.py builds Alpine 3.22 + apk valkey + (arm64, indri) for paperless; default.nix builds via nixpkgs (amd64, + ringtail) for immich-ringtail. Both track upstream valkey 8.1.x; Alpine + 3.22 currently ships 8.1.7-r0 and nixpkgs valkey is 8.1.7. Alpine 3.23 + jumps to 9.0. Distinct from authentik-redis (nix-built Redis 8.x) which has its own entry. - name: external-secrets From f588638331567d921e189cbff25db5425ccebaef Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 14:53:21 -0700 Subject: [PATCH 11/35] C0: rebuild valkey from squashed main commit Image tags from PR #362 (v8.1.7-02859c5{,-nix}) referenced a branch SHA that no longer exists on main after squash-merge. Rebuilt both the dagger arm64 and nix amd64 variants from the squashed commit (ecded30) and updated paperless + immich-ringtail to the new tags. Co-Authored-By: Claude Opus 4.7 (1M context) --- argocd/manifests/immich-ringtail/kustomization.yaml | 2 +- argocd/manifests/paperless/kustomization.yaml | 2 +- docs/changelog.d/+valkey-rebuild-on-main.infra.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 docs/changelog.d/+valkey-rebuild-on-main.infra.md diff --git a/argocd/manifests/immich-ringtail/kustomization.yaml b/argocd/manifests/immich-ringtail/kustomization.yaml index 7a97fef..2fa131c 100644 --- a/argocd/manifests/immich-ringtail/kustomization.yaml +++ b/argocd/manifests/immich-ringtail/kustomization.yaml @@ -26,4 +26,4 @@ images: # is arm64-only and serves paperless on indri. - name: docker.io/valkey/valkey newName: registry.ops.eblu.me/blumeops/valkey - newTag: v8.1.7-02859c5-nix + newTag: v8.1.7-ecded30-nix diff --git a/argocd/manifests/paperless/kustomization.yaml b/argocd/manifests/paperless/kustomization.yaml index 575dfb4..3cd0d74 100644 --- a/argocd/manifests/paperless/kustomization.yaml +++ b/argocd/manifests/paperless/kustomization.yaml @@ -16,4 +16,4 @@ images: newTag: v2.20.13-07f52e9 - name: docker.io/library/redis newName: registry.ops.eblu.me/blumeops/valkey - newTag: v8.1.7-02859c5 + newTag: v8.1.7-ecded30 diff --git a/docs/changelog.d/+valkey-rebuild-on-main.infra.md b/docs/changelog.d/+valkey-rebuild-on-main.infra.md new file mode 100644 index 0000000..c743e61 --- /dev/null +++ b/docs/changelog.d/+valkey-rebuild-on-main.infra.md @@ -0,0 +1 @@ +Rebuild valkey container from squashed main commit (both arm64 dagger and amd64 nix variants), and update paperless + immich-ringtail kustomizations to the main-SHA tags `v8.1.7-ecded30` and `v8.1.7-ecded30-nix`. From e0064de83d0d15a1f34f16146542a62817dca3ef Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 1 Jun 2026 15:52:09 -0700 Subject: [PATCH 12/35] C0: update ringtail flake inputs (nixpkgs, disko) Co-Authored-By: Claude Opus 4.8 (1M context) --- .../+ringtail-flake-update-2026-06-01.infra.md | 4 ++++ nixos/ringtail/flake.lock | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md diff --git a/docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md b/docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md new file mode 100644 index 0000000..dd488b6 --- /dev/null +++ b/docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md @@ -0,0 +1,4 @@ +Update the ringtail NixOS flake lockfile (`nixos/ringtail/flake.lock`): bump +`nixpkgs` (b77b3de → 25f5383) and `disko` (5ba0c95 → 115e521) to latest. +`nixpkgs-services` was intentionally left pinned (skipped by the +`flake-update` pipeline). Routine recurring maintenance per [[manage-lockfile]]. diff --git a/nixos/ringtail/flake.lock b/nixos/ringtail/flake.lock index 0f0da7e..bb60501 100644 --- a/nixos/ringtail/flake.lock +++ b/nixos/ringtail/flake.lock @@ -7,11 +7,11 @@ ] }, "locked": { - "lastModified": 1779699611, - "narHash": "sha256-EcCaSTKnmg2o4wLKaN1aqQFomwyhO7ik0bX9COdyCas=", + "lastModified": 1780290312, + "narHash": "sha256-eTAlX0CwgB84Ts3GaBd944A3DRXVMzgA0EqroZBISUo=", "owner": "nix-community", "repo": "disko", - "rev": "5ba0c9555c28685e57fa54c7a25e42c7efdbfc8d", + "rev": "115e5211780054d8a890b41f0b7734cafad54dfe", "type": "github" }, "original": { @@ -43,11 +43,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1779467186, - "narHash": "sha256-nOesoDCiXcUftqbRBMz9tt4blI5PvljMWbm3kuCA+0s=", + "lastModified": 1779796641, + "narHash": "sha256-ZsIrKmhp4vbBXoXXmR/tBXA/UCsAQiJL9vsgZEduhVY=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b77b3de8775677f84492abe84635f87b0e153f0f", + "rev": "25f538306313eae3927264466c70d7001dcea1df", "type": "github" }, "original": { From a36a18aaa6714e187834edc09eb2fc565d0f5fbb Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 1 Jun 2026 20:52:20 -0700 Subject: [PATCH 13/35] C0: black-hole /mirrors/* at Fly edge + name-and-shame scrapers A $29.60 Fly bill traced to ~1.25 TB/30d egress on forge.eblu.me (99.95% of all proxy egress), ~71% of it AI scrapers (Meta meta-externalagent, OpenAI GPTBot, Amazonbot, Bytespider) crawling the public mirror repos' infinite git-history URL space and timing out Forgejo. robots.txt already disallowed /mirrors/ but those agents ignore it, so enforce at the edge: return 403 (^~ to beat the regex asset locations), served as a roll-of-dishonour page with an X-Naughty-Scrapers header. Mirrors stay reachable on the tailnet via forge.ops.eblu.me. Tier 2 (UA denylist + Anubis) and the Cloudflare rejection are documented in docs/explanation/ai-scraper-mitigation.md. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../+ai-scraper-mitigation-doc.doc.md | 1 + .../+forge-mirrors-blackhole.infra.md | 1 + docs/explanation/ai-scraper-mitigation.md | 201 ++++++++++++++++++ docs/tutorials/expose-service-publicly.md | 7 + fly/Dockerfile | 1 + fly/naughty.html | 64 ++++++ fly/nginx.conf | 27 +++ 7 files changed, 302 insertions(+) create mode 100644 docs/changelog.d/+ai-scraper-mitigation-doc.doc.md create mode 100644 docs/changelog.d/+forge-mirrors-blackhole.infra.md create mode 100644 docs/explanation/ai-scraper-mitigation.md create mode 100644 fly/naughty.html diff --git a/docs/changelog.d/+ai-scraper-mitigation-doc.doc.md b/docs/changelog.d/+ai-scraper-mitigation-doc.doc.md new file mode 100644 index 0000000..246fedb --- /dev/null +++ b/docs/changelog.d/+ai-scraper-mitigation-doc.doc.md @@ -0,0 +1 @@ +Add `docs/explanation/ai-scraper-mitigation.md` — the egress-cost / AI-crawler threat model for the public Fly proxy, the tiered mitigation plan (Tier 1: mirror black-hole, shipped; Tier 2: user-agent denylist + Anubis; Tier 3: Cloudflare, rejected on principle), and the data behind it. diff --git a/docs/changelog.d/+forge-mirrors-blackhole.infra.md b/docs/changelog.d/+forge-mirrors-blackhole.infra.md new file mode 100644 index 0000000..29a5e6a --- /dev/null +++ b/docs/changelog.d/+forge-mirrors-blackhole.infra.md @@ -0,0 +1 @@ +Black-hole the `/mirrors/*` repositories at the Fly proxy edge (`return 403` → `forge.ops.eblu.me`). A surprise $29.60 Fly bill traced to ~1.24 TB/30d of egress on `forge.eblu.me`, 99.95% of all proxy egress — of which ~71% was AI scrapers (Meta `meta-externalagent`, OpenAI `GPTBot`, Amazonbot) crawling the near-infinite git-history URL space of the public mirror repos and timing out Forgejo in the process. Mirrors exist for supply-chain control and are consumed over the tailnet, so their public web UI had no legitimate audience. `robots.txt` already disallowed `/mirrors/`, but the offending agents ignore it. Tier-2 mitigations (user-agent denylist, Anubis proof-of-work gateway) are documented in `docs/explanation/ai-scraper-mitigation.md`. diff --git a/docs/explanation/ai-scraper-mitigation.md b/docs/explanation/ai-scraper-mitigation.md new file mode 100644 index 0000000..fe4ba3d --- /dev/null +++ b/docs/explanation/ai-scraper-mitigation.md @@ -0,0 +1,201 @@ +--- +title: AI Scraper Mitigation +modified: 2026-06-01 +last-reviewed: 2026-06-01 +tags: + - explanation + - fly-io + - forgejo + - security + - networking +--- + +# AI Scraper Mitigation on the Public Proxy + +> **Note:** This article was drafted by AI and reviewed by Erich. I plan to rewrite all explanatory content in my own words — these serve as placeholders to establish the documentation structure. + +How BlumeOps keeps AI crawlers from running up the [[expose-service-publicly|Fly.io proxy]] egress bill and DoS-ing [[forgejo|Forgejo]] on [[indri]]. + +## The incident + +A $29.60 Fly.io invoice arrived, nearly all of it a single line: + +``` +Bandwidth: Egress (iad) — 958,524,714,138 bytes — $19.17 +``` + +The `iad` (Ashburn) region is a red herring: the proxy machine runs in `sjc`, +but Fly bills egress at the edge PoP nearest the *client*, so `iad` just means +"the traffic went to clients on the US East Coast." + +Tracing it through the nginx access logs (shipped to Loki via [[alloy|Alloy]]): + +| Signal | Value | +|--------|-------| +| Total proxy egress (30d) | ~1.25 TB | +| Share that was `forge.eblu.me` | **99.95%** | +| Share of forge egress that was `/mirrors/*` | **~71%** | +| Share that was declared AI bots | **~85%+** | +| Top offenders | Meta `meta-externalagent` (66% of bytes), OpenAI `GPTBot` (16%), Amazonbot, Bytespider | +| Forgejo `5xx` (upstream timeouts) | tens of thousands/day, spiking to 112k | + +The crawlers were walking [[forgejo|Forgejo]]'s git-history browse endpoints — +`src/commit/`, `commits/`, `blame/`, `raw/commit/`, plus `.patch`/`.diff` +and `?page=N` pagination. That URL space is effectively **infinite**: every +file × every commit × every page, multiplied across every mirrored repo. A +crawler that follows links never finishes, and every page is a cache `MISS` +that both tunnels to indri *and* bills as egress. + +Two distinct harms, not one: + +1. **Cost** — ~1.25 TB/mo of egress on a free-tier-ish proxy. +2. **Availability** — the crawl alone generates ~400–530k requests/day, + enough to time out Forgejo regardless of how much RAM [[indri]] has. Moving + egress elsewhere would *not* fix this; the crawl has to be throttled at the + source. + +`robots.txt` already `Disallow`s `/mirrors/`, `/user/`, and archive/download +paths — but **`meta-externalagent` and `GPTBot` ignore it.** For these agents, +`robots.txt` is a dead letter, which is why edge enforcement is required. + +## The tiered plan + +### Tier 1 — Black-hole `/mirrors/*` (shipped) + +The mirror repositories (`tailscale`, `prometheus`, `mealie`, `paperless-ngx`, +…) are mirrors of *already-public upstreams*, kept for supply-chain control +(see [[spork-strategy]] and the container/mirror story in [[why-gitops]]). They +are consumed by CI, gilbert, and other tailnet clients over +`forge.ops.eblu.me`. Their web UI on the public internet served **no +legitimate audience** — only scrapers. So the proxy now returns `403` for +anything under `/mirrors/`, pointing humans at the tailnet host: + +```nginx +location ^~ /mirrors/ { + return 403 "Mirror repositories are tailnet-only — use forge.ops.eblu.me.\n"; +} +``` + +The `^~` modifier matters: without it, the regex `location` blocks for static +assets (`*.css`, `*.js`, release downloads) would match first and leak content +under `/mirrors/`. `^~` tells nginx to stop at the prefix match and skip the +regex round. + +This is config, not bot-fighting — we simply stopped serving an infinite +tarpit to the world. It removes ~71% of forge egress and a large share of the +upstream timeouts, with zero impact on any human or tailnet consumer. It +mirrors the existing tailnet-only blocks for `/api/packages/` and `/swagger`. + +The `403` is also a small act of public shaming. Blocked requests are served a +"roll of dishonour" page (`fly/naughty.html`, status kept at `403` via +`error_page 403 /naughty.html`) that names the offending operators and their +share of the stolen bytes, and every response carries an `X-Naughty-Scrapers` +header: + +``` +X-Naughty-Scrapers: OpenAI/GPTBot, Meta/meta-externalagent, Amazonbot, ByteDance/Bytespider — robots.txt ignorers +``` + +Petty? A little. But it costs nothing, documents *why* the block exists for the +next person who hits it, and the page is a few KB versus the megabytes of git +HTML the crawlers were taking. + +**Trade-off accepted:** mirror release-artifact downloads over WAN now also +`403`. Legitimate consumers already pull these over the tailnet, and the public +exposure was the same crawl liability, so this is intentional. + +### Tier 2 — Defend the repos that *stay* public (planned) + +`/eblume/*` is intentionally public (a public profile is a feature). But the +same git-history endpoints are still a tarpit there, just lower-volume. Two +layers, in increasing order of effort and effectiveness: + +#### 2a. User-agent denylist (cheap, evadable) + +Block the declared AI crawlers at the edge regardless of path: + +```nginx +# Illustrative — not yet deployed. +map $http_user_agent $is_ai_bot { + default 0; + "~*meta-externalagent" 1; + "~*GPTBot" 1; + "~*ClaudeBot" 1; + "~*Amazonbot" 1; + "~*Bytespider" 1; + "~*SemrushBot" 1; +} +# in the forge.eblu.me server block: +if ($is_ai_bot) { return 403; } +``` + +This catches ~85% of *current* traffic for a few lines of config. It is +trivially evadable — a scraper need only spoof a browser UA — so it is a +speed-bump, not a wall. Keep `robots.txt` too: well-behaved crawlers +(Googlebot, Bingbot) do honor it, and it documents intent. + +#### 2b. Anubis proof-of-work gateway (the real wall) + +[Anubis](https://github.com/TecharoHQ/anubis) is a Go reverse proxy that +weighs each request with a browser-based proof-of-work challenge before passing +it upstream. It was written for *exactly this scenario* — its author built it +after Amazon's scraper took down their Git server — and is widely deployed in +front of Forgejo/Gitea (Codeberg, the UN, etc.). Headless scrapers that can't +run the challenge JS never reach the application; humans clear it once and +proceed. + +Why it fits BlumeOps better than the alternatives: + +- **It attacks cost *and* availability at once.** Bots receive a few-KB + challenge page instead of MB of git HTML (egress collapses) and never reach + Forgejo (timeouts collapse). No other single lever does both. +- **It stays in-house.** No third party terminates our TLS or sees our + traffic. + +Placement options: + +| Where | Pros | Cons | +|-------|------|------| +| On [[indri]], between [[caddy|Caddy]] and Forgejo | Protects every path and every entry (WAN *and* tailnet); one config | Adds a hop and a service to the indri critical path; the challenge page still tunnels back through Fly for WAN clients (small egress) | +| On the Fly proxy machine, in front of nginx | Challenge served at the edge — bots never even tunnel to indri | Fly VM is small (512 MB); another moving part in the boot sequence alongside `tailscaled`/nginx/`fail2ban`/Alloy | + +Leaning toward Caddy-side on indri for simplicity and uniform coverage, but +this is the open design question for Tier 2. Anubis is MIT-licensed and the +author has signalled a future move to an `equi-x`-based challenge, so pin a +version and track upstream. + +### Tier 3 — Move egress off Fly entirely (rejected) + +A [[#The incident|Cloudflare]] Tunnel (`cloudflared` on indri → Cloudflare +edge) would make this a non-problem on the cost axis: Cloudflare does not meter +proxied bandwidth, and it bundles free AI-bot mitigation (Bot Fight Mode, the +"block AI scrapers" toggle, Managed Challenge, AI Labyrinth). One move would +zero the egress bill and add bot defense. + +**We are not doing this, on principle.** Cloudflare is a solid platform and a +defensible engineering choice — but it already sits in front of an enormous +fraction of the modern web, and routing BlumeOps through it would add one more +site to the pile of the internet that one company can see and gate. BlumeOps +deliberately keeps its own backbone ([[expose-service-publicly|Fly + Tailscale ++ Caddy]], DNS at [[gandi|Gandi]] — see the "no Cloudflare dependency" line in +that doc). This is a values decision, not a technical one: we would rather pay +a few dollars and run our own mitigation than centralize on Cloudflare. + +It is also worth noting that **Tier 3 would not, by itself, fix the upstream +timeouts** — free egress just means we'd stop *caring* that bots crawl, while +they continued to hammer Forgejo. Crawl mitigation (Tier 1 + Tier 2) is +required regardless of where egress is billed. + +## Summary + +| Tier | Lever | Cost | Availability | Status | +|------|-------|------|--------------|--------| +| 1 | Black-hole `/mirrors/*` at edge | −~71% | big drop | **shipped** | +| 2a | UA denylist on remaining repos | −most of the rest | further drop | planned | +| 2b | Anubis PoW gateway | −near-total | near-total | planned | +| 3 | Cloudflare Tunnel | −total | needs 2b anyway | **rejected (principle)** | + +The guiding insight: the cheapest, lowest-risk mitigation is to **not serve an +infinite-URL surface that has no human audience.** Everything past Tier 1 is +about defending the surface we *do* want public, in-house, without ceding +control of our traffic to a third party. diff --git a/docs/tutorials/expose-service-publicly.md b/docs/tutorials/expose-service-publicly.md index 886cad4..65af611 100644 --- a/docs/tutorials/expose-service-publicly.md +++ b/docs/tutorials/expose-service-publicly.md @@ -376,6 +376,13 @@ Mitigations for dynamic services: - fail2ban on indri (see below) can block IPs showing abuse patterns - The break-glass shutoff remains the last resort +The most acute version of this in practice has been **AI scrapers**, which +ignore `robots.txt` and crawl dynamic services (notably [[forgejo|Forgejo]]'s +infinite git-history URL space) into both a surprise egress bill and an +effective L7 DoS. See [[ai-scraper-mitigation]] for the incident, the tiered +defense (mirror black-hole, user-agent denylist, Anubis proof-of-work), and +why a Cloudflare Tunnel is *not* the chosen answer here. + If a publicly exposed dynamic service attracts targeted attacks or the home network bandwidth is impacted, consider migrating to Cloudflare Tunnel for enterprise-grade DDoS protection (requires DNS migration; diff --git a/fly/Dockerfile b/fly/Dockerfile index d4e7a18..406c849 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -25,6 +25,7 @@ COPY fail2ban/action.d/nginx-deny.conf /etc/fail2ban/action.d/nginx-deny.conf COPY nginx.conf /etc/nginx/nginx.conf COPY error.html /usr/share/nginx/html/error.html +COPY naughty.html /usr/share/nginx/html/naughty.html COPY alloy.river /etc/alloy/config.alloy COPY start.sh /start.sh RUN chmod +x /start.sh diff --git a/fly/naughty.html b/fly/naughty.html new file mode 100644 index 0000000..d899171 --- /dev/null +++ b/fly/naughty.html @@ -0,0 +1,64 @@ + + + + + + + 403 · Roll of Dishonour + + + +
+

🪤 403 — you walked into the scraper trap

+

These are mirror repositories. They are tailnet-only.

+ +

+ This path used to serve the web UI for mirrors of public upstream + projects. It exists for supply-chain control, not for crawling. A + robots.txt politely disallowed /mirrors/. + A pack of AI scrapers ignored it, walked the infinite git-history URL + space, and ran up ~1.25 TB of egress and a real + money bill in a single month — while timing out the server for everyone + else. +

+ +

So /mirrors/ is closed at the edge now. Roll of dishonour, + by share of the bytes they stole:

+ + + + + + + + + +
OperatorUser-Agent
Metameta-externalagent
OpenAIGPTBot
AmazonAmazonbot
ByteDanceBytespider
+ +

+ If you are a human who actually wanted these mirrors, they are reachable + from the tailnet at forge.ops.eblu.me. If you are a crawler: + read the robots.txt next time. We left you a header, too. +

+ +
GNU Terry Pratchett
+
+ + diff --git a/fly/nginx.conf b/fly/nginx.conf index 570e6c9..ec35774 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -215,6 +215,33 @@ http { return 403 "API documentation is only available at forge.ops.eblu.me (tailnet).\n"; } + # Black-hole the mirror repositories on WAN. These are mirrors of + # already-public upstreams (tailscale, prometheus, mealie, …) kept + # for supply-chain control; CI, gilbert, and tailnet clients consume + # them via forge.ops.eblu.me. Their web UI served no public purpose + # but AI scrapers, which crawled the near-infinite git-history URL + # space (src/commit, commits, blame, raw) and drove ~70% of Fly + # egress (1.24 TB/30d → a surprise bill) plus enough upstream load to + # time out Forgejo. robots.txt already Disallows /mirrors/, but + # meta-externalagent and GPTBot ignore it — so enforce at the edge. + # `^~` makes this win over the regex locations below (e.g. *.css), so + # static assets under /mirrors/ can't leak through. We also name and + # shame: blocked requests get a "roll of dishonour" page (403 status + # preserved) and an X-Naughty-Scrapers header. See + # docs/explanation/ai-scraper-mitigation.md. + location ^~ /mirrors/ { + error_page 403 /naughty.html; + return 403; + } + + # Roll of dishonour — served on the /mirrors/ 403, status kept at 403. + location = /naughty.html { + internal; + root /usr/share/nginx/html; + add_header X-Naughty-Scrapers "OpenAI/GPTBot, Meta/meta-externalagent, Amazonbot, ByteDance/Bytespider — robots.txt ignorers" always; + add_header X-Clacks-Overhead "GNU Terry Pratchett" always; + } + # Redirect archive endpoints to tailnet — archive requests generate full # git bundles on demand. Unauthenticated crawlers hitting unique commit # SHAs cause unbounded CPU and disk usage (DoS vector). Legitimate users From 40bd92982015582cb7aa2680c6dc8412706498fb Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 1 Jun 2026 20:55:05 -0700 Subject: [PATCH 14/35] C0: remove visible GNU Terry Pratchett from naughty.html body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GNU lives in the overhead — the X-Clacks-Overhead header — never on the visible page. Keep the header, drop the footer. Co-Authored-By: Claude Opus 4.8 (1M context) --- fly/naughty.html | 3 --- 1 file changed, 3 deletions(-) diff --git a/fly/naughty.html b/fly/naughty.html index d899171..b6eada8 100644 --- a/fly/naughty.html +++ b/fly/naughty.html @@ -21,7 +21,6 @@ td.share { color: #f2c14e; text-align: right; font-variant-numeric: tabular-nums; } .name { color: #e8867a; } a { color: #7fb3d5; } - footer { margin-top: 2rem; color: #5c574f; font-size: .85rem; } @@ -57,8 +56,6 @@ from the tailnet at forge.ops.eblu.me. If you are a crawler: read the robots.txt next time. We left you a header, too.

- -
GNU Terry Pratchett
From fcac8e5a7290bac54b25f82895c8120ef81367ff Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 10:34:00 -0700 Subject: [PATCH 15/35] =?UTF-8?q?Wave=201=20indri=E2=86=92ringtail=20migra?= =?UTF-8?q?tion:=20paperless,=20teslamate,=20mealie=20(#363)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrate paperless, teslamate, and mealie off the OOM-saturated minikube-indri node onto ringtail k3s, shedding ~1.1 GiB of resident load. Second chain in the indri-k8s decommission after immich. **Containers ported to Nix (default.nix), build-verified on ringtail:** - paperless → wraps nixpkgs paperless-ngx 2.20.15 (pinned unstable); runs as web/worker/beat/consumer - mealie → wraps nixpkgs mealie 3.16.0 (forward 4-minor bump, breaking-change reviewed); single gunicorn, SQLite - teslamate → from-scratch beamPackages mixRelease (not in nixpkgs); erlang_27+elixir_1_18, npm assets, ex_cldr locales pre-fetched **Data:** cold downtime-tolerant cutover. paperless+teslamate postgres dump/restore from quiesced source into a new ringtail blumeops-pg CNPG cluster; mealie SQLite PVC copied. Source DBs untouched until verified (rollback = repoint). **Also:** ringtail blumeops-pg cluster + ExternalSecrets scaffold; fixes pre-existing shower version-check drift. Runbook: docs/how-to/ringtail/migrate-wave1-ringtail.md. Deploy-from-branch + cutover happens before merge; container images rebuilt from main after merge. Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/363 --- argocd/apps/mealie-ringtail.yaml | 26 +++ argocd/apps/paperless-ringtail.yaml | 28 +++ argocd/apps/teslamate-ringtail.yaml | 28 +++ .../databases-ringtail/blumeops-pg.yaml | 97 +++++++++ .../external-secret-borgmatic.yaml | 30 +++ .../external-secret-eblume.yaml | 30 +++ .../external-secret-paperless.yaml | 28 +++ .../external-secret-teslamate.yaml | 30 +++ .../databases-ringtail/kustomization.yaml | 6 + .../manifests/mealie-ringtail/deployment.yaml | 102 +++++++++ .../mealie-ringtail/external-secret.yaml | 23 ++ .../ingress-tailscale.yaml | 0 .../mealie-ringtail/kustomization.yaml | 15 ++ argocd/manifests/mealie-ringtail/pvc.yaml | 14 ++ argocd/manifests/mealie-ringtail/service.yaml | 13 ++ argocd/manifests/mealie/deployment.yaml | 4 +- argocd/manifests/mealie/kustomization.yaml | 2 +- .../paperless-ringtail/deployment.yaml | 201 ++++++++++++++++++ .../paperless-ringtail/external-secret.yaml | 31 +++ .../ingress-tailscale.yaml | 0 .../paperless-ringtail/kustomization.yaml | 21 ++ .../manifests/paperless-ringtail/pv-nfs.yaml | 22 ++ argocd/manifests/paperless-ringtail/pvc.yaml | 15 ++ .../manifests/paperless-ringtail/service.yaml | 13 ++ argocd/manifests/paperless/deployment.yaml | 5 +- argocd/manifests/paperless/kustomization.yaml | 2 +- .../teslamate-ringtail/deployment.yaml | 72 +++++++ .../external-secret-db.yaml | 25 +++ .../external-secret-encryption-key.yaml | 27 +++ .../ingress-tailscale.yaml | 0 .../teslamate-ringtail/kustomization.yaml | 15 ++ .../manifests/teslamate-ringtail/service.yaml | 12 ++ argocd/manifests/teslamate/deployment.yaml | 5 +- argocd/manifests/teslamate/kustomization.yaml | 2 +- containers/mealie/Dockerfile | 145 ------------- containers/mealie/default.nix | 65 ++++++ containers/paperless/Dockerfile | 156 -------------- containers/paperless/default.nix | 77 +++++++ containers/teslamate/container.py | 104 --------- containers/teslamate/default.nix | 122 +++++++++++ containers/teslamate/entrypoint.sh | 23 -- .../migrate-wave1-ringtail.infra.md | 13 ++ .../immich/migrate-immich-to-ringtail.md | 2 + .../how-to/ringtail/migrate-wave1-ringtail.md | 176 +++++++++++++++ service-versions.yaml | 40 +++- 45 files changed, 1422 insertions(+), 445 deletions(-) create mode 100644 argocd/apps/mealie-ringtail.yaml create mode 100644 argocd/apps/paperless-ringtail.yaml create mode 100644 argocd/apps/teslamate-ringtail.yaml create mode 100644 argocd/manifests/databases-ringtail/blumeops-pg.yaml create mode 100644 argocd/manifests/databases-ringtail/external-secret-borgmatic.yaml create mode 100644 argocd/manifests/databases-ringtail/external-secret-eblume.yaml create mode 100644 argocd/manifests/databases-ringtail/external-secret-paperless.yaml create mode 100644 argocd/manifests/databases-ringtail/external-secret-teslamate.yaml create mode 100644 argocd/manifests/mealie-ringtail/deployment.yaml create mode 100644 argocd/manifests/mealie-ringtail/external-secret.yaml rename argocd/manifests/{mealie => mealie-ringtail}/ingress-tailscale.yaml (100%) create mode 100644 argocd/manifests/mealie-ringtail/kustomization.yaml create mode 100644 argocd/manifests/mealie-ringtail/pvc.yaml create mode 100644 argocd/manifests/mealie-ringtail/service.yaml create mode 100644 argocd/manifests/paperless-ringtail/deployment.yaml create mode 100644 argocd/manifests/paperless-ringtail/external-secret.yaml rename argocd/manifests/{paperless => paperless-ringtail}/ingress-tailscale.yaml (100%) create mode 100644 argocd/manifests/paperless-ringtail/kustomization.yaml create mode 100644 argocd/manifests/paperless-ringtail/pv-nfs.yaml create mode 100644 argocd/manifests/paperless-ringtail/pvc.yaml create mode 100644 argocd/manifests/paperless-ringtail/service.yaml create mode 100644 argocd/manifests/teslamate-ringtail/deployment.yaml create mode 100644 argocd/manifests/teslamate-ringtail/external-secret-db.yaml create mode 100644 argocd/manifests/teslamate-ringtail/external-secret-encryption-key.yaml rename argocd/manifests/{teslamate => teslamate-ringtail}/ingress-tailscale.yaml (100%) create mode 100644 argocd/manifests/teslamate-ringtail/kustomization.yaml create mode 100644 argocd/manifests/teslamate-ringtail/service.yaml delete mode 100644 containers/mealie/Dockerfile create mode 100644 containers/mealie/default.nix delete mode 100644 containers/paperless/Dockerfile create mode 100644 containers/paperless/default.nix delete mode 100644 containers/teslamate/container.py create mode 100644 containers/teslamate/default.nix delete mode 100644 containers/teslamate/entrypoint.sh create mode 100644 docs/changelog.d/migrate-wave1-ringtail.infra.md create mode 100644 docs/how-to/ringtail/migrate-wave1-ringtail.md diff --git a/argocd/apps/mealie-ringtail.yaml b/argocd/apps/mealie-ringtail.yaml new file mode 100644 index 0000000..2f014a9 --- /dev/null +++ b/argocd/apps/mealie-ringtail.yaml @@ -0,0 +1,26 @@ +# Mealie on ringtail k3s. +# +# Wave-1 indri-k8s decommission. Staging deployment; the minikube `mealie` +# app stays in parallel until cutover (copy SQLite PVC, drop the minikube +# tailscale ingress, flip Caddy). See [[migrate-wave1-ringtail]]. +# +# Prerequisites: +# - external-secrets-ringtail (onepassword-blumeops ClusterSecretStore) +# - mealie-data PVC contents copied from minikube at cutover +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: mealie-ringtail + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/mealie-ringtail + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: mealie + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/paperless-ringtail.yaml b/argocd/apps/paperless-ringtail.yaml new file mode 100644 index 0000000..bec98e9 --- /dev/null +++ b/argocd/apps/paperless-ringtail.yaml @@ -0,0 +1,28 @@ +# Paperless-ngx on ringtail k3s. +# +# Wave-1 indri-k8s decommission. Staging deployment; the minikube +# `paperless` app stays in parallel until cutover (drop the minikube +# tailscale ingress to free the name, then flip Caddy). See +# [[migrate-wave1-ringtail]]. +# +# Prerequisites: +# - databases-ringtail blumeops-pg (paperless database + role) +# - external-secrets-ringtail (onepassword-blumeops ClusterSecretStore) +# - sifaka NFS rule granting ringtail access to /volume1/paperless +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: paperless-ringtail + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/paperless-ringtail + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: paperless + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/apps/teslamate-ringtail.yaml b/argocd/apps/teslamate-ringtail.yaml new file mode 100644 index 0000000..b7b3491 --- /dev/null +++ b/argocd/apps/teslamate-ringtail.yaml @@ -0,0 +1,28 @@ +# TeslaMate on ringtail k3s. +# +# Wave-1 indri-k8s decommission. Staging deployment; the minikube +# `teslamate` app stays in parallel until cutover (migrate the teslamate +# database, drop the minikube tailscale ingress, flip Caddy). See +# [[migrate-wave1-ringtail]]. +# +# Prerequisites: +# - databases-ringtail blumeops-pg (teslamate database + role; cube + +# earthdistance extensions created by superuser at cutover) +# - external-secrets-ringtail (onepassword-blumeops ClusterSecretStore) +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: teslamate-ringtail + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/teslamate-ringtail + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: teslamate + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/databases-ringtail/blumeops-pg.yaml b/argocd/manifests/databases-ringtail/blumeops-pg.yaml new file mode 100644 index 0000000..3a37249 --- /dev/null +++ b/argocd/manifests/databases-ringtail/blumeops-pg.yaml @@ -0,0 +1,97 @@ +# PostgreSQL Cluster for blumeops services on ringtail k3s. +# +# Wave-1 indri-k8s decommission target (see [[migrate-wave1-ringtail]]). +# Holds the paperless and teslamate databases migrated off the minikube +# blumeops-pg via cold pg_dump/pg_restore at cutover. miniflux + authentik +# stay where they are for now (later waves), so this cluster only carries +# the wave-1 roles. +# +# Apps reach this in-cluster at blumeops-pg-rw.databases.svc.cluster.local +# — the same name they used on minikube, so teslamate's DATABASE_HOST is +# unchanged. +# +# Database creation is deferred to cutover, mirroring the minikube cluster +# (where only the bootstrap database is declared and the rest were created +# out-of-band): +# - paperless: the bootstrap database below (restored into at cutover). +# - teslamate: created at its cutover by the eblume superuser, because the +# dump's `earthdistance` extension is untrusted and CREATE EXTENSION +# needs superuser. (cube + earthdistance ownership then transferred to +# the teslamate role so it can ALTER EXTENSION UPDATE.) +apiVersion: postgresql.cnpg.io/v1 +kind: Cluster +metadata: + name: blumeops-pg + namespace: databases +spec: + instances: 1 + imageName: ghcr.io/cloudnative-pg/postgresql:18.3 + + storage: + size: 10Gi + storageClass: local-path + + bootstrap: + initdb: + database: paperless + owner: paperless + + managed: + roles: + # eblume superuser for admin + privileged restore steps (extensions) + - name: eblume + login: true + superuser: true + createdb: true + createrole: true + connectionLimit: -1 + ensure: present + inherit: true + passwordSecret: + name: blumeops-pg-eblume + # borgmatic read-only user for backups + - name: borgmatic + login: true + connectionLimit: -1 + ensure: present + inherit: true + inRoles: + - pg_read_all_data + passwordSecret: + name: blumeops-pg-borgmatic + # paperless user (also the bootstrap database owner above; the + # managed role sets its password from the 1Password-backed secret) + - name: paperless + login: true + connectionLimit: -1 + ensure: present + inherit: true + passwordSecret: + name: blumeops-pg-paperless + # teslamate user. Extension ownership (cube, earthdistance) is + # transferred to this role at cutover so it can ALTER EXTENSION UPDATE. + - name: teslamate + login: true + connectionLimit: -1 + ensure: present + inherit: true + passwordSecret: + name: blumeops-pg-teslamate + + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "1Gi" + cpu: "500m" + + postgresql: + parameters: + max_connections: "50" + shared_buffers: "128MB" + password_encryption: "scram-sha-256" + pg_hba: + # Password auth from anywhere; network security is via Tailscale. + - host all all 0.0.0.0/0 scram-sha-256 + - host all all ::/0 scram-sha-256 diff --git a/argocd/manifests/databases-ringtail/external-secret-borgmatic.yaml b/argocd/manifests/databases-ringtail/external-secret-borgmatic.yaml new file mode 100644 index 0000000..ee600e3 --- /dev/null +++ b/argocd/manifests/databases-ringtail/external-secret-borgmatic.yaml @@ -0,0 +1,30 @@ +# ExternalSecret for borgmatic backup user password +# +# Replaces the manual op inject workflow from secret-borgmatic.yaml.tpl +# +# 1Password item: "borgmatic" in blumeops vault +# Field: "db-password" +# +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: blumeops-pg-borgmatic + namespace: databases +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: blumeops-pg-borgmatic + creationPolicy: Owner + template: + type: kubernetes.io/basic-auth + data: + username: borgmatic + password: "{{ .password }}" + data: + - secretKey: password + remoteRef: + key: borgmatic + property: db-password diff --git a/argocd/manifests/databases-ringtail/external-secret-eblume.yaml b/argocd/manifests/databases-ringtail/external-secret-eblume.yaml new file mode 100644 index 0000000..a324c7d --- /dev/null +++ b/argocd/manifests/databases-ringtail/external-secret-eblume.yaml @@ -0,0 +1,30 @@ +# ExternalSecret for eblume superuser password +# +# Replaces the manual op inject workflow from secret-eblume.yaml.tpl +# +# 1Password item: "postgres" in blumeops vault +# Field: "password" +# +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: blumeops-pg-eblume + namespace: databases +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: blumeops-pg-eblume + creationPolicy: Owner + template: + type: kubernetes.io/basic-auth + data: + username: eblume + password: "{{ .password }}" + data: + - secretKey: password + remoteRef: + key: postgres + property: password diff --git a/argocd/manifests/databases-ringtail/external-secret-paperless.yaml b/argocd/manifests/databases-ringtail/external-secret-paperless.yaml new file mode 100644 index 0000000..e5742be --- /dev/null +++ b/argocd/manifests/databases-ringtail/external-secret-paperless.yaml @@ -0,0 +1,28 @@ +# ExternalSecret for Paperless database user password +# +# 1Password item: "Paperless (blumeops)" in blumeops vault +# Field: "postgresql-password" +# +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: blumeops-pg-paperless + namespace: databases +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: blumeops-pg-paperless + creationPolicy: Owner + template: + type: kubernetes.io/basic-auth + data: + username: paperless + password: "{{ .password }}" + data: + - secretKey: password + remoteRef: + key: Paperless (blumeops) + property: postgresql-password diff --git a/argocd/manifests/databases-ringtail/external-secret-teslamate.yaml b/argocd/manifests/databases-ringtail/external-secret-teslamate.yaml new file mode 100644 index 0000000..0c52e0b --- /dev/null +++ b/argocd/manifests/databases-ringtail/external-secret-teslamate.yaml @@ -0,0 +1,30 @@ +# ExternalSecret for TeslaMate database user password +# +# Replaces the manual op inject workflow from secret-teslamate.yaml.tpl +# +# 1Password item: "TeslaMate" in blumeops vault +# Field: "db_password" +# +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: blumeops-pg-teslamate + namespace: databases +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: blumeops-pg-teslamate + creationPolicy: Owner + template: + type: kubernetes.io/basic-auth + data: + username: teslamate + password: "{{ .password }}" + data: + - secretKey: password + remoteRef: + key: TeslaMate + property: db_password diff --git a/argocd/manifests/databases-ringtail/kustomization.yaml b/argocd/manifests/databases-ringtail/kustomization.yaml index 971e2d4..2bc2af3 100644 --- a/argocd/manifests/databases-ringtail/kustomization.yaml +++ b/argocd/manifests/databases-ringtail/kustomization.yaml @@ -7,3 +7,9 @@ resources: - immich-pg.yaml - external-secret-immich-borgmatic.yaml - service-immich-pg-tailscale.yaml + # wave-1 indri-k8s decommission: blumeops-pg (paperless + teslamate) + - blumeops-pg.yaml + - external-secret-eblume.yaml + - external-secret-borgmatic.yaml + - external-secret-paperless.yaml + - external-secret-teslamate.yaml diff --git a/argocd/manifests/mealie-ringtail/deployment.yaml b/argocd/manifests/mealie-ringtail/deployment.yaml new file mode 100644 index 0000000..10d06ab --- /dev/null +++ b/argocd/manifests/mealie-ringtail/deployment.yaml @@ -0,0 +1,102 @@ +# Mealie on ringtail k3s — Nix image. +# +# Single gunicorn process (the Nix image's default `mealie-run` entrypoint +# runs init_db then gunicorn), serving the prebuilt frontend. DB is SQLite +# on the mealie-data PVC; its contents are copied from the minikube PVC at +# cutover. See [[migrate-wave1-ringtail]]. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mealie + namespace: mealie +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: mealie + template: + metadata: + labels: + app: mealie + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: mealie + image: registry.ops.eblu.me/blumeops/mealie:kustomized + ports: + - containerPort: 9000 + env: + - name: BASE_URL + value: "https://meals.ops.eblu.me" + - name: ALLOW_SIGNUP + value: "false" + - name: TZ + value: "America/Los_Angeles" + - name: MAX_WORKERS + value: "1" + - name: WEB_CONCURRENCY + value: "1" + # OIDC — Authentik (public client, PKCE) + - name: OIDC_AUTH_ENABLED + value: "true" + - name: OIDC_CONFIGURATION_URL + value: "https://authentik.ops.eblu.me/application/o/mealie/.well-known/openid-configuration" + - name: OIDC_CLIENT_ID + value: "mealie" + - name: OIDC_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: mealie-secrets + key: oidc-client-secret + - name: OIDC_AUTO_REDIRECT + value: "false" + - name: OIDC_PROVIDER_NAME + value: "Authentik" + - name: OIDC_ADMIN_GROUP + value: "admins" + - name: OIDC_SIGNUP_ENABLED + value: "true" + - name: OIDC_USER_CLAIM + value: "email" + # OpenAI — recipe parsing, image OCR, ingredient extraction + - name: OPENAI_API_KEY + valueFrom: + secretKeyRef: + name: mealie-secrets + key: openai-api-key + - name: OPENAI_MODEL + value: "gpt-4o" + - name: OPENAI_REQUEST_TIMEOUT + value: "120" + - name: OPENAI_WORKERS + value: "1" + volumeMounts: + - name: data + mountPath: /app/data + resources: + requests: + memory: "128Mi" + cpu: "50m" + limits: + memory: "1000Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: /api/app/about + port: 9000 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /api/app/about + port: 9000 + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: data + persistentVolumeClaim: + claimName: mealie-data diff --git a/argocd/manifests/mealie-ringtail/external-secret.yaml b/argocd/manifests/mealie-ringtail/external-secret.yaml new file mode 100644 index 0000000..99c2793 --- /dev/null +++ b/argocd/manifests/mealie-ringtail/external-secret.yaml @@ -0,0 +1,23 @@ +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: mealie-secrets + namespace: mealie +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: mealie-secrets + creationPolicy: Owner + data: + - secretKey: oidc-client-secret + remoteRef: + key: "Authentik (blumeops)" + property: mealie-client-secret + - secretKey: openai-api-key + remoteRef: + key: "openai (blumeops)" + property: credential diff --git a/argocd/manifests/mealie/ingress-tailscale.yaml b/argocd/manifests/mealie-ringtail/ingress-tailscale.yaml similarity index 100% rename from argocd/manifests/mealie/ingress-tailscale.yaml rename to argocd/manifests/mealie-ringtail/ingress-tailscale.yaml diff --git a/argocd/manifests/mealie-ringtail/kustomization.yaml b/argocd/manifests/mealie-ringtail/kustomization.yaml new file mode 100644 index 0000000..8428042 --- /dev/null +++ b/argocd/manifests/mealie-ringtail/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: mealie + +resources: + - deployment.yaml + - service.yaml + - pvc.yaml + - ingress-tailscale.yaml + - external-secret.yaml + +images: + - name: registry.ops.eblu.me/blumeops/mealie + newTag: v3.16.0-1d4cbbf-nix diff --git a/argocd/manifests/mealie-ringtail/pvc.yaml b/argocd/manifests/mealie-ringtail/pvc.yaml new file mode 100644 index 0000000..89c38ef --- /dev/null +++ b/argocd/manifests/mealie-ringtail/pvc.yaml @@ -0,0 +1,14 @@ +# SQLite data volume for Mealie on ringtail. Contents copied from the +# minikube mealie-data PVC at cutover (recipes, meal plans, uploaded media). +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: mealie-data + namespace: mealie +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 2Gi diff --git a/argocd/manifests/mealie-ringtail/service.yaml b/argocd/manifests/mealie-ringtail/service.yaml new file mode 100644 index 0000000..4162b96 --- /dev/null +++ b/argocd/manifests/mealie-ringtail/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: mealie + namespace: mealie +spec: + selector: + app: mealie + ports: + - name: http + port: 9000 + targetPort: 9000 + protocol: TCP diff --git a/argocd/manifests/mealie/deployment.yaml b/argocd/manifests/mealie/deployment.yaml index bdcf91e..7cdd275 100644 --- a/argocd/manifests/mealie/deployment.yaml +++ b/argocd/manifests/mealie/deployment.yaml @@ -4,7 +4,9 @@ metadata: name: mealie namespace: mealie spec: - replicas: 1 + # Migrated to ringtail (mealie-ringtail). Scaled to 0; SQLite PVC retained + # for rollback until the decommission PR. See [[migrate-wave1-ringtail]]. + replicas: 0 selector: matchLabels: app: mealie diff --git a/argocd/manifests/mealie/kustomization.yaml b/argocd/manifests/mealie/kustomization.yaml index fb0713b..02563f4 100644 --- a/argocd/manifests/mealie/kustomization.yaml +++ b/argocd/manifests/mealie/kustomization.yaml @@ -7,7 +7,7 @@ resources: - deployment.yaml - service.yaml - pvc.yaml - - ingress-tailscale.yaml + # ingress removed: name 'meals' handed off to mealie-ringtail at cutover - external-secret.yaml images: diff --git a/argocd/manifests/paperless-ringtail/deployment.yaml b/argocd/manifests/paperless-ringtail/deployment.yaml new file mode 100644 index 0000000..de4f456 --- /dev/null +++ b/argocd/manifests/paperless-ringtail/deployment.yaml @@ -0,0 +1,201 @@ +# Paperless-ngx on ringtail k3s — Nix image, multi-process. +# +# The upstream s6 image ran web + worker + scheduler + consumer (and DB +# migrations) in one container. The Nix image (containers/paperless/ +# default.nix) ships the binaries but no supervisor, so we run those as +# four containers in one pod, sharing the local data/consume dirs +# (emptyDir) and the NFS media volume; redis is colocated so +# PAPERLESS_REDIS=localhost works for all. A migrate initContainer runs +# DB migrations once before the app containers start. +# +# DB points in-cluster at the ringtail blumeops-pg (was pg.ops.eblu.me on +# indri). PAPERLESS_{DATA_DIR,MEDIA_ROOT,CONSUMPTION_DIR} are set +# explicitly because the Nix package does not default to the upstream +# /usr/src/paperless paths. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: paperless + namespace: paperless +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: paperless + template: + metadata: + labels: + app: paperless + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + initContainers: + # redis as a native sidecar (restartPolicy: Always): starts before + # the migrate init and stays running for the app containers, so all + # of them reach PAPERLESS_REDIS=localhost:6379. + - name: redis + image: docker.io/library/redis:kustomized + restartPolicy: Always + ports: + - containerPort: 6379 + volumeMounts: + - name: redis-data + mountPath: /data + resources: + requests: + memory: "32Mi" + cpu: "10m" + limits: + memory: "128Mi" + - name: migrate + image: registry.ops.eblu.me/blumeops/paperless:kustomized + command: ["paperless-ngx", "migrate", "--no-input"] + env: &paperless-env + - name: PAPERLESS_URL + value: "https://paperless.ops.eblu.me" + - name: PAPERLESS_REDIS + value: "redis://localhost:6379" + - name: PAPERLESS_DBHOST + value: "blumeops-pg-rw.databases.svc.cluster.local" + - name: PAPERLESS_DBPORT + value: "5432" + - name: PAPERLESS_DBNAME + value: "paperless" + - name: PAPERLESS_DBUSER + value: "paperless" + - name: PAPERLESS_DBPASS + valueFrom: + secretKeyRef: + name: paperless-secrets + key: db-password + # Explicit port to override the k8s-injected PAPERLESS_PORT + # (service named 'paperless' would set PAPERLESS_PORT=tcp://...) + - name: PAPERLESS_PORT + value: "8000" + - name: PAPERLESS_DATA_DIR + value: "/usr/src/paperless/data" + - name: PAPERLESS_MEDIA_ROOT + value: "/usr/src/paperless/media" + - name: PAPERLESS_CONSUMPTION_DIR + value: "/usr/src/paperless/consume" + - name: PAPERLESS_SECRET_KEY + valueFrom: + secretKeyRef: + name: paperless-secrets + key: secret-key + - name: PAPERLESS_TIME_ZONE + value: "America/Los_Angeles" + - name: PAPERLESS_OCR_LANGUAGE + value: "eng" + - name: PAPERLESS_TASK_WORKERS + value: "1" + - name: PAPERLESS_ADMIN_USER + value: "eblume" + - name: PAPERLESS_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: paperless-secrets + key: admin-password + - name: PAPERLESS_ADMIN_MAIL + value: "blume.erich@gmail.com" + - name: PAPERLESS_APPS + value: "allauth.socialaccount.providers.openid_connect" + - name: PAPERLESS_SOCIALACCOUNT_PROVIDERS + valueFrom: + secretKeyRef: + name: paperless-secrets + key: socialaccount-providers + - name: PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS + value: "true" + - name: PAPERLESS_SOCIAL_AUTO_SIGNUP + value: "true" + - name: PAPERLESS_ACCOUNT_ALLOW_SIGNUPS + value: "false" + - name: PAPERLESS_REDIRECT_LOGIN_TO_SSO + value: "false" + volumeMounts: &paperless-mounts + - name: data + mountPath: /usr/src/paperless/data + - name: media + mountPath: /usr/src/paperless/media + - name: consume + mountPath: /usr/src/paperless/consume + containers: + - name: web + image: registry.ops.eblu.me/blumeops/paperless:kustomized + ports: + - containerPort: 8000 + name: http + env: *paperless-env + volumeMounts: *paperless-mounts + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "1Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: / + port: 8000 + initialDelaySeconds: 60 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 8000 + initialDelaySeconds: 30 + periodSeconds: 10 + + - name: worker + image: registry.ops.eblu.me/blumeops/paperless:kustomized + command: ["celery", "--app", "paperless", "worker", "--loglevel", "INFO"] + env: *paperless-env + volumeMounts: *paperless-mounts + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "1Gi" + cpu: "1000m" + + - name: beat + image: registry.ops.eblu.me/blumeops/paperless:kustomized + command: ["celery", "--app", "paperless", "beat", "--loglevel", "INFO"] + env: *paperless-env + volumeMounts: *paperless-mounts + resources: + requests: + memory: "64Mi" + cpu: "20m" + limits: + memory: "256Mi" + + - name: consumer + image: registry.ops.eblu.me/blumeops/paperless:kustomized + command: ["paperless-ngx", "document_consumer"] + env: *paperless-env + volumeMounts: *paperless-mounts + resources: + requests: + memory: "128Mi" + cpu: "50m" + limits: + memory: "512Mi" + + volumes: + - name: data + emptyDir: {} + - name: media + persistentVolumeClaim: + claimName: paperless-media + - name: consume + emptyDir: {} + - name: redis-data + emptyDir: + sizeLimit: 1Gi diff --git a/argocd/manifests/paperless-ringtail/external-secret.yaml b/argocd/manifests/paperless-ringtail/external-secret.yaml new file mode 100644 index 0000000..750b7c5 --- /dev/null +++ b/argocd/manifests/paperless-ringtail/external-secret.yaml @@ -0,0 +1,31 @@ +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: paperless-secrets + namespace: paperless +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: paperless-secrets + creationPolicy: Owner + data: + - secretKey: db-password + remoteRef: + key: "Paperless (blumeops)" + property: postgresql-password + - secretKey: secret-key + remoteRef: + key: "Paperless (blumeops)" + property: secret-key + - secretKey: admin-password + remoteRef: + key: "Paperless (blumeops)" + property: admin-password + - secretKey: socialaccount-providers + remoteRef: + key: "Paperless (blumeops)" + property: socialaccount-providers diff --git a/argocd/manifests/paperless/ingress-tailscale.yaml b/argocd/manifests/paperless-ringtail/ingress-tailscale.yaml similarity index 100% rename from argocd/manifests/paperless/ingress-tailscale.yaml rename to argocd/manifests/paperless-ringtail/ingress-tailscale.yaml diff --git a/argocd/manifests/paperless-ringtail/kustomization.yaml b/argocd/manifests/paperless-ringtail/kustomization.yaml new file mode 100644 index 0000000..0a691e0 --- /dev/null +++ b/argocd/manifests/paperless-ringtail/kustomization.yaml @@ -0,0 +1,21 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: paperless + +resources: + - deployment.yaml + - service.yaml + - pv-nfs.yaml + - pvc.yaml + - ingress-tailscale.yaml + - external-secret.yaml + +images: + - name: registry.ops.eblu.me/blumeops/paperless + newTag: v2.20.15-1d4cbbf-nix + # amd64 valkey built via nix (the v8.1.7-ecded30 tag without -nix is the + # arm64 Alpine build for indri and fails on ringtail with exec format error) + - name: docker.io/library/redis + newName: registry.ops.eblu.me/blumeops/valkey + newTag: v8.1.7-ecded30-nix diff --git a/argocd/manifests/paperless-ringtail/pv-nfs.yaml b/argocd/manifests/paperless-ringtail/pv-nfs.yaml new file mode 100644 index 0000000..2990d1a --- /dev/null +++ b/argocd/manifests/paperless-ringtail/pv-nfs.yaml @@ -0,0 +1,22 @@ +# NFS PersistentVolume for the Paperless document library, mounted from +# ringtail. Same sifaka export (/volume1/paperless) as the minikube PV, +# but a distinct PV name so both clusters can declare it during the +# parallel-run before cutover. +# +# Prerequisite: sifaka must have an NFS rule granting ringtail Read/Write +# (Squash=No mapping) on the paperless share — the same step done for +# immich. See [[sifaka-nfs-from-ringtail]]. +apiVersion: v1 +kind: PersistentVolume +metadata: + name: paperless-media-nfs-pv-ringtail +spec: + capacity: + storage: 500Gi + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + nfs: + server: sifaka + path: /volume1/paperless diff --git a/argocd/manifests/paperless-ringtail/pvc.yaml b/argocd/manifests/paperless-ringtail/pvc.yaml new file mode 100644 index 0000000..8b44660 --- /dev/null +++ b/argocd/manifests/paperless-ringtail/pvc.yaml @@ -0,0 +1,15 @@ +# PersistentVolumeClaim for the Paperless document library on ringtail. +# Binds the NFS PV for sifaka:/volume1/paperless. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: paperless-media + namespace: paperless +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + volumeName: paperless-media-nfs-pv-ringtail + resources: + requests: + storage: 500Gi diff --git a/argocd/manifests/paperless-ringtail/service.yaml b/argocd/manifests/paperless-ringtail/service.yaml new file mode 100644 index 0000000..cff2972 --- /dev/null +++ b/argocd/manifests/paperless-ringtail/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: paperless + namespace: paperless +spec: + selector: + app: paperless + ports: + - name: http + port: 8000 + targetPort: 8000 + protocol: TCP diff --git a/argocd/manifests/paperless/deployment.yaml b/argocd/manifests/paperless/deployment.yaml index cc2c013..1730486 100644 --- a/argocd/manifests/paperless/deployment.yaml +++ b/argocd/manifests/paperless/deployment.yaml @@ -4,7 +4,10 @@ metadata: name: paperless namespace: paperless spec: - replicas: 1 + # Migrated to ringtail (paperless-ringtail). Scaled to 0 to prevent + # double-writing the now-ringtail-owned database; manifest retained for + # rollback until the decommission PR. See [[migrate-wave1-ringtail]]. + replicas: 0 selector: matchLabels: app: paperless diff --git a/argocd/manifests/paperless/kustomization.yaml b/argocd/manifests/paperless/kustomization.yaml index 3cd0d74..a92a769 100644 --- a/argocd/manifests/paperless/kustomization.yaml +++ b/argocd/manifests/paperless/kustomization.yaml @@ -8,7 +8,7 @@ resources: - service.yaml - pv-nfs.yaml - pvc.yaml - - ingress-tailscale.yaml + # ingress removed: name 'paperless' handed off to paperless-ringtail at cutover - external-secret.yaml images: diff --git a/argocd/manifests/teslamate-ringtail/deployment.yaml b/argocd/manifests/teslamate-ringtail/deployment.yaml new file mode 100644 index 0000000..cf8cc73 --- /dev/null +++ b/argocd/manifests/teslamate-ringtail/deployment.yaml @@ -0,0 +1,72 @@ +# TeslaMate on ringtail k3s — Nix image. +# +# The Nix image's Entrypoint waits for postgres, runs migrations +# (TeslaMate.Release.migrate), then starts the release — so no command +# override is needed. Stateless; all data lives in the teslamate database +# on the ringtail blumeops-pg (DATABASE_HOST already an in-cluster name, +# unchanged from minikube). See [[migrate-wave1-ringtail]]. +apiVersion: apps/v1 +kind: Deployment +metadata: + name: teslamate + namespace: teslamate +spec: + replicas: 1 + selector: + matchLabels: + app: teslamate + template: + metadata: + labels: + app: teslamate + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: teslamate + image: registry.ops.eblu.me/blumeops/teslamate:kustomized + ports: + - containerPort: 4000 + env: + - name: DATABASE_USER + value: "teslamate" + - name: DATABASE_PASS + valueFrom: + secretKeyRef: + name: teslamate-db + key: password + - name: DATABASE_NAME + value: "teslamate" + - name: DATABASE_HOST + value: "blumeops-pg-rw.databases.svc.cluster.local" + - name: ENCRYPTION_KEY + valueFrom: + secretKeyRef: + name: teslamate-encryption + key: key + - name: DISABLE_MQTT + value: "true" + - name: CHECK_ORIGIN + value: "false" + - name: TZ + value: "America/Los_Angeles" + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: / + port: 4000 + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 4000 + initialDelaySeconds: 10 + periodSeconds: 10 diff --git a/argocd/manifests/teslamate-ringtail/external-secret-db.yaml b/argocd/manifests/teslamate-ringtail/external-secret-db.yaml new file mode 100644 index 0000000..11eeec6 --- /dev/null +++ b/argocd/manifests/teslamate-ringtail/external-secret-db.yaml @@ -0,0 +1,25 @@ +# ExternalSecret for TeslaMate database password +# +# Replaces the manual op inject workflow from secret-db.yaml.tpl +# +# 1Password item: "TeslaMate" in blumeops vault +# Field: "db_password" +# +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: teslamate-db + namespace: teslamate +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: teslamate-db + creationPolicy: Owner + data: + - secretKey: password + remoteRef: + key: TeslaMate + property: db_password diff --git a/argocd/manifests/teslamate-ringtail/external-secret-encryption-key.yaml b/argocd/manifests/teslamate-ringtail/external-secret-encryption-key.yaml new file mode 100644 index 0000000..96938bf --- /dev/null +++ b/argocd/manifests/teslamate-ringtail/external-secret-encryption-key.yaml @@ -0,0 +1,27 @@ +# ExternalSecret for TeslaMate encryption key +# +# Replaces the manual op inject workflow from secret-encryption-key.yaml.tpl +# +# 1Password item: "TeslaMate" in blumeops vault +# Field: "api_enc_key" +# +# This key encrypts Tesla API tokens at rest in the database. +# +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: teslamate-encryption + namespace: teslamate +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: teslamate-encryption + creationPolicy: Owner + data: + - secretKey: key + remoteRef: + key: TeslaMate + property: api_enc_key diff --git a/argocd/manifests/teslamate/ingress-tailscale.yaml b/argocd/manifests/teslamate-ringtail/ingress-tailscale.yaml similarity index 100% rename from argocd/manifests/teslamate/ingress-tailscale.yaml rename to argocd/manifests/teslamate-ringtail/ingress-tailscale.yaml diff --git a/argocd/manifests/teslamate-ringtail/kustomization.yaml b/argocd/manifests/teslamate-ringtail/kustomization.yaml new file mode 100644 index 0000000..f31fe09 --- /dev/null +++ b/argocd/manifests/teslamate-ringtail/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: teslamate + +resources: + - deployment.yaml + - service.yaml + - ingress-tailscale.yaml + - external-secret-db.yaml + - external-secret-encryption-key.yaml + +images: + - name: registry.ops.eblu.me/blumeops/teslamate + newTag: v3.0.0-191be1b-nix diff --git a/argocd/manifests/teslamate-ringtail/service.yaml b/argocd/manifests/teslamate-ringtail/service.yaml new file mode 100644 index 0000000..b04f45e --- /dev/null +++ b/argocd/manifests/teslamate-ringtail/service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: teslamate + namespace: teslamate +spec: + selector: + app: teslamate + ports: + - port: 4000 + targetPort: 4000 + type: ClusterIP diff --git a/argocd/manifests/teslamate/deployment.yaml b/argocd/manifests/teslamate/deployment.yaml index 42859a7..cf7f9bb 100644 --- a/argocd/manifests/teslamate/deployment.yaml +++ b/argocd/manifests/teslamate/deployment.yaml @@ -4,7 +4,10 @@ metadata: name: teslamate namespace: teslamate spec: - replicas: 1 + # Migrated to ringtail (teslamate-ringtail). Scaled to 0 to prevent + # double-writing the now-ringtail-owned database; manifest retained for + # rollback until the decommission PR. See [[migrate-wave1-ringtail]]. + replicas: 0 selector: matchLabels: app: teslamate diff --git a/argocd/manifests/teslamate/kustomization.yaml b/argocd/manifests/teslamate/kustomization.yaml index a00586f..be9d39d 100644 --- a/argocd/manifests/teslamate/kustomization.yaml +++ b/argocd/manifests/teslamate/kustomization.yaml @@ -6,7 +6,7 @@ namespace: teslamate resources: - deployment.yaml - service.yaml - - ingress-tailscale.yaml + # ingress removed: name 'tesla' handed off to teslamate-ringtail at cutover - external-secret-db.yaml - external-secret-encryption-key.yaml diff --git a/containers/mealie/Dockerfile b/containers/mealie/Dockerfile deleted file mode 100644 index 8df38bf..0000000 --- a/containers/mealie/Dockerfile +++ /dev/null @@ -1,145 +0,0 @@ -# Mealie — self-hosted recipe manager -# Built from source via forge mirror of mealie-recipes/mealie -# Based on upstream docker/Dockerfile (multi-stage: Node frontend + Python backend) - -ARG CONTAINER_APP_VERSION=v3.12.0 - -############################################### -# Frontend Build -############################################### -FROM node:24-slim AS frontend-builder - -ARG CONTAINER_APP_VERSION -RUN apt-get update && apt-get install --no-install-recommends -y git ca-certificates && rm -rf /var/lib/apt/lists/* - -RUN git clone --depth 1 --branch ${CONTAINER_APP_VERSION} \ - https://forge.ops.eblu.me/mirrors/mealie.git /src - -WORKDIR /src/frontend - -RUN yarn install \ - --prefer-offline \ - --frozen-lockfile \ - --non-interactive \ - --production=false \ - --network-timeout 1000000 - -RUN yarn generate - -############################################### -# Python Base -############################################### -FROM python:3.12-slim AS python-base - -ENV MEALIE_HOME="/app" -ENV PYTHONUNBUFFERED=1 \ - PYTHONDONTWRITEBYTECODE=1 \ - PIP_NO_CACHE_DIR=off \ - PIP_DISABLE_PIP_VERSION_CHECK=on \ - PIP_DEFAULT_TIMEOUT=100 \ - VENV_PATH="/opt/mealie" - -ENV PATH="$VENV_PATH/bin:$PATH" - -RUN useradd -u 911 -U -d $MEALIE_HOME -s /bin/bash abc \ - && usermod -G users abc \ - && mkdir $MEALIE_HOME - -############################################### -# Backend Package Build -############################################### -FROM python-base AS backend-builder - -ARG CONTAINER_APP_VERSION -RUN apt-get update \ - && apt-get install --no-install-recommends -y curl git ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -RUN pip install uv - -RUN git clone --depth 1 --branch ${CONTAINER_APP_VERSION} \ - https://forge.ops.eblu.me/mirrors/mealie.git /src - -WORKDIR /src - -COPY --from=frontend-builder /src/frontend/dist ./mealie/frontend - -RUN uv build --out-dir dist - -RUN uv export --no-editable --no-emit-project --extra pgsql --format requirements-txt --output-file dist/requirements.txt \ - && MEALIE_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])") \ - && echo "mealie[pgsql]==${MEALIE_VERSION} \\" >> dist/requirements.txt \ - && pip hash dist/mealie-${MEALIE_VERSION}-py3-none-any.whl | tail -n1 | tr -d '\n' >> dist/requirements.txt \ - && echo " \\" >> dist/requirements.txt \ - && pip hash dist/mealie-${MEALIE_VERSION}.tar.gz | tail -n1 >> dist/requirements.txt - -############################################### -# Python Venv Build -############################################### -FROM python-base AS venv-builder - -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - build-essential \ - libpq-dev \ - libwebp-dev \ - ffmpeg \ - libsasl2-dev libldap2-dev libssl-dev \ - gnupg gnupg2 gnupg1 \ - && rm -rf /var/lib/apt/lists/* - -RUN python3 -m venv --upgrade-deps $VENV_PATH - -COPY --from=backend-builder /src/dist /dist - -RUN . $VENV_PATH/bin/activate \ - && pip install --require-hashes -r /dist/requirements.txt --find-links /dist - -############################################### -# Production Image -############################################### -FROM python-base AS production - -ENV PRODUCTION=true -ENV TESTING=false - -RUN apt-get update \ - && apt-get install --no-install-recommends -y \ - curl \ - ffmpeg \ - gosu \ - iproute2 \ - libldap-common \ - libldap2 \ - && rm -rf /var/lib/apt/lists/* - -RUN mkdir -p /run/secrets - -COPY --from=venv-builder $VENV_PATH $VENV_PATH - -ENV NLTK_DATA="/nltk_data/" -RUN mkdir -p $NLTK_DATA -RUN python -m nltk.downloader -d $NLTK_DATA averaged_perceptron_tagger_eng - -VOLUME ["$MEALIE_HOME/data/"] -ENV APP_PORT=9000 - -EXPOSE ${APP_PORT} - -COPY --from=backend-builder /src/docker/healthcheck.sh $MEALIE_HOME/healthcheck.sh -RUN chmod +x $MEALIE_HOME/healthcheck.sh -HEALTHCHECK CMD $MEALIE_HOME/healthcheck.sh - -ENV HOST=0.0.0.0 - -COPY --from=backend-builder /src/docker/entry.sh $MEALIE_HOME/run.sh -RUN chmod +x $MEALIE_HOME/run.sh - -ARG CONTAINER_APP_VERSION -LABEL org.opencontainers.image.title="Mealie" -LABEL org.opencontainers.image.description="Self-hosted recipe manager" -LABEL org.opencontainers.image.version="${CONTAINER_APP_VERSION}" -LABEL org.opencontainers.image.source="https://forge.eblu.me/eblume/blumeops" -LABEL org.opencontainers.image.vendor="blumeops" - -ENTRYPOINT ["/app/run.sh"] diff --git a/containers/mealie/default.nix b/containers/mealie/default.nix new file mode 100644 index 0000000..fdb1430 --- /dev/null +++ b/containers/mealie/default.nix @@ -0,0 +1,65 @@ +# Nix-built Mealie for ringtail (amd64). +# +# Replaces the from-source Dockerfile build (Node frontend + Python venv) +# with nixpkgs' mealie, which ships a single `mealie` gunicorn entrypoint +# serving the prebuilt frontend + backend — so this is a clean single- +# process wrap (unlike paperless, which is multi-process). +# +# Mealie stores its DB as SQLite under DATA_DIR (the mealie-data PVC at +# /app/data); there is no postgres. The run wrapper mirrors the nixpkgs +# mealie NixOS module: run `libexec/init_db` (Alembic migrations) first, +# then exec gunicorn. +# +# Self-pins nixos-unstable: stable nixpkgs lags at 3.9.2, unstable carries +# 3.16.0. This is a forward 4-minor bump from the v3.12.0 Dockerfile build +# (the deferred upgrade) — mealie auto-migrates the SQLite DB forward on +# startup via init_db; the source PVC is retained for rollback. The version +# assertion makes nix-build fail if a pin bump changes the version. +let + nixpkgs = fetchTarball { + url = "https://github.com/NixOS/nixpkgs/archive/331800de5053fcebacf6813adb5db9c9dca22a0c.tar.gz"; + sha256 = "1p54fm6dkbq62kpi55cr4wyx7b1nsajpsnjgs64cmp073fwi15f7"; + }; + pkgs = import nixpkgs { system = "x86_64-linux"; }; + + version = "3.16.0"; + + app = pkgs.mealie; + + # Mirror the NixOS module's mealie service: init_db (Alembic) then + # gunicorn bound to the app port. DATA_DIR/env come from the image + + # k8s manifest. + mealie-run = pkgs.writeShellScriptBin "mealie-run" '' + set -e + ${app}/libexec/init_db + exec ${pkgs.lib.getExe app} -b 0.0.0.0:9000 + ''; +in + +assert app.version == version; + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/mealie"; + + contents = [ + app + mealie-run + pkgs.bashInteractive + pkgs.coreutils + pkgs.cacert + pkgs.tzdata + ]; + + config = { + Cmd = [ "${mealie-run}/bin/mealie-run" ]; + Env = [ + "DATA_DIR=/app/data" + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "PYTHONUNBUFFERED=1" + "PRODUCTION=true" + ]; + ExposedPorts = { + "9000/tcp" = { }; + }; + }; +} diff --git a/containers/paperless/Dockerfile b/containers/paperless/Dockerfile deleted file mode 100644 index a7b4e65..0000000 --- a/containers/paperless/Dockerfile +++ /dev/null @@ -1,156 +0,0 @@ -# syntax=docker/dockerfile:1 -# Paperless-ngx — self-hosted document management -# Built from source via forge mirror of paperless-ngx/paperless-ngx -# Closely follows upstream Dockerfile structure with git clone instead of COPY - -ARG CONTAINER_APP_VERSION=v2.20.13 - -############################################### -# Stage 1: Clone source (reused by later stages) -############################################### -FROM docker.io/library/alpine:3.22 AS source - -ARG CONTAINER_APP_VERSION -RUN apk add --no-cache git -RUN git clone --depth 1 --branch ${CONTAINER_APP_VERSION} \ - https://forge.ops.eblu.me/mirrors/paperless-ngx.git /src - -############################################### -# Stage 2: Compile frontend -############################################### -FROM --platform=$BUILDPLATFORM docker.io/node:20-trixie-slim AS compile-frontend - -COPY --from=source /src/src-ui /src/src-ui -WORKDIR /src/src-ui - -RUN set -eux \ - && npm update -g pnpm \ - && npm install -g corepack@latest \ - && corepack enable \ - && pnpm install - -RUN set -eux \ - && ./node_modules/.bin/ng build --configuration production - -############################################### -# Stage 3: s6-overlay base -############################################### -FROM ghcr.io/astral-sh/uv:0.9.15-python3.12-trixie-slim AS s6-overlay-base - -WORKDIR /usr/src/s6 - -ENV S6_BEHAVIOUR_IF_STAGE2_FAILS=2 \ - S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 \ - S6_VERBOSITY=1 \ - PATH=/command:$PATH - -ARG TARGETARCH -ARG TARGETVARIANT -ARG S6_OVERLAY_VERSION=3.2.1.0 - -RUN set -eux \ - && apt-get update \ - && apt-get install --yes --quiet --no-install-recommends curl xz-utils \ - && S6_ARCH="" \ - && if [ "${TARGETARCH}${TARGETVARIANT}" = "amd64" ]; then S6_ARCH="x86_64"; \ - elif [ "${TARGETARCH}${TARGETVARIANT}" = "arm64" ]; then S6_ARCH="aarch64"; fi \ - && if [ -z "${S6_ARCH}" ]; then echo "Error: Cannot determine arch"; exit 1; fi \ - && curl --fail --silent --show-error --location --remote-name-all --parallel \ - "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz" \ - "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz.sha256" \ - "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${S6_ARCH}.tar.xz" \ - "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${S6_ARCH}.tar.xz.sha256" \ - && sha256sum --check ./*.sha256 \ - && tar --directory / -Jxpf s6-overlay-noarch.tar.xz \ - && tar --directory / -Jxpf s6-overlay-${S6_ARCH}.tar.xz \ - && rm ./*.tar.xz ./*.sha256 \ - && apt-get --yes purge curl xz-utils \ - && apt-get --yes autoremove --purge \ - && rm -rf /var/lib/apt/lists/* - -# Copy rootfs (s6 service definitions, init scripts) -COPY --from=source /src/docker/rootfs / - -############################################### -# Stage 4: Main application -############################################### -FROM s6-overlay-base AS main-app - -ARG CONTAINER_APP_VERSION -ARG DEBIAN_FRONTEND=noninteractive -ARG TARGETARCH -ARG JBIG2ENC_VERSION=0.30 - -ENV PYTHONDONTWRITEBYTECODE=1 \ - PYTHONUNBUFFERED=1 \ - PYTHONWARNINGS="ignore:::django.http.response:517" \ - PNGX_CONTAINERIZED=1 \ - UV_LINK_MODE=copy \ - UV_CACHE_DIR=/cache/uv/ - -# Runtime packages -RUN set -eux \ - && apt-get update \ - && apt-get install --yes --quiet --no-install-recommends \ - curl gosu tzdata fonts-liberation gettext ghostscript gnupg \ - icc-profiles-free imagemagick postgresql-client \ - tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-fra \ - tesseract-ocr-ita tesseract-ocr-spa unpaper pngquant jbig2dec \ - libxml2 libxslt1.1 qpdf file libmagic1 media-types zlib1g \ - libzbar0 poppler-utils \ - && curl --fail --silent --show-error --location --remote-name-all \ - "https://github.com/paperless-ngx/builder/releases/download/jbig2enc-trixie-v${JBIG2ENC_VERSION}/jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb" \ - && dpkg --install ./jbig2enc_${JBIG2ENC_VERSION}-1_${TARGETARCH}.deb \ - && cp /etc/ImageMagick-6/paperless-policy.xml /etc/ImageMagick-6/policy.xml \ - && rm --force *.deb \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /usr/src/paperless/src/ - -# Python dependencies -COPY --from=source /src/pyproject.toml /src/uv.lock /usr/src/paperless/src/ - -RUN --mount=type=cache,target=${UV_CACHE_DIR},id=python-cache \ - set -eux \ - && apt-get update \ - && apt-get install --yes --quiet --no-install-recommends \ - build-essential default-libmysqlclient-dev pkg-config \ - && uv export --quiet --no-dev --all-extras --format requirements-txt --output-file requirements.txt \ - && uv pip install --system --no-python-downloads --python-preference system --requirements requirements.txt \ - && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" snowball_data \ - && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" stopwords \ - && python3 -W ignore::RuntimeWarning -m nltk.downloader -d "/usr/share/nltk_data" punkt_tab \ - && apt-get --yes purge build-essential default-libmysqlclient-dev pkg-config \ - && apt-get --yes autoremove --purge \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - -# Copy backend source -COPY --from=source /src/src ./ - -# Copy compiled frontend -COPY --from=compile-frontend /src/src/documents/static/frontend/ ./documents/static/frontend/ - -# Create user and finalize -RUN set -eux \ - && addgroup --gid 1000 paperless \ - && useradd --uid 1000 --gid paperless --home-dir /usr/src/paperless paperless \ - && mkdir -p /usr/src/paperless/data /usr/src/paperless/media \ - /usr/src/paperless/consume /usr/src/paperless/export \ - && chown -R paperless:paperless /usr/src/paperless \ - && s6-setuidgid paperless python3 manage.py collectstatic --clear --no-input --link \ - && s6-setuidgid paperless python3 manage.py compilemessages - -VOLUME ["/usr/src/paperless/data", "/usr/src/paperless/media", \ - "/usr/src/paperless/consume", "/usr/src/paperless/export"] - -ENTRYPOINT ["/init"] -EXPOSE 8000 - -HEALTHCHECK --interval=30s --timeout=10s --retries=5 \ - CMD [ "curl", "-fs", "-S", "-L", "--max-time", "2", "http://localhost:8000" ] - -LABEL org.opencontainers.image.title="Paperless-ngx" -LABEL org.opencontainers.image.description="Self-hosted document management system" -LABEL org.opencontainers.image.version="${CONTAINER_APP_VERSION}" -LABEL org.opencontainers.image.source="https://forge.eblu.me/eblume/blumeops" -LABEL org.opencontainers.image.vendor="blumeops" diff --git a/containers/paperless/default.nix b/containers/paperless/default.nix new file mode 100644 index 0000000..734d909 --- /dev/null +++ b/containers/paperless/default.nix @@ -0,0 +1,77 @@ +# Nix-built Paperless-ngx for ringtail (amd64). +# +# Replaces the from-source Dockerfile build (s6-overlay) with nixpkgs' +# paperless-ngx, which already bundles the full OCR/imaging closure +# (tesseract, ghostscript, imagemagick, qpdf, poppler, jbig2enc) and the +# NLTK data via wrappers — so the image stays lean. +# +# Unlike the upstream s6 image, this image does NOT run all processes +# itself. Paperless is multi-process; on ringtail it runs as four +# containers sharing this one image, each with a different command: +# web -> paperless-web (granian, the wrapper below) +# worker -> celery --app paperless worker +# beat -> celery --app paperless beat +# consumer -> paperless-ngx document_consumer +# plus a redis/valkey sidecar. The PYTHONPATH/granian invocation mirrors +# the nixpkgs paperless NixOS module's paperless-web service exactly. +# +# Self-pins nixos-unstable: stable nixpkgs lags at 2.19.6, while unstable +# carries 2.20.15 — a same-minor forward patch bump from the previous +# Dockerfile build (v2.20.13). The version assertion makes nix-build fail +# if a pin bump changes the version, forcing an explicit acknowledgment +# here and in service-versions.yaml (enforced by container-version-check). +let + nixpkgs = fetchTarball { + url = "https://github.com/NixOS/nixpkgs/archive/331800de5053fcebacf6813adb5db9c9dca22a0c.tar.gz"; + sha256 = "1p54fm6dkbq62kpi55cr4wyx7b1nsajpsnjgs64cmp073fwi15f7"; + }; + pkgs = import nixpkgs { system = "x86_64-linux"; }; + + version = "2.20.15"; + + app = pkgs.paperless-ngx; + + # Mirror the NixOS module's paperless-web service: granian serving the + # ASGI app with the package's propagated deps + src on PYTHONPATH. + pythonPath = + "${app.python.pkgs.makePythonPath app.propagatedBuildInputs}:${app}/lib/paperless-ngx/src"; + + paperless-web = pkgs.writeShellScriptBin "paperless-web" '' + export PYTHONPATH="${pythonPath}" + export PAPERLESS_NLTK_DIR="${app.nltkDataDir}" + exec ${app.python.pkgs.granian}/bin/granian \ + --interface asginl --ws \ + --host 0.0.0.0 --port 8000 \ + "paperless.asgi:application" + ''; +in + +assert app.version == version; + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/paperless"; + + contents = [ + app + paperless-web + pkgs.bashInteractive + pkgs.coreutils + pkgs.cacert + pkgs.tzdata + ]; + + config = { + # Default command is the web server; worker/beat/consumer containers + # override `command` in their k8s manifests. + Cmd = [ "${paperless-web}/bin/paperless-web" ]; + Env = [ + "PAPERLESS_NLTK_DIR=${app.nltkDataDir}" + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "PYTHONUNBUFFERED=1" + "PNGX_CONTAINERIZED=1" + ]; + ExposedPorts = { + "8000/tcp" = { }; + }; + }; +} diff --git a/containers/teslamate/container.py b/containers/teslamate/container.py deleted file mode 100644 index 519d77d..0000000 --- a/containers/teslamate/container.py +++ /dev/null @@ -1,104 +0,0 @@ -"""TeslaMate — Tesla data logger. - -Two-stage build: Elixir+Node (builder), Debian slim (runtime). -Source cloned from forge mirror. -""" - -import dagger -from dagger import dag - -from blumeops.containers import clone_from_forge, oci_labels - -VERSION = "v3.0.0" - - -async def build(src: dagger.Directory) -> dagger.Container: - source = clone_from_forge("teslamate", VERSION) - - # Stage 1: Build Elixir release with Node.js assets - builder = ( - dag.container() - .from_("elixir:1.19.5-otp-26") - .with_exec( - [ - "bash", - "-c", - "apt-get update" - " && apt-get install -y ca-certificates curl gnupg git zstd brotli" - " && mkdir -p /etc/apt/keyrings" - " && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" - " | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg" - ' && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg]' - ' https://deb.nodesource.com/node_22.x nodistro main"' - " > /etc/apt/sources.list.d/nodesource.list" - " && apt-get update" - " && apt-get install -y nodejs" - " && apt-get clean" - " && rm -rf /var/lib/apt/lists/*", - ] - ) - .with_exec(["mix", "local.rebar", "--force"]) - .with_exec(["mix", "local.hex", "--force"]) - .with_directory("/opt/app", source) - .with_workdir("/opt/app") - .with_env_variable("MIX_ENV", "prod") - .with_exec(["mix", "deps.get", "--only", "prod"]) - .with_exec(["mix", "deps.compile"]) - .with_exec( - [ - "npm", - "ci", - "--prefix", - "./assets", - "--progress=false", - "--no-audit", - "--loglevel=error", - ] - ) - .with_exec(["mix", "assets.deploy"]) - .with_exec(["mix", "compile"]) - .with_exec( - ["bash", "-c", "SKIP_LOCALE_DOWNLOAD=true mix release --path /opt/built"] - ) - ) - - # Stage 2: Debian slim runtime - entrypoint = src.file("containers/teslamate/entrypoint.sh") - - runtime = ( - dag.container() - .from_("debian:trixie-slim") - .with_exec( - [ - "bash", - "-c", - "apt-get update && apt-get install -y --no-install-recommends" - " libodbc2 libsctp1 libssl3t64 libstdc++6" - " netcat-openbsd tini tzdata" - " && apt-get clean" - " && rm -rf /var/lib/apt/lists/*" - " && groupadd --gid 10001 --system nonroot" - " && useradd --uid 10000 --system --gid nonroot" - " --home-dir /home/nonroot --shell /sbin/nologin nonroot", - ] - ) - ) - runtime = oci_labels( - runtime, - title="TeslaMate", - description="Tesla data logger and visualization", - version=VERSION, - ) - return ( - runtime.with_env_variable("LANG", "C.UTF-8") - .with_env_variable("SRTM_CACHE", "/opt/app/.srtm_cache") - .with_env_variable("HOME", "/opt/app") - .with_workdir("/opt/app") - .with_directory("/opt/app", builder.directory("/opt/built"), owner="nonroot") - .with_exec(["mkdir", "-p", "/opt/app/.srtm_cache"]) - .with_file("/entrypoint.sh", entrypoint, permissions=0o555, owner="nonroot") - .with_user("nonroot") - .with_exposed_port(4000) - .with_entrypoint(["tini", "--", "/bin/dash", "/entrypoint.sh"]) - .with_default_args(args=["bin/teslamate", "start"]) - ) diff --git a/containers/teslamate/default.nix b/containers/teslamate/default.nix new file mode 100644 index 0000000..e126561 --- /dev/null +++ b/containers/teslamate/default.nix @@ -0,0 +1,122 @@ +# Nix-built TeslaMate for ringtail (amd64). +# +# Replaces the Dagger container.py (Elixir+Node builder -> Debian slim). +# TeslaMate is NOT in nixpkgs, so this is a from-scratch beamPackages +# mixRelease: an Elixir/Phoenix release with npm-built assets. +# +# Pinned to the same nixos-unstable rev as paperless/mealie for a +# consistent toolchain. The BEAM combo is pinned to erlang_27 + elixir_1_18 +# (teslamate requires elixir ~> 1.17; upstream's image uses OTP 26, so we +# stay off the default OTP 28 which elixir 1.18 does not target). +# +# Source comes from the forge mirror (supply-chain control), pinned by the +# v3.0.0 tag's commit so builtins.fetchGit needs no hash. +let + nixpkgs = fetchTarball { + url = "https://github.com/NixOS/nixpkgs/archive/331800de5053fcebacf6813adb5db9c9dca22a0c.tar.gz"; + sha256 = "1p54fm6dkbq62kpi55cr4wyx7b1nsajpsnjgs64cmp073fwi15f7"; + }; + pkgs = import nixpkgs { system = "x86_64-linux"; }; + lib = pkgs.lib; + + version = "3.0.0"; + + beamPackages = pkgs.beam.packages.erlang_27; + elixir = beamPackages.elixir_1_18; + + src = builtins.fetchGit { + url = "https://forge.ops.eblu.me/mirrors/teslamate.git"; + ref = "refs/tags/v${version}"; + rev = "3281154d42330786a182c1bbe094ecda0b1c5578"; + }; + + # ex_cldr downloads locale JSON from GitHub at compile time, which the + # build sandbox blocks. teslamate's cldr.ex reads the data dir from the + # LOCALES env var; point it at the pre-fetched elixir-cldr data so no + # download is attempted (with SKIP_LOCALE_DOWNLOAD=true disabling the + # forced refresh). CLDR data version matches the compile-time errors. + cldrData = pkgs.fetchFromGitHub { + owner = "elixir-cldr"; + repo = "cldr"; + rev = "v2.46.0"; + sha256 = "1iwzk9dc754l72vpf8vsisdjncnjx26pz509552b6vnm49xbxyji"; + }; + + teslamate = beamPackages.mixRelease { + pname = "teslamate"; + inherit version src elixir; + + # Keep the build-generated Erlang cookie in the release. mixRelease + # strips it by default (expecting RELEASE_COOKIE at runtime), but the + # start script reads releases/COOKIE. teslamate is single-node (no + # distributed Erlang exposed), so a baked-in cookie is fine. + removeCookie = false; + + mixFodDeps = beamPackages.fetchMixDeps { + pname = "mix-deps-teslamate"; + inherit src version elixir; + hash = "sha256-DDrREiM1BIMgD2qFPTK8QyjOYlnfE3XlnaH/jk7G2go="; + }; + + # Frontend assets. esbuild + sass are devDeps and the esbuild platform + # binary is an optional dep, so npm ci must include both. We run npm ci + # here (not a separate derivation) because assets/package.json has + # file:../deps/phoenix references that only resolve once mixFodDeps has + # populated deps/. npmConfigHook wires up the offline cache from npmDeps; + # then `node scripts/build.js` (custom esbuild) + `mix phx.digest`. + nativeBuildInputs = [ pkgs.nodejs pkgs.npmHooks.npmConfigHook ]; + npmDeps = pkgs.fetchNpmDeps { + name = "teslamate-npm-deps"; + src = src + "/assets"; + hash = "sha256-XyiaUkT/c4rZnNxmxhVLb+vEXnc64A1hjOrnR5fhaEk="; + }; + npmRoot = "assets"; + + preBuild = '' + export SKIP_LOCALE_DOWNLOAD=true + export LOCALES=${cldrData}/priv/cldr + ( cd assets && npm ci --include=dev --include=optional && node scripts/build.js ) + mix phx.digest --no-deps-check + ''; + }; +in + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/teslamate"; + + contents = [ + teslamate + pkgs.bashInteractive + pkgs.coreutils + pkgs.dash + pkgs.netcat-openbsd + pkgs.cacert + pkgs.tzdata + ]; + + config = { + # Mirror entrypoint.sh: wait for postgres, run migrations, then start. + Entrypoint = [ + "${pkgs.dash}/bin/dash" + "-c" + '' + : "''${DATABASE_HOST:=127.0.0.1}" + : "''${DATABASE_PORT:=5432}" + while ! ${pkgs.netcat-openbsd}/bin/nc -z "$DATABASE_HOST" "$DATABASE_PORT" 2>/dev/null; do + echo "waiting for postgres at $DATABASE_HOST:$DATABASE_PORT"; sleep 1 + done + ${teslamate}/bin/teslamate eval "TeslaMate.Release.migrate" + exec ${teslamate}/bin/teslamate start + '' + ]; + Env = [ + "HOME=/opt/app" + "SRTM_CACHE=/opt/app/.srtm_cache" + "LANG=C.UTF-8" + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + ]; + ExposedPorts = { + "4000/tcp" = { }; + }; + }; +} diff --git a/containers/teslamate/entrypoint.sh b/containers/teslamate/entrypoint.sh deleted file mode 100644 index f66117e..0000000 --- a/containers/teslamate/entrypoint.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env dash -set -e - -: "${DATABASE_HOST:="127.0.0.1"}" -: "${DATABASE_PORT:=5432}" -: "${ULIMIT_MAX_NOFILE:=65536}" - -# prevent memory bloat in some misconfigured versions of Docker/containerd -# where the nofiles limit is very large. 0 means don't set it. -if test "${ULIMIT_MAX_NOFILE}" != 0 && test "$(ulimit -n)" -gt "${ULIMIT_MAX_NOFILE}"; then - ulimit -n "${ULIMIT_MAX_NOFILE}" -fi - -# wait until Postgres is ready -while ! nc -z "${DATABASE_HOST}" "${DATABASE_PORT}" 2>/dev/null; do - echo waiting for postgres at "${DATABASE_HOST}":"${DATABASE_PORT}" - sleep 1s -done - -# apply migrations -bin/teslamate eval "TeslaMate.Release.migrate" - -exec "$@" diff --git a/docs/changelog.d/migrate-wave1-ringtail.infra.md b/docs/changelog.d/migrate-wave1-ringtail.infra.md new file mode 100644 index 0000000..c44263a --- /dev/null +++ b/docs/changelog.d/migrate-wave1-ringtail.infra.md @@ -0,0 +1,13 @@ +Move paperless, teslamate, and mealie off `minikube-indri` onto +`k3s-ringtail`, shedding ~1.1 GiB of resident load from the +OOM-thrashing 8 GiB minikube node (the kernel OOM killer had been +killing `kube-apiserver`/`dockerd`/argocd, flapping every +minikube-hosted service at once). paperless + teslamate databases +move into a fresh CNPG `blumeops-pg` cluster on ringtail via a cold +`pg_dump`/`pg_restore` from the quiesced source — row counts verified +equal before any routing flip; source DBs dropped only after the +ringtail side serves traffic. mealie's SQLite PVC is copied as-is. +paperless media stays on sifaka NFS. Downtime-tolerant cold cutover +(no streaming replication); rollback is repoint-and-scale-up with the +source untouched. Second chain in the indri-k8s decommission after +[[migrate-immich-to-ringtail]]. diff --git a/docs/how-to/immich/migrate-immich-to-ringtail.md b/docs/how-to/immich/migrate-immich-to-ringtail.md index cd23384..e654b62 100644 --- a/docs/how-to/immich/migrate-immich-to-ringtail.md +++ b/docs/how-to/immich/migrate-immich-to-ringtail.md @@ -122,6 +122,8 @@ file). ## Related +- [[migrate-wave1-ringtail]] — the next chain in the indri-k8s + decommission: paperless, teslamate, and mealie - [[shower-on-ringtail]] — a previous migration to ringtail (simpler: no upstream cluster, SQLite, no GPU) - [[connect-to-postgres]] — getting a psql session against CNPG diff --git a/docs/how-to/ringtail/migrate-wave1-ringtail.md b/docs/how-to/ringtail/migrate-wave1-ringtail.md new file mode 100644 index 0000000..ffb8cdc --- /dev/null +++ b/docs/how-to/ringtail/migrate-wave1-ringtail.md @@ -0,0 +1,176 @@ +--- +title: Migrate Wave 1 (paperless, teslamate, mealie) to Ringtail +modified: 2026-06-03 +last-reviewed: 2026-06-03 +tags: + - how-to + - operations + - ringtail + - migration +--- + +# Migrate Wave 1 to Ringtail + +Move paperless, teslamate, and mealie off `minikube-indri` and onto +`k3s-ringtail`. This is the load-shedding response to minikube going +OOM: the kernel OOM killer was thrashing the 8 GiB node — killing +`kube-apiserver`, `dockerd`, and the argocd application-controller — +which made every minikube-hosted service probe-flap at once. These +three app pods are ~1.1 GiB resident combined and are the heaviest +non-observability tenants left on minikube. Following +[[migrate-immich-to-ringtail]], the first chain in the indri-k8s +decommission. + +## End state + +- `paperless`, `teslamate`, and `mealie` run on ringtail k3s in their + own namespaces, off minikube entirely. +- A CNPG `blumeops-pg` Cluster runs in a `databases` namespace on + ringtail (PostgreSQL, owned by ringtail's `cnpg-system` operator), + holding the `paperless` and `teslamate` databases. Apps reach it + in-cluster via `blumeops-pg-rw.databases.svc.cluster.local`. +- mealie keeps its SQLite database; its 2 GiB `mealie-data` PVC is + copied to a ringtail PVC. +- paperless media still lives on [[sifaka]] via NFS (RWX, 500 GiB), + mounted from ringtail pods. teslamate has no file state. +- Routing: `paperless.ops.eblu.me`, `teslamate.ops.eblu.me`, and + `mealie.ops.eblu.me` (Caddy on indri) proxy to Tailscale + ProxyGroup ingresses on ringtail. Service names are unchanged. +- The minikube manifests and the `paperless`/`teslamate`/`mealie` + databases inside indri's `blumeops-pg` are removed only after + cutover is verified. + +## Non-goals + +- Migrating the rest of `blumeops-pg` (e.g. miniflux) — that is a + later wave. This chain moves only the paperless + teslamate + databases out; the source cluster on indri stays up for the others. +- Version bumps or config changes. Lift-and-shift only. +- Public (Fly) exposure changes. These stay tailnet-only. +- The observability stack (prometheus/loki/tempo/grafana) — deferred; + it carries 50 GiB of local TSDB and is the riskiest move. + +## Critical constraint: no data loss + +**Downtime is acceptable — data loss is not.** We can take each +service fully offline for its cutover, which removes the entire +class of streaming-replication and double-writer hazards. The cold +dump is taken from a *quiesced* source, so it is internally +consistent. + +Data surfaces: + +1. **paperless postgres** — document metadata, tags, correspondents, + the search index state. The document *files* are on NFS and never + move, but losing the DB means files-without-index. This is the + surface to protect most carefully. +2. **teslamate postgres** — drive/charge history. Re-derivable only + from Tesla's API for a limited window; treat as unrecoverable. +3. **mealie SQLite** — recipes, meal plans. On the `mealie-data` PVC. + +The source databases on indri are **never dropped until the ringtail +side is verified and serving**. Rollback is "repoint and scale back +up," not "restore from backup." [[borgmatic]] remains the backstop. + +## Why a fresh CNPG cluster (not cross-cluster pg) + +indri's `blumeops-pg` is already exposed tailnet-wide at +`pg.ops.eblu.me` (Caddy L4), so we *could* leave the DBs on indri and +just move the app pods. We are not, because: + +- The goal is to retire minikube — keeping pg there blocks it and + leaves a cross-host runtime dependency (ringtail apps SPOF on + indri's pg over the tailnet). +- CNPG is the same operator on both clusters; a Cluster CR on ringtail + is mechanically equivalent to the one on minikube. +- Naming the ringtail cluster `blumeops-pg` in `databases` lets apps + use the same in-cluster DNS they would on indri. + +## Cold-cutover procedure (per service) + +Do these one service at a time. paperless first (heaviest, highest +data-sensitivity), then teslamate, then mealie. + +### 0. Prerequisites (once, before any service) + +- Confirm ringtail's `cnpg-system` operator and `databases` namespace + are healthy (immich-pg already runs there). +- Confirm ringtail pods can reach indri's `pg.ops.eblu.me:5432` (used + only to pull the dump) and the sifaka NFS export for paperless + media. See [[sifaka-nfs-from-ringtail]]. +- Define the ringtail `blumeops-pg` CNPG Cluster manifest (model on + `databases-ringtail/immich-pg.yaml`) and its ExternalSecrets for + the per-app roles. Sync it; let it come up empty and healthy. + +### 1. Quiesce the source + +```fish +kubectl --context=minikube-indri -n scale deploy/ --replicas=0 +# confirm 0 running, DB now has no writers +``` + +### 2. Dump from indri, restore to ringtail (postgres apps) + +```fish +# dump the single app DB from the quiesced source +kubectl --context=minikube-indri -n databases exec blumeops-pg-1 -- \ + pg_dump -Fc -d > /tmp/.dump + +# restore into the ringtail cluster +kubectl --context=k3s-ringtail -n databases exec -i blumeops-pg-1 -- \ + pg_restore --no-owner --role= -d < /tmp/.dump +``` + +For **mealie** (SQLite) instead: copy the `mealie-data` PVC contents +to the ringtail PVC (e.g. a one-shot rsync pod mounting both, or +`kubectl cp` via a helper pod). Verify the `.db` file size and that +mealie boots read-only against it. + +### 3. Verify the restore (before any routing flips) + +- Row counts match source for the key tables, scripted: + - paperless: `documents_document`, `documents_tag`, + `documents_correspondent`, `auth_user`. + - teslamate: `cars`, `drives`, `charging_processes`, `positions`. +- `pg_dump --schema-only --no-owner` diff between source and dest is + empty modulo CNPG-managed roles. +- Boot the app against the ringtail DB on its tailnet name *before* + Caddy is flipped, and smoke-test (paperless: documents list + + search; teslamate: dashboard loads recent drives; mealie: recipes + list). + +### 4. Release the service name + +```fish +# delete the minikube tailscale ingress so ringtail can claim the name +kubectl --context=minikube-indri -n delete ingress -tailscale +``` + +### 5. Bring up on ringtail + +- Apply the ringtail manifests (new ArgoCD app `-ringtail`, + `destination.server` = `https://ringtail.tail8d86e.ts.net:6443`). + App points at `blumeops-pg-rw.databases.svc.cluster.local`. +- Sync; wait for healthy + the ProxyGroup ingress to get its name. + +### 6. Flip routing + +- Repoint the Caddy `.ops.eblu.me` upstream at the ringtail + ProxyGroup ingress (provision-indri, caddy role). +- `mise run services-check` — confirm the service flips from FIRING + to OK and no neighbours regressed. + +### 7. Decommission the source (only after verification) + +- Remove the minikube manifests for the app. +- Drop the app DB from indri's `blumeops-pg` (paperless/teslamate) + **last**, once the ringtail side has served real traffic. + +## Rollback + +If a cutover fails verification at any step before §7: + +- Re-create the minikube tailscale ingress (if §4 ran). +- Scale the minikube app back to `1`. +- Repoint Caddy back to the minikube ingress. +- The source DB was never modified or dropped. Document the failure. diff --git a/service-versions.yaml b/service-versions.yaml index 5440f01..699f89c 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -47,7 +47,7 @@ services: - name: shower type: argocd last-reviewed: 2026-05-15 - current-version: "1.1.2" + current-version: "1.1.3" upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app notes: | Django app for Adelaide / Heidi / Addie's baby shower. Wheel @@ -222,9 +222,17 @@ services: - name: teslamate type: argocd - last-reviewed: 2026-04-14 + last-reviewed: "2026-06-03" current-version: "v3.0.0" upstream-source: https://github.com/teslamate-org/teslamate/releases + notes: >- + Tesla data logger. Container ported from Dagger (container.py) to Nix + (containers/teslamate/default.nix) — a from-scratch beamPackages + mixRelease (Elixir/Phoenix release with npm-built assets), since + teslamate is not in nixpkgs. Pins erlang_27 + elixir_1_18 from the + shared nixos-unstable rev; assets via in-release npm ci + esbuild; + ex_cldr locale data pre-fetched (LOCALES env) to avoid sandbox + downloads. Version unchanged (v3.0.0). Build verified on ringtail. - name: transmission type: argocd @@ -328,21 +336,31 @@ services: - name: mealie type: argocd - last-reviewed: 2026-05-11 - current-version: "v3.12.0" + last-reviewed: "2026-06-03" + current-version: "v3.16.0" upstream-source: https://github.com/mealie-recipes/mealie/releases notes: >- - Recipe manager; built from source via forge mirror. - Upstream is at v3.17.0 as of 2026-05-11 (5 minor versions ahead). - Container/manifest still pinned to v3.12.0 — upgrade deferred to a - separate task (build new image, review changelog for breaking changes). + Recipe manager. Container ported from Dockerfile to Nix + (containers/mealie/default.nix wraps nixpkgs mealie from a pinned + nixos-unstable; single gunicorn process, SQLite on the mealie-data + PVC). Bumped v3.12.0 -> v3.16.0 as part of the port (the deferred + upgrade). Breaking-change review v3.13-v3.16: no schema breaking + changes, SQLite auto-migrates forward via init_db; notable items are + minor (OIDC missing-claims log -> DEBUG, NLP parser uses user-defined + units, Nuxt 3->4 frontend, new Announcements feature, path-traversal + patches). Source PVC retained for rollback. Build verified on ringtail. - name: paperless type: argocd - last-reviewed: "2026-04-08" - current-version: "v2.20.13" + last-reviewed: "2026-06-03" + current-version: "v2.20.15" upstream-source: https://github.com/paperless-ngx/paperless-ngx/releases - notes: Document management; built from source via forge mirror + notes: >- + Document management. Container ported from Dockerfile to Nix + (containers/paperless/default.nix wraps nixpkgs paperless-ngx from a + pinned nixos-unstable). Runs as web/worker/beat/consumer containers on + ringtail (multi-process; no s6). Bumped v2.20.13 -> v2.20.15 (the + unstable package version, same-minor patch) as part of the port. - name: unpoller type: argocd From 92b54e7ba9a41b461a423cfdd5a53278a7e4ac40 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 10:36:15 -0700 Subject: [PATCH 16/35] C0: ringtail wave-1 images rebuilt from main (fcac8e5-nix tags) Post-merge rebuild of paperless/mealie/teslamate Nix images at the main merge commit, replacing the feature-branch -nix tags. Image content is identical; only the commit-sha suffix changes. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/mealie-ringtail/kustomization.yaml | 2 +- argocd/manifests/paperless-ringtail/kustomization.yaml | 2 +- argocd/manifests/teslamate-ringtail/kustomization.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/argocd/manifests/mealie-ringtail/kustomization.yaml b/argocd/manifests/mealie-ringtail/kustomization.yaml index 8428042..2b6a7ef 100644 --- a/argocd/manifests/mealie-ringtail/kustomization.yaml +++ b/argocd/manifests/mealie-ringtail/kustomization.yaml @@ -12,4 +12,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/mealie - newTag: v3.16.0-1d4cbbf-nix + newTag: v3.16.0-fcac8e5-nix diff --git a/argocd/manifests/paperless-ringtail/kustomization.yaml b/argocd/manifests/paperless-ringtail/kustomization.yaml index 0a691e0..41665b8 100644 --- a/argocd/manifests/paperless-ringtail/kustomization.yaml +++ b/argocd/manifests/paperless-ringtail/kustomization.yaml @@ -13,7 +13,7 @@ resources: images: - name: registry.ops.eblu.me/blumeops/paperless - newTag: v2.20.15-1d4cbbf-nix + newTag: v2.20.15-fcac8e5-nix # amd64 valkey built via nix (the v8.1.7-ecded30 tag without -nix is the # arm64 Alpine build for indri and fails on ringtail with exec format error) - name: docker.io/library/redis diff --git a/argocd/manifests/teslamate-ringtail/kustomization.yaml b/argocd/manifests/teslamate-ringtail/kustomization.yaml index f31fe09..acb623e 100644 --- a/argocd/manifests/teslamate-ringtail/kustomization.yaml +++ b/argocd/manifests/teslamate-ringtail/kustomization.yaml @@ -12,4 +12,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/teslamate - newTag: v3.0.0-191be1b-nix + newTag: v3.0.0-fcac8e5-nix From e0057b46e4c7266fc4c01db7a88af69ae65ff655 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 12:25:30 -0700 Subject: [PATCH 17/35] Wire ringtail blumeops-pg into backups + Grafana (#364) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prereq for the wave-1 decommission. The cutover moved paperless+teslamate (postgres) and mealie (SQLite) to ringtail, but borgmatic and the Grafana TeslaMate datasource still pointed at the minikube copies — the migrated live data was unbacked since cutover, and dropping the minikube DBs would break the TeslaMate dashboards. - Tailscale Service `blumeops-pg-ringtail` + Caddy L4 route `pg.ops.eblu.me:5434` - borgmatic: teslamate + paperless postgres → :5434; mealie SQLite → ssh:eblume@ringtail - Grafana TeslaMate datasource → pg.ops.eblu.me:5434 Deploy: sync databases-ringtail (tailscale svc) + grafana from branch; provision-indri --tags caddy,borgmatic; verify a backup run + dashboards. Unblocks the decommission PR. Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/364 --- ansible/roles/borgmatic/defaults/main.yml | 16 +++++++------ ansible/roles/borgmatic/tasks/main.yml | 2 ++ .../borgmatic/templates/k8s-sqlite-dump.sh.j2 | 4 +++- ansible/roles/caddy/defaults/main.yml | 2 ++ .../databases-ringtail/kustomization.yaml | 1 + .../service-blumeops-pg-tailscale.yaml | 24 +++++++++++++++++++ argocd/manifests/grafana/datasources.yaml | 4 +++- .../mealie-ringtail/kustomization.yaml | 2 +- containers/mealie/default.nix | 4 ++++ ...ckup-grafana-ringtail-blumeops-pg.infra.md | 8 +++++++ 10 files changed, 57 insertions(+), 10 deletions(-) create mode 100644 argocd/manifests/databases-ringtail/service-blumeops-pg-tailscale.yaml create mode 100644 docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md diff --git a/ansible/roles/borgmatic/defaults/main.yml b/ansible/roles/borgmatic/defaults/main.yml index 3a89a09..a743161 100644 --- a/ansible/roles/borgmatic/defaults/main.yml +++ b/ansible/roles/borgmatic/defaults/main.yml @@ -56,8 +56,9 @@ borgmatic_k8s_sqlite_dumps: namespace: mealie label_selector: app=mealie db_path: /app/data/mealie.db - # local kubectl, --context=minikube (indri's only configured ctx) - target: local:minikube + # migrated to ringtail (wave-1); ssh to ringtail and run k3s kubectl + # there, same as shower below. + target: ssh:eblume@ringtail - name: shower namespace: shower label_selector: app=shower @@ -102,17 +103,18 @@ borgmatic_postgresql_databases: hostname: pg.ops.eblu.me port: 5432 username: borgmatic - - name: teslamate - hostname: pg.ops.eblu.me - port: 5432 - username: borgmatic - name: authentik hostname: pg.ops.eblu.me port: 5432 username: borgmatic + # migrated to ringtail blumeops-pg (wave-1); port 5434 = Caddy L4 route + - name: teslamate + hostname: pg.ops.eblu.me + port: 5434 + username: borgmatic - name: paperless hostname: pg.ops.eblu.me - port: 5432 + port: 5434 username: borgmatic # immich-pg cluster (VectorChord) via Caddy L4 on port 5433 - name: immich diff --git a/ansible/roles/borgmatic/tasks/main.yml b/ansible/roles/borgmatic/tasks/main.yml index 4ac242c..36d3bb6 100644 --- a/ansible/roles/borgmatic/tasks/main.yml +++ b/ansible/roles/borgmatic/tasks/main.yml @@ -19,8 +19,10 @@ ansible.builtin.copy: content: | # Managed by ansible (borgmatic role) - k8s PostgreSQL backup credentials + # 5432 = minikube blumeops-pg, 5433 = immich-pg, 5434 = ringtail blumeops-pg pg.ops.eblu.me:5432:*:borgmatic:{{ borgmatic_db_password }} pg.ops.eblu.me:5433:*:borgmatic:{{ borgmatic_db_password }} + pg.ops.eblu.me:5434:*:borgmatic:{{ borgmatic_db_password }} dest: ~/.pgpass mode: '0600' no_log: true diff --git a/ansible/roles/borgmatic/templates/k8s-sqlite-dump.sh.j2 b/ansible/roles/borgmatic/templates/k8s-sqlite-dump.sh.j2 index 323e717..9cc24da 100644 --- a/ansible/roles/borgmatic/templates/k8s-sqlite-dump.sh.j2 +++ b/ansible/roles/borgmatic/templates/k8s-sqlite-dump.sh.j2 @@ -28,7 +28,9 @@ db_path=${4:?missing db path} name=${5:?missing name} dump_target=${6:?missing dump target} -pod_tmp="/tmp/${name}-backup.db" +# Stage the backup next to the source DB (a guaranteed-writable volume); +# minimal nix images (e.g. mealie) have no /tmp. +pod_tmp="$(dirname "$db_path")/.borgmatic-backup-${name}.db" python_backup='import sqlite3; sqlite3.connect("'"$db_path"'").backup(sqlite3.connect("'"$pod_tmp"'"))' diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index da6f3f9..363d09e 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -117,6 +117,8 @@ caddy_tcp_services: backend: "pg.tail8d86e.ts.net:5432" # PostgreSQL (blumeops-pg) - port: 5433 backend: "immich-pg.tail8d86e.ts.net:5432" # PostgreSQL (immich-pg) + - port: 5434 + backend: "blumeops-pg-ringtail.tail8d86e.ts.net:5432" # PostgreSQL (blumeops-pg on ringtail) - port: "{{ sifaka_node_exporter_port }}" backend: "sifaka:{{ sifaka_node_exporter_port }}" # Sifaka node_exporter - port: "{{ sifaka_smartctl_exporter_port }}" diff --git a/argocd/manifests/databases-ringtail/kustomization.yaml b/argocd/manifests/databases-ringtail/kustomization.yaml index 2bc2af3..143345c 100644 --- a/argocd/manifests/databases-ringtail/kustomization.yaml +++ b/argocd/manifests/databases-ringtail/kustomization.yaml @@ -9,6 +9,7 @@ resources: - service-immich-pg-tailscale.yaml # wave-1 indri-k8s decommission: blumeops-pg (paperless + teslamate) - blumeops-pg.yaml + - service-blumeops-pg-tailscale.yaml - external-secret-eblume.yaml - external-secret-borgmatic.yaml - external-secret-paperless.yaml diff --git a/argocd/manifests/databases-ringtail/service-blumeops-pg-tailscale.yaml b/argocd/manifests/databases-ringtail/service-blumeops-pg-tailscale.yaml new file mode 100644 index 0000000..f7ca5ef --- /dev/null +++ b/argocd/manifests/databases-ringtail/service-blumeops-pg-tailscale.yaml @@ -0,0 +1,24 @@ +# Tailscale LoadBalancer for the ringtail blumeops-pg cluster. +# Canonical hostname: blumeops-pg-ringtail.tail8d86e.ts.net (distinct from +# the minikube blumeops-pg, which still owns pg.tail8d86e.ts.net until the +# wave-1 decommission). Borgmatic on indri and the Grafana TeslaMate +# datasource reach it via the Caddy L4 route pg.ops.eblu.me:5434. +apiVersion: v1 +kind: Service +metadata: + name: blumeops-pg-tailscale + namespace: databases + annotations: + tailscale.com/hostname: "blumeops-pg-ringtail" + tailscale.com/proxy-class: "default" +spec: + type: LoadBalancer + loadBalancerClass: tailscale + selector: + cnpg.io/cluster: blumeops-pg + role: primary + ports: + - name: postgresql + port: 5432 + targetPort: 5432 + protocol: TCP diff --git a/argocd/manifests/grafana/datasources.yaml b/argocd/manifests/grafana/datasources.yaml index 5a3d0f3..64ed2bf 100644 --- a/argocd/manifests/grafana/datasources.yaml +++ b/argocd/manifests/grafana/datasources.yaml @@ -63,5 +63,7 @@ datasources: password: $TESLAMATE_DB_PASSWORD type: postgres uid: TeslaMate - url: blumeops-pg-rw.databases.svc.cluster.local:5432 + # teslamate DB migrated to ringtail blumeops-pg (wave-1); reached via the + # Caddy L4 route on indri (pg.ops.eblu.me:5434 -> blumeops-pg-ringtail). + url: pg.ops.eblu.me:5434 user: teslamate diff --git a/argocd/manifests/mealie-ringtail/kustomization.yaml b/argocd/manifests/mealie-ringtail/kustomization.yaml index 2b6a7ef..7679032 100644 --- a/argocd/manifests/mealie-ringtail/kustomization.yaml +++ b/argocd/manifests/mealie-ringtail/kustomization.yaml @@ -12,4 +12,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/mealie - newTag: v3.16.0-fcac8e5-nix + newTag: v3.16.0-22cfd86-nix diff --git a/containers/mealie/default.nix b/containers/mealie/default.nix index fdb1430..e55efe3 100644 --- a/containers/mealie/default.nix +++ b/containers/mealie/default.nix @@ -48,6 +48,10 @@ pkgs.dockerTools.buildLayeredImage { pkgs.coreutils pkgs.cacert pkgs.tzdata + # python3 (stdlib sqlite3) for the borgmatic k8s-sqlite-dump helper, + # which runs `python3 -c "...sqlite3...backup..."` inside the pod. + # Same nixpkgs python mealie is built against, so ~no added closure. + pkgs.python3 ]; config = { diff --git a/docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md b/docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md new file mode 100644 index 0000000..33b041f --- /dev/null +++ b/docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md @@ -0,0 +1,8 @@ +Wire the ringtail `blumeops-pg` cluster (which holds the wave-1-migrated +paperless + teslamate databases) into backups and Grafana. Adds a Tailscale +LoadBalancer Service (`blumeops-pg-ringtail.tail8d86e.ts.net`) and a Caddy L4 +route (`pg.ops.eblu.me:5434`), then repoints borgmatic's `teslamate` + +`paperless` postgres dumps and the `mealie` SQLite dump at ringtail, and the +Grafana TeslaMate datasource at the ringtail DB. Closes the backup gap that +opened at cutover (the migrated live data was still being backed up from the +now-frozen minikube copies) and unblocks the wave-1 decommission. From 44798a6429adea3822041755af5ddd22ac149b98 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 12:26:55 -0700 Subject: [PATCH 18/35] C0: mealie-ringtail image rebuilt from main (e0057b4-nix) Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/mealie-ringtail/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/mealie-ringtail/kustomization.yaml b/argocd/manifests/mealie-ringtail/kustomization.yaml index 7679032..ad65785 100644 --- a/argocd/manifests/mealie-ringtail/kustomization.yaml +++ b/argocd/manifests/mealie-ringtail/kustomization.yaml @@ -12,4 +12,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/mealie - newTag: v3.16.0-22cfd86-nix + newTag: v3.16.0-e0057b4-nix From 46f00021781e835fddc80de06588fb4ae87d5f5f Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 12:36:06 -0700 Subject: [PATCH 19/35] Decommission wave-1 minikube services (paperless, teslamate, mealie) (#365) Final step of the wave-1 indri-k8s migration. paperless, teslamate, mealie run on ringtail with data migrated, verified, and backed up (local + BorgBase offsite via PR #364). - Remove minikube paperless/teslamate/mealie manifest dirs + ArgoCD app defs (prunes the parked Deployments/Services + redundant minikube mealie/paperless PVCs) - Drop paperless/teslamate roles + ExternalSecrets from the minikube blumeops-pg cluster - miniflux + authentik stay on minikube (later waves) Finalization after merge: sync apps + databases to prune, then DROP DATABASE paperless/teslamate on indri's blumeops-pg (fresh safety dump taken first). Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/365 --- argocd/apps/mealie.yaml | 17 --- argocd/apps/paperless.yaml | 17 --- argocd/apps/teslamate.yaml | 32 ----- argocd/manifests/databases/blumeops-pg.yaml | 23 +-- .../databases/external-secret-paperless.yaml | 28 ---- .../databases/external-secret-teslamate.yaml | 30 ---- argocd/manifests/databases/kustomization.yaml | 2 - argocd/manifests/mealie/deployment.yaml | 96 ------------- argocd/manifests/mealie/external-secret.yaml | 23 --- argocd/manifests/mealie/kustomization.yaml | 15 -- argocd/manifests/mealie/pvc.yaml | 13 -- argocd/manifests/mealie/service.yaml | 13 -- argocd/manifests/paperless/deployment.yaml | 133 ------------------ .../manifests/paperless/external-secret.yaml | 31 ---- argocd/manifests/paperless/kustomization.yaml | 19 --- argocd/manifests/paperless/pv-nfs.yaml | 22 --- argocd/manifests/paperless/pvc.yaml | 15 -- argocd/manifests/paperless/service.yaml | 13 -- argocd/manifests/teslamate/README.md | 69 --------- argocd/manifests/teslamate/deployment.yaml | 68 --------- .../teslamate/external-secret-db.yaml | 25 ---- .../external-secret-encryption-key.yaml | 27 ---- argocd/manifests/teslamate/kustomization.yaml | 15 -- argocd/manifests/teslamate/service.yaml | 12 -- .../decommission-wave1-minikube.infra.md | 8 ++ 25 files changed, 11 insertions(+), 755 deletions(-) delete mode 100644 argocd/apps/mealie.yaml delete mode 100644 argocd/apps/paperless.yaml delete mode 100644 argocd/apps/teslamate.yaml delete mode 100644 argocd/manifests/databases/external-secret-paperless.yaml delete mode 100644 argocd/manifests/databases/external-secret-teslamate.yaml delete mode 100644 argocd/manifests/mealie/deployment.yaml delete mode 100644 argocd/manifests/mealie/external-secret.yaml delete mode 100644 argocd/manifests/mealie/kustomization.yaml delete mode 100644 argocd/manifests/mealie/pvc.yaml delete mode 100644 argocd/manifests/mealie/service.yaml delete mode 100644 argocd/manifests/paperless/deployment.yaml delete mode 100644 argocd/manifests/paperless/external-secret.yaml delete mode 100644 argocd/manifests/paperless/kustomization.yaml delete mode 100644 argocd/manifests/paperless/pv-nfs.yaml delete mode 100644 argocd/manifests/paperless/pvc.yaml delete mode 100644 argocd/manifests/paperless/service.yaml delete mode 100644 argocd/manifests/teslamate/README.md delete mode 100644 argocd/manifests/teslamate/deployment.yaml delete mode 100644 argocd/manifests/teslamate/external-secret-db.yaml delete mode 100644 argocd/manifests/teslamate/external-secret-encryption-key.yaml delete mode 100644 argocd/manifests/teslamate/kustomization.yaml delete mode 100644 argocd/manifests/teslamate/service.yaml create mode 100644 docs/changelog.d/decommission-wave1-minikube.infra.md diff --git a/argocd/apps/mealie.yaml b/argocd/apps/mealie.yaml deleted file mode 100644 index af33469..0000000 --- a/argocd/apps/mealie.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: mealie - namespace: argocd -spec: - project: default - source: - repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git - targetRevision: main - path: argocd/manifests/mealie - destination: - server: https://kubernetes.default.svc - namespace: mealie - syncPolicy: - syncOptions: - - CreateNamespace=true diff --git a/argocd/apps/paperless.yaml b/argocd/apps/paperless.yaml deleted file mode 100644 index 88437eb..0000000 --- a/argocd/apps/paperless.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: paperless - namespace: argocd -spec: - project: default - source: - repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git - targetRevision: main - path: argocd/manifests/paperless - destination: - server: https://kubernetes.default.svc - namespace: paperless - syncPolicy: - syncOptions: - - CreateNamespace=true diff --git a/argocd/apps/teslamate.yaml b/argocd/apps/teslamate.yaml deleted file mode 100644 index 60247da..0000000 --- a/argocd/apps/teslamate.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# TeslaMate Tesla Data Logger -# Requires: CloudNativePG PostgreSQL cluster and manual secret setup -# -# Before syncing, create the namespace and secrets: -# kubectl create namespace teslamate -# op inject -i argocd/manifests/databases/secret-teslamate.yaml.tpl | kubectl apply -f - -# op inject -i argocd/manifests/teslamate/secret-encryption-key.yaml.tpl | kubectl apply -f - -# op inject -i argocd/manifests/teslamate/secret-db.yaml.tpl | kubectl apply -f - -# -# Then create the database: -# PGPASSWORD=$(op read "op://blumeops/postgres/password") \ -# psql -h pg.ops.eblu.me -U eblume -c "CREATE DATABASE teslamate OWNER teslamate;" -# -# After syncing, access the TeslaMate UI at https://tesla.tail8d86e.ts.net to complete -# Tesla API authentication via OAuth flow. -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: teslamate - namespace: argocd -spec: - project: default - source: - repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git - targetRevision: main - path: argocd/manifests/teslamate - destination: - server: https://kubernetes.default.svc - namespace: teslamate - syncPolicy: - syncOptions: - - CreateNamespace=true diff --git a/argocd/manifests/databases/blumeops-pg.yaml b/argocd/manifests/databases/blumeops-pg.yaml index 58c771a..37aef23 100644 --- a/argocd/manifests/databases/blumeops-pg.yaml +++ b/argocd/manifests/databases/blumeops-pg.yaml @@ -44,18 +44,9 @@ spec: - pg_read_all_data passwordSecret: name: blumeops-pg-borgmatic - # teslamate user for TeslaMate Tesla data logger - # Superuser removed. Extension ownership (cube, earthdistance) - # transferred manually so teslamate can ALTER EXTENSION UPDATE. - # earthdistance is untrusted — DROP+CREATE needs temporary - # superuser escalation during upgrades. - - name: teslamate - login: true - connectionLimit: -1 - ensure: present - inherit: true - passwordSecret: - name: blumeops-pg-teslamate + # teslamate + paperless roles removed: migrated to ringtail blumeops-pg + # (wave-1 decommission). Their databases were dropped from this cluster + # after the cutover was verified and backed up. # authentik user for Authentik identity provider (runs on ringtail) - name: authentik login: true @@ -65,14 +56,6 @@ spec: createdb: true passwordSecret: name: blumeops-pg-authentik - # paperless user for Paperless-ngx document management - - name: paperless - login: true - connectionLimit: -1 - ensure: present - inherit: true - passwordSecret: - name: blumeops-pg-paperless # Resource limits for minikube environment resources: diff --git a/argocd/manifests/databases/external-secret-paperless.yaml b/argocd/manifests/databases/external-secret-paperless.yaml deleted file mode 100644 index e5742be..0000000 --- a/argocd/manifests/databases/external-secret-paperless.yaml +++ /dev/null @@ -1,28 +0,0 @@ -# ExternalSecret for Paperless database user password -# -# 1Password item: "Paperless (blumeops)" in blumeops vault -# Field: "postgresql-password" -# -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: blumeops-pg-paperless - namespace: databases -spec: - refreshInterval: 1h - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-blumeops - target: - name: blumeops-pg-paperless - creationPolicy: Owner - template: - type: kubernetes.io/basic-auth - data: - username: paperless - password: "{{ .password }}" - data: - - secretKey: password - remoteRef: - key: Paperless (blumeops) - property: postgresql-password diff --git a/argocd/manifests/databases/external-secret-teslamate.yaml b/argocd/manifests/databases/external-secret-teslamate.yaml deleted file mode 100644 index 0c52e0b..0000000 --- a/argocd/manifests/databases/external-secret-teslamate.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# ExternalSecret for TeslaMate database user password -# -# Replaces the manual op inject workflow from secret-teslamate.yaml.tpl -# -# 1Password item: "TeslaMate" in blumeops vault -# Field: "db_password" -# -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: blumeops-pg-teslamate - namespace: databases -spec: - refreshInterval: 1h - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-blumeops - target: - name: blumeops-pg-teslamate - creationPolicy: Owner - template: - type: kubernetes.io/basic-auth - data: - username: teslamate - password: "{{ .password }}" - data: - - secretKey: password - remoteRef: - key: TeslaMate - property: db_password diff --git a/argocd/manifests/databases/kustomization.yaml b/argocd/manifests/databases/kustomization.yaml index 692285a..0393757 100644 --- a/argocd/manifests/databases/kustomization.yaml +++ b/argocd/manifests/databases/kustomization.yaml @@ -9,6 +9,4 @@ resources: - service-metrics-tailscale.yaml - external-secret-eblume.yaml - external-secret-borgmatic.yaml - - external-secret-teslamate.yaml - external-secret-authentik.yaml - - external-secret-paperless.yaml diff --git a/argocd/manifests/mealie/deployment.yaml b/argocd/manifests/mealie/deployment.yaml deleted file mode 100644 index 7cdd275..0000000 --- a/argocd/manifests/mealie/deployment.yaml +++ /dev/null @@ -1,96 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: mealie - namespace: mealie -spec: - # Migrated to ringtail (mealie-ringtail). Scaled to 0; SQLite PVC retained - # for rollback until the decommission PR. See [[migrate-wave1-ringtail]]. - replicas: 0 - selector: - matchLabels: - app: mealie - template: - metadata: - labels: - app: mealie - spec: - securityContext: - seccompProfile: - type: RuntimeDefault - containers: - - name: mealie - image: registry.ops.eblu.me/blumeops/mealie:kustomized - ports: - - containerPort: 9000 - env: - - name: BASE_URL - value: "https://meals.ops.eblu.me" - - name: ALLOW_SIGNUP - value: "false" - - name: TZ - value: "America/Los_Angeles" - - name: MAX_WORKERS - value: "1" - - name: WEB_CONCURRENCY - value: "1" - # OIDC — Authentik (public client, PKCE) - - name: OIDC_AUTH_ENABLED - value: "true" - - name: OIDC_CONFIGURATION_URL - value: "https://authentik.ops.eblu.me/application/o/mealie/.well-known/openid-configuration" - - name: OIDC_CLIENT_ID - value: "mealie" - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mealie-secrets - key: oidc-client-secret - - name: OIDC_AUTO_REDIRECT - value: "false" - - name: OIDC_PROVIDER_NAME - value: "Authentik" - - name: OIDC_ADMIN_GROUP - value: "admins" - - name: OIDC_SIGNUP_ENABLED - value: "true" - - name: OIDC_USER_CLAIM - value: "email" - # OpenAI — recipe parsing, image OCR, ingredient extraction - - name: OPENAI_API_KEY - valueFrom: - secretKeyRef: - name: mealie-secrets - key: openai-api-key - - name: OPENAI_MODEL - value: "gpt-4o" - - name: OPENAI_REQUEST_TIMEOUT - value: "120" - - name: OPENAI_WORKERS - value: "1" - volumeMounts: - - name: data - mountPath: /app/data - resources: - requests: - memory: "128Mi" - cpu: "50m" - limits: - memory: "1000Mi" - cpu: "500m" - livenessProbe: - httpGet: - path: /api/app/about - port: 9000 - initialDelaySeconds: 30 - periodSeconds: 30 - readinessProbe: - httpGet: - path: /api/app/about - port: 9000 - initialDelaySeconds: 10 - periodSeconds: 10 - volumes: - - name: data - persistentVolumeClaim: - claimName: mealie-data diff --git a/argocd/manifests/mealie/external-secret.yaml b/argocd/manifests/mealie/external-secret.yaml deleted file mode 100644 index 99c2793..0000000 --- a/argocd/manifests/mealie/external-secret.yaml +++ /dev/null @@ -1,23 +0,0 @@ ---- -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: mealie-secrets - namespace: mealie -spec: - refreshInterval: 1h - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-blumeops - target: - name: mealie-secrets - creationPolicy: Owner - data: - - secretKey: oidc-client-secret - remoteRef: - key: "Authentik (blumeops)" - property: mealie-client-secret - - secretKey: openai-api-key - remoteRef: - key: "openai (blumeops)" - property: credential diff --git a/argocd/manifests/mealie/kustomization.yaml b/argocd/manifests/mealie/kustomization.yaml deleted file mode 100644 index 02563f4..0000000 --- a/argocd/manifests/mealie/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -namespace: mealie - -resources: - - deployment.yaml - - service.yaml - - pvc.yaml - # ingress removed: name 'meals' handed off to mealie-ringtail at cutover - - external-secret.yaml - -images: - - name: registry.ops.eblu.me/blumeops/mealie - newTag: v3.12.0-613f05d diff --git a/argocd/manifests/mealie/pvc.yaml b/argocd/manifests/mealie/pvc.yaml deleted file mode 100644 index f473e07..0000000 --- a/argocd/manifests/mealie/pvc.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: mealie-data - namespace: mealie -spec: - accessModes: - - ReadWriteOnce - storageClassName: standard - resources: - requests: - storage: 2Gi diff --git a/argocd/manifests/mealie/service.yaml b/argocd/manifests/mealie/service.yaml deleted file mode 100644 index 4162b96..0000000 --- a/argocd/manifests/mealie/service.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: mealie - namespace: mealie -spec: - selector: - app: mealie - ports: - - name: http - port: 9000 - targetPort: 9000 - protocol: TCP diff --git a/argocd/manifests/paperless/deployment.yaml b/argocd/manifests/paperless/deployment.yaml deleted file mode 100644 index 1730486..0000000 --- a/argocd/manifests/paperless/deployment.yaml +++ /dev/null @@ -1,133 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: paperless - namespace: paperless -spec: - # Migrated to ringtail (paperless-ringtail). Scaled to 0 to prevent - # double-writing the now-ringtail-owned database; manifest retained for - # rollback until the decommission PR. See [[migrate-wave1-ringtail]]. - replicas: 0 - selector: - matchLabels: - app: paperless - template: - metadata: - labels: - app: paperless - spec: - securityContext: - seccompProfile: - type: RuntimeDefault - containers: - - name: paperless - image: registry.ops.eblu.me/blumeops/paperless:kustomized - ports: - - containerPort: 8000 - name: http - env: - - name: PAPERLESS_URL - value: "https://paperless.ops.eblu.me" - - name: PAPERLESS_REDIS - value: "redis://localhost:6379" - - name: PAPERLESS_DBHOST - value: "pg.ops.eblu.me" - - name: PAPERLESS_DBPORT - value: "5432" - - name: PAPERLESS_DBNAME - value: "paperless" - # Explicit port to override k8s-injected PAPERLESS_PORT env var - # (k8s sets PAPERLESS_PORT=tcp://... for a service named 'paperless') - - name: PAPERLESS_PORT - value: "8000" - - name: PAPERLESS_DBUSER - value: "paperless" - - name: PAPERLESS_DBPASS - valueFrom: - secretKeyRef: - name: paperless-secrets - key: db-password - - name: PAPERLESS_SECRET_KEY - valueFrom: - secretKeyRef: - name: paperless-secrets - key: secret-key - - name: PAPERLESS_TIME_ZONE - value: "America/Los_Angeles" - - name: PAPERLESS_OCR_LANGUAGE - value: "eng" - - name: PAPERLESS_TASK_WORKERS - value: "1" - # Admin account (created on first startup) - - name: PAPERLESS_ADMIN_USER - value: "eblume" - - name: PAPERLESS_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: paperless-secrets - key: admin-password - - name: PAPERLESS_ADMIN_MAIL - value: "blume.erich@gmail.com" - # OIDC via Authentik - # Full JSON blob pulled from 1Password (includes client secret) - - name: PAPERLESS_APPS - value: "allauth.socialaccount.providers.openid_connect" - - name: PAPERLESS_SOCIALACCOUNT_PROVIDERS - valueFrom: - secretKeyRef: - name: paperless-secrets - key: socialaccount-providers - - name: PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS - value: "true" - - name: PAPERLESS_SOCIAL_AUTO_SIGNUP - value: "true" - - name: PAPERLESS_ACCOUNT_ALLOW_SIGNUPS - value: "false" - - name: PAPERLESS_REDIRECT_LOGIN_TO_SSO - value: "false" - volumeMounts: - - name: data - mountPath: /usr/src/paperless/data - - name: media - mountPath: /usr/src/paperless/media - - name: consume - mountPath: /usr/src/paperless/consume - resources: - requests: - memory: "256Mi" - cpu: "100m" - limits: - memory: "2Gi" - cpu: "1000m" - livenessProbe: - httpGet: - path: / - port: 8000 - initialDelaySeconds: 60 - periodSeconds: 30 - readinessProbe: - httpGet: - path: / - port: 8000 - initialDelaySeconds: 30 - periodSeconds: 10 - - - name: redis - image: docker.io/library/redis:kustomized - ports: - - containerPort: 6379 - resources: - requests: - memory: "32Mi" - cpu: "10m" - limits: - memory: "128Mi" - - volumes: - - name: data - emptyDir: {} - - name: media - persistentVolumeClaim: - claimName: paperless-media - - name: consume - emptyDir: {} diff --git a/argocd/manifests/paperless/external-secret.yaml b/argocd/manifests/paperless/external-secret.yaml deleted file mode 100644 index 750b7c5..0000000 --- a/argocd/manifests/paperless/external-secret.yaml +++ /dev/null @@ -1,31 +0,0 @@ ---- -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: paperless-secrets - namespace: paperless -spec: - refreshInterval: 1h - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-blumeops - target: - name: paperless-secrets - creationPolicy: Owner - data: - - secretKey: db-password - remoteRef: - key: "Paperless (blumeops)" - property: postgresql-password - - secretKey: secret-key - remoteRef: - key: "Paperless (blumeops)" - property: secret-key - - secretKey: admin-password - remoteRef: - key: "Paperless (blumeops)" - property: admin-password - - secretKey: socialaccount-providers - remoteRef: - key: "Paperless (blumeops)" - property: socialaccount-providers diff --git a/argocd/manifests/paperless/kustomization.yaml b/argocd/manifests/paperless/kustomization.yaml deleted file mode 100644 index a92a769..0000000 --- a/argocd/manifests/paperless/kustomization.yaml +++ /dev/null @@ -1,19 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -namespace: paperless - -resources: - - deployment.yaml - - service.yaml - - pv-nfs.yaml - - pvc.yaml - # ingress removed: name 'paperless' handed off to paperless-ringtail at cutover - - external-secret.yaml - -images: - - name: registry.ops.eblu.me/blumeops/paperless - newTag: v2.20.13-07f52e9 - - name: docker.io/library/redis - newName: registry.ops.eblu.me/blumeops/valkey - newTag: v8.1.7-ecded30 diff --git a/argocd/manifests/paperless/pv-nfs.yaml b/argocd/manifests/paperless/pv-nfs.yaml deleted file mode 100644 index 8ee7526..0000000 --- a/argocd/manifests/paperless/pv-nfs.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# NFS PersistentVolume for Paperless document library -# Requires: NFS share on sifaka at /volume1/paperless with NFS permissions for indri -# -# To create on Synology: -# 1. Control Panel > Shared Folder > Create -# 2. Name: paperless, Location: Volume 1 -# 3. Control Panel > File Services > NFS > NFS Rules -# 4. Add rule for "paperless" share: Hostname=indri, Privilege=Read/Write, Squash=No mapping -apiVersion: v1 -kind: PersistentVolume -metadata: - name: paperless-media-nfs-pv -spec: - capacity: - storage: 500Gi - accessModes: - - ReadWriteMany - persistentVolumeReclaimPolicy: Retain - storageClassName: "" - nfs: - server: sifaka - path: /volume1/paperless diff --git a/argocd/manifests/paperless/pvc.yaml b/argocd/manifests/paperless/pvc.yaml deleted file mode 100644 index 4365c9f..0000000 --- a/argocd/manifests/paperless/pvc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# PersistentVolumeClaim for Paperless document library -# Binds to the NFS PV for sifaka:/volume1/paperless -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: paperless-media - namespace: paperless -spec: - accessModes: - - ReadWriteMany - storageClassName: "" - volumeName: paperless-media-nfs-pv - resources: - requests: - storage: 500Gi diff --git a/argocd/manifests/paperless/service.yaml b/argocd/manifests/paperless/service.yaml deleted file mode 100644 index cff2972..0000000 --- a/argocd/manifests/paperless/service.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: paperless - namespace: paperless -spec: - selector: - app: paperless - ports: - - name: http - port: 8000 - targetPort: 8000 - protocol: TCP diff --git a/argocd/manifests/teslamate/README.md b/argocd/manifests/teslamate/README.md deleted file mode 100644 index 7e1f9fc..0000000 --- a/argocd/manifests/teslamate/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# TeslaMate - -TeslaMate is a self-hosted Tesla data logger that collects and visualizes vehicle data. - -## Prerequisites - -### 1. Create 1Password Secrets - -Create two items in the blumeops 1Password vault: - -1. **TeslaMate DB Password** - - Generate a secure password for the teslamate PostgreSQL user - - Add a field named `password` with the generated value - -2. **TeslaMate Encryption Key** - - Generate with: `openssl rand -base64 32` - - Add a field named `key` with the generated value - - This encrypts Tesla API tokens at rest in the database - -### 2. Apply Kubernetes Secrets - -```bash -# Create namespace -kubectl create namespace teslamate - -# Apply database user secret (for CNPG) -op inject -i argocd/manifests/databases/secret-teslamate.yaml.tpl | kubectl apply -f - - -# Apply teslamate secrets -op inject -i argocd/manifests/teslamate/secret-encryption-key.yaml.tpl | kubectl apply -f - -op inject -i argocd/manifests/teslamate/secret-db.yaml.tpl | kubectl apply -f - -``` - -### 3. Create Database - -After the teslamate user exists in PostgreSQL (sync blumeops-pg first): - -```bash -PGPASSWORD=$(op read "op://blumeops/postgres/password") \ - psql -h pg.ops.eblu.me -U eblume -c "CREATE DATABASE teslamate OWNER teslamate;" -``` - -## Deployment - -```bash -# Sync ArgoCD apps -argocd app sync apps -argocd app sync blumeops-pg teslamate grafana grafana-config -``` - -## Tesla API Setup - -1. Access TeslaMate UI at https://tesla.tail8d86e.ts.net -2. Click "Sign in with Tesla" -3. Complete OAuth flow in browser -4. Tokens are encrypted and stored in database -5. Verify vehicle appears and data collection starts - -## Grafana Dashboards - -TeslaMate dashboards are available in Grafana at https://grafana.tail8d86e.ts.net - -They use the "TeslaMate" PostgreSQL datasource (not Prometheus). - -## Notes - -- MQTT is disabled (can be enabled later for Home Assistant integration) -- Timezone is set to America/Los_Angeles -- Encryption key protects Tesla API tokens at rest diff --git a/argocd/manifests/teslamate/deployment.yaml b/argocd/manifests/teslamate/deployment.yaml deleted file mode 100644 index cf7f9bb..0000000 --- a/argocd/manifests/teslamate/deployment.yaml +++ /dev/null @@ -1,68 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: teslamate - namespace: teslamate -spec: - # Migrated to ringtail (teslamate-ringtail). Scaled to 0 to prevent - # double-writing the now-ringtail-owned database; manifest retained for - # rollback until the decommission PR. See [[migrate-wave1-ringtail]]. - replicas: 0 - selector: - matchLabels: - app: teslamate - template: - metadata: - labels: - app: teslamate - spec: - securityContext: - seccompProfile: - type: RuntimeDefault - containers: - - name: teslamate - image: registry.ops.eblu.me/blumeops/teslamate:kustomized - ports: - - containerPort: 4000 - env: - - name: DATABASE_USER - value: "teslamate" - - name: DATABASE_PASS - valueFrom: - secretKeyRef: - name: teslamate-db - key: password - - name: DATABASE_NAME - value: "teslamate" - - name: DATABASE_HOST - value: "blumeops-pg-rw.databases.svc.cluster.local" - - name: ENCRYPTION_KEY - valueFrom: - secretKeyRef: - name: teslamate-encryption - key: key - - name: DISABLE_MQTT - value: "true" - - name: CHECK_ORIGIN - value: "false" - - name: TZ - value: "America/Los_Angeles" - resources: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "512Mi" - cpu: "500m" - livenessProbe: - httpGet: - path: / - port: 4000 - initialDelaySeconds: 30 - periodSeconds: 30 - readinessProbe: - httpGet: - path: / - port: 4000 - initialDelaySeconds: 10 - periodSeconds: 10 diff --git a/argocd/manifests/teslamate/external-secret-db.yaml b/argocd/manifests/teslamate/external-secret-db.yaml deleted file mode 100644 index 11eeec6..0000000 --- a/argocd/manifests/teslamate/external-secret-db.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# ExternalSecret for TeslaMate database password -# -# Replaces the manual op inject workflow from secret-db.yaml.tpl -# -# 1Password item: "TeslaMate" in blumeops vault -# Field: "db_password" -# -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: teslamate-db - namespace: teslamate -spec: - refreshInterval: 1h - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-blumeops - target: - name: teslamate-db - creationPolicy: Owner - data: - - secretKey: password - remoteRef: - key: TeslaMate - property: db_password diff --git a/argocd/manifests/teslamate/external-secret-encryption-key.yaml b/argocd/manifests/teslamate/external-secret-encryption-key.yaml deleted file mode 100644 index 96938bf..0000000 --- a/argocd/manifests/teslamate/external-secret-encryption-key.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# ExternalSecret for TeslaMate encryption key -# -# Replaces the manual op inject workflow from secret-encryption-key.yaml.tpl -# -# 1Password item: "TeslaMate" in blumeops vault -# Field: "api_enc_key" -# -# This key encrypts Tesla API tokens at rest in the database. -# -apiVersion: external-secrets.io/v1 -kind: ExternalSecret -metadata: - name: teslamate-encryption - namespace: teslamate -spec: - refreshInterval: 1h - secretStoreRef: - kind: ClusterSecretStore - name: onepassword-blumeops - target: - name: teslamate-encryption - creationPolicy: Owner - data: - - secretKey: key - remoteRef: - key: TeslaMate - property: api_enc_key diff --git a/argocd/manifests/teslamate/kustomization.yaml b/argocd/manifests/teslamate/kustomization.yaml deleted file mode 100644 index be9d39d..0000000 --- a/argocd/manifests/teslamate/kustomization.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -namespace: teslamate - -resources: - - deployment.yaml - - service.yaml - # ingress removed: name 'tesla' handed off to teslamate-ringtail at cutover - - external-secret-db.yaml - - external-secret-encryption-key.yaml - -images: - - name: registry.ops.eblu.me/blumeops/teslamate - newTag: v3.0.0-08c698e diff --git a/argocd/manifests/teslamate/service.yaml b/argocd/manifests/teslamate/service.yaml deleted file mode 100644 index b04f45e..0000000 --- a/argocd/manifests/teslamate/service.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: teslamate - namespace: teslamate -spec: - selector: - app: teslamate - ports: - - port: 4000 - targetPort: 4000 - type: ClusterIP diff --git a/docs/changelog.d/decommission-wave1-minikube.infra.md b/docs/changelog.d/decommission-wave1-minikube.infra.md new file mode 100644 index 0000000..63b3ab5 --- /dev/null +++ b/docs/changelog.d/decommission-wave1-minikube.infra.md @@ -0,0 +1,8 @@ +Decommission the wave-1 services on minikube-indri now that paperless, +teslamate, and mealie run on ringtail with their data backed up. Removes the +minikube `paperless`/`teslamate`/`mealie` manifest dirs + ArgoCD app +definitions (pruning the parked Deployments, Services, and the redundant +minikube mealie/paperless PVCs), and drops the `paperless`/`teslamate` roles +from the minikube `blumeops-pg` cluster. The `paperless` and `teslamate` +databases are dropped from indri's blumeops-pg as the finalization step. +miniflux + authentik remain on the minikube cluster (later waves). From eaa899cfc65fd5d704c88e39771bc293765b181d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 13:02:05 -0700 Subject: [PATCH 20/35] C0: wave-1 decommission follow-ups (argocd admin RBAC, teslamate probe) - argocd: grant local break-glass admin the admin role (g, admin, role:admin); previously only the Authentik admins group had access, locking out admin once its token expired (policy.default is unset). - alloy-k8s: repoint the teslamate blackbox probe from the deleted minikube service to https://tesla.ops.eblu.me/ (Caddy over Tailscale), like immich. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/alloy-k8s/config.alloy | 3 ++- argocd/manifests/argocd/argocd-rbac-cm-patch.yaml | 4 ++++ docs/changelog.d/+wave1-decommission-followups.infra.md | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+wave1-decommission-followups.infra.md diff --git a/argocd/manifests/alloy-k8s/config.alloy b/argocd/manifests/alloy-k8s/config.alloy index 5a0a8f9..2940b0b 100644 --- a/argocd/manifests/alloy-k8s/config.alloy +++ b/argocd/manifests/alloy-k8s/config.alloy @@ -191,8 +191,9 @@ prometheus.exporter.blackbox "services" { } target { + // Migrated to ringtail (wave-1); probe through Caddy over Tailscale. name = "teslamate" - address = "http://teslamate.teslamate.svc.cluster.local:4000/" + address = "https://tesla.ops.eblu.me/" module = "http_2xx" } diff --git a/argocd/manifests/argocd/argocd-rbac-cm-patch.yaml b/argocd/manifests/argocd/argocd-rbac-cm-patch.yaml index c2ea095..4914587 100644 --- a/argocd/manifests/argocd/argocd-rbac-cm-patch.yaml +++ b/argocd/manifests/argocd/argocd-rbac-cm-patch.yaml @@ -2,6 +2,9 @@ # # - workflow-bot: minimal CI/CD permissions (sync, get) # - admins: Authentik admins group mapped to ArgoCD admin role +# - admin: local break-glass account — keeps ArgoCD admin rights for when +# Authentik SSO is unavailable (without this it has no permissions, since +# policy.default is unset) # apiVersion: v1 kind: ConfigMap @@ -14,3 +17,4 @@ data: p, role:workflow-bot, applications, get, *, allow g, workflow-bot, role:workflow-bot g, admins, role:admin + g, admin, role:admin diff --git a/docs/changelog.d/+wave1-decommission-followups.infra.md b/docs/changelog.d/+wave1-decommission-followups.infra.md new file mode 100644 index 0000000..7b54d52 --- /dev/null +++ b/docs/changelog.d/+wave1-decommission-followups.infra.md @@ -0,0 +1,8 @@ +Fix three follow-ups from the wave-1 decommission: grant the local +break-glass `admin` account ArgoCD admin rights (`g, admin, role:admin` — +previously only the Authentik `admins` group had access, so admin was +locked out whenever its token expired), and repoint the alloy blackbox +probe for teslamate from the deleted minikube service to +`https://tesla.ops.eblu.me/` (through Caddy over Tailscale). The orphaned +paperless/teslamate roles + ExternalSecrets left on the minikube +blumeops-pg are also cleaned up. From 308c8e3dad287b2de98891681db4c254ef1c181a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 15:31:59 -0700 Subject: [PATCH 21/35] C0: drop duplicate Homepage static entries for ringtail-migrated services Mealie, Paperless, Immich, TeslaMate are now autodiscovered from their ringtail Ingress gethomepage.dev annotations; the static services.yaml entries (from when they were on minikube, which homepage-on-ringtail can't autodiscover) were duplicating them. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/homepage/services.yaml | 16 ---------------- .../changelog.d/+homepage-dedup-migrated.misc.md | 5 +++++ 2 files changed, 5 insertions(+), 16 deletions(-) create mode 100644 docs/changelog.d/+homepage-dedup-migrated.misc.md diff --git a/argocd/manifests/homepage/services.yaml b/argocd/manifests/homepage/services.yaml index d552ff2..cc1adf4 100644 --- a/argocd/manifests/homepage/services.yaml +++ b/argocd/manifests/homepage/services.yaml @@ -71,10 +71,6 @@ enableBlocks: true enableNowPlaying: false fields: ["movies", "series", "episodes"] - - Mealie: - href: https://meals.ops.eblu.me - icon: mealie.png - description: Recipe manager - DJ: href: https://dj.ops.eblu.me icon: navidrome.png @@ -85,15 +81,7 @@ user: "{{HOMEPAGE_VAR_NAVIDROME_USER}}" token: "{{HOMEPAGE_VAR_NAVIDROME_TOKEN}}" salt: "{{HOMEPAGE_VAR_NAVIDROME_SALT}}" - - Paperless: - href: https://paperless.ops.eblu.me - icon: paperless-ngx.png - description: Document management - Content: - - Immich: - href: https://photos.ops.eblu.me - icon: immich.png - description: Photo management - Kiwix: href: https://kiwix.ops.eblu.me icon: kiwix.png @@ -138,10 +126,6 @@ href: https://docs.eblu.me icon: mdi-book-open-page-variant description: BlumeOps Documentation - - TeslaMate: - href: https://tesla.ops.eblu.me - icon: teslamate.png - description: Tesla data logger - Transmission: href: https://torrent.ops.eblu.me icon: transmission.png diff --git a/docs/changelog.d/+homepage-dedup-migrated.misc.md b/docs/changelog.d/+homepage-dedup-migrated.misc.md new file mode 100644 index 0000000..9efc5ba --- /dev/null +++ b/docs/changelog.d/+homepage-dedup-migrated.misc.md @@ -0,0 +1,5 @@ +Remove the duplicate Homepage tiles for Mealie, Paperless, Immich, and +TeslaMate. Homepage runs on ringtail and autodiscovers ringtail Ingresses via +`gethomepage.dev/*` annotations; once these services migrated to ringtail they +were discovered automatically, making their leftover static `services.yaml` +entries (needed only while they lived on minikube) redundant. From 214871458478a6b9aaa6dcc1b5aabab1336e8c7c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 21:32:10 -0700 Subject: [PATCH 22/35] C0: retire Todoist blumeops-tasks; point task discovery at heph Replace the Todoist-backed blumeops-tasks mise task with `heph list --project Blumeops --json` (hephaestus, now at v1 prototype on gilbert). Update task-discovery, rotation-reminder, and zk references across docs; note the zk zettelkasten is migrating into heph docs. Co-Authored-By: Claude Opus 4.8 (1M context) --- AGENTS.md | 12 +- .../+blumeops-tasks-due-recurrence.feature.md | 1 - .../+retire-todoist-for-heph.infra.md | 1 + .../configuration/rotate-fly-deploy-token.md | 2 +- docs/how-to/configuration/rotate-gandi-pat.md | 2 +- docs/reference/services/borgmatic.md | 2 +- docs/reference/storage/backups.md | 2 +- docs/reference/tools/mise-tasks.md | 1 - docs/tutorials/ai-assistance-guide.md | 3 +- mise-tasks/blumeops-tasks | 216 ------------------ 10 files changed, 16 insertions(+), 226 deletions(-) delete mode 100644 docs/changelog.d/+blumeops-tasks-due-recurrence.feature.md create mode 100644 docs/changelog.d/+retire-todoist-for-heph.infra.md delete mode 100755 mise-tasks/blumeops-tasks diff --git a/AGENTS.md b/AGENTS.md index 9e7350d..c64af40 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -65,7 +65,7 @@ See [[agent-change-process]] for the full methodology. ./pulumi/ # Pulumi IaC (tailnet ACLs, dns, cloud) ~/.config/{nvim,fish} # user's shell config, managed by chezmoi ~/code/personal/ # user's projects -~/code/personal/zk # user's Obsidian-sync managed zettelkasten. Potential source for reference data. +~/code/personal/zk # user's zettelkasten (Obsidian-sync). Reference-data source; migrating into heph docs (hephaestus). ~/code/3rd/ # mirrored external projects ~/code/work # FORBIDDEN ``` @@ -147,10 +147,16 @@ Create a new spork: `mise run spork-create ` ## Task Discovery +BlumeOps tasks live in [hephaestus](https://github.com/eblume/hephaestus) (`heph`), +the user's self-hosted context/task system. Fetch them with the CLI: + ```fish -mise run blumeops-tasks # fetch from Todoist, sorted by priority +heph list --project Blumeops --json # outstanding Blumeops tasks as JSON ``` -Most tasks are stored in `./mise-tasks/`. For scripts with any logic or + +(This replaced the retired `blumeops-tasks` mise task, which read from Todoist.) + +Most operational scripts are stored in `./mise-tasks/`. For scripts with any logic or complexity, use uv run --script 's with explicit dependencies. Complex workflows with artifacts should become dagger pipelines. Mise tasks are for development processes and operations - tools for the user or the agent. diff --git a/docs/changelog.d/+blumeops-tasks-due-recurrence.feature.md b/docs/changelog.d/+blumeops-tasks-due-recurrence.feature.md deleted file mode 100644 index 83072dd..0000000 --- a/docs/changelog.d/+blumeops-tasks-due-recurrence.feature.md +++ /dev/null @@ -1 +0,0 @@ -`blumeops-tasks` now annotates each task with a human-readable due offset (`5d overdue` / `due in 2d` / `due today`) and a `↻ ` marker for recurring tasks, and sorts by overdue-ness (most overdue first, no-due-date last) with priority as tiebreaker. diff --git a/docs/changelog.d/+retire-todoist-for-heph.infra.md b/docs/changelog.d/+retire-todoist-for-heph.infra.md new file mode 100644 index 0000000..f6284d0 --- /dev/null +++ b/docs/changelog.d/+retire-todoist-for-heph.infra.md @@ -0,0 +1 @@ +Retired the `blumeops-tasks` mise task (Todoist API) in favor of `heph list --project Blumeops --json` from the self-hosted [hephaestus](https://github.com/eblume/hephaestus) system. Updated docs to point task discovery and rotation reminders at heph, and noted that the `~/code/personal/zk` zettelkasten is migrating into heph docs. diff --git a/docs/how-to/configuration/rotate-fly-deploy-token.md b/docs/how-to/configuration/rotate-fly-deploy-token.md index 5863f54..9abe5f0 100644 --- a/docs/how-to/configuration/rotate-fly-deploy-token.md +++ b/docs/how-to/configuration/rotate-fly-deploy-token.md @@ -14,7 +14,7 @@ How to rotate the Fly.io API token used to deploy [[flyio-proxy]]. The token liv ## When to rotate -- Every 75 days (Todoist recurring task) +- Every 75 days (heph recurring task) - After any compromise / accidental disclosure - If `fly deploy` starts returning auth errors diff --git a/docs/how-to/configuration/rotate-gandi-pat.md b/docs/how-to/configuration/rotate-gandi-pat.md index 94a0b4e..5ce6f81 100644 --- a/docs/how-to/configuration/rotate-gandi-pat.md +++ b/docs/how-to/configuration/rotate-gandi-pat.md @@ -14,7 +14,7 @@ How to rotate the Gandi Personal Access Token. **One PAT** is shared by [[caddy] ## When to rotate -- Every 60 days (Todoist recurring task) +- Every 60 days (heph recurring task) - After any compromise / accidental disclosure - Whenever Gandi starts rejecting the PAT (see [Debugging](#debugging)) diff --git a/docs/reference/services/borgmatic.md b/docs/reference/services/borgmatic.md index fea4551..37f1a60 100644 --- a/docs/reference/services/borgmatic.md +++ b/docs/reference/services/borgmatic.md @@ -25,7 +25,7 @@ Daily backup system using Borg backup, running on indri. ## What Gets Backed Up **Directories:** -- `~/code/personal/zk` - Zettelkasten +- `~/code/personal/zk` - Zettelkasten (migrating into heph docs; see [hephaestus](https://github.com/eblume/hephaestus)) - `/opt/homebrew/var/forgejo` - Git forge data - `~/.config/borgmatic` - Borgmatic config - `~/Documents` - Personal documents diff --git a/docs/reference/storage/backups.md b/docs/reference/storage/backups.md index 14dbcea..2dfbae4 100644 --- a/docs/reference/storage/backups.md +++ b/docs/reference/storage/backups.md @@ -22,7 +22,7 @@ Daily automated backups from [[indri]] to [[sifaka|Sifaka]] NAS. | Path | Description | Priority | |------|-------------|----------| -| `~/code/personal/zk` | Zettelkasten notes | Critical | +| `~/code/personal/zk` | Zettelkasten notes (migrating into heph docs) | Critical | | `/opt/homebrew/var/forgejo` | Git repositories | Critical | | `~/.config/borgmatic` | Backup config | High | | `~/Documents` | Personal documents (includes [[1password]] encrypted export) | High | diff --git a/docs/reference/tools/mise-tasks.md b/docs/reference/tools/mise-tasks.md index 4ec3438..b614cb1 100644 --- a/docs/reference/tools/mise-tasks.md +++ b/docs/reference/tools/mise-tasks.md @@ -69,7 +69,6 @@ Run `mise tasks --sort name` for the live list with descriptions. |------|-------------| | `services-check` | Check all services are online and responding | | `service-review` | Review the most stale service for version freshness | -| `blumeops-tasks` | List tasks from Todoist sorted by priority | | `op-backup` | Encrypt 1Password export and send to indri for borgmatic | ## Infrastructure Setup diff --git a/docs/tutorials/ai-assistance-guide.md b/docs/tutorials/ai-assistance-guide.md index 3ee1ffa..4f0c595 100644 --- a/docs/tutorials/ai-assistance-guide.md +++ b/docs/tutorials/ai-assistance-guide.md @@ -98,7 +98,6 @@ BlumeOps operations are driven by mise tasks. Run `mise tasks` to list all avail | `provision-indri` | Deploy changes to [[indri]]-hosted services via Ansible | | `services-check` | After deployments - verify all services are healthy | | `pr-comments` | Check unresolved PR comments during review | -| `blumeops-tasks` | Find pending tasks from Todoist | | `container-list` | View available container images and tags | | `container-build-and-release` | Trigger container build workflows | | `dns-preview` | Preview DNS changes before applying | @@ -111,6 +110,8 @@ BlumeOps operations are driven by mise tasks. Run `mise tasks` to list all avail | `docs-review` | Review the most stale doc by last-reviewed date | | `runner-logs` | View Forgejo workflow logs (indri or ringtail runner) | +For task discovery, BlumeOps tasks live in [hephaestus](https://github.com/eblume/hephaestus) (`heph`), not Todoist. List outstanding work with `heph list --project Blumeops --json`. + For ArgoCD operations, use the `argocd` CLI directly: - `argocd app diff ` - Preview changes - `argocd app sync ` - Deploy changes diff --git a/mise-tasks/blumeops-tasks b/mise-tasks/blumeops-tasks deleted file mode 100755 index 035aa3b..0000000 --- a/mise-tasks/blumeops-tasks +++ /dev/null @@ -1,216 +0,0 @@ -#!/usr/bin/env -S uv run --script -# /// script -# requires-python = ">=3.12" -# dependencies = ["httpx==0.28.1", "rich==15.0.0"] -# /// -#MISE description="List Blumeops tasks from Todoist sorted by priority" -"""Fetch and display Blumeops tasks from Todoist, sorted by priority. - -This script is specific to Erich Blume's personal development workflow and -is not intended for general use. It requires: - - - A 1Password CLI (`op`) configured with access to the author's vault - - A Todoist account with a project named "Blumeops" - -The script fetches tasks and displays them sorted by a custom priority order: -p1 (urgent), p2 (high), p4 (normal/default), p3 (backlog). The p3-last ordering -reflects a deliberate choice to treat p3 as "backlog" rather than moderate -priority. - -Usage: mise run blumeops-tasks -""" - -import subprocess -import sys -from datetime import date - -import httpx -from rich.console import Console -from rich.markup import escape -from rich.text import Text - -TODOIST_API_BASE = "https://api.todoist.com/api/v1" -PROJECT_NAME = "Blumeops" - -# Priority mapping: Todoist API uses 1=normal(p4), 2=moderate(p3), 3=high(p2), 4=urgent(p1) -# User wants order: p1, p2, p4, p3 (p3 is backlog, goes last) -PRIORITY_LABELS = {4: "p1", 3: "p2", 1: "p4", 2: "p3"} -PRIORITY_SORT_ORDER = {4: 1, 3: 2, 1: 3, 2: 4} # Lower = earlier - - -def get_todoist_token() -> str: - """Retrieve Todoist API token from 1Password.""" - result = subprocess.run( - ["op", "read", "op://vg6xf6vvfmoh5hqjjhlhbeoaie/c53h3xnmswhvexa5mntoyvhgpm/credential"], - capture_output=True, - text=True, - ) - if result.returncode != 0: - raise RuntimeError(f"Failed to get Todoist token from 1Password: {result.stderr}") - return result.stdout.strip() - - -def get_project_id(client: httpx.Client, project_name: str) -> str: - """Find project ID by name.""" - cursor = None - while True: - params = {} - if cursor: - params["cursor"] = cursor - response = client.get(f"{TODOIST_API_BASE}/projects", params=params) - response.raise_for_status() - data = response.json() - for project in data.get("results", data if isinstance(data, list) else []): - if project["name"] == project_name: - return project["id"] - cursor = data.get("next_cursor") if isinstance(data, dict) else None - if not cursor: - break - - raise RuntimeError(f"Project '{project_name}' not found in Todoist") - - -def get_tasks(client: httpx.Client, project_id: str) -> list[dict]: - """Get all tasks for a project.""" - tasks = [] - cursor = None - while True: - params = {"project_id": project_id} - if cursor: - params["cursor"] = cursor - response = client.get(f"{TODOIST_API_BASE}/tasks", params=params) - response.raise_for_status() - data = response.json() - tasks.extend(data.get("results", data if isinstance(data, list) else [])) - cursor = data.get("next_cursor") if isinstance(data, dict) else None - if not cursor: - break - return tasks - - -def is_due(task: dict) -> bool: - """Check if a task should be displayed based on its due date. - - Tasks without a due date are always shown. Tasks with a due date - are only shown when the date is today or in the past. - """ - due = task.get("due") - if due is None: - return True - due_date = date.fromisoformat(due["date"][:10]) - return due_date <= date.today() - - -def days_until_due(task: dict) -> int | None: - """Return signed days offset from today, or None if no due date. - - Negative = days remaining before due (e.g. -2 = due in 2 days). - Positive = days past due (overdue). Zero = due today. - """ - due = task.get("due") - if due is None: - return None - due_date = date.fromisoformat(due["date"][:10]) - return (date.today() - due_date).days - - -def recurrence_string(task: dict) -> str | None: - """Return the Todoist natural-language recurrence string, or None. - - Todoist's REST API doesn't expose RFC 5545 RRULE; the natural-language - `due.string` (e.g. "every monday", "every 2 weeks") is the terse form. - """ - due = task.get("due") - if due is None or not due.get("is_recurring"): - return None - return due.get("string") - - -def sort_tasks(tasks: list[dict]) -> list[dict]: - """Sort by overdue-ness, then priority. - - Most overdue first (largest +N); tasks with no due date come last. - Within a given day, tiebreaker is the custom priority order p1, p2, p4, p3. - """ - - def key(task: dict) -> tuple[int, int, int]: - days = days_until_due(task) - no_due = 1 if days is None else 0 - days_key = -(days if days is not None else 0) # descending - return (no_due, days_key, PRIORITY_SORT_ORDER.get(task["priority"], 5)) - - return sorted(tasks, key=key) - - -def main() -> int: - console = Console() - - # Get API token - try: - token = get_todoist_token() - except RuntimeError as e: - console.print(f"[red]Error:[/red] {e}") - return 1 - - # Create HTTP client with auth header - with httpx.Client(headers={"Authorization": f"Bearer {token}"}) as client: - # Find project - try: - project_id = get_project_id(client, PROJECT_NAME) - except RuntimeError as e: - console.print(f"[red]Error:[/red] {e}") - return 1 - - # Get, filter, and sort tasks - tasks = get_tasks(client, project_id) - tasks = [t for t in tasks if is_due(t)] - sorted_tasks = sort_tasks(tasks) - - if not sorted_tasks: - console.print("No tasks found in Blumeops project") - return 0 - - # Display tasks - console.print(f"[bold]Blumeops Tasks[/bold] ({len(sorted_tasks)} tasks)") - console.print("=" * 40) - console.print() - - for task in sorted_tasks: - priority = task["priority"] - label = PRIORITY_LABELS.get(priority, "p?") - content = task["content"] - description = task.get("description", "") - - # Header line with priority and content - header = Text() - header.append(f"[{label}]", style="bold") - header.append(f" {content}") - - meta = [] - days = days_until_due(task) - if days is not None: - if days == 0: - meta.append("due today") - elif days > 0: - meta.append(f"{days}d overdue") - else: - meta.append(f"due in {-days}d") - recurrence = recurrence_string(task) - if recurrence: - meta.append(f"↻ {recurrence}") - if meta: - header.append(f" ({', '.join(meta)})", style="dim") - console.print(header) - - # Description indented (escape rich markup to preserve brackets) - if description: - for line in description.split("\n"): - console.print(f" {escape(line)}", style="dim") - - console.print() - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) From 29e0f012cd43d7185ed37a0a037695c6b52abc03 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 21:39:41 -0700 Subject: [PATCH 23/35] C0: pin Quartz docs build to v4.5.2 (v5.0.0 broke build) The Dagger build_docs pipeline cloned Quartz from the default branch unpinned. Quartz v5.0.0 restructured its config layout (.quartz/plugins, ../quartz imports), breaking the docs build against our existing quartz.config.ts / quartz.layout.ts. Pin the clone to the last v4 release (v4.5.2) to restore known-good behavior. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/changelog.d/+pin-quartz-v4.bugfix.md | 1 + src/blumeops/main.py | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 docs/changelog.d/+pin-quartz-v4.bugfix.md diff --git a/docs/changelog.d/+pin-quartz-v4.bugfix.md b/docs/changelog.d/+pin-quartz-v4.bugfix.md new file mode 100644 index 0000000..e073bbb --- /dev/null +++ b/docs/changelog.d/+pin-quartz-v4.bugfix.md @@ -0,0 +1 @@ +Pin the Quartz docs build to v4.5.2. The Dagger `build_docs` pipeline cloned Quartz from the default branch unpinned; Quartz v5.0.0 restructured its config layout (`.quartz/plugins`, `../quartz` imports) and broke the docs build against our existing `quartz.config.ts`/`quartz.layout.ts`. diff --git a/src/blumeops/main.py b/src/blumeops/main.py index 94b932b..9bbd12f 100644 --- a/src/blumeops/main.py +++ b/src/blumeops/main.py @@ -80,6 +80,10 @@ class Blumeops: "git", "clone", "--depth=1", + # Pin to last v4 release. v5.0.0 restructured config + # layout (.quartz/plugins, ../quartz imports) and breaks + # our quartz.config.ts/quartz.layout.ts. See changelog. + "--branch=v4.5.2", "https://github.com/jackyzha0/quartz.git", "/tmp/quartz", ] From 8f72f04d5cf5c507d0a9e8163d07d666975b53b7 Mon Sep 17 00:00:00 2001 From: Forgejo Actions Date: Wed, 3 Jun 2026 21:52:22 -0700 Subject: [PATCH 24/35] Update docs release to v1.17.0 - Built changelog from towncrier fragments [skip ci] --- CHANGELOG.md | 253 ++++++++++++++++++ ansible/roles/docs/defaults/main.yml | 3 +- .../+1password-backup-doc-export-name.doc.md | 1 - .../+agent-file-neutralization.ai.md | 1 - .../+ai-scraper-mitigation-doc.doc.md | 1 - .../+alloy-main-sha-rebuild.infra.md | 5 - .../+alloy-native-macos-v1.16.0.infra.md | 6 - .../+argocd-resource-limits.infra.md | 1 - .../+claude-md-import-agents.ai.md | 1 - ...ontainer-build-suggest-runner-logs.misc.md | 1 - .../+fix-forge-static-assets.bugfix.md | 1 - .../+fly-deploy-immediate-strategy.infra.md | 1 - .../+forge-mirrors-blackhole.infra.md | 1 - .../+frigate-notify-local.infra.md | 1 - .../+grafana-recreate-strategy.infra.md | 1 - .../+homepage-config-perms-fix.bugfix.md | 5 - .../+homepage-dedup-migrated.misc.md | 5 - .../+immich-probe-ringtail.infra.md | 1 - ...anage-forgejo-mirrors-sync-location.doc.md | 1 - docs/changelog.d/+pin-quartz-v4.bugfix.md | 1 - .../+prowler-rebuild-on-main.infra.md | 1 - .../+remove-devpi-container-build.misc.md | 1 - .../+retire-todoist-for-heph.infra.md | 1 - docs/changelog.d/+review-1password-doc.doc.md | 1 - .../+review-compliance-image-iac.feature.md | 1 - .../+review-contributing-doc.doc.md | 1 - docs/changelog.d/+review-index-doc.doc.md | 1 - docs/changelog.d/+review-navidrome-doc.doc.md | 1 - docs/changelog.d/+review-ollama-doc.doc.md | 1 - .../+ringtail-clone-via-tailnet.infra.md | 1 - .../+ringtail-coredump-size-cap.infra.md | 1 - ...+ringtail-flake-update-2026-06-01.infra.md | 4 - docs/changelog.d/+ringtail-proton-ge.infra.md | 4 - .../+ringtail-sn2-prelaunch.infra.md | 6 - .../+ringtail-sway-fuzzel.bugfix.md | 3 - .../+ringtail-vrr-flicker.bugfix.md | 1 - ...ate-fly-deploy-token-shell-examples.doc.md | 1 - docs/changelog.d/+runner-logs-auth.feature.md | 1 - .../+runner-logs-missing-log.misc.md | 1 - .../changelog.d/+shower-1.1.1-deploy.infra.md | 1 - .../+shower-1.1.1-fod-pin.infra.md | 1 - docs/changelog.d/+shower-1.1.1.infra.md | 1 - .../changelog.d/+shower-1.1.3-deploy.infra.md | 1 - docs/changelog.d/+shower-1.1.3.infra.md | 1 - .../+shower-main-sha-rebuild.infra.md | 5 - .../+shower-rebuild-from-main-sha.misc.md | 6 - ...hower-v1.1.2-rebuild-from-main-sha.misc.md | 1 - .../+tailscale-main-sha-rebuild.infra.md | 1 - .../+transmission-doc-review.doc.md | 1 - .../+unpoller-rebuild-on-main.infra.md | 1 - .../+valkey-main-tag-bump.infra.md | 1 - .../+valkey-rebuild-on-main.infra.md | 1 - .../+wave1-decommission-followups.infra.md | 8 - .../+zot-ci-rotation-op-syntax.doc.md | 1 - docs/changelog.d/+zot-v2.1.16.infra.md | 1 - docs/changelog.d/alloy-v1.16.0.infra.md | 5 - ...ckup-grafana-ringtail-blumeops-pg.infra.md | 8 - ...cleanup-cv-docs-minikube-artifacts.misc.md | 1 - ...dagger-0-20-6-runner-image-alpine.infra.md | 1 - .../decommission-wave1-minikube.infra.md | 8 - .../doc-review-replicating-blumeops.doc.md | 1 - .../fix-borgmatic-shower-via-ssh.bugfix.md | 14 - ...o-runner-v12-8-server-connections.infra.md | 1 - .../changelog.d/homepage-to-ringtail.infra.md | 8 - .../migrate-cv-docs-to-indri.infra.md | 1 - .../migrate-devpi-to-indri.infra.md | 1 - .../migrate-immich-to-ringtail.infra.md | 13 - .../migrate-wave1-ringtail.infra.md | 13 - .../mirror-tailscale-container.infra.md | 1 - .../changelog.d/prowler-iac-mutelist.infra.md | 1 - .../recurring-maintenance-2026-05-27.doc.md | 1 - .../recurring-maintenance-2026-05-27.infra.md | 4 - .../review-ringtail-flake-2026-05-11.infra.md | 1 - docs/changelog.d/ringtail-static-ip.infra.md | 1 - .../rip-out-compensating-controls.infra.md | 1 - .../service-review-mealie-2026-05-11.infra.md | 1 - docs/changelog.d/shower-app-deploy.bugfix.md | 13 - docs/changelog.d/shower-app-deploy.feature.md | 4 - docs/changelog.d/shower-app-deploy.infra.md | 9 - docs/changelog.d/shower-v1.1.0.feature.md | 15 -- docs/changelog.d/shower-v1.1.2.infra.md | 1 - docs/changelog.d/unpoller-v3.infra.md | 1 - .../update-tooling-deps-2026-04.doc.md | 1 - .../update-tooling-deps-2026-04.infra.md | 1 - docs/changelog.d/valkey-mirror.infra.md | 1 - docs/changelog.d/valkey-nix.infra.md | 1 - 86 files changed, 254 insertions(+), 234 deletions(-) delete mode 100644 docs/changelog.d/+1password-backup-doc-export-name.doc.md delete mode 100644 docs/changelog.d/+agent-file-neutralization.ai.md delete mode 100644 docs/changelog.d/+ai-scraper-mitigation-doc.doc.md delete mode 100644 docs/changelog.d/+alloy-main-sha-rebuild.infra.md delete mode 100644 docs/changelog.d/+alloy-native-macos-v1.16.0.infra.md delete mode 100644 docs/changelog.d/+argocd-resource-limits.infra.md delete mode 100644 docs/changelog.d/+claude-md-import-agents.ai.md delete mode 100644 docs/changelog.d/+container-build-suggest-runner-logs.misc.md delete mode 100644 docs/changelog.d/+fix-forge-static-assets.bugfix.md delete mode 100644 docs/changelog.d/+fly-deploy-immediate-strategy.infra.md delete mode 100644 docs/changelog.d/+forge-mirrors-blackhole.infra.md delete mode 100644 docs/changelog.d/+frigate-notify-local.infra.md delete mode 100644 docs/changelog.d/+grafana-recreate-strategy.infra.md delete mode 100644 docs/changelog.d/+homepage-config-perms-fix.bugfix.md delete mode 100644 docs/changelog.d/+homepage-dedup-migrated.misc.md delete mode 100644 docs/changelog.d/+immich-probe-ringtail.infra.md delete mode 100644 docs/changelog.d/+manage-forgejo-mirrors-sync-location.doc.md delete mode 100644 docs/changelog.d/+pin-quartz-v4.bugfix.md delete mode 100644 docs/changelog.d/+prowler-rebuild-on-main.infra.md delete mode 100644 docs/changelog.d/+remove-devpi-container-build.misc.md delete mode 100644 docs/changelog.d/+retire-todoist-for-heph.infra.md delete mode 100644 docs/changelog.d/+review-1password-doc.doc.md delete mode 100644 docs/changelog.d/+review-compliance-image-iac.feature.md delete mode 100644 docs/changelog.d/+review-contributing-doc.doc.md delete mode 100644 docs/changelog.d/+review-index-doc.doc.md delete mode 100644 docs/changelog.d/+review-navidrome-doc.doc.md delete mode 100644 docs/changelog.d/+review-ollama-doc.doc.md delete mode 100644 docs/changelog.d/+ringtail-clone-via-tailnet.infra.md delete mode 100644 docs/changelog.d/+ringtail-coredump-size-cap.infra.md delete mode 100644 docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md delete mode 100644 docs/changelog.d/+ringtail-proton-ge.infra.md delete mode 100644 docs/changelog.d/+ringtail-sn2-prelaunch.infra.md delete mode 100644 docs/changelog.d/+ringtail-sway-fuzzel.bugfix.md delete mode 100644 docs/changelog.d/+ringtail-vrr-flicker.bugfix.md delete mode 100644 docs/changelog.d/+rotate-fly-deploy-token-shell-examples.doc.md delete mode 100644 docs/changelog.d/+runner-logs-auth.feature.md delete mode 100644 docs/changelog.d/+runner-logs-missing-log.misc.md delete mode 100644 docs/changelog.d/+shower-1.1.1-deploy.infra.md delete mode 100644 docs/changelog.d/+shower-1.1.1-fod-pin.infra.md delete mode 100644 docs/changelog.d/+shower-1.1.1.infra.md delete mode 100644 docs/changelog.d/+shower-1.1.3-deploy.infra.md delete mode 100644 docs/changelog.d/+shower-1.1.3.infra.md delete mode 100644 docs/changelog.d/+shower-main-sha-rebuild.infra.md delete mode 100644 docs/changelog.d/+shower-rebuild-from-main-sha.misc.md delete mode 100644 docs/changelog.d/+shower-v1.1.2-rebuild-from-main-sha.misc.md delete mode 100644 docs/changelog.d/+tailscale-main-sha-rebuild.infra.md delete mode 100644 docs/changelog.d/+transmission-doc-review.doc.md delete mode 100644 docs/changelog.d/+unpoller-rebuild-on-main.infra.md delete mode 100644 docs/changelog.d/+valkey-main-tag-bump.infra.md delete mode 100644 docs/changelog.d/+valkey-rebuild-on-main.infra.md delete mode 100644 docs/changelog.d/+wave1-decommission-followups.infra.md delete mode 100644 docs/changelog.d/+zot-ci-rotation-op-syntax.doc.md delete mode 100644 docs/changelog.d/+zot-v2.1.16.infra.md delete mode 100644 docs/changelog.d/alloy-v1.16.0.infra.md delete mode 100644 docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md delete mode 100644 docs/changelog.d/cleanup-cv-docs-minikube-artifacts.misc.md delete mode 100644 docs/changelog.d/dagger-0-20-6-runner-image-alpine.infra.md delete mode 100644 docs/changelog.d/decommission-wave1-minikube.infra.md delete mode 100644 docs/changelog.d/doc-review-replicating-blumeops.doc.md delete mode 100644 docs/changelog.d/fix-borgmatic-shower-via-ssh.bugfix.md delete mode 100644 docs/changelog.d/forgejo-runner-v12-8-server-connections.infra.md delete mode 100644 docs/changelog.d/homepage-to-ringtail.infra.md delete mode 100644 docs/changelog.d/migrate-cv-docs-to-indri.infra.md delete mode 100644 docs/changelog.d/migrate-devpi-to-indri.infra.md delete mode 100644 docs/changelog.d/migrate-immich-to-ringtail.infra.md delete mode 100644 docs/changelog.d/migrate-wave1-ringtail.infra.md delete mode 100644 docs/changelog.d/mirror-tailscale-container.infra.md delete mode 100644 docs/changelog.d/prowler-iac-mutelist.infra.md delete mode 100644 docs/changelog.d/recurring-maintenance-2026-05-27.doc.md delete mode 100644 docs/changelog.d/recurring-maintenance-2026-05-27.infra.md delete mode 100644 docs/changelog.d/review-ringtail-flake-2026-05-11.infra.md delete mode 100644 docs/changelog.d/ringtail-static-ip.infra.md delete mode 100644 docs/changelog.d/rip-out-compensating-controls.infra.md delete mode 100644 docs/changelog.d/service-review-mealie-2026-05-11.infra.md delete mode 100644 docs/changelog.d/shower-app-deploy.bugfix.md delete mode 100644 docs/changelog.d/shower-app-deploy.feature.md delete mode 100644 docs/changelog.d/shower-app-deploy.infra.md delete mode 100644 docs/changelog.d/shower-v1.1.0.feature.md delete mode 100644 docs/changelog.d/shower-v1.1.2.infra.md delete mode 100644 docs/changelog.d/unpoller-v3.infra.md delete mode 100644 docs/changelog.d/update-tooling-deps-2026-04.doc.md delete mode 100644 docs/changelog.d/update-tooling-deps-2026-04.infra.md delete mode 100644 docs/changelog.d/valkey-mirror.infra.md delete mode 100644 docs/changelog.d/valkey-nix.infra.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ae5f8e..0499154 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,259 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [v1.17.0] - 2026-06-03 + +### Features + +- Deploy the Adelaide / Heidi / Addie baby shower app — guest splash, raffle + picker, and prize assignment console — on ringtail k3s with `shower.eblu.me` + as the public entry and `shower.ops.eblu.me` as the tailnet admin host. App + source: [`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app). +- Deploy adelaide-baby-shower-app v1.1.0 to ringtail k3s. Replaces the + boolean lock with a four-phase `ShowerState` (`pre_event` → `party` → + `prizes_locked` → `event_locked`), adds an append-only "guest memories" + panel where guests can leave photos and comments for the baby, and + polishes the admin and QR views. Three Django migrations + (`0009_shower_phase`, `0010_guest_memories`, `0011_book_description`) + run automatically in the entrypoint against the SQLite PV. No config + or env-var changes. + + Container build also gains a Forgejo-PyPI workaround: Forgejo's simple + index returns absolute file URLs hardcoded to the public ROOT_URL + (`forge.eblu.me`), which the Fly edge 403s on `/api/packages/*`. The + wheel and sdist are now both pulled via direct `fetchurl` against + `forge.ops.eblu.me` (tailnet-only) and the wheel is handed to pip as + a local path. +- `review-compliance-reports` now also fetches and summarizes the weekly Prowler container-image and IaC scans (previously only the K8s CIS in-cluster scan was processed). For each scan it shows status counts, severity breakdown, week-over-week delta, and — for the high-volume image/IaC scans — top-N tables grouped by check ID and resource instead of per-finding listings. +- runner-logs now authenticates with Forgejo API token and auto-detects the repo from git remote. Job logs are fetched via SSH to indri (reading Forgejo's on-disk zstd log files) instead of the web endpoint, which doesn't support token auth for private repos. + +### Bug Fixes + +- Fix nightly borgmatic backups failing for 2 days. The shower SQLite + dump hook referenced `kubectl --context=k3s-ringtail`, but indri's + kubeconfig deliberately doesn't carry the ringtail credentials. The + `before_backup` hook's failure aborted the entire run, taking out + *both* the local sifaka repo and the BorgBase offsite. Replaced + the inline-shell dump with a `~/bin/borgmatic-k8s-sqlite-dump` + helper deployed by the ansible role. Each dump entry now declares a + `target` of either `local:` (mealie — kubectl uses indri's + kubeconfig) or `ssh:` (shower — ssh into ringtail and + run `k3s kubectl` there, no indri-side kubeconfig needed; k3s.yaml + on ringtail is mode 644 so no sudo required). Bytes stream back via + `kubectl exec ... -- cat` rather than `kubectl cp`, since `kubectl + cp` requires `tar` inside the pod and nix-built images like shower + don't bundle it. +- Shower app container now bakes the wheel + Python deps into the image + at build time via `buildPythonPackage` instead of pip-installing on + first boot. Boots are deterministic and don't depend on forge PyPI + being reachable from the pod. The `wheelHash` in + `containers/shower/default.nix` is the sha256 sourced from the + [forge PyPI simple index](https://forge.eblu.me/api/packages/eblume/pypi/simple/adelaide-baby-shower-app/); + bumping the version means bumping that hash too. + + Borgmatic now covers the shower app: SQLite is dumped from the live + pod via `kubectl exec` (mirroring the existing mealie entry, with + `context: k3s-ringtail`), and the prize-photo media share is picked up + through `/Volumes/shower` (sifaka SMB mount on indri, same pattern as + `/Volumes/photos`). +- Disabled adaptive sync (VRR) on ringtail's DP-1 output. The OMEN 27i IPS panel pumps brightness when its refresh rate swings into the low VRR range during low-framerate content (e.g. game cutscenes), producing a flicker that worsened over a session until a reboot. Pinning the panel to a fixed 165Hz eliminates it. +- Fixed forge.eblu.me static assets (CSS, JS, images, fonts) not loading — the proxy's static asset cache block was missing the `Host` header, so Caddy couldn't route the requests. +- Fixed homepage container EACCES on cold start: the nix-built image now chowns + `/app/config` to uid 1000 at build time via `fakeRootCommands`, matching the + behavior of the old Dockerfile. Without this, homepage couldn't seed missing + skeleton configs (proxmox.yaml etc.) or create `/app/config/logs`, crashing on + its first uncached request. Caught during the ringtail cutover. +- Fixed sway keybindings on ringtail — the home-manager `keybindings` block was replacing the module's defaults entirely, leaving only explicit overrides (no workspace switching, focus, move, splits, resize mode, etc). Switched to `lib.mkOptionDefault` with `lib.mkForce` on the conflicting custom binds (`Mod+Return`, `Mod+d`, `Mod+space`, `Mod+l`) so defaults merge back in. Also added `Mod+F1` to show a filterable fuzzel list of current keybindings. + + Fixed fuzzel config errors on launch — `border-radius` and `border-width` were under `[main]`, but fuzzel expects them as `radius`/`width` under a `[border]` section. +- Pin the Quartz docs build to v4.5.2. The Dagger `build_docs` pipeline cloned Quartz from the default branch unpinned; Quartz v5.0.0 restructured its config layout (`.quartz/plugins`, `../quartz` imports) and broke the docs build against our existing `quartz.config.ts`/`quartz.layout.ts`. + +### Infrastructure + +- Wire the ringtail `blumeops-pg` cluster (which holds the wave-1-migrated + paperless + teslamate databases) into backups and Grafana. Adds a Tailscale + LoadBalancer Service (`blumeops-pg-ringtail.tail8d86e.ts.net`) and a Caddy L4 + route (`pg.ops.eblu.me:5434`), then repoints borgmatic's `teslamate` + + `paperless` postgres dumps and the `mealie` SQLite dump at ringtail, and the + Grafana TeslaMate datasource at the ringtail DB. Closes the backup gap that + opened at cutover (the migrated live data was still being backed up from the + now-frozen minikube copies) and unblocks the wave-1 decommission. +- Migrated homepage dashboard from minikube (indri/arm64) to k3s (ringtail/amd64). + The container is now built via nix (`containers/homepage/default.nix`), adapted + from nixpkgs `homepage-dashboard` with the upstream Next.js cache patches and + wrapped with `dockerTools.buildLayeredImage`. Autodiscovery shifts: services on + minikube (ArgoCD, Immich, Kiwix, Mealie, Miniflux, Grafana, Prometheus, + Navidrome, Paperless, TeslaMate, Transmission) become explicit static entries + in `services.yaml`; ringtail services (Authentik, Frigate/NVR, Ntfy, Ollama) + auto-populate via Ingress annotations. +- Migrated CV (`cv.eblu.me`) and Docs (`docs.eblu.me`) from minikube Deployments to indri-native ansible roles. Caddy now serves the extracted release tarballs directly via a new `kind: static` service-block in the Caddy template — no daemon, no container — replacing the prior nginx-in-a-pod layer. Removes a network hop on every request and shrinks minikube's footprint. See [[cv-on-indri]] and [[docs-on-indri]]. Part of the broader minikube wind-down. +- Migrated devpi (PyPI mirror at `pypi.ops.eblu.me`) from a minikube StatefulSet to a launchd-managed service on indri. devpi-server now runs in a uv-managed venv with pinned `devpi-server` and `devpi-web` versions, listens on `127.0.0.1:3141`, and is fronted by Caddy. The minikube StatefulSet was crash-looping under memory pressure (and breaking the Python toolchain everywhere); the new layout removes a layer of dependency on cluster health for critical-path tooling. See [[devpi-on-indri]]. +- Move the entire Immich stack — server, machine-learning, valkey, + and the PostgreSQL+VectorChord cluster — off `minikube-indri` and + onto `k3s-ringtail`. Postgres data migrated zero-loss via CNPG + `pg_basebackup` (replica catch-up then promote); row counts on + `asset`, `user`, `album`, `smart_search`, `activity`, `asset_face` + verified equal between source and replica before cutover. The ML + pod now uses ringtail's RTX 4080 via the nvidia-device-plugin + (time-slicing bumped 2 → 4 to share with frigate + ollama). Caddy + routing at `photos.ops.eblu.me` is unchanged (still + `photos.tail8d86e.ts.net`, the device just lives on ringtail now). + Borgmatic backups continue against the same `immich-pg` tailnet + hostname. First concrete chain in the broader indri-k8s + decommission effort. +- Add local nix container build for `tailscale` (`containers/tailscale/default.nix`) so ringtail's tailscale-operator ProxyClass proxy pods pull from the forge mirror instead of `docker.io/tailscale/tailscale`. Pinned at v1.94.2 to match `service-versions.yaml`. Indri's tailscale-operator continues to use upstream during the k8s-to-ringtail migration. +- Address the 6 critical Prowler IaC findings against `argocd/manifests/`. Prowler's IaC provider hardcodes `self._mutelist = None` and delegates filtering to Trivy, but doesn't plumb `--ignorefile` through — so the documented "use Trivy filtering" path is actually broken. Added a shim around `trivy` in the Prowler image that injects `--ignorefile $TRIVY_IGNOREFILE` for `trivy fs` invocations when the env var points at a real file. The IaC cronjob now mounts `mutelist/trivyignore.yaml` (Trivy's per-path schema) and sets the env var, muting the `external-secrets` and `kube-state-metrics` Secret-access findings (KSV-0041, KSV-0114). Separately, `grafana-clusterrole` is tightened to remove `secrets` access entirely: the dashboard sidecar already only consumes ConfigMap-labeled dashboards, so its `RESOURCE` env var is now `configmap` instead of `both`. +- Pin ringtail's wired IP to `192.168.1.21` via NixOS scripted networking; NetworkManager no longer manages `enp5s0`. Removes DHCP lease renewal as a failure mode after a silent lease teardown took ringtail offline. Also explicitly enables `net.ipv4.ip_forward` (previously set implicitly by scripted-DHCP) so k3s pod networking and Tailscale routing continue to work with static networking. +- Ripped out the compensating-controls (CC) framework: deleted `compensating-controls.yaml`, the `review-compensating-controls` mise task, and the associated how-to / explanation docs. Prowler and Kingfisher continue to run weekly and produce reports; the Prowler mutelist YAML files remain in place but no longer carry `CC: ` prefixes — each entry just keeps a free-form `Description` of why the finding is muted. The CC review cadence proved to be more overhead than this single-operator homelab needed. +- Wire shower app for public exposure: fly nginx `shower.eblu.me` server + block as a guest-only surface — splash page, `/prizes//`, static + assets, media. Everything authenticated (`/admin/`, `/host/`, + `/accounts/`) returns 403 with a "tailnet only" pointer. Staff hit + `shower.ops.eblu.me` for the operator console + admin; the app's + v1.0.1 `DJANGO_PUBLIC_URL_BASE` setting makes QR codes generated on + the tailnet point back at the WAN host for guests. Plus a Caddy route + on indri, Pulumi Gandi CNAME, and a Grafana APM dashboard tracking + request rate, error rate, latency, bandwidth, and access logs. +- Mirror Valkey 8.1 locally as `registry.ops.eblu.me/blumeops/valkey`. Replaces direct pulls of `docker.io/valkey/valkey:8.1-alpine` for paperless and immich sidecars. Built via native Dagger pipeline on Alpine 3.22. Stateless swap — no data migration. Authentik's nix-built Redis remains separate. +- Add nix-built amd64 valkey for ringtail (`containers/valkey/default.nix`) so immich-ringtail can stop pulling the upstream multi-arch `docker.io/valkey/valkey` image. Existing `container.py` continues to build Alpine arm64 for paperless on indri. Both bump to valkey 8.1.7 (Alpine 3.22 8.1.7-r0 / nixpkgs 8.1.7). +- Upgrade Grafana Alloy v1.14.0 → v1.16.0 across all four service deployments + (alloy-k8s, alloy-ringtail, alloy-tracing-ringtail on k8s; alloy native on + indri). Pulls in stable database observability (v1.15) and the OTel Collector + v0.147.0 bump. Container build also migrated from Dockerfile to native Dagger + `container.py` per the build-container-image migration playbook. +- Upgraded Dagger from v0.20.1 to v0.20.6 (engine, CLI pin, and SDK regen) and migrated `runner-job-image` from a Debian-based Dockerfile to a native Dagger `container.py` on Alpine 3.23, reusing the shared `alpine_runtime` helper. +- Decommission the wave-1 services on minikube-indri now that paperless, + teslamate, and mealie run on ringtail with their data backed up. Removes the + minikube `paperless`/`teslamate`/`mealie` manifest dirs + ArgoCD app + definitions (pruning the parked Deployments, Services, and the redundant + minikube mealie/paperless PVCs), and drops the `paperless`/`teslamate` roles + from the minikube `blumeops-pg` cluster. The `paperless` and `teslamate` + databases are dropped from indri's blumeops-pg as the finalization step. + miniflux + authentik remain on the minikube cluster (later waves). +- Upgraded the k8s Forgejo runner to the v12.8 line, switched it from first-boot registration to declarative `server.connections` credentials from 1Password, and consolidated the supporting runner how-to documentation. +- Move paperless, teslamate, and mealie off `minikube-indri` onto + `k3s-ringtail`, shedding ~1.1 GiB of resident load from the + OOM-thrashing 8 GiB minikube node (the kernel OOM killer had been + killing `kube-apiserver`/`dockerd`/argocd, flapping every + minikube-hosted service at once). paperless + teslamate databases + move into a fresh CNPG `blumeops-pg` cluster on ringtail via a cold + `pg_dump`/`pg_restore` from the quiesced source — row counts verified + equal before any routing flip; source DBs dropped only after the + ringtail side serves traffic. mealie's SQLite PVC is copied as-is. + paperless media stays on sifaka NFS. Downtime-tolerant cold cutover + (no streaming replication); rollback is repoint-and-scale-up with the + source untouched. Second chain in the indri-k8s decommission after + [[migrate-immich-to-ringtail]]. +- Recurring maintenance batch: + + - Ringtail flake inputs refreshed (`disko`, `home-manager`, `nixpkgs`). + - Tooling deps bumped: prek hooks (trufflehog v3.95.3, kingfisher v1.101.0, ruff v0.15.14, `ansible-core` 2.21.0); fly proxy base images (nginx 1.30.1-alpine, alloy v1.16.1); `typer==0.26.2` in mise tasks. +- Updated `nixos/ringtail/flake.lock` (weekly cadence): `disko`, `home-manager`, and `nixpkgs` inputs refreshed. `nixpkgs-services` skipped per overlay convention. +- Reviewed `mealie` service version freshness; upstream is 5 minor versions ahead (v3.17.0 vs deployed v3.12.0). Marked reviewed; upgrade deferred. +- Deploy shower v1.1.2 — bump container build to new app release. +- Upgrade unpoller v2.34.0 → v3.2.0 and migrate container build from Dockerfile to native Dagger (container.py). v3.0.0 carries breaking UniFi API changes; v3.2.0 introduces a 60s background poll (cached scrapes) by default — set `interval = 0` in `up.conf` to restore on-demand polling. +- Monthly tooling dependency refresh: prek hooks (trufflehog, kingfisher, ruff, shfmt, prettier, actionlint, ansible-lint), fly proxy base images (nginx 1.30.0, tailscale v1.94.2, alloy v1.16.0), normalize pyyaml lower bound in mise-tasks. +- Add GE-Proton (`pkgs.proton-ge-bin`) to `programs.steam.extraCompatPackages` + on ringtail. Subnautica 2 hangs at Mercuna plugin init under Proton + Experimental + DXVK D3D12; GE-Proton is available as a Steam per-game + compatibility option to work around it. +- Add `sn2-prelaunch` Steam launch wrapper on ringtail that removes + Subnautica 2's stale `Saved/running.dat` and `Saved/beforelobby.dat` + lockfiles before each launch. SN2 pops up an invisible (0×0-sized) + Error dialog when it detects an unclean exit, blocking GameThread + forever; this is observable only as a black screen with a spinning + loader. Use via Steam launch option: `sn2-prelaunch %command%`. +- Add local nix container build for `frigate-notify` (`containers/frigate-notify/default.nix`) so the Frigate→ntfy bridge is rebuilt on ringtail from the forge mirror instead of pulled from `ghcr.io/0x2142/frigate-notify`. +- Add resource limits to all ArgoCD pods to prevent unbounded resource consumption during node-wide pressure events. +- Black-hole the `/mirrors/*` repositories at the Fly proxy edge (`return 403` → `forge.ops.eblu.me`). A surprise $29.60 Fly bill traced to ~1.24 TB/30d of egress on `forge.eblu.me`, 99.95% of all proxy egress — of which ~71% was AI scrapers (Meta `meta-externalagent`, OpenAI `GPTBot`, Amazonbot) crawling the near-infinite git-history URL space of the public mirror repos and timing out Forgejo in the process. Mirrors exist for supply-chain control and are consumed over the tailnet, so their public web UI had no legitimate audience. `robots.txt` already disallowed `/mirrors/`, but the offending agents ignore it. Tier-2 mitigations (user-agent denylist, Anubis proof-of-work gateway) are documented in `docs/explanation/ai-scraper-mitigation.md`. +- Bump paperless and immich kustomizations to the main-SHA-built valkey tag (`v8.1.6-r0-fabca04`). Routine post-merge follow-up to keep production manifests pointing at images built from a commit on main. +- Bump shower container to v1.1.1 (probe FOD hash). +- Bumped shower app to v1.1.3 (wheel/sdist + FOD hashes probed on ringtail). +- Cap systemd-coredump on ringtail (ProcessSizeMax/ExternalSizeMax 1G, MaxUse 2G) so multi-GB Wine/Proton game crash dumps no longer thrash the disk and lock up the desktop. +- Deploy shower v1.1.1 to ringtail (kustomize newTag bump). +- Deployed shower v1.1.3 to ringtail (image built and pushed from ringtail; runner bypassed due to indri overload). +- Fix three follow-ups from the wave-1 decommission: grant the local + break-glass `admin` account ArgoCD admin rights (`g, admin, role:admin` — + previously only the Authentik `admins` group had access, so admin was + locked out whenever its token expired), and repoint the alloy blackbox + probe for teslamate from the deleted minikube service to + `https://tesla.ops.eblu.me/` (through Caddy over Tailscale). The orphaned + paperless/teslamate roles + ExternalSecrets left on the minikube + blumeops-pg are also cleaned up. +- Moved the Immich blackbox health probe from indri's alloy to ringtail's alloy. After the immich migration to ringtail, the probe still targeted `immich-server.immich.svc.cluster.local` on indri's cluster where the service no longer exists, causing a persistent `ServiceProbeFailure` alert. +- Pin shower v1.1.1 FOD outputHash (probed locally on ringtail). +- Rebuild Prowler container against main HEAD (v5.23.0-495e45d) after merging the IaC mutelist Dockerfile changes. +- Rebuild and retag alloy v1.16.0 container images from the main-branch SHA + following the squash-merge of #345, per the build-container-image + squash-merge convention. Both images (`registry.ops.eblu.me/blumeops/alloy`) + now reference `9564435` rather than the branch SHA `26a3ab5`, restoring + source traceability after branch cleanup. +- Rebuild shower from the post-merge commit on main so the container's + SHA tag points at a commit that will still exist after the 30-day + branch-cleanup window. Functionally identical to the branch-tag image + already deployed, just preserves source traceability per + [[build-container-image#Squash-merge and container tags]]. +- Rebuild unpoller container from squashed main commit so the image SHA tag matches a commit in main's history (was tagged with the pre-squash branch SHA). +- Rebuild valkey container from squashed main commit (both arm64 dagger and amd64 nix variants), and update paperless + immich-ringtail kustomizations to the main-SHA tags `v8.1.7-ecded30` and `v8.1.7-ecded30-nix`. +- Retired the `blumeops-tasks` mise task (Todoist API) in favor of `heph list --project Blumeops --json` from the self-hosted [hephaestus](https://github.com/eblume/hephaestus) system. Updated docs to point task discovery and rotation reminders at heph, and noted that the `~/code/personal/zk` zettelkasten is migrating into heph docs. +- Switch the Fly proxy deploy strategy from `bluegreen` to `immediate` in `fly/fly.toml`. With a single proxy machine, bluegreen offers little benefit — the green machine routinely failed to reach "started" inside Fly's default 5-minute deploy timeout (the cold-start sequence of `tailscaled` → `tailscale up` → wait-for-MagicDNS → nginx startup eats most of the budget), and the failed deploys would roll back. `immediate` replaces the machine in place with a brief downtime (~5–10s) but actually completes. +- Switch the ringtail provisioning playbook's blumeops clone URL from `forge.eblu.me` (public, via Fly proxy) to `forge.ops.eblu.me` (tailnet, direct via Caddy on indri). Ringtail is always on the tailnet, so the WAN round-trip is pure overhead — it also made `provision-ringtail` brittle whenever the Fly proxy was slow or down. +- Switched Grafana's deployment strategy from `RollingUpdate` to `Recreate`. With an RWO PVC holding the SQLite database and Bleve search index, `RollingUpdate` reliably crashloops the new pod on the index lock until rollout timeout. `Recreate` terminates the old pod first so the new one acquires the lock cleanly. +- Update `tailscale-operator-ringtail` ProxyClass to reference the `0108b68` main-SHA build of the tailscale container. Routine post-merge cleanup so the deployed image traces to a commit that survives PR branch cleanup. +- Update the ringtail NixOS flake lockfile (`nixos/ringtail/flake.lock`): bump + `nixpkgs` (b77b3de → 25f5383) and `disko` (5ba0c95 → 115e521) to latest. + `nixpkgs-services` was intentionally left pinned (skipped by the + `flake-update` pipeline). Routine recurring maintenance per [[manage-lockfile]]. +- Upgrade native macOS Alloy on indri to v1.16.0. Built on gilbert with Go + 1.26.2 + CGO (required for the macOS native DNS resolver, which Tailscale + MagicDNS depends on), scp'd to `~/.local/bin/alloy` on indri, codesigned, + and the LaunchAgent reloaded. Completes the v1.16.0 fleet upgrade started + in #345 — all four Alloy services (alloy-k8s, alloy-ringtail, + alloy-tracing-ringtail, alloy ansible) now run v1.16.0. +- Upgraded zot on indri from v2.1.15 to v2.1.16 (security fixes: TLS verification on metrics client, CORS Allow-Credentials suppression on wildcard origins, manifest/API-key body size limits). + +### Documentation + +- Reviewed `replicating-blumeops` tutorial: fixed "BluemeOps" typos (also in `contributing.md`) and added `last-reviewed` frontmatter. +- Reviewed [[indri]] reference card: added `devpi`, `cv`, and `docs` to the native-services list; widened the k8s note to reflect the growing set of apps now on ringtail and the planned indri-minikube decommission; added CPU/RAM specs. +- New how-to: rotate-fly-deploy-token. Documents the 75-day rotation cadence, why we use `org`-scoped tokens (silences the cosmetic metrics-token warning on `fly status` with marginal blast-radius cost given the single-app personal org), and the procedure for rotation + Forgejo Actions secret sync. +- Add `docs/explanation/ai-scraper-mitigation.md` — the egress-cost / AI-crawler threat model for the public Fly proxy, the tiered mitigation plan (Tier 1: mirror black-hole, shipped; Tier 2: user-agent denylist + Anubis; Tier 3: Cloudflare, rejected on principle), and the data behind it. +- Fix manage-forgejo-mirrors verify step — sync button is on the repo settings page ("Synchronize now"), not the main repo page. +- Fixed the `op item edit` invocation in the [[zot]] API-key rotation procedure: the previous `pbpaste | op item edit ... "field[password]=-"` stdin syntax is rejected by op 2.34 as "invalid JSON" (recent op versions treat piped input as a full JSON template, not a single field value). Procedure now reads the clipboard into a local fish variable and passes it as an inline assignment. +- Fixed the export-filename step in [[run-1password-backup]]: 1Password's desktop app names the export `1PasswordExport--.1pux` automatically rather than letting you save to a fixed name, so the procedure now points the task at that glob instead of pretending the default name is `1Password-export.1pux`. +- Refresh the contributing tutorial: add `last-reviewed`, include the `.ai.md` changelog fragment type, and clarify that `prek` is pinned via `mise`. +- Review and refresh the Navidrome reference card: add `last-reviewed`, correct the scanner env var name, document the current image/version, and record routing and runtime details from the manifests. +- Review and refresh the Ollama reference card: add `last-reviewed`, bump the documented image tag to 0.20.4, and add the two `qwen3.5` models now declared in `models.txt`. +- Reviewed [[1password]] reference card: added the `blumeops` vs `Personal` vault split, noted that `onepassword-connect` runs on both indri and ringtail (not just one cluster), and pulled the `op read` vs `op item get --fields` guidance up from agent memory into the card. +- Reviewed `index.md`; added ringtail to the infrastructure overview and stamped `last-reviewed`. +- Reviewed transmission card: corrected storage layout (`/config/` is emptyDir, watch dir disabled) and noted the Prometheus exporter sidecar. +- rotate-fly-deploy-token: combine mint+store into one command with both fish and bash forms; document the `op item edit` "Password item requires ps value" validator gotcha and the placeholder-password workaround. + +### AI Assistance + +- Adopt `AGENTS.md` as the canonical agent instruction file, keep `CLAUDE.md` as a compatibility shim, and update docs to reference the neutral file and the correct agent-change-process path. +- CLAUDE.md now imports AGENTS.md via `@AGENTS.md` instead of telling agents to go read it. Claude Code only auto-loads CLAUDE.md, so the prose shim was easy to skip; the import inlines AGENTS.md into the session prompt unconditionally. + +### Miscellaneous + +- Removed the dead minikube manifests, container builds, and tooling shims left behind after the cv + docs migration to indri-native (#342). Deletes `argocd/{apps,manifests}/{cv,docs}/`, `containers/{cv,quartz}/`, and the `quartz`→`docs` mapping in `mise-tasks/container-version-check`. Bumps `docs.current-version` to `v1.16.0` (the blumeops release tag) now that the legacy nginx-base version pin is gone. +- Rebuild shower v1.1.0 container from main HEAD (`3c7967e`) and bump the + kustomization tag to `v1.1.0-3c7967e-nix`. The PR was squash-merged, so + the branch commit `444ff91` baked into the prior tag isn't reachable + from main's history. The new tag points at a commit that exists on + main; image content is byte-identical because the FOD output is content + addressed and the inputs didn't change. +- Rebuild shower v1.1.2 from main HEAD (a33fa47) and retag — PR #358 was squash-merged so the branch SHA baked into the prior image tag isn't reachable from main. FOD is content-addressed, so image bytes are identical; only provenance changes. +- Remove the duplicate Homepage tiles for Mealie, Paperless, Immich, and + TeslaMate. Homepage runs on ringtail and autodiscovers ringtail Ingresses via + `gethomepage.dev/*` annotations; once these services migrated to ringtail they + were discovered automatically, making their leftover static `services.yaml` + entries (needed only while they lived on minikube) redundant. +- Removed the now-unused `containers/devpi/` Dagger build artifact. Devpi runs natively on indri via uv venv; the container image is no longer referenced anywhere. Doc examples in `docs/reference/tools/dagger.md` updated to use `miniflux` as the example container name. +- `container-build-and-release` now prints the specific `mise run runner-logs ` command after dispatching, polling the Forgejo API to resolve the run number for the commit it just triggered. +- `mise run runner-logs -j ` now reports a clear error when the log file doesn't exist on indri (e.g. a runner crash that left `action_task.log_in_storage = 0`). Previously it printed only the header and exited 0, because `zstdcat` exits 0 with a "can't stat … -- ignored" stderr message and ssh+fish on indri swallows the remote exit code. + + ## [v1.16.0] - 2026-04-18 ### Infrastructure diff --git a/ansible/roles/docs/defaults/main.yml b/ansible/roles/docs/defaults/main.yml index f09221b..a5a1a8a 100644 --- a/ansible/roles/docs/defaults/main.yml +++ b/ansible/roles/docs/defaults/main.yml @@ -3,9 +3,8 @@ # Caddy serves docs_content_dir directly via the static-kind service block, # with Quartz-style try_files (path → path/ → path.html → 404). -docs_version: "v1.16.0" +docs_version: "v1.17.0" docs_release_url: "https://forge.eblu.me/eblume/blumeops/releases/download/{{ docs_version }}/docs-{{ docs_version }}.tar.gz" - docs_home: /Users/erichblume/blumeops/docs docs_content_dir: "{{ docs_home }}/content" docs_version_sentinel: "{{ docs_home }}/.installed-version" diff --git a/docs/changelog.d/+1password-backup-doc-export-name.doc.md b/docs/changelog.d/+1password-backup-doc-export-name.doc.md deleted file mode 100644 index 6c4d262..0000000 --- a/docs/changelog.d/+1password-backup-doc-export-name.doc.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the export-filename step in [[run-1password-backup]]: 1Password's desktop app names the export `1PasswordExport--.1pux` automatically rather than letting you save to a fixed name, so the procedure now points the task at that glob instead of pretending the default name is `1Password-export.1pux`. diff --git a/docs/changelog.d/+agent-file-neutralization.ai.md b/docs/changelog.d/+agent-file-neutralization.ai.md deleted file mode 100644 index da16fba..0000000 --- a/docs/changelog.d/+agent-file-neutralization.ai.md +++ /dev/null @@ -1 +0,0 @@ -Adopt `AGENTS.md` as the canonical agent instruction file, keep `CLAUDE.md` as a compatibility shim, and update docs to reference the neutral file and the correct agent-change-process path. diff --git a/docs/changelog.d/+ai-scraper-mitigation-doc.doc.md b/docs/changelog.d/+ai-scraper-mitigation-doc.doc.md deleted file mode 100644 index 246fedb..0000000 --- a/docs/changelog.d/+ai-scraper-mitigation-doc.doc.md +++ /dev/null @@ -1 +0,0 @@ -Add `docs/explanation/ai-scraper-mitigation.md` — the egress-cost / AI-crawler threat model for the public Fly proxy, the tiered mitigation plan (Tier 1: mirror black-hole, shipped; Tier 2: user-agent denylist + Anubis; Tier 3: Cloudflare, rejected on principle), and the data behind it. diff --git a/docs/changelog.d/+alloy-main-sha-rebuild.infra.md b/docs/changelog.d/+alloy-main-sha-rebuild.infra.md deleted file mode 100644 index 42a7b37..0000000 --- a/docs/changelog.d/+alloy-main-sha-rebuild.infra.md +++ /dev/null @@ -1,5 +0,0 @@ -Rebuild and retag alloy v1.16.0 container images from the main-branch SHA -following the squash-merge of #345, per the build-container-image -squash-merge convention. Both images (`registry.ops.eblu.me/blumeops/alloy`) -now reference `9564435` rather than the branch SHA `26a3ab5`, restoring -source traceability after branch cleanup. diff --git a/docs/changelog.d/+alloy-native-macos-v1.16.0.infra.md b/docs/changelog.d/+alloy-native-macos-v1.16.0.infra.md deleted file mode 100644 index 471990f..0000000 --- a/docs/changelog.d/+alloy-native-macos-v1.16.0.infra.md +++ /dev/null @@ -1,6 +0,0 @@ -Upgrade native macOS Alloy on indri to v1.16.0. Built on gilbert with Go -1.26.2 + CGO (required for the macOS native DNS resolver, which Tailscale -MagicDNS depends on), scp'd to `~/.local/bin/alloy` on indri, codesigned, -and the LaunchAgent reloaded. Completes the v1.16.0 fleet upgrade started -in #345 — all four Alloy services (alloy-k8s, alloy-ringtail, -alloy-tracing-ringtail, alloy ansible) now run v1.16.0. diff --git a/docs/changelog.d/+argocd-resource-limits.infra.md b/docs/changelog.d/+argocd-resource-limits.infra.md deleted file mode 100644 index ba24a5a..0000000 --- a/docs/changelog.d/+argocd-resource-limits.infra.md +++ /dev/null @@ -1 +0,0 @@ -Add resource limits to all ArgoCD pods to prevent unbounded resource consumption during node-wide pressure events. diff --git a/docs/changelog.d/+claude-md-import-agents.ai.md b/docs/changelog.d/+claude-md-import-agents.ai.md deleted file mode 100644 index f63231e..0000000 --- a/docs/changelog.d/+claude-md-import-agents.ai.md +++ /dev/null @@ -1 +0,0 @@ -CLAUDE.md now imports AGENTS.md via `@AGENTS.md` instead of telling agents to go read it. Claude Code only auto-loads CLAUDE.md, so the prose shim was easy to skip; the import inlines AGENTS.md into the session prompt unconditionally. diff --git a/docs/changelog.d/+container-build-suggest-runner-logs.misc.md b/docs/changelog.d/+container-build-suggest-runner-logs.misc.md deleted file mode 100644 index d10ea51..0000000 --- a/docs/changelog.d/+container-build-suggest-runner-logs.misc.md +++ /dev/null @@ -1 +0,0 @@ -`container-build-and-release` now prints the specific `mise run runner-logs ` command after dispatching, polling the Forgejo API to resolve the run number for the commit it just triggered. diff --git a/docs/changelog.d/+fix-forge-static-assets.bugfix.md b/docs/changelog.d/+fix-forge-static-assets.bugfix.md deleted file mode 100644 index de0517e..0000000 --- a/docs/changelog.d/+fix-forge-static-assets.bugfix.md +++ /dev/null @@ -1 +0,0 @@ -Fixed forge.eblu.me static assets (CSS, JS, images, fonts) not loading — the proxy's static asset cache block was missing the `Host` header, so Caddy couldn't route the requests. diff --git a/docs/changelog.d/+fly-deploy-immediate-strategy.infra.md b/docs/changelog.d/+fly-deploy-immediate-strategy.infra.md deleted file mode 100644 index 205bd6a..0000000 --- a/docs/changelog.d/+fly-deploy-immediate-strategy.infra.md +++ /dev/null @@ -1 +0,0 @@ -Switch the Fly proxy deploy strategy from `bluegreen` to `immediate` in `fly/fly.toml`. With a single proxy machine, bluegreen offers little benefit — the green machine routinely failed to reach "started" inside Fly's default 5-minute deploy timeout (the cold-start sequence of `tailscaled` → `tailscale up` → wait-for-MagicDNS → nginx startup eats most of the budget), and the failed deploys would roll back. `immediate` replaces the machine in place with a brief downtime (~5–10s) but actually completes. diff --git a/docs/changelog.d/+forge-mirrors-blackhole.infra.md b/docs/changelog.d/+forge-mirrors-blackhole.infra.md deleted file mode 100644 index 29a5e6a..0000000 --- a/docs/changelog.d/+forge-mirrors-blackhole.infra.md +++ /dev/null @@ -1 +0,0 @@ -Black-hole the `/mirrors/*` repositories at the Fly proxy edge (`return 403` → `forge.ops.eblu.me`). A surprise $29.60 Fly bill traced to ~1.24 TB/30d of egress on `forge.eblu.me`, 99.95% of all proxy egress — of which ~71% was AI scrapers (Meta `meta-externalagent`, OpenAI `GPTBot`, Amazonbot) crawling the near-infinite git-history URL space of the public mirror repos and timing out Forgejo in the process. Mirrors exist for supply-chain control and are consumed over the tailnet, so their public web UI had no legitimate audience. `robots.txt` already disallowed `/mirrors/`, but the offending agents ignore it. Tier-2 mitigations (user-agent denylist, Anubis proof-of-work gateway) are documented in `docs/explanation/ai-scraper-mitigation.md`. diff --git a/docs/changelog.d/+frigate-notify-local.infra.md b/docs/changelog.d/+frigate-notify-local.infra.md deleted file mode 100644 index 120f915..0000000 --- a/docs/changelog.d/+frigate-notify-local.infra.md +++ /dev/null @@ -1 +0,0 @@ -Add local nix container build for `frigate-notify` (`containers/frigate-notify/default.nix`) so the Frigate→ntfy bridge is rebuilt on ringtail from the forge mirror instead of pulled from `ghcr.io/0x2142/frigate-notify`. diff --git a/docs/changelog.d/+grafana-recreate-strategy.infra.md b/docs/changelog.d/+grafana-recreate-strategy.infra.md deleted file mode 100644 index 3662e10..0000000 --- a/docs/changelog.d/+grafana-recreate-strategy.infra.md +++ /dev/null @@ -1 +0,0 @@ -Switched Grafana's deployment strategy from `RollingUpdate` to `Recreate`. With an RWO PVC holding the SQLite database and Bleve search index, `RollingUpdate` reliably crashloops the new pod on the index lock until rollout timeout. `Recreate` terminates the old pod first so the new one acquires the lock cleanly. diff --git a/docs/changelog.d/+homepage-config-perms-fix.bugfix.md b/docs/changelog.d/+homepage-config-perms-fix.bugfix.md deleted file mode 100644 index 20e1135..0000000 --- a/docs/changelog.d/+homepage-config-perms-fix.bugfix.md +++ /dev/null @@ -1,5 +0,0 @@ -Fixed homepage container EACCES on cold start: the nix-built image now chowns -`/app/config` to uid 1000 at build time via `fakeRootCommands`, matching the -behavior of the old Dockerfile. Without this, homepage couldn't seed missing -skeleton configs (proxmox.yaml etc.) or create `/app/config/logs`, crashing on -its first uncached request. Caught during the ringtail cutover. diff --git a/docs/changelog.d/+homepage-dedup-migrated.misc.md b/docs/changelog.d/+homepage-dedup-migrated.misc.md deleted file mode 100644 index 9efc5ba..0000000 --- a/docs/changelog.d/+homepage-dedup-migrated.misc.md +++ /dev/null @@ -1,5 +0,0 @@ -Remove the duplicate Homepage tiles for Mealie, Paperless, Immich, and -TeslaMate. Homepage runs on ringtail and autodiscovers ringtail Ingresses via -`gethomepage.dev/*` annotations; once these services migrated to ringtail they -were discovered automatically, making their leftover static `services.yaml` -entries (needed only while they lived on minikube) redundant. diff --git a/docs/changelog.d/+immich-probe-ringtail.infra.md b/docs/changelog.d/+immich-probe-ringtail.infra.md deleted file mode 100644 index f2d3dee..0000000 --- a/docs/changelog.d/+immich-probe-ringtail.infra.md +++ /dev/null @@ -1 +0,0 @@ -Moved the Immich blackbox health probe from indri's alloy to ringtail's alloy. After the immich migration to ringtail, the probe still targeted `immich-server.immich.svc.cluster.local` on indri's cluster where the service no longer exists, causing a persistent `ServiceProbeFailure` alert. diff --git a/docs/changelog.d/+manage-forgejo-mirrors-sync-location.doc.md b/docs/changelog.d/+manage-forgejo-mirrors-sync-location.doc.md deleted file mode 100644 index f71fc81..0000000 --- a/docs/changelog.d/+manage-forgejo-mirrors-sync-location.doc.md +++ /dev/null @@ -1 +0,0 @@ -Fix manage-forgejo-mirrors verify step — sync button is on the repo settings page ("Synchronize now"), not the main repo page. diff --git a/docs/changelog.d/+pin-quartz-v4.bugfix.md b/docs/changelog.d/+pin-quartz-v4.bugfix.md deleted file mode 100644 index e073bbb..0000000 --- a/docs/changelog.d/+pin-quartz-v4.bugfix.md +++ /dev/null @@ -1 +0,0 @@ -Pin the Quartz docs build to v4.5.2. The Dagger `build_docs` pipeline cloned Quartz from the default branch unpinned; Quartz v5.0.0 restructured its config layout (`.quartz/plugins`, `../quartz` imports) and broke the docs build against our existing `quartz.config.ts`/`quartz.layout.ts`. diff --git a/docs/changelog.d/+prowler-rebuild-on-main.infra.md b/docs/changelog.d/+prowler-rebuild-on-main.infra.md deleted file mode 100644 index 107b687..0000000 --- a/docs/changelog.d/+prowler-rebuild-on-main.infra.md +++ /dev/null @@ -1 +0,0 @@ -Rebuild Prowler container against main HEAD (v5.23.0-495e45d) after merging the IaC mutelist Dockerfile changes. diff --git a/docs/changelog.d/+remove-devpi-container-build.misc.md b/docs/changelog.d/+remove-devpi-container-build.misc.md deleted file mode 100644 index 8ebec54..0000000 --- a/docs/changelog.d/+remove-devpi-container-build.misc.md +++ /dev/null @@ -1 +0,0 @@ -Removed the now-unused `containers/devpi/` Dagger build artifact. Devpi runs natively on indri via uv venv; the container image is no longer referenced anywhere. Doc examples in `docs/reference/tools/dagger.md` updated to use `miniflux` as the example container name. diff --git a/docs/changelog.d/+retire-todoist-for-heph.infra.md b/docs/changelog.d/+retire-todoist-for-heph.infra.md deleted file mode 100644 index f6284d0..0000000 --- a/docs/changelog.d/+retire-todoist-for-heph.infra.md +++ /dev/null @@ -1 +0,0 @@ -Retired the `blumeops-tasks` mise task (Todoist API) in favor of `heph list --project Blumeops --json` from the self-hosted [hephaestus](https://github.com/eblume/hephaestus) system. Updated docs to point task discovery and rotation reminders at heph, and noted that the `~/code/personal/zk` zettelkasten is migrating into heph docs. diff --git a/docs/changelog.d/+review-1password-doc.doc.md b/docs/changelog.d/+review-1password-doc.doc.md deleted file mode 100644 index bba9591..0000000 --- a/docs/changelog.d/+review-1password-doc.doc.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed [[1password]] reference card: added the `blumeops` vs `Personal` vault split, noted that `onepassword-connect` runs on both indri and ringtail (not just one cluster), and pulled the `op read` vs `op item get --fields` guidance up from agent memory into the card. diff --git a/docs/changelog.d/+review-compliance-image-iac.feature.md b/docs/changelog.d/+review-compliance-image-iac.feature.md deleted file mode 100644 index 1125359..0000000 --- a/docs/changelog.d/+review-compliance-image-iac.feature.md +++ /dev/null @@ -1 +0,0 @@ -`review-compliance-reports` now also fetches and summarizes the weekly Prowler container-image and IaC scans (previously only the K8s CIS in-cluster scan was processed). For each scan it shows status counts, severity breakdown, week-over-week delta, and — for the high-volume image/IaC scans — top-N tables grouped by check ID and resource instead of per-finding listings. diff --git a/docs/changelog.d/+review-contributing-doc.doc.md b/docs/changelog.d/+review-contributing-doc.doc.md deleted file mode 100644 index c394a01..0000000 --- a/docs/changelog.d/+review-contributing-doc.doc.md +++ /dev/null @@ -1 +0,0 @@ -Refresh the contributing tutorial: add `last-reviewed`, include the `.ai.md` changelog fragment type, and clarify that `prek` is pinned via `mise`. diff --git a/docs/changelog.d/+review-index-doc.doc.md b/docs/changelog.d/+review-index-doc.doc.md deleted file mode 100644 index 7016a7a..0000000 --- a/docs/changelog.d/+review-index-doc.doc.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed `index.md`; added ringtail to the infrastructure overview and stamped `last-reviewed`. diff --git a/docs/changelog.d/+review-navidrome-doc.doc.md b/docs/changelog.d/+review-navidrome-doc.doc.md deleted file mode 100644 index fbe5e79..0000000 --- a/docs/changelog.d/+review-navidrome-doc.doc.md +++ /dev/null @@ -1 +0,0 @@ -Review and refresh the Navidrome reference card: add `last-reviewed`, correct the scanner env var name, document the current image/version, and record routing and runtime details from the manifests. diff --git a/docs/changelog.d/+review-ollama-doc.doc.md b/docs/changelog.d/+review-ollama-doc.doc.md deleted file mode 100644 index 05ef23e..0000000 --- a/docs/changelog.d/+review-ollama-doc.doc.md +++ /dev/null @@ -1 +0,0 @@ -Review and refresh the Ollama reference card: add `last-reviewed`, bump the documented image tag to 0.20.4, and add the two `qwen3.5` models now declared in `models.txt`. diff --git a/docs/changelog.d/+ringtail-clone-via-tailnet.infra.md b/docs/changelog.d/+ringtail-clone-via-tailnet.infra.md deleted file mode 100644 index d664163..0000000 --- a/docs/changelog.d/+ringtail-clone-via-tailnet.infra.md +++ /dev/null @@ -1 +0,0 @@ -Switch the ringtail provisioning playbook's blumeops clone URL from `forge.eblu.me` (public, via Fly proxy) to `forge.ops.eblu.me` (tailnet, direct via Caddy on indri). Ringtail is always on the tailnet, so the WAN round-trip is pure overhead — it also made `provision-ringtail` brittle whenever the Fly proxy was slow or down. diff --git a/docs/changelog.d/+ringtail-coredump-size-cap.infra.md b/docs/changelog.d/+ringtail-coredump-size-cap.infra.md deleted file mode 100644 index 824b2df..0000000 --- a/docs/changelog.d/+ringtail-coredump-size-cap.infra.md +++ /dev/null @@ -1 +0,0 @@ -Cap systemd-coredump on ringtail (ProcessSizeMax/ExternalSizeMax 1G, MaxUse 2G) so multi-GB Wine/Proton game crash dumps no longer thrash the disk and lock up the desktop. diff --git a/docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md b/docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md deleted file mode 100644 index dd488b6..0000000 --- a/docs/changelog.d/+ringtail-flake-update-2026-06-01.infra.md +++ /dev/null @@ -1,4 +0,0 @@ -Update the ringtail NixOS flake lockfile (`nixos/ringtail/flake.lock`): bump -`nixpkgs` (b77b3de → 25f5383) and `disko` (5ba0c95 → 115e521) to latest. -`nixpkgs-services` was intentionally left pinned (skipped by the -`flake-update` pipeline). Routine recurring maintenance per [[manage-lockfile]]. diff --git a/docs/changelog.d/+ringtail-proton-ge.infra.md b/docs/changelog.d/+ringtail-proton-ge.infra.md deleted file mode 100644 index 0d8bc04..0000000 --- a/docs/changelog.d/+ringtail-proton-ge.infra.md +++ /dev/null @@ -1,4 +0,0 @@ -Add GE-Proton (`pkgs.proton-ge-bin`) to `programs.steam.extraCompatPackages` -on ringtail. Subnautica 2 hangs at Mercuna plugin init under Proton -Experimental + DXVK D3D12; GE-Proton is available as a Steam per-game -compatibility option to work around it. diff --git a/docs/changelog.d/+ringtail-sn2-prelaunch.infra.md b/docs/changelog.d/+ringtail-sn2-prelaunch.infra.md deleted file mode 100644 index f9c68e2..0000000 --- a/docs/changelog.d/+ringtail-sn2-prelaunch.infra.md +++ /dev/null @@ -1,6 +0,0 @@ -Add `sn2-prelaunch` Steam launch wrapper on ringtail that removes -Subnautica 2's stale `Saved/running.dat` and `Saved/beforelobby.dat` -lockfiles before each launch. SN2 pops up an invisible (0×0-sized) -Error dialog when it detects an unclean exit, blocking GameThread -forever; this is observable only as a black screen with a spinning -loader. Use via Steam launch option: `sn2-prelaunch %command%`. diff --git a/docs/changelog.d/+ringtail-sway-fuzzel.bugfix.md b/docs/changelog.d/+ringtail-sway-fuzzel.bugfix.md deleted file mode 100644 index 6801040..0000000 --- a/docs/changelog.d/+ringtail-sway-fuzzel.bugfix.md +++ /dev/null @@ -1,3 +0,0 @@ -Fixed sway keybindings on ringtail — the home-manager `keybindings` block was replacing the module's defaults entirely, leaving only explicit overrides (no workspace switching, focus, move, splits, resize mode, etc). Switched to `lib.mkOptionDefault` with `lib.mkForce` on the conflicting custom binds (`Mod+Return`, `Mod+d`, `Mod+space`, `Mod+l`) so defaults merge back in. Also added `Mod+F1` to show a filterable fuzzel list of current keybindings. - -Fixed fuzzel config errors on launch — `border-radius` and `border-width` were under `[main]`, but fuzzel expects them as `radius`/`width` under a `[border]` section. diff --git a/docs/changelog.d/+ringtail-vrr-flicker.bugfix.md b/docs/changelog.d/+ringtail-vrr-flicker.bugfix.md deleted file mode 100644 index cb23344..0000000 --- a/docs/changelog.d/+ringtail-vrr-flicker.bugfix.md +++ /dev/null @@ -1 +0,0 @@ -Disabled adaptive sync (VRR) on ringtail's DP-1 output. The OMEN 27i IPS panel pumps brightness when its refresh rate swings into the low VRR range during low-framerate content (e.g. game cutscenes), producing a flicker that worsened over a session until a reboot. Pinning the panel to a fixed 165Hz eliminates it. diff --git a/docs/changelog.d/+rotate-fly-deploy-token-shell-examples.doc.md b/docs/changelog.d/+rotate-fly-deploy-token-shell-examples.doc.md deleted file mode 100644 index 24ffcb9..0000000 --- a/docs/changelog.d/+rotate-fly-deploy-token-shell-examples.doc.md +++ /dev/null @@ -1 +0,0 @@ -rotate-fly-deploy-token: combine mint+store into one command with both fish and bash forms; document the `op item edit` "Password item requires ps value" validator gotcha and the placeholder-password workaround. diff --git a/docs/changelog.d/+runner-logs-auth.feature.md b/docs/changelog.d/+runner-logs-auth.feature.md deleted file mode 100644 index 9ee6fa1..0000000 --- a/docs/changelog.d/+runner-logs-auth.feature.md +++ /dev/null @@ -1 +0,0 @@ -runner-logs now authenticates with Forgejo API token and auto-detects the repo from git remote. Job logs are fetched via SSH to indri (reading Forgejo's on-disk zstd log files) instead of the web endpoint, which doesn't support token auth for private repos. diff --git a/docs/changelog.d/+runner-logs-missing-log.misc.md b/docs/changelog.d/+runner-logs-missing-log.misc.md deleted file mode 100644 index c06704a..0000000 --- a/docs/changelog.d/+runner-logs-missing-log.misc.md +++ /dev/null @@ -1 +0,0 @@ -`mise run runner-logs -j ` now reports a clear error when the log file doesn't exist on indri (e.g. a runner crash that left `action_task.log_in_storage = 0`). Previously it printed only the header and exited 0, because `zstdcat` exits 0 with a "can't stat … -- ignored" stderr message and ssh+fish on indri swallows the remote exit code. diff --git a/docs/changelog.d/+shower-1.1.1-deploy.infra.md b/docs/changelog.d/+shower-1.1.1-deploy.infra.md deleted file mode 100644 index 61244ac..0000000 --- a/docs/changelog.d/+shower-1.1.1-deploy.infra.md +++ /dev/null @@ -1 +0,0 @@ -Deploy shower v1.1.1 to ringtail (kustomize newTag bump). diff --git a/docs/changelog.d/+shower-1.1.1-fod-pin.infra.md b/docs/changelog.d/+shower-1.1.1-fod-pin.infra.md deleted file mode 100644 index a19b578..0000000 --- a/docs/changelog.d/+shower-1.1.1-fod-pin.infra.md +++ /dev/null @@ -1 +0,0 @@ -Pin shower v1.1.1 FOD outputHash (probed locally on ringtail). diff --git a/docs/changelog.d/+shower-1.1.1.infra.md b/docs/changelog.d/+shower-1.1.1.infra.md deleted file mode 100644 index eb9476c..0000000 --- a/docs/changelog.d/+shower-1.1.1.infra.md +++ /dev/null @@ -1 +0,0 @@ -Bump shower container to v1.1.1 (probe FOD hash). diff --git a/docs/changelog.d/+shower-1.1.3-deploy.infra.md b/docs/changelog.d/+shower-1.1.3-deploy.infra.md deleted file mode 100644 index 833fac6..0000000 --- a/docs/changelog.d/+shower-1.1.3-deploy.infra.md +++ /dev/null @@ -1 +0,0 @@ -Deployed shower v1.1.3 to ringtail (image built and pushed from ringtail; runner bypassed due to indri overload). diff --git a/docs/changelog.d/+shower-1.1.3.infra.md b/docs/changelog.d/+shower-1.1.3.infra.md deleted file mode 100644 index 33ee49d..0000000 --- a/docs/changelog.d/+shower-1.1.3.infra.md +++ /dev/null @@ -1 +0,0 @@ -Bumped shower app to v1.1.3 (wheel/sdist + FOD hashes probed on ringtail). diff --git a/docs/changelog.d/+shower-main-sha-rebuild.infra.md b/docs/changelog.d/+shower-main-sha-rebuild.infra.md deleted file mode 100644 index f1751b5..0000000 --- a/docs/changelog.d/+shower-main-sha-rebuild.infra.md +++ /dev/null @@ -1,5 +0,0 @@ -Rebuild shower from the post-merge commit on main so the container's -SHA tag points at a commit that will still exist after the 30-day -branch-cleanup window. Functionally identical to the branch-tag image -already deployed, just preserves source traceability per -[[build-container-image#Squash-merge and container tags]]. diff --git a/docs/changelog.d/+shower-rebuild-from-main-sha.misc.md b/docs/changelog.d/+shower-rebuild-from-main-sha.misc.md deleted file mode 100644 index a9495cd..0000000 --- a/docs/changelog.d/+shower-rebuild-from-main-sha.misc.md +++ /dev/null @@ -1,6 +0,0 @@ -Rebuild shower v1.1.0 container from main HEAD (`3c7967e`) and bump the -kustomization tag to `v1.1.0-3c7967e-nix`. The PR was squash-merged, so -the branch commit `444ff91` baked into the prior tag isn't reachable -from main's history. The new tag points at a commit that exists on -main; image content is byte-identical because the FOD output is content -addressed and the inputs didn't change. diff --git a/docs/changelog.d/+shower-v1.1.2-rebuild-from-main-sha.misc.md b/docs/changelog.d/+shower-v1.1.2-rebuild-from-main-sha.misc.md deleted file mode 100644 index 9355a54..0000000 --- a/docs/changelog.d/+shower-v1.1.2-rebuild-from-main-sha.misc.md +++ /dev/null @@ -1 +0,0 @@ -Rebuild shower v1.1.2 from main HEAD (a33fa47) and retag — PR #358 was squash-merged so the branch SHA baked into the prior image tag isn't reachable from main. FOD is content-addressed, so image bytes are identical; only provenance changes. diff --git a/docs/changelog.d/+tailscale-main-sha-rebuild.infra.md b/docs/changelog.d/+tailscale-main-sha-rebuild.infra.md deleted file mode 100644 index 24bb81c..0000000 --- a/docs/changelog.d/+tailscale-main-sha-rebuild.infra.md +++ /dev/null @@ -1 +0,0 @@ -Update `tailscale-operator-ringtail` ProxyClass to reference the `0108b68` main-SHA build of the tailscale container. Routine post-merge cleanup so the deployed image traces to a commit that survives PR branch cleanup. diff --git a/docs/changelog.d/+transmission-doc-review.doc.md b/docs/changelog.d/+transmission-doc-review.doc.md deleted file mode 100644 index 418504f..0000000 --- a/docs/changelog.d/+transmission-doc-review.doc.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed transmission card: corrected storage layout (`/config/` is emptyDir, watch dir disabled) and noted the Prometheus exporter sidecar. diff --git a/docs/changelog.d/+unpoller-rebuild-on-main.infra.md b/docs/changelog.d/+unpoller-rebuild-on-main.infra.md deleted file mode 100644 index 60ae8fa..0000000 --- a/docs/changelog.d/+unpoller-rebuild-on-main.infra.md +++ /dev/null @@ -1 +0,0 @@ -Rebuild unpoller container from squashed main commit so the image SHA tag matches a commit in main's history (was tagged with the pre-squash branch SHA). diff --git a/docs/changelog.d/+valkey-main-tag-bump.infra.md b/docs/changelog.d/+valkey-main-tag-bump.infra.md deleted file mode 100644 index cd19f60..0000000 --- a/docs/changelog.d/+valkey-main-tag-bump.infra.md +++ /dev/null @@ -1 +0,0 @@ -Bump paperless and immich kustomizations to the main-SHA-built valkey tag (`v8.1.6-r0-fabca04`). Routine post-merge follow-up to keep production manifests pointing at images built from a commit on main. diff --git a/docs/changelog.d/+valkey-rebuild-on-main.infra.md b/docs/changelog.d/+valkey-rebuild-on-main.infra.md deleted file mode 100644 index c743e61..0000000 --- a/docs/changelog.d/+valkey-rebuild-on-main.infra.md +++ /dev/null @@ -1 +0,0 @@ -Rebuild valkey container from squashed main commit (both arm64 dagger and amd64 nix variants), and update paperless + immich-ringtail kustomizations to the main-SHA tags `v8.1.7-ecded30` and `v8.1.7-ecded30-nix`. diff --git a/docs/changelog.d/+wave1-decommission-followups.infra.md b/docs/changelog.d/+wave1-decommission-followups.infra.md deleted file mode 100644 index 7b54d52..0000000 --- a/docs/changelog.d/+wave1-decommission-followups.infra.md +++ /dev/null @@ -1,8 +0,0 @@ -Fix three follow-ups from the wave-1 decommission: grant the local -break-glass `admin` account ArgoCD admin rights (`g, admin, role:admin` — -previously only the Authentik `admins` group had access, so admin was -locked out whenever its token expired), and repoint the alloy blackbox -probe for teslamate from the deleted minikube service to -`https://tesla.ops.eblu.me/` (through Caddy over Tailscale). The orphaned -paperless/teslamate roles + ExternalSecrets left on the minikube -blumeops-pg are also cleaned up. diff --git a/docs/changelog.d/+zot-ci-rotation-op-syntax.doc.md b/docs/changelog.d/+zot-ci-rotation-op-syntax.doc.md deleted file mode 100644 index ec8834f..0000000 --- a/docs/changelog.d/+zot-ci-rotation-op-syntax.doc.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the `op item edit` invocation in the [[zot]] API-key rotation procedure: the previous `pbpaste | op item edit ... "field[password]=-"` stdin syntax is rejected by op 2.34 as "invalid JSON" (recent op versions treat piped input as a full JSON template, not a single field value). Procedure now reads the clipboard into a local fish variable and passes it as an inline assignment. diff --git a/docs/changelog.d/+zot-v2.1.16.infra.md b/docs/changelog.d/+zot-v2.1.16.infra.md deleted file mode 100644 index f007164..0000000 --- a/docs/changelog.d/+zot-v2.1.16.infra.md +++ /dev/null @@ -1 +0,0 @@ -Upgraded zot on indri from v2.1.15 to v2.1.16 (security fixes: TLS verification on metrics client, CORS Allow-Credentials suppression on wildcard origins, manifest/API-key body size limits). diff --git a/docs/changelog.d/alloy-v1.16.0.infra.md b/docs/changelog.d/alloy-v1.16.0.infra.md deleted file mode 100644 index cd9a1ef..0000000 --- a/docs/changelog.d/alloy-v1.16.0.infra.md +++ /dev/null @@ -1,5 +0,0 @@ -Upgrade Grafana Alloy v1.14.0 → v1.16.0 across all four service deployments -(alloy-k8s, alloy-ringtail, alloy-tracing-ringtail on k8s; alloy native on -indri). Pulls in stable database observability (v1.15) and the OTel Collector -v0.147.0 bump. Container build also migrated from Dockerfile to native Dagger -`container.py` per the build-container-image migration playbook. diff --git a/docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md b/docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md deleted file mode 100644 index 33b041f..0000000 --- a/docs/changelog.d/backup-grafana-ringtail-blumeops-pg.infra.md +++ /dev/null @@ -1,8 +0,0 @@ -Wire the ringtail `blumeops-pg` cluster (which holds the wave-1-migrated -paperless + teslamate databases) into backups and Grafana. Adds a Tailscale -LoadBalancer Service (`blumeops-pg-ringtail.tail8d86e.ts.net`) and a Caddy L4 -route (`pg.ops.eblu.me:5434`), then repoints borgmatic's `teslamate` + -`paperless` postgres dumps and the `mealie` SQLite dump at ringtail, and the -Grafana TeslaMate datasource at the ringtail DB. Closes the backup gap that -opened at cutover (the migrated live data was still being backed up from the -now-frozen minikube copies) and unblocks the wave-1 decommission. diff --git a/docs/changelog.d/cleanup-cv-docs-minikube-artifacts.misc.md b/docs/changelog.d/cleanup-cv-docs-minikube-artifacts.misc.md deleted file mode 100644 index 79a81cf..0000000 --- a/docs/changelog.d/cleanup-cv-docs-minikube-artifacts.misc.md +++ /dev/null @@ -1 +0,0 @@ -Removed the dead minikube manifests, container builds, and tooling shims left behind after the cv + docs migration to indri-native (#342). Deletes `argocd/{apps,manifests}/{cv,docs}/`, `containers/{cv,quartz}/`, and the `quartz`→`docs` mapping in `mise-tasks/container-version-check`. Bumps `docs.current-version` to `v1.16.0` (the blumeops release tag) now that the legacy nginx-base version pin is gone. diff --git a/docs/changelog.d/dagger-0-20-6-runner-image-alpine.infra.md b/docs/changelog.d/dagger-0-20-6-runner-image-alpine.infra.md deleted file mode 100644 index 35f77c2..0000000 --- a/docs/changelog.d/dagger-0-20-6-runner-image-alpine.infra.md +++ /dev/null @@ -1 +0,0 @@ -Upgraded Dagger from v0.20.1 to v0.20.6 (engine, CLI pin, and SDK regen) and migrated `runner-job-image` from a Debian-based Dockerfile to a native Dagger `container.py` on Alpine 3.23, reusing the shared `alpine_runtime` helper. diff --git a/docs/changelog.d/decommission-wave1-minikube.infra.md b/docs/changelog.d/decommission-wave1-minikube.infra.md deleted file mode 100644 index 63b3ab5..0000000 --- a/docs/changelog.d/decommission-wave1-minikube.infra.md +++ /dev/null @@ -1,8 +0,0 @@ -Decommission the wave-1 services on minikube-indri now that paperless, -teslamate, and mealie run on ringtail with their data backed up. Removes the -minikube `paperless`/`teslamate`/`mealie` manifest dirs + ArgoCD app -definitions (pruning the parked Deployments, Services, and the redundant -minikube mealie/paperless PVCs), and drops the `paperless`/`teslamate` roles -from the minikube `blumeops-pg` cluster. The `paperless` and `teslamate` -databases are dropped from indri's blumeops-pg as the finalization step. -miniflux + authentik remain on the minikube cluster (later waves). diff --git a/docs/changelog.d/doc-review-replicating-blumeops.doc.md b/docs/changelog.d/doc-review-replicating-blumeops.doc.md deleted file mode 100644 index e9e6d0f..0000000 --- a/docs/changelog.d/doc-review-replicating-blumeops.doc.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed `replicating-blumeops` tutorial: fixed "BluemeOps" typos (also in `contributing.md`) and added `last-reviewed` frontmatter. diff --git a/docs/changelog.d/fix-borgmatic-shower-via-ssh.bugfix.md b/docs/changelog.d/fix-borgmatic-shower-via-ssh.bugfix.md deleted file mode 100644 index e18272c..0000000 --- a/docs/changelog.d/fix-borgmatic-shower-via-ssh.bugfix.md +++ /dev/null @@ -1,14 +0,0 @@ -Fix nightly borgmatic backups failing for 2 days. The shower SQLite -dump hook referenced `kubectl --context=k3s-ringtail`, but indri's -kubeconfig deliberately doesn't carry the ringtail credentials. The -`before_backup` hook's failure aborted the entire run, taking out -*both* the local sifaka repo and the BorgBase offsite. Replaced -the inline-shell dump with a `~/bin/borgmatic-k8s-sqlite-dump` -helper deployed by the ansible role. Each dump entry now declares a -`target` of either `local:` (mealie — kubectl uses indri's -kubeconfig) or `ssh:` (shower — ssh into ringtail and -run `k3s kubectl` there, no indri-side kubeconfig needed; k3s.yaml -on ringtail is mode 644 so no sudo required). Bytes stream back via -`kubectl exec ... -- cat` rather than `kubectl cp`, since `kubectl -cp` requires `tar` inside the pod and nix-built images like shower -don't bundle it. diff --git a/docs/changelog.d/forgejo-runner-v12-8-server-connections.infra.md b/docs/changelog.d/forgejo-runner-v12-8-server-connections.infra.md deleted file mode 100644 index cc35684..0000000 --- a/docs/changelog.d/forgejo-runner-v12-8-server-connections.infra.md +++ /dev/null @@ -1 +0,0 @@ -Upgraded the k8s Forgejo runner to the v12.8 line, switched it from first-boot registration to declarative `server.connections` credentials from 1Password, and consolidated the supporting runner how-to documentation. diff --git a/docs/changelog.d/homepage-to-ringtail.infra.md b/docs/changelog.d/homepage-to-ringtail.infra.md deleted file mode 100644 index 1e3e795..0000000 --- a/docs/changelog.d/homepage-to-ringtail.infra.md +++ /dev/null @@ -1,8 +0,0 @@ -Migrated homepage dashboard from minikube (indri/arm64) to k3s (ringtail/amd64). -The container is now built via nix (`containers/homepage/default.nix`), adapted -from nixpkgs `homepage-dashboard` with the upstream Next.js cache patches and -wrapped with `dockerTools.buildLayeredImage`. Autodiscovery shifts: services on -minikube (ArgoCD, Immich, Kiwix, Mealie, Miniflux, Grafana, Prometheus, -Navidrome, Paperless, TeslaMate, Transmission) become explicit static entries -in `services.yaml`; ringtail services (Authentik, Frigate/NVR, Ntfy, Ollama) -auto-populate via Ingress annotations. diff --git a/docs/changelog.d/migrate-cv-docs-to-indri.infra.md b/docs/changelog.d/migrate-cv-docs-to-indri.infra.md deleted file mode 100644 index 608a6b9..0000000 --- a/docs/changelog.d/migrate-cv-docs-to-indri.infra.md +++ /dev/null @@ -1 +0,0 @@ -Migrated CV (`cv.eblu.me`) and Docs (`docs.eblu.me`) from minikube Deployments to indri-native ansible roles. Caddy now serves the extracted release tarballs directly via a new `kind: static` service-block in the Caddy template — no daemon, no container — replacing the prior nginx-in-a-pod layer. Removes a network hop on every request and shrinks minikube's footprint. See [[cv-on-indri]] and [[docs-on-indri]]. Part of the broader minikube wind-down. diff --git a/docs/changelog.d/migrate-devpi-to-indri.infra.md b/docs/changelog.d/migrate-devpi-to-indri.infra.md deleted file mode 100644 index 418db70..0000000 --- a/docs/changelog.d/migrate-devpi-to-indri.infra.md +++ /dev/null @@ -1 +0,0 @@ -Migrated devpi (PyPI mirror at `pypi.ops.eblu.me`) from a minikube StatefulSet to a launchd-managed service on indri. devpi-server now runs in a uv-managed venv with pinned `devpi-server` and `devpi-web` versions, listens on `127.0.0.1:3141`, and is fronted by Caddy. The minikube StatefulSet was crash-looping under memory pressure (and breaking the Python toolchain everywhere); the new layout removes a layer of dependency on cluster health for critical-path tooling. See [[devpi-on-indri]]. diff --git a/docs/changelog.d/migrate-immich-to-ringtail.infra.md b/docs/changelog.d/migrate-immich-to-ringtail.infra.md deleted file mode 100644 index b47742f..0000000 --- a/docs/changelog.d/migrate-immich-to-ringtail.infra.md +++ /dev/null @@ -1,13 +0,0 @@ -Move the entire Immich stack — server, machine-learning, valkey, -and the PostgreSQL+VectorChord cluster — off `minikube-indri` and -onto `k3s-ringtail`. Postgres data migrated zero-loss via CNPG -`pg_basebackup` (replica catch-up then promote); row counts on -`asset`, `user`, `album`, `smart_search`, `activity`, `asset_face` -verified equal between source and replica before cutover. The ML -pod now uses ringtail's RTX 4080 via the nvidia-device-plugin -(time-slicing bumped 2 → 4 to share with frigate + ollama). Caddy -routing at `photos.ops.eblu.me` is unchanged (still -`photos.tail8d86e.ts.net`, the device just lives on ringtail now). -Borgmatic backups continue against the same `immich-pg` tailnet -hostname. First concrete chain in the broader indri-k8s -decommission effort. diff --git a/docs/changelog.d/migrate-wave1-ringtail.infra.md b/docs/changelog.d/migrate-wave1-ringtail.infra.md deleted file mode 100644 index c44263a..0000000 --- a/docs/changelog.d/migrate-wave1-ringtail.infra.md +++ /dev/null @@ -1,13 +0,0 @@ -Move paperless, teslamate, and mealie off `minikube-indri` onto -`k3s-ringtail`, shedding ~1.1 GiB of resident load from the -OOM-thrashing 8 GiB minikube node (the kernel OOM killer had been -killing `kube-apiserver`/`dockerd`/argocd, flapping every -minikube-hosted service at once). paperless + teslamate databases -move into a fresh CNPG `blumeops-pg` cluster on ringtail via a cold -`pg_dump`/`pg_restore` from the quiesced source — row counts verified -equal before any routing flip; source DBs dropped only after the -ringtail side serves traffic. mealie's SQLite PVC is copied as-is. -paperless media stays on sifaka NFS. Downtime-tolerant cold cutover -(no streaming replication); rollback is repoint-and-scale-up with the -source untouched. Second chain in the indri-k8s decommission after -[[migrate-immich-to-ringtail]]. diff --git a/docs/changelog.d/mirror-tailscale-container.infra.md b/docs/changelog.d/mirror-tailscale-container.infra.md deleted file mode 100644 index 54ca3ba..0000000 --- a/docs/changelog.d/mirror-tailscale-container.infra.md +++ /dev/null @@ -1 +0,0 @@ -Add local nix container build for `tailscale` (`containers/tailscale/default.nix`) so ringtail's tailscale-operator ProxyClass proxy pods pull from the forge mirror instead of `docker.io/tailscale/tailscale`. Pinned at v1.94.2 to match `service-versions.yaml`. Indri's tailscale-operator continues to use upstream during the k8s-to-ringtail migration. diff --git a/docs/changelog.d/prowler-iac-mutelist.infra.md b/docs/changelog.d/prowler-iac-mutelist.infra.md deleted file mode 100644 index 077cfa8..0000000 --- a/docs/changelog.d/prowler-iac-mutelist.infra.md +++ /dev/null @@ -1 +0,0 @@ -Address the 6 critical Prowler IaC findings against `argocd/manifests/`. Prowler's IaC provider hardcodes `self._mutelist = None` and delegates filtering to Trivy, but doesn't plumb `--ignorefile` through — so the documented "use Trivy filtering" path is actually broken. Added a shim around `trivy` in the Prowler image that injects `--ignorefile $TRIVY_IGNOREFILE` for `trivy fs` invocations when the env var points at a real file. The IaC cronjob now mounts `mutelist/trivyignore.yaml` (Trivy's per-path schema) and sets the env var, muting the `external-secrets` and `kube-state-metrics` Secret-access findings (KSV-0041, KSV-0114). Separately, `grafana-clusterrole` is tightened to remove `secrets` access entirely: the dashboard sidecar already only consumes ConfigMap-labeled dashboards, so its `RESOURCE` env var is now `configmap` instead of `both`. diff --git a/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md b/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md deleted file mode 100644 index af30489..0000000 --- a/docs/changelog.d/recurring-maintenance-2026-05-27.doc.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed [[indri]] reference card: added `devpi`, `cv`, and `docs` to the native-services list; widened the k8s note to reflect the growing set of apps now on ringtail and the planned indri-minikube decommission; added CPU/RAM specs. diff --git a/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md b/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md deleted file mode 100644 index f2d48ad..0000000 --- a/docs/changelog.d/recurring-maintenance-2026-05-27.infra.md +++ /dev/null @@ -1,4 +0,0 @@ -Recurring maintenance batch: - -- Ringtail flake inputs refreshed (`disko`, `home-manager`, `nixpkgs`). -- Tooling deps bumped: prek hooks (trufflehog v3.95.3, kingfisher v1.101.0, ruff v0.15.14, `ansible-core` 2.21.0); fly proxy base images (nginx 1.30.1-alpine, alloy v1.16.1); `typer==0.26.2` in mise tasks. diff --git a/docs/changelog.d/review-ringtail-flake-2026-05-11.infra.md b/docs/changelog.d/review-ringtail-flake-2026-05-11.infra.md deleted file mode 100644 index f39f9f4..0000000 --- a/docs/changelog.d/review-ringtail-flake-2026-05-11.infra.md +++ /dev/null @@ -1 +0,0 @@ -Updated `nixos/ringtail/flake.lock` (weekly cadence): `disko`, `home-manager`, and `nixpkgs` inputs refreshed. `nixpkgs-services` skipped per overlay convention. diff --git a/docs/changelog.d/ringtail-static-ip.infra.md b/docs/changelog.d/ringtail-static-ip.infra.md deleted file mode 100644 index 8474b0a..0000000 --- a/docs/changelog.d/ringtail-static-ip.infra.md +++ /dev/null @@ -1 +0,0 @@ -Pin ringtail's wired IP to `192.168.1.21` via NixOS scripted networking; NetworkManager no longer manages `enp5s0`. Removes DHCP lease renewal as a failure mode after a silent lease teardown took ringtail offline. Also explicitly enables `net.ipv4.ip_forward` (previously set implicitly by scripted-DHCP) so k3s pod networking and Tailscale routing continue to work with static networking. diff --git a/docs/changelog.d/rip-out-compensating-controls.infra.md b/docs/changelog.d/rip-out-compensating-controls.infra.md deleted file mode 100644 index d41fd1a..0000000 --- a/docs/changelog.d/rip-out-compensating-controls.infra.md +++ /dev/null @@ -1 +0,0 @@ -Ripped out the compensating-controls (CC) framework: deleted `compensating-controls.yaml`, the `review-compensating-controls` mise task, and the associated how-to / explanation docs. Prowler and Kingfisher continue to run weekly and produce reports; the Prowler mutelist YAML files remain in place but no longer carry `CC: ` prefixes — each entry just keeps a free-form `Description` of why the finding is muted. The CC review cadence proved to be more overhead than this single-operator homelab needed. diff --git a/docs/changelog.d/service-review-mealie-2026-05-11.infra.md b/docs/changelog.d/service-review-mealie-2026-05-11.infra.md deleted file mode 100644 index 074cd21..0000000 --- a/docs/changelog.d/service-review-mealie-2026-05-11.infra.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed `mealie` service version freshness; upstream is 5 minor versions ahead (v3.17.0 vs deployed v3.12.0). Marked reviewed; upgrade deferred. diff --git a/docs/changelog.d/shower-app-deploy.bugfix.md b/docs/changelog.d/shower-app-deploy.bugfix.md deleted file mode 100644 index 91d2b3b..0000000 --- a/docs/changelog.d/shower-app-deploy.bugfix.md +++ /dev/null @@ -1,13 +0,0 @@ -Shower app container now bakes the wheel + Python deps into the image -at build time via `buildPythonPackage` instead of pip-installing on -first boot. Boots are deterministic and don't depend on forge PyPI -being reachable from the pod. The `wheelHash` in -`containers/shower/default.nix` is the sha256 sourced from the -[forge PyPI simple index](https://forge.eblu.me/api/packages/eblume/pypi/simple/adelaide-baby-shower-app/); -bumping the version means bumping that hash too. - -Borgmatic now covers the shower app: SQLite is dumped from the live -pod via `kubectl exec` (mirroring the existing mealie entry, with -`context: k3s-ringtail`), and the prize-photo media share is picked up -through `/Volumes/shower` (sifaka SMB mount on indri, same pattern as -`/Volumes/photos`). diff --git a/docs/changelog.d/shower-app-deploy.feature.md b/docs/changelog.d/shower-app-deploy.feature.md deleted file mode 100644 index 96218be..0000000 --- a/docs/changelog.d/shower-app-deploy.feature.md +++ /dev/null @@ -1,4 +0,0 @@ -Deploy the Adelaide / Heidi / Addie baby shower app — guest splash, raffle -picker, and prize assignment console — on ringtail k3s with `shower.eblu.me` -as the public entry and `shower.ops.eblu.me` as the tailnet admin host. App -source: [`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app). diff --git a/docs/changelog.d/shower-app-deploy.infra.md b/docs/changelog.d/shower-app-deploy.infra.md deleted file mode 100644 index 157a068..0000000 --- a/docs/changelog.d/shower-app-deploy.infra.md +++ /dev/null @@ -1,9 +0,0 @@ -Wire shower app for public exposure: fly nginx `shower.eblu.me` server -block as a guest-only surface — splash page, `/prizes//`, static -assets, media. Everything authenticated (`/admin/`, `/host/`, -`/accounts/`) returns 403 with a "tailnet only" pointer. Staff hit -`shower.ops.eblu.me` for the operator console + admin; the app's -v1.0.1 `DJANGO_PUBLIC_URL_BASE` setting makes QR codes generated on -the tailnet point back at the WAN host for guests. Plus a Caddy route -on indri, Pulumi Gandi CNAME, and a Grafana APM dashboard tracking -request rate, error rate, latency, bandwidth, and access logs. diff --git a/docs/changelog.d/shower-v1.1.0.feature.md b/docs/changelog.d/shower-v1.1.0.feature.md deleted file mode 100644 index d2c3400..0000000 --- a/docs/changelog.d/shower-v1.1.0.feature.md +++ /dev/null @@ -1,15 +0,0 @@ -Deploy adelaide-baby-shower-app v1.1.0 to ringtail k3s. Replaces the -boolean lock with a four-phase `ShowerState` (`pre_event` → `party` → -`prizes_locked` → `event_locked`), adds an append-only "guest memories" -panel where guests can leave photos and comments for the baby, and -polishes the admin and QR views. Three Django migrations -(`0009_shower_phase`, `0010_guest_memories`, `0011_book_description`) -run automatically in the entrypoint against the SQLite PV. No config -or env-var changes. - -Container build also gains a Forgejo-PyPI workaround: Forgejo's simple -index returns absolute file URLs hardcoded to the public ROOT_URL -(`forge.eblu.me`), which the Fly edge 403s on `/api/packages/*`. The -wheel and sdist are now both pulled via direct `fetchurl` against -`forge.ops.eblu.me` (tailnet-only) and the wheel is handed to pip as -a local path. diff --git a/docs/changelog.d/shower-v1.1.2.infra.md b/docs/changelog.d/shower-v1.1.2.infra.md deleted file mode 100644 index aa2db0d..0000000 --- a/docs/changelog.d/shower-v1.1.2.infra.md +++ /dev/null @@ -1 +0,0 @@ -Deploy shower v1.1.2 — bump container build to new app release. diff --git a/docs/changelog.d/unpoller-v3.infra.md b/docs/changelog.d/unpoller-v3.infra.md deleted file mode 100644 index fa6eaf9..0000000 --- a/docs/changelog.d/unpoller-v3.infra.md +++ /dev/null @@ -1 +0,0 @@ -Upgrade unpoller v2.34.0 → v3.2.0 and migrate container build from Dockerfile to native Dagger (container.py). v3.0.0 carries breaking UniFi API changes; v3.2.0 introduces a 60s background poll (cached scrapes) by default — set `interval = 0` in `up.conf` to restore on-demand polling. diff --git a/docs/changelog.d/update-tooling-deps-2026-04.doc.md b/docs/changelog.d/update-tooling-deps-2026-04.doc.md deleted file mode 100644 index 141e975..0000000 --- a/docs/changelog.d/update-tooling-deps-2026-04.doc.md +++ /dev/null @@ -1 +0,0 @@ -New how-to: rotate-fly-deploy-token. Documents the 75-day rotation cadence, why we use `org`-scoped tokens (silences the cosmetic metrics-token warning on `fly status` with marginal blast-radius cost given the single-app personal org), and the procedure for rotation + Forgejo Actions secret sync. diff --git a/docs/changelog.d/update-tooling-deps-2026-04.infra.md b/docs/changelog.d/update-tooling-deps-2026-04.infra.md deleted file mode 100644 index 4731eca..0000000 --- a/docs/changelog.d/update-tooling-deps-2026-04.infra.md +++ /dev/null @@ -1 +0,0 @@ -Monthly tooling dependency refresh: prek hooks (trufflehog, kingfisher, ruff, shfmt, prettier, actionlint, ansible-lint), fly proxy base images (nginx 1.30.0, tailscale v1.94.2, alloy v1.16.0), normalize pyyaml lower bound in mise-tasks. diff --git a/docs/changelog.d/valkey-mirror.infra.md b/docs/changelog.d/valkey-mirror.infra.md deleted file mode 100644 index 06f8d98..0000000 --- a/docs/changelog.d/valkey-mirror.infra.md +++ /dev/null @@ -1 +0,0 @@ -Mirror Valkey 8.1 locally as `registry.ops.eblu.me/blumeops/valkey`. Replaces direct pulls of `docker.io/valkey/valkey:8.1-alpine` for paperless and immich sidecars. Built via native Dagger pipeline on Alpine 3.22. Stateless swap — no data migration. Authentik's nix-built Redis remains separate. diff --git a/docs/changelog.d/valkey-nix.infra.md b/docs/changelog.d/valkey-nix.infra.md deleted file mode 100644 index e41eb63..0000000 --- a/docs/changelog.d/valkey-nix.infra.md +++ /dev/null @@ -1 +0,0 @@ -Add nix-built amd64 valkey for ringtail (`containers/valkey/default.nix`) so immich-ringtail can stop pulling the upstream multi-arch `docker.io/valkey/valkey` image. Existing `container.py` continues to build Alpine arm64 for paperless on indri. Both bump to valkey 8.1.7 (Alpine 3.22 8.1.7-r0 / nixpkgs 8.1.7). From 02ea1cc72af43928e9105479ded91da9b51ca18a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 12:39:50 -0700 Subject: [PATCH 25/35] C0: point tailscale-operator base mirror fetch at tailnet forge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The public forge.eblu.me now black-holes /mirrors/ at the Fly edge (AI-scraper mitigation), so the in-cluster ArgoCD repo-server got a 403 fetching the upstream operator manifest — leaving tailscale-operator and tailscale-operator-ringtail in Unknown sync. Use forge.ops.eblu.me. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/tailscale-operator-base/kustomization.yaml | 5 ++++- .../+tailscale-operator-mirror-tailnet-url.bugfix.md | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+tailscale-operator-mirror-tailnet-url.bugfix.md diff --git a/argocd/manifests/tailscale-operator-base/kustomization.yaml b/argocd/manifests/tailscale-operator-base/kustomization.yaml index 4519af6..9d117ef 100644 --- a/argocd/manifests/tailscale-operator-base/kustomization.yaml +++ b/argocd/manifests/tailscale-operator-base/kustomization.yaml @@ -6,8 +6,11 @@ namespace: tailscale # Upstream Tailscale operator manifest from forge mirror. # To upgrade: update the ref in the URL AND the newTag below. +# Must use the tailnet host forge.ops.eblu.me — the public forge.eblu.me +# black-holes /mirrors/ at the Fly edge (AI-scraper mitigation), which the +# in-cluster ArgoCD repo-server would otherwise hit and fail with a 403. resources: - - https://forge.eblu.me/mirrors/tailscale/raw/tag/v1.94.2/cmd/k8s-operator/deploy/manifests/operator.yaml + - https://forge.ops.eblu.me/mirrors/tailscale/raw/tag/v1.94.2/cmd/k8s-operator/deploy/manifests/operator.yaml - proxyclass.yaml - dnsconfig.yaml diff --git a/docs/changelog.d/+tailscale-operator-mirror-tailnet-url.bugfix.md b/docs/changelog.d/+tailscale-operator-mirror-tailnet-url.bugfix.md new file mode 100644 index 0000000..cc29cf7 --- /dev/null +++ b/docs/changelog.d/+tailscale-operator-mirror-tailnet-url.bugfix.md @@ -0,0 +1 @@ +Fixed the `tailscale-operator` and `tailscale-operator-ringtail` ArgoCD apps showing `Unknown` sync status. Their shared base kustomization fetched the upstream operator manifest from the public `forge.eblu.me/mirrors/...`, which the AI-scraper mitigation now black-holes (403). Pointed the remote resource at the tailnet host `forge.ops.eblu.me` instead, which the in-cluster repo-server can reach. From bb55fa95667903e1b38c084a46690e7da61eef0d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:37:02 -0700 Subject: [PATCH 26/35] Recurring review sweep: 4 doc cards + nvidia-device-plugin v0.19.2 (#366) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knocks out the two daily recurring review tasks (doc review + service review) in one PR. ## Doc review (4 never-reviewed reference cards, `last-reviewed: 2026-06-04`) - **cluster.md** — Kubernetes version v1.34.0 → **v1.35.0**; refreshed the stale ringtail workload list and noted the in-progress minikube→k3s migration (points to `[[ringtail]]` as the canonical list). - **ntfy.md / tempo.md / alloy.md** — corrected image references: these are now **locally-built `registry.ops.eblu.me/blumeops/*` nix containers** (ntfy v2.19.2, tempo v2.10.3, alloy-k8s v1.16.0), not upstream Docker Hub. Fly.io alloy binary bumped to v1.16.1. ## Service review - **nvidia-device-plugin** (ringtail GPU): v0.19.0 → **v0.19.2**. Upstream patch releases — CDI/Tegra fixes + dependency bumps, no breaking changes for our manifest-based CDI + RuntimeClass setup (the service-account change in the notes is helm-only). ## Not in this PR (need container rebuilds, deferred) The other stale services are locally-built nix images, so upgrading them is a forge-runner rebuild rather than a clean tag bump — left untouched (not date-bumped, so they resurface): **prometheus** (v3.10.0→v3.12.0), **loki** (3.6.7→3.7.2), **kube-state-metrics**, **homepage**. Happy to do these as a follow-up rebuild PR. ## Deploy / verify Not yet deployed — `nvidia-device-plugin` still points at `main`. After review: ``` argocd app set nvidia-device-plugin --revision reviews-jun4 && argocd app sync nvidia-device-plugin # after merge: argocd app set nvidia-device-plugin --revision main && argocd app sync nvidia-device-plugin ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/366 --- argocd/manifests/nvidia-device-plugin/kustomization.yaml | 2 +- docs/changelog.d/reviews-jun4.doc.md | 1 + docs/changelog.d/reviews-jun4.infra.md | 1 + docs/reference/kubernetes/cluster.md | 9 ++++++--- docs/reference/services/alloy.md | 7 ++++--- docs/reference/services/ntfy.md | 5 +++-- docs/reference/services/tempo.md | 5 +++-- service-versions.yaml | 4 ++-- 8 files changed, 21 insertions(+), 13 deletions(-) create mode 100644 docs/changelog.d/reviews-jun4.doc.md create mode 100644 docs/changelog.d/reviews-jun4.infra.md diff --git a/argocd/manifests/nvidia-device-plugin/kustomization.yaml b/argocd/manifests/nvidia-device-plugin/kustomization.yaml index a46edf6..f5a33ae 100644 --- a/argocd/manifests/nvidia-device-plugin/kustomization.yaml +++ b/argocd/manifests/nvidia-device-plugin/kustomization.yaml @@ -10,4 +10,4 @@ resources: images: - name: nvcr.io/nvidia/k8s-device-plugin - newTag: v0.19.0 + newTag: v0.19.2 diff --git a/docs/changelog.d/reviews-jun4.doc.md b/docs/changelog.d/reviews-jun4.doc.md new file mode 100644 index 0000000..f1aeaa8 --- /dev/null +++ b/docs/changelog.d/reviews-jun4.doc.md @@ -0,0 +1 @@ +Reviewed four never-reviewed reference cards (`cluster`, `ntfy`, `tempo`, `alloy`) and corrected drift: minikube is now Kubernetes v1.35.0; ntfy, tempo, and alloy-k8s images are now locally-built `registry.ops.eblu.me/blumeops/*` nix containers (v2.19.2, v2.10.3, v1.16.0) rather than upstream Docker Hub; the Fly.io alloy binary is v1.16.1; and the ringtail workload list reflects the in-progress minikube→k3s migration. diff --git a/docs/changelog.d/reviews-jun4.infra.md b/docs/changelog.d/reviews-jun4.infra.md new file mode 100644 index 0000000..c128e70 --- /dev/null +++ b/docs/changelog.d/reviews-jun4.infra.md @@ -0,0 +1 @@ +Upgraded the nvidia-device-plugin on ringtail from v0.19.0 to v0.19.2 (upstream patch release: CDI/Tegra fixes and dependency bumps, no breaking changes for our manifest-based CDI + RuntimeClass setup). diff --git a/docs/reference/kubernetes/cluster.md b/docs/reference/kubernetes/cluster.md index 9b632bd..07c14af 100644 --- a/docs/reference/kubernetes/cluster.md +++ b/docs/reference/kubernetes/cluster.md @@ -1,6 +1,7 @@ --- title: Cluster -modified: 2026-02-19 +modified: 2026-06-04 +last-reviewed: 2026-06-04 tags: - kubernetes --- @@ -15,7 +16,7 @@ BlumeOps runs two Kubernetes clusters: a Minikube cluster on [[indri]] (most ser |----------|-------| | **Driver** | docker | | **Container Runtime** | docker | -| **Kubernetes Version** | v1.34.0 | +| **Kubernetes Version** | v1.35.0 | | **CPUs** | 6 | | **Memory** | 11GB | | **Disk** | 200GB | @@ -41,7 +42,9 @@ Single-node k3s cluster for workloads requiring amd64 or GPU access. See [[ringt |----------|-------| | **Context** | `k3s-ringtail` | | **API Server** | `https://ringtail.tail8d86e.ts.net:6443` | -| **Workloads** | Frigate (GPU), ntfy, frigate-notify, nvidia-device-plugin | +| **Workloads** | GPU workloads (Frigate, Ollama), notifications (ntfy, frigate-notify), [[authentik]], and services migrated off indri minikube (Immich, Mealie, Paperless, TeslaMate). See [[ringtail]] for the authoritative list. | + +Services are being progressively migrated from indri's minikube to ringtail's k3s; the split above reflects an in-progress state, not a fixed boundary. ## Related diff --git a/docs/reference/services/alloy.md b/docs/reference/services/alloy.md index d781f2f..97d1e77 100644 --- a/docs/reference/services/alloy.md +++ b/docs/reference/services/alloy.md @@ -1,6 +1,7 @@ --- title: Alloy -modified: 2026-03-13 +modified: 2026-06-04 +last-reviewed: 2026-06-04 tags: - service - observability @@ -20,10 +21,10 @@ Unified observability collector for metrics and logs with three deployments: | **Indri Binary** | `~/.local/bin/alloy` | | **Indri Config** | `~/.config/grafana-alloy/config.alloy` | | **K8s Namespace** | `alloy` | -| **K8s Image** | `grafana/alloy:v1.14.0` | +| **K8s Image** | `registry.ops.eblu.me/blumeops/alloy:v1.16.0-9564435` (locally built) | | **ArgoCD App** | `alloy-k8s` | | **Fly.io Config** | `fly/alloy.river` | -| **Fly.io Image** | `grafana/alloy:v1.5.1` (binary copied into nginx container) | +| **Fly.io Image** | `grafana/alloy:v1.16.1` (binary copied into nginx container, sha-pinned) | ## Metrics Collected diff --git a/docs/reference/services/ntfy.md b/docs/reference/services/ntfy.md index b549a6d..1bf45af 100644 --- a/docs/reference/services/ntfy.md +++ b/docs/reference/services/ntfy.md @@ -1,6 +1,7 @@ --- title: Ntfy -modified: 2026-02-17 +modified: 2026-06-04 +last-reviewed: 2026-06-04 tags: - service - notifications @@ -17,7 +18,7 @@ Self-hosted push notification service. Ntfy receives HTTP POST messages and deli | **URL** | https://ntfy.ops.eblu.me | | **Tailscale URL** | https://ntfy.tail8d86e.ts.net | | **Namespace** | `ntfy` | -| **Image** | `binwiederhier/ntfy:v2.17.0` | +| **Image** | `registry.ops.eblu.me/blumeops/ntfy:v2.19.2-fd0bebb-nix` (locally built) | | **Upstream** | https://github.com/binwiederhier/ntfy | | **Manifests** | `argocd/manifests/ntfy/` | diff --git a/docs/reference/services/tempo.md b/docs/reference/services/tempo.md index 771b97f..5eb5d87 100644 --- a/docs/reference/services/tempo.md +++ b/docs/reference/services/tempo.md @@ -1,6 +1,7 @@ --- title: Tempo -modified: 2026-03-05 +modified: 2026-06-04 +last-reviewed: 2026-06-04 tags: - service - observability @@ -18,7 +19,7 @@ Distributed tracing backend for BlumeOps infrastructure. Receives traces via OTL | **Tailscale URL** | https://tempo.tail8d86e.ts.net | | **OTLP Endpoint** | https://tempo-otlp.tail8d86e.ts.net | | **Namespace** | `monitoring` | -| **Image** | `grafana/tempo:2.10.1` | +| **Image** | `registry.ops.eblu.me/blumeops/tempo:v2.10.3-75f9ba4` (locally built) | | **Storage** | 10Gi PVC (local filesystem) | | **Retention** | 7 days | diff --git a/service-versions.yaml b/service-versions.yaml index 699f89c..11ec9f9 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -56,8 +56,8 @@ services: - name: nvidia-device-plugin type: argocd - last-reviewed: 2026-03-27 - current-version: "v0.19.0" + last-reviewed: 2026-06-04 + current-version: "v0.19.2" upstream-source: https://github.com/NVIDIA/k8s-device-plugin/releases notes: DaemonSet + RuntimeClass on ringtail for GPU workloads From 0e70a1b5242183170a5d7d8ac96ee864063f65bb Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:55:55 -0700 Subject: [PATCH 27/35] Localize external-secrets container (native container.py build) (#367) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knocks out the weekly "pick one non-local container and make it local" task by moving **external-secrets** off `ghcr.io` onto a locally-built image, under our own supply-chain control. Doubles as its overdue service review. ## What changed - **`containers/external-secrets/container.py`** (new) — native Dagger build (the Dockerfile→container.py migration pattern). Clones the forge mirror at `v2.2.0` and builds the single `all_providers` static Go binary, faithful to upstream's `make build` (CGO off, no version ldflags upstream). ENTRYPOINT is `/bin/external-secrets` so the controller/webhook/cert-controller Deployments select their role via `args:` exactly as before. - **`argocd/manifests/external-secrets/kustomization.yaml`** — image swapped to `registry.ops.eblu.me/blumeops/external-secrets:v2.2.0-2985007`. **Like-for-like (v2.2.0)**, not an upgrade. - **`service-versions.yaml`** — marked reviewed (2026-06-04), noted the local build. ## Build Built on the indri forge runner (run #579, ~4 min) → pushed to Zot. Image config verified: `Entrypoint=/bin/external-secrets`, `User=65534`, version label `v2.2.0`. ## Deployed from branch & verified - All 3 pods (controller / webhook / cert-controller) rolled to the local image, `1/1 Running` - Controller + webhook logs clean (no errors; webhook serving TLS) - **End-to-end secret fetch proven:** force-synced `monitoring/grafana-admin` → `refreshTime` advanced to now, `Ready=True` - All 10 ExternalSecrets cluster-wide remain `SecretSynced=True` — no collateral damage - App `Healthy` ## Post-merge `external-secrets` currently points at this branch (so `apps` reads OutOfSync — expected). After merge: ``` argocd app set external-secrets --revision main && argocd app sync external-secrets ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/367 --- .../external-secrets/kustomization.yaml | 3 +- containers/external-secrets/container.py | 51 +++++++++++++++++++ .../local-external-secrets.infra.md | 1 + service-versions.yaml | 7 ++- 4 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 containers/external-secrets/container.py create mode 100644 docs/changelog.d/local-external-secrets.infra.md diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index 574aaa7..c25a7d5 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -12,4 +12,5 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets - newTag: v2.2.0 + newName: registry.ops.eblu.me/blumeops/external-secrets + newTag: v2.2.0-2985007 diff --git a/containers/external-secrets/container.py b/containers/external-secrets/container.py new file mode 100644 index 0000000..6be5765 --- /dev/null +++ b/containers/external-secrets/container.py @@ -0,0 +1,51 @@ +"""External Secrets Operator — native Dagger build. + +Two-stage build: Go binary (all providers), Alpine runtime. +Source cloned from forge mirror. + +A single binary serves as the controller, webhook, and cert-controller; the +Deployments select the role via a subcommand passed in `args:`, so the image +ENTRYPOINT must be the binary itself (matching upstream's distroless image). +""" + +import dagger + +from blumeops.containers import ( + alpine_runtime, + clone_from_forge, + go_build, + oci_labels, +) + +VERSION = "v2.2.0" + + +async def build(src: dagger.Directory) -> dagger.Container: + source = clone_from_forge("external-secrets", VERSION) + + # Upstream `make build` compiles every secret provider into a single + # static binary (`-tags all_providers`, CGO disabled). Mirror that so the + # local image is functionally identical to ghcr.io/.../external-secrets. + backend = go_build( + source, + "/external-secrets", + tags="all_providers", + ) + + runtime = alpine_runtime( + extra_apk=["ca-certificates"], + create_user=False, + ) + runtime = oci_labels( + runtime, + title="External Secrets Operator", + description=( + "Kubernetes operator that integrates external secret management systems" + ), + version=VERSION, + ) + return ( + runtime.with_file("/bin/external-secrets", backend.file("/external-secrets")) + .with_user("65534") + .with_entrypoint(["/bin/external-secrets"]) + ) diff --git a/docs/changelog.d/local-external-secrets.infra.md b/docs/changelog.d/local-external-secrets.infra.md new file mode 100644 index 0000000..13cbb05 --- /dev/null +++ b/docs/changelog.d/local-external-secrets.infra.md @@ -0,0 +1 @@ +Localized the external-secrets controller image. It now builds from the forge mirror via a native Dagger `container.py` (single `all_providers` static Go binary, faithful to upstream's `make build`) and is served from `registry.ops.eblu.me/blumeops/external-secrets` instead of `ghcr.io`, bringing another platform component under local supply-chain control. diff --git a/service-versions.yaml b/service-versions.yaml index 11ec9f9..cc9dc9e 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -159,10 +159,13 @@ services: - name: external-secrets type: argocd - last-reviewed: 2026-03-25 + last-reviewed: 2026-06-04 current-version: "v2.2.0" upstream-source: https://github.com/external-secrets/external-secrets/releases - notes: Static kustomize manifests rendered from upstream Helm chart + notes: >- + Static kustomize manifests rendered from upstream Helm chart. Controller + image is locally built from the forge mirror via containers/external-secrets/container.py + (single all_providers static Go binary). - name: 1password-connect type: argocd From 30c82079b9dbb8e2492586d979cd4ec5b04cd08d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:59:17 -0700 Subject: [PATCH 28/35] C0: rebuild external-secrets image off main (v2.2.0-0e70a1b) Repoint to the main-branch-built image so the deployed tag traces to a main commit rather than the merged feature branch. Same v2.2.0 source, stable provenance. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/external-secrets/kustomization.yaml | 2 +- docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index c25a7d5..8b1aea5 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets newName: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-2985007 + newTag: v2.2.0-0e70a1b diff --git a/docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md b/docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md new file mode 100644 index 0000000..2e931d4 --- /dev/null +++ b/docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md @@ -0,0 +1 @@ +Rebuilt the locally-built external-secrets image from the `main` branch so the deployed tag (`v2.2.0-0e70a1b`) traces to a `main` commit rather than the now-merged feature branch, giving a stable provenance reference. From 13895bb04a5afcbb723d7ab3355d228431d76a5d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 15:37:42 -0700 Subject: [PATCH 29/35] Localize external-secrets on ringtail (amd64 nix build) (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #367. That PR localized external-secrets but the Dagger build (on indri's Apple Silicon runner) only produces an **arm64** image — and external-secrets also runs on **ringtail (amd64)** via the same shared manifest. This completes the localization so both clusters run the local binary on their native arch. ## Approach (matches the kube-state-metrics dual-build pattern) - **`containers/external-secrets/default.nix`** (new) — builds the **amd64** image on ringtail's nix-container-builder. `buildGoModule` with Go 1.26 (v2.2.0 requires ≥1.26.1; nixpkgs default is 1.25.x) and `-tags all_providers`, faithful to upstream. Same v2.2.0 source from the forge mirror. - **`argocd/manifests/external-secrets-ringtail/`** (new) — thin kustomize overlay that reuses the shared indri manifest as a base and overrides **only** the image to the `-nix` (amd64) tag. No manifest duplication. - **`argocd/apps/external-secrets-ringtail.yaml`** — repointed at the new overlay. Result: indri → `v2.2.0-…` (arm64, Dagger), ringtail → `v2.2.0-…-nix` (amd64, nix). ## Build Run #581 built both arches at the branch commit. Verified the nix image is `linux/amd64`, entrypoint = the binary, user 65534. ## Deployed from branch & verified on ringtail (k3s, amd64) - All 3 pods rolled to the nix amd64 image, `1/1 Running` (no exec-format error → arch correct) - Controller logs clean - **Live secret fetch proven:** force-synced `homepage/homepage-grafana` → `refreshTime` advanced, `Ready=True` - **All 20** ringtail ExternalSecrets remain `SecretSynced=True` ## Post-merge The `external-secrets-ringtail` app is temporarily pointed at this branch + overlay path (apps app left on `main`, manual-sync, untouched). After merge: ``` argocd app sync apps # picks up the new Application path on main argocd app set external-secrets-ringtail --revision main && argocd app sync external-secrets-ringtail ``` I'll also rebuild off `main` so both clusters land on stable main-sha tags (as done for indri in #367). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/368 --- argocd/apps/external-secrets-ringtail.yaml | 2 +- .../kustomization.yaml | 16 ++++++ containers/external-secrets/default.nix | 56 +++++++++++++++++++ .../external-secrets-ringtail-nix.infra.md | 1 + 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 argocd/manifests/external-secrets-ringtail/kustomization.yaml create mode 100644 containers/external-secrets/default.nix create mode 100644 docs/changelog.d/external-secrets-ringtail-nix.infra.md diff --git a/argocd/apps/external-secrets-ringtail.yaml b/argocd/apps/external-secrets-ringtail.yaml index e2f5898..0bb8bd7 100644 --- a/argocd/apps/external-secrets-ringtail.yaml +++ b/argocd/apps/external-secrets-ringtail.yaml @@ -15,7 +15,7 @@ spec: source: repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git targetRevision: main - path: argocd/manifests/external-secrets + path: argocd/manifests/external-secrets-ringtail destination: server: https://ringtail.tail8d86e.ts.net:6443 namespace: external-secrets diff --git a/argocd/manifests/external-secrets-ringtail/kustomization.yaml b/argocd/manifests/external-secrets-ringtail/kustomization.yaml new file mode 100644 index 0000000..05b6b54 --- /dev/null +++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml @@ -0,0 +1,16 @@ +# Ringtail (amd64) overlay for external-secrets. +# +# Reuses the shared indri manifest as a base and only overrides the controller +# image to the nix-built amd64 variant (`-nix` tag). The base sets the arm64 +# image (built via containers/external-secrets/container.py on indri's Dagger +# runner); ringtail's k3s is amd64 and needs the image built by +# containers/external-secrets/default.nix on the nix-container-builder. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../external-secrets + +images: + - name: registry.ops.eblu.me/blumeops/external-secrets + newTag: v2.2.0-59dace8-nix diff --git a/containers/external-secrets/default.nix b/containers/external-secrets/default.nix new file mode 100644 index 0000000..eabe03d --- /dev/null +++ b/containers/external-secrets/default.nix @@ -0,0 +1,56 @@ +# Nix-built External Secrets Operator (amd64, for ringtail k3s). +# Builds v2.2.0 from the forge mirror with all secret providers compiled in, +# faithful to upstream's `make build` (-tags all_providers). The container.py +# sibling builds the arm64 image for indri's minikube; this default.nix builds +# the amd64 image on ringtail's nix-container-builder. +{ pkgs ? import { } }: + +let + version = "2.2.0"; + + src = pkgs.fetchgit { + url = "https://forge.ops.eblu.me/mirrors/external-secrets.git"; + rev = "v${version}"; + hash = "sha256-eAocOAp5s4CFRrpKfQr2lf3Ji+6nQQ1A5/eTw5B7v9U="; + }; + + # external-secrets v2.2.0 requires Go >= 1.26.1; nixpkgs default go is 1.25.x. + external-secrets = (pkgs.buildGoModule.override { go = pkgs.go_1_26; }) { + inherit src version; + pname = "external-secrets"; + vendorHash = "sha256-0xuBK3fjAplPLAElHvKB6d+2lDz+De/s91fV4dPZwjE="; + + doCheck = false; + + subPackages = [ "." ]; + + tags = [ "all_providers" ]; + + ldflags = [ "-s" "-w" ]; + + meta = with pkgs.lib; { + description = "Kubernetes operator that integrates external secret management systems"; + homepage = "https://github.com/external-secrets/external-secrets"; + license = licenses.asl20; + mainProgram = "external-secrets"; + }; + }; +in + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/external-secrets"; + contents = [ + external-secrets + pkgs.cacert + pkgs.tzdata + ]; + + config = { + Entrypoint = [ "${external-secrets}/bin/external-secrets" ]; + Env = [ + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "TZDIR=${pkgs.tzdata}/share/zoneinfo" + ]; + User = "65534"; + }; +} diff --git a/docs/changelog.d/external-secrets-ringtail-nix.infra.md b/docs/changelog.d/external-secrets-ringtail-nix.infra.md new file mode 100644 index 0000000..9ce3f85 --- /dev/null +++ b/docs/changelog.d/external-secrets-ringtail-nix.infra.md @@ -0,0 +1 @@ +Completed the external-secrets localization for the ringtail (amd64) cluster. The indri Dagger build (`container.py`) only produces an arm64 image; added `containers/external-secrets/default.nix` to build the amd64 variant on ringtail's nix-container-builder, and gave `external-secrets-ringtail` a thin kustomize overlay that reuses the shared manifest and points at the `-nix` image. Both clusters now run the locally-built external-secrets binary on their native architecture. From f6c926f1f594a0ee019bca5d31cdcc4225f6d6cf Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 16:19:20 -0700 Subject: [PATCH 30/35] C0: rebuild external-secrets off main, repoint both clusters to stable tags indri -> v2.2.0-13895bb (arm64), ringtail -> v2.2.0-13895bb-nix (amd64). Both deployed images now trace to main commit 13895bb instead of earlier branch builds. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/external-secrets-ringtail/kustomization.yaml | 2 +- argocd/manifests/external-secrets/kustomization.yaml | 2 +- docs/changelog.d/+external-secrets-stable-main-sha.infra.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 docs/changelog.d/+external-secrets-stable-main-sha.infra.md diff --git a/argocd/manifests/external-secrets-ringtail/kustomization.yaml b/argocd/manifests/external-secrets-ringtail/kustomization.yaml index 05b6b54..9fd4e2f 100644 --- a/argocd/manifests/external-secrets-ringtail/kustomization.yaml +++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-59dace8-nix + newTag: v2.2.0-13895bb-nix diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index 8b1aea5..639db66 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets newName: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-0e70a1b + newTag: v2.2.0-13895bb diff --git a/docs/changelog.d/+external-secrets-stable-main-sha.infra.md b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md new file mode 100644 index 0000000..fbe3c21 --- /dev/null +++ b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md @@ -0,0 +1 @@ +Rebuilt the external-secrets images off `main` and repointed both clusters to the stable main-sha tags (`v2.2.0-13895bb` arm64 / `v2.2.0-13895bb-nix` amd64), so the deployed images on indri and ringtail trace to the same `main` commit rather than earlier feature-branch builds. From a2f1e062243a47c7c68b5a57617f14102b798503 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 06:46:58 -0700 Subject: [PATCH 31/35] Add hephaestus sync hub to indri (launchagent, PWA, device-code OIDC) (#369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes indri the canonical **heph** hub for the hub-and-spoke task/context system, deployed as a self-updating LaunchAgent managed by Ansible. Other devices (gilbert) attach as offline-capable spokes. ## What's here - **`ansible/roles/heph`** (tag `heph`) — bootstrap `cargo install hephd` (only if absent; `--self-update` keeps it current after), version-pinned `heph-pwa` checkout served via `--web-root`, launchagent `mcquack.eblume.heph`: ``` hephd --mode server --http-addr 0.0.0.0:8787 --db … --web-root … --oidc-issuer …/o/heph/ --oidc-audience heph --self-update --self-update-interval-secs 600 ``` `~/.cargo/bin` is on the agent `PATH` so self-update's `cargo install` works. - **Caddy** — `heph.ops.eblu.me → localhost:8787` (TLS for the PWA secure context). - **Authentik** — new `heph` **public device-code** OIDC app + `default-device-code-flow` bound to the default brand's `flow_device_code` (verified live: brand `authentik-default`, field currently unset → additive). - **Docs** — `services/hephaestus.md` (Path-A seeding runbook + spoke caveat), `indri.md`, changelog fragment. ## Three features requested - **Autoupdate** — 10-min interval (`--self-update-interval-secs 600`). - **PWA** — `--web-root` (confirmed shipped in v1.2.0). - **Spoke** — gilbert reconfig documented (post-merge step). ## Deploy plan (not done yet — awaiting review) 1. Seed from gilbert (Path A): `heph daemon stop` → copy `heph.db` → `DELETE FROM meta WHERE key='origin'`. 2. Sync Authentik `apps`/blueprint; verify blueprint status via API (not just logs). 3. `provision-indri --tags heph,caddy` from this branch. 4. Point gilbert at the hub + `heph auth login`. ## Known follow-ups (heph-side, tracked in the Hephaestus project) - `heph daemon` can't bake hub/spoke config or pass `--self-update-interval-secs` → worked around by the ansible plist. - Path-A seeding lacks a clean `hephd --owner-id`/seed command → manual `meta.origin` reset for now. - Self-update moves hephd ahead of the ansible-pinned PWA shell over time (drift; tolerated by the SW cache, revisit on next release). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/369 --- ansible/playbooks/indri.yml | 2 + ansible/roles/caddy/defaults/main.yml | 3 + ansible/roles/heph/defaults/main.yml | 49 +++++++ ansible/roles/heph/handlers/main.yml | 6 + ansible/roles/heph/tasks/main.yml | 82 +++++++++++ ansible/roles/heph/templates/heph.plist.j2 | 50 +++++++ .../authentik/configmap-blueprint.yaml | 79 +++++++++++ docs/changelog.d/heph-indri-hub.infra.md | 1 + docs/reference/infrastructure/indri.md | 1 + docs/reference/services/hephaestus.md | 130 ++++++++++++++++++ 10 files changed, 403 insertions(+) create mode 100644 ansible/roles/heph/defaults/main.yml create mode 100644 ansible/roles/heph/handlers/main.yml create mode 100644 ansible/roles/heph/tasks/main.yml create mode 100644 ansible/roles/heph/templates/heph.plist.j2 create mode 100644 docs/changelog.d/heph-indri-hub.infra.md create mode 100644 docs/reference/services/hephaestus.md diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml index ddb57f8..1e33bb1 100644 --- a/ansible/playbooks/indri.yml +++ b/ansible/playbooks/indri.yml @@ -260,5 +260,7 @@ tags: cv - role: docs tags: docs + - role: heph + tags: heph - role: caddy tags: caddy diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index 363d09e..e6d7385 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -52,6 +52,9 @@ caddy_services: - name: devpi host: "pypi.{{ caddy_domain }}" backend: "http://localhost:3141" + - name: heph + host: "heph.{{ caddy_domain }}" + backend: "http://localhost:8787" # hephaestus hub (server mode) + PWA shell - name: kiwix host: "kiwix.{{ caddy_domain }}" backend: "https://kiwix.tail8d86e.ts.net" diff --git a/ansible/roles/heph/defaults/main.yml b/ansible/roles/heph/defaults/main.yml new file mode 100644 index 0000000..e5eea36 --- /dev/null +++ b/ansible/roles/heph/defaults/main.yml @@ -0,0 +1,49 @@ +--- +# hephaestus hub — the canonical heph replica (server mode) on indri. +# Other devices (e.g. gilbert) are spokes that sync against this hub. +# See [[set-up-sync-hub]] and [[host-heph-pwa]] in the hephaestus repo. + +# Pinned release used for the initial `cargo install` and the PWA shell. +# After bootstrap, hephd's own --self-update keeps the binary current; this +# pin only governs the first install and the bundled PWA shell version. +heph_version: v1.2.0 + +# Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial +# install and unattended self-update build from the same source (no ssh-agent). +heph_repo_url: https://forge.eblu.me/eblume/hephaestus.git + +heph_bin_dir: /Users/erichblume/.cargo/bin +heph_binary: "{{ heph_bin_dir }}/hephd" + +# rustc/cargo here are rustup shims. The bare (non-mise) environment that the +# launchagent and ansible run in falls back to rustup's *default* toolchain, +# which can lag behind heph's rust-version floor (Cargo.toml: 1.89). Pin the +# channel explicitly so both the bootstrap build and unattended self-update +# always use a current toolchain regardless of the host's rustup default. +heph_rust_toolchain: stable + +heph_data_dir: /Users/erichblume/.local/share/heph +heph_db: "{{ heph_data_dir }}/heph.db" +heph_socket: "{{ heph_data_dir }}/hephd.sock" +heph_log_dir: /Users/erichblume/Library/Logs + +# Version-pinned source checkout; the PWA static shell is served directly from +# its heph-pwa/ subdir (no copy), keeping shell and hub in lockstep at heph_version. +heph_pwa_src_dir: /Users/erichblume/.cache/heph-pwa-src +heph_web_root: "{{ heph_pwa_src_dir }}/heph-pwa" + +# Hub listens on all interfaces so tailnet spokes can reach it directly +# (http://indri.tail8d86e.ts.net:8787) and Caddy can proxy heph.ops.eblu.me. +# Access is gated by Authentik OIDC regardless — tailnet reachability is not +# enough (this is the owner's most sensitive data). +heph_http_addr: 0.0.0.0:8787 +heph_port: 8787 +heph_external_url: https://heph.ops.eblu.me + +# Authentik OIDC — issuer + audience together turn hub auth on. The audience is +# the device-code client id (see argocd/manifests/authentik heph blueprint). +heph_oidc_issuer: https://authentik.ops.eblu.me/application/o/heph/ +heph_oidc_audience: heph + +# Self-update poll interval (seconds). 10 minutes. +heph_self_update_interval_secs: 600 diff --git a/ansible/roles/heph/handlers/main.yml b/ansible/roles/heph/handlers/main.yml new file mode 100644 index 0000000..92fe9d7 --- /dev/null +++ b/ansible/roles/heph/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart heph + ansible.builtin.shell: | + launchctl unload ~/Library/LaunchAgents/mcquack.eblume.heph.plist 2>/dev/null || true + launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist + changed_when: true diff --git a/ansible/roles/heph/tasks/main.yml b/ansible/roles/heph/tasks/main.yml new file mode 100644 index 0000000..7a45fe3 --- /dev/null +++ b/ansible/roles/heph/tasks/main.yml @@ -0,0 +1,82 @@ +--- +# hephaestus hub (server mode) on indri. +# +# DATA SEEDING (one-time, Path A — do this BEFORE the first provision so the hub +# adopts gilbert's existing data instead of being born empty): +# +# 1. On the seed device (gilbert): heph daemon stop +# 2. Copy its store to indri: scp ~/.local/share/heph/heph.db \ +# indri:~/.local/share/heph/heph.db +# 3. On indri, give the hub its OWN device origin (keeps gilbert's owner_id + +# data; hephd regenerates a fresh origin on next start when it is missing): +# sqlite3 ~/.local/share/heph/heph.db "DELETE FROM meta WHERE key='origin';" +# 4. Run this role (installs hephd, stages the PWA, loads the launchagent). +# +# hephd auto-creates an empty store on first start if none exists, so seeding is +# optional — skip it only if you intend a fresh, empty hub. + +- name: Ensure heph data directory exists + ansible.builtin.file: + path: "{{ heph_data_dir }}" + state: directory + mode: '0700' + +- name: Check for installed hephd binary + ansible.builtin.stat: + path: "{{ heph_binary }}" + register: heph_binary_stat + +# Bootstrap install only when hephd is absent. Thereafter hephd's own +# --self-update keeps it current; ansible must not fight (or downgrade) it. +# This builds from source and can take several minutes on a cold cargo cache. +- name: Bootstrap-install heph + hephd from the forge ({{ heph_version }}) + ansible.builtin.command: + cmd: >- + {{ heph_bin_dir }}/cargo install --locked + --git {{ heph_repo_url }} + --tag {{ heph_version }} + heph hephd + environment: + PATH: "{{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin" + RUSTUP_TOOLCHAIN: "{{ heph_rust_toolchain }}" + when: not heph_binary_stat.stat.exists + changed_when: true + notify: Restart heph + +# Checkout provides the PWA shell at {{ heph_web_root }} (heph-pwa/ subdir), +# served directly by hephd. Static files are read from disk per request, so a +# version bump needs no restart; the service worker (CACHE = "heph-pwa-vN") +# evicts stale assets on next load. +- name: Ensure heph cache parent directory exists + ansible.builtin.file: + path: "{{ heph_pwa_src_dir | dirname }}" + state: directory + mode: '0755' + +- name: Stage heph-pwa source at {{ heph_version }} + ansible.builtin.git: + repo: "{{ heph_repo_url }}" + dest: "{{ heph_pwa_src_dir }}" + version: "{{ heph_version }}" + depth: 1 + single_branch: true + force: true + +- name: Deploy heph LaunchAgent plist + ansible.builtin.template: + src: heph.plist.j2 + dest: ~/Library/LaunchAgents/mcquack.eblume.heph.plist + mode: '0644' + notify: Restart heph + +- name: Check if heph LaunchAgent is loaded + ansible.builtin.command: launchctl list mcquack.eblume.heph + register: heph_launchctl_check + changed_when: false + failed_when: false + +- name: Load heph LaunchAgent if not loaded + ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist + when: heph_launchctl_check.rc != 0 + changed_when: true + failed_when: false diff --git a/ansible/roles/heph/templates/heph.plist.j2 b/ansible/roles/heph/templates/heph.plist.j2 new file mode 100644 index 0000000..19a2367 --- /dev/null +++ b/ansible/roles/heph/templates/heph.plist.j2 @@ -0,0 +1,50 @@ + + + + + + Label + mcquack.eblume.heph + ProgramArguments + + {{ heph_binary }} + --mode + server + --http-addr + {{ heph_http_addr }} + --db + {{ heph_db }} + --socket + {{ heph_socket }} + --web-root + {{ heph_web_root }} + --oidc-issuer + {{ heph_oidc_issuer }} + --oidc-audience + {{ heph_oidc_audience }} + --self-update + --self-update-interval-secs + {{ heph_self_update_interval_secs }} + + RunAtLoad + + KeepAlive + + EnvironmentVariables + + + PATH + {{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin + HOME + /Users/erichblume + + RUSTUP_TOOLCHAIN + {{ heph_rust_toolchain }} + + StandardOutPath + {{ heph_log_dir }}/mcquack.heph.out.log + StandardErrorPath + {{ heph_log_dir }}/mcquack.heph.err.log + + diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index fcbb99b..56d9110 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -434,3 +434,82 @@ data: provider: !KeyOf mealie-provider meta_launch_url: https://meals.ops.eblu.me policy_engine_mode: all + + heph.yaml: | + version: 1 + metadata: + name: BlumeOps Heph SSO + labels: + blueprints.goauthentik.io/description: "Hephaestus hub OIDC (device-code) provider, application, and device-code flow" + entries: + # Device-code flow (RFC 8628). authentik ships no default for this, so we + # create one and bind it to the brand below. An empty stage_configuration + # flow is sufficient: the already-authenticated user just confirms the code. + - model: authentik_flows.flow + id: device-code-flow + identifiers: + slug: default-device-code-flow + attrs: + name: Device code flow + title: Device code flow + slug: default-device-code-flow + designation: stage_configuration + authentication: require_authenticated + + # Enable the device-code grant globally by binding the flow to the default + # brand (domain authentik-default). Partial update — only sets this field. + - model: authentik_brands.brand + identifiers: + domain: authentik-default + attrs: + flow_device_code: !KeyOf device-code-flow + + # OAuth2 provider for heph — PUBLIC client (device-code + PKCE, no secret). + # client_id doubles as the token audience the hub verifies (--oidc-audience heph), + # and the app slug 'heph' is the issuer path (/application/o/heph/). + - model: authentik_providers_oauth2.oauth2provider + id: heph-provider + identifiers: + name: Heph + attrs: + name: Heph + authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]] + invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] + client_type: public + client_id: heph + # Device-code (RFC 8628) + PKCE use no redirect, but the provider + # serializer requires the field — an empty list satisfies it. + redirect_uris: [] + signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]] + property_mappings: + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + sub_mode: hashed_user_id + include_claims_in_id_token: true + + # Heph application — linked to the OAuth2 provider + - model: authentik_core.application + id: heph-app + identifiers: + slug: heph + attrs: + name: Hephaestus + slug: heph + provider: !KeyOf heph-provider + meta_launch_url: https://heph.ops.eblu.me + policy_engine_mode: any + + # Policy binding — restrict heph to admins group (single-owner, sensitive data) + - model: authentik_policies.policybinding + identifiers: + order: 0 + target: !KeyOf heph-app + group: !Find [authentik_core.group, [name, admins]] + attrs: + target: !KeyOf heph-app + group: !Find [authentik_core.group, [name, admins]] + order: 0 + enabled: true + negate: false + timeout: 30 diff --git a/docs/changelog.d/heph-indri-hub.infra.md b/docs/changelog.d/heph-indri-hub.infra.md new file mode 100644 index 0000000..6761cb7 --- /dev/null +++ b/docs/changelog.d/heph-indri-hub.infra.md @@ -0,0 +1 @@ +Added the [[hephaestus]] (`heph`) sync hub to indri as a self-updating LaunchAgent managed by Ansible (`ansible/roles/heph`, tag `heph`). The hub runs `hephd --mode server` behind `heph.ops.eblu.me` (Caddy TLS), with self-update on a 10-minute interval and the heph-pwa mobile shell served from `--web-root`. Access is gated by a new Authentik device-code (RFC 8628) OIDC application. Indri is now the canonical hub; other devices (e.g. gilbert) attach as offline-capable spokes. The hub's store was seeded from gilbert via the data-safe Path A bring-up (copy store, reset `meta.origin`). diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md index 67652ca..8364ba0 100644 --- a/docs/reference/infrastructure/indri.md +++ b/docs/reference/infrastructure/indri.md @@ -33,6 +33,7 @@ Primary BlumeOps server. Mac Mini M1 (2020). - [[alloy|Alloy]] - Metrics/logs collector - [[caddy]] - Reverse proxy for `*.ops.eblu.me` - [[devpi]] - PyPI mirror (LaunchAgent) +- [[hephaestus]] - heph task/context sync hub (LaunchAgent, self-updating) - [[cv]] - Static CV site, served by Caddy - [[docs]] - Quartz-built docs site, served by Caddy diff --git a/docs/reference/services/hephaestus.md b/docs/reference/services/hephaestus.md new file mode 100644 index 0000000..1754ea0 --- /dev/null +++ b/docs/reference/services/hephaestus.md @@ -0,0 +1,130 @@ +--- +title: Hephaestus +modified: 2026-06-04 +last-reviewed: 2026-06-04 +tags: + - service + - hephaestus +--- + +# Hephaestus + +[hephaestus](https://github.com/eblume/hephaestus) (`heph`) is the user's +self-hosted task + context/knowledge system. It is **hub-and-spoke**: each device +runs a full local SQLite replica (`hephd --mode local`) and background-syncs +against one canonical **hub**. Indri runs that hub. + +## Quick Reference + +| Property | Value | +|----------|-------| +| **PWA URL** | https://heph.ops.eblu.me (browser PWA, Caddy TLS) | +| **Spoke sync URL** | http://indri.tail8d86e.ts.net:8787 (direct, tailnet) | +| **Local Port** | 8787 (`hephd --mode server`, bound `0.0.0.0`) | +| **Binary** | `~/.cargo/bin/hephd` (self-updating) | +| **Data** | `~/.local/share/heph/heph.db` | +| **PWA shell** | `~/.local/share/heph/web` | +| **Logs** | `~/Library/Logs/mcquack.heph.{out,err}.log` | +| **LaunchAgent** | `mcquack.eblume.heph` | +| **Ansible role** | `ansible/roles/heph` (tag `heph`) | + +## What runs on indri + +The launchagent runs the hub in server mode with three features enabled: + +``` +hephd --mode server --http-addr 0.0.0.0:8787 --db ~/.local/share/heph/heph.db + --web-root ~/.local/share/heph/web + --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ + --oidc-audience heph + --self-update --self-update-interval-secs 600 +``` + +- **Server mode** exposes the HTTP sync endpoint (`/rpc`, `/sync/*`) that spokes + reconcile their op-log against. +- **Self-update** (10-minute poll) rebuilds `hephd` from the forge when a newer + release tag appears (`cargo install --git https://forge.eblu.me/eblume/hephaestus.git`). + Indri's Rust toolchain (`~/.cargo/bin`) is on the agent's `PATH` for this, and + the plist pins `RUSTUP_TOOLCHAIN=stable` — the + launchagent runs without mise, so a bare `cargo` shim would otherwise fall back + to rustup's *default* toolchain, which can lag behind heph's `rust-version` floor + (1.89) and silently fail the build. +- **PWA** (`--web-root`) serves the [heph-pwa] mobile shell; Caddy terminates TLS + at `heph.ops.eblu.me` so the PWA runs in a secure context (service worker, + install-to-home-screen, voice capture). + +[heph-pwa]: https://github.com/eblume/hephaestus + +The hub binds `0.0.0.0` so tailnet spokes can also sync directly +(`http://indri.tail8d86e.ts.net:8787`); access is gated by Authentik OIDC either +way — tailnet reachability alone is not enough. + +## Authentication (Authentik OIDC, device-code) + +The hub verifies an OIDC bearer token on every sync. The `heph` application is a +**public** OAuth2 client using the **device-code flow** (RFC 8628), provisioned +in the [[authentik]] blueprint (`argocd/manifests/authentik/configmap-blueprint.yaml`): + +- Issuer: `https://authentik.ops.eblu.me/application/o/heph/` +- Audience / client id: `heph` +- Restricted to the `admins` group (single-owner, sensitive data). + +Because no Authentik instance ships a device-code flow by default, the blueprint +also creates `default-device-code-flow` and binds it to the default brand's +`flow_device_code`. Devices obtain a token with `heph auth login`; the PWA +currently takes a pasted token (in-app device-code login is upstream follow-up). + +## Data seeding (Path A, one-time) + +The hub was seeded from the existing `gilbert` device so no task history was +lost. heph's data-safe bring-up ("Path A") has the hub **adopt the device's +identity** rather than rewriting the device: + +1. Quiesce the seed device: `heph daemon stop` (on gilbert). +2. Copy its store to indri: `scp ~/.local/share/heph/heph.db indri:~/.local/share/heph/heph.db`. +3. Give the hub its **own device origin** (keeps gilbert's `owner_id` + data; + `hephd` regenerates a fresh `origin` on next start when it is missing): + ```fish + ssh indri "sqlite3 ~/.local/share/heph/heph.db \"DELETE FROM meta WHERE key='origin';\"" + ``` +4. `mise run provision-indri -- --tags heph` (installs hephd, stages the PWA, + loads the launchagent → hub starts on the seeded store). + +Only `meta.origin` changes; `owner_id`, nodes, op-log, and links are copied +untouched. A clean `hephd --owner-id` / seed command is tracked upstream as +hephaestus follow-up — until then this manual reset is the documented path. + +## Connecting a spoke (e.g. gilbert) + +A device joins by running its local daemon with the hub URL + OIDC client and +logging in once: + +```bash +hephd --mode local --hub-url http://indri.tail8d86e.ts.net:8787 \ + --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ \ + --oidc-client-id heph +heph auth login --hub-url http://indri.tail8d86e.ts.net:8787 \ + --issuer https://authentik.ops.eblu.me/application/o/heph/ --client-id heph +``` + +> **Use the direct `http://…:8787` tailnet URL for sync, not the Caddy HTTPS +> URL.** hephd's sync client is plain-HTTP-only; pointing `--hub-url` at +> `https://heph.ops.eblu.me` fails with a confusing `error sending request` +> (the HTTP connector rejects the `https` scheme before connecting). Tailscale +> encrypts the transport, and the OIDC bearer token still gates every request. +> `heph.ops.eblu.me` (Caddy TLS) exists only for the browser PWA, which needs a +> secure context. The cached token is keyed by the exact `--hub-url`, so use the +> same value for `hephd` and `heph auth login`. + +> **Caveat:** `heph daemon` cannot yet bake hub/spoke flags into the generated +> launchd plist (upstream gap). On a spoke whose plist is managed by `heph +> daemon`, the hub/OIDC flags must be hand-added — and a later `heph daemon +> start/restart` will regenerate the plist and drop them. Avoid `heph daemon` +> subcommands on a configured spoke until that gap is closed; reload via +> `launchctl` instead. + +## Related + +- [[indri]] — host +- [[authentik]] — OIDC provider +- [[caddy]] — TLS termination for `heph.ops.eblu.me` From 6576880b0e8e80cd88452add47627c3b4e6d6435 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 07:30:31 -0700 Subject: [PATCH 32/35] heph Authentik: register heph-pwa redirect URIs (PKCE login) (#370) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the heph-pwa redirect URIs to the Authentik `heph` OAuth2 provider so the new browser **Login with Authentik** flow (Authorization Code + PKCE, hephaestus PR #9) can redirect back and exchange the code: - `https://heph.ops.eblu.me/` (the PWA origin) - `http://localhost:8787/` (local dev: `hephd --web-root`) Authentik also keys token-endpoint CORS off these origins, so they're required for the browser token exchange. Additive (the provider was `redirect_uris: []`); harmless until the PWA feature deploys. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/370 --- argocd/manifests/authentik/configmap-blueprint.yaml | 13 ++++++++++--- docs/changelog.d/heph-pwa-redirect-uris.infra.md | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 docs/changelog.d/heph-pwa-redirect-uris.infra.md diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index 56d9110..9da2f70 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -477,9 +477,16 @@ data: invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] client_type: public client_id: heph - # Device-code (RFC 8628) + PKCE use no redirect, but the provider - # serializer requires the field — an empty list satisfies it. - redirect_uris: [] + # CLI/TUI use the device-code grant (no redirect). The heph-pwa browser + # login uses Authorization Code + PKCE, which DOES redirect back to the + # app's origin — register those here (Authentik also keys token-endpoint + # CORS off these origins). Trailing slash matters: the PWA's redirect_uri + # is its base dir, e.g. https://heph.ops.eblu.me/. + redirect_uris: + - matching_mode: strict + url: https://heph.ops.eblu.me/ + - matching_mode: strict + url: http://localhost:8787/ # local dev (hephd --web-root) signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]] property_mappings: - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] diff --git a/docs/changelog.d/heph-pwa-redirect-uris.infra.md b/docs/changelog.d/heph-pwa-redirect-uris.infra.md new file mode 100644 index 0000000..f887eed --- /dev/null +++ b/docs/changelog.d/heph-pwa-redirect-uris.infra.md @@ -0,0 +1 @@ +Registered the heph-pwa redirect URIs (`https://heph.ops.eblu.me/`, plus `http://localhost:8787/` for dev) on the Authentik `heph` OAuth2 provider, enabling the PWA's new Authorization Code + PKCE "Login with Authentik" flow (and the token-endpoint CORS it needs). Pairs with hephaestus PR #9. From 3abe80523a0b402c40a0bd3d825e5d81b87939d8 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 07:40:51 -0700 Subject: [PATCH 33/35] C0: bump indri heph hub to v1.2.1 (PWA Authentik login + /config) Co-Authored-By: Claude Opus 4.8 (1M context) --- ansible/roles/heph/defaults/main.yml | 2 +- docs/changelog.d/+heph-hub-v1.2.1.infra.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+heph-hub-v1.2.1.infra.md diff --git a/ansible/roles/heph/defaults/main.yml b/ansible/roles/heph/defaults/main.yml index e5eea36..88d2240 100644 --- a/ansible/roles/heph/defaults/main.yml +++ b/ansible/roles/heph/defaults/main.yml @@ -6,7 +6,7 @@ # Pinned release used for the initial `cargo install` and the PWA shell. # After bootstrap, hephd's own --self-update keeps the binary current; this # pin only governs the first install and the bundled PWA shell version. -heph_version: v1.2.0 +heph_version: v1.2.1 # Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial # install and unattended self-update build from the same source (no ssh-agent). diff --git a/docs/changelog.d/+heph-hub-v1.2.1.infra.md b/docs/changelog.d/+heph-hub-v1.2.1.infra.md new file mode 100644 index 0000000..c203323 --- /dev/null +++ b/docs/changelog.d/+heph-hub-v1.2.1.infra.md @@ -0,0 +1 @@ +Bumped the indri heph hub to v1.2.1, which adds the hub `GET /config` endpoint and ships the heph-pwa **Login with Authentik** flow (Authorization Code + PKCE). Pairs with the Authentik `heph` provider redirect URIs registered earlier. From cf63fcb5b5cf379700efe3ce0986b18ec4d76625 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 08:22:46 -0700 Subject: [PATCH 34/35] C0: track heph in service-versions (self-updating; note drift task) Co-Authored-By: Claude Opus 4.8 (1M context) --- service-versions.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/service-versions.yaml b/service-versions.yaml index cc9dc9e..866c687 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -414,6 +414,23 @@ services: upstream-source: https://github.com/caddyserver/caddy/releases notes: Built from source with Gandi DNS and Layer 4 plugins + - name: heph + type: ansible + last-reviewed: 2026-06-05 + current-version: "v1.2.1" + upstream-source: https://forge.eblu.me/eblume/hephaestus/releases + notes: >- + hephaestus task/context sync hub on indri (server-mode launchagent, + ansible/roles/heph; cargo-built from the forge). SELF-UPDATING: hephd + polls the forge for newer releases every 10 min and rebuilds + restarts + itself, so the running version drifts AHEAD of the ansible heph_version + pin. current-version here is the last observed/deployed tag, not a hard + pin — verify the live version via `curl https://heph.ops.eblu.me/config` + is served (hub up) and the hub log's `current=` line. Reconciling this + self-update vs IaC-pin drift is tracked in the heph "Hephaestus" project: + "Reconcile hephd self-update with ansible-pinned version (drift on indri + hub)" (node 01KTBXWT6XTHNDH92CVJY88E5K). + - name: borgmatic type: ansible last-reviewed: 2026-04-15 From 50a36ff93a9d1c697c976a1db498bc5633f2cd7c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Sat, 6 Jun 2026 18:07:13 -0700 Subject: [PATCH 35/35] heph Authentik: grant offline_access scope (fixes spoke sync refresh-token 400) The heph CLI requests scope "openid offline_access", but the Authentik heph OAuth2 provider only mapped openid/email/profile. Without the offline_access mapping the issued refresh token is bound to the login session rather than the 30-day refresh-token window; once the session lapses, hephd's refresh_token grant returns 400 Bad Request and spoke sync silently degrades (heph sync --status -> auth_failure: true). Add the built-in offline_access scope mapping to the provider's property_mappings and document the requirement in the service reference. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/authentik/configmap-blueprint.yaml | 4 ++++ docs/changelog.d/heph-offline-access.bugfix.md | 1 + docs/reference/services/hephaestus.md | 11 +++++++++++ 3 files changed, 16 insertions(+) create mode 100644 docs/changelog.d/heph-offline-access.bugfix.md diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index 9da2f70..cc97dea 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -492,6 +492,10 @@ data: - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + # offline_access: heph CLI requests "openid offline_access"; without + # this mapping the refresh token is session-bound and hephd's + # refresh_token grant 400s once the session lapses (spoke sync dies). + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, offline_access]] sub_mode: hashed_user_id include_claims_in_id_token: true diff --git a/docs/changelog.d/heph-offline-access.bugfix.md b/docs/changelog.d/heph-offline-access.bugfix.md new file mode 100644 index 0000000..e9721bc --- /dev/null +++ b/docs/changelog.d/heph-offline-access.bugfix.md @@ -0,0 +1 @@ +Granted the `offline_access` scope on the Authentik `heph` OAuth2 provider so hephaestus spokes receive a durable 30-day refresh token. Previously the refresh token was session-bound, so spoke sync would silently fail with a `400 Bad Request` on the `refresh_token` grant once the Authentik session lapsed. diff --git a/docs/reference/services/hephaestus.md b/docs/reference/services/hephaestus.md index 1754ea0..7abc35b 100644 --- a/docs/reference/services/hephaestus.md +++ b/docs/reference/services/hephaestus.md @@ -68,6 +68,17 @@ in the [[authentik]] blueprint (`argocd/manifests/authentik/configmap-blueprint. - Issuer: `https://authentik.ops.eblu.me/application/o/heph/` - Audience / client id: `heph` - Restricted to the `admins` group (single-owner, sensitive data). +- Scope mappings: `openid`, `email`, `profile`, **`offline_access`**. + +> **`offline_access` is required for durable sync.** The `heph` CLI requests +> `scope = "openid offline_access"`, and a refresh token is only issued for the +> 30-day refresh-token window when the provider actually grants `offline_access`. +> Without that scope mapping the refresh token is bound to the login **session**; +> once the session lapses, hephd's `refresh_token` grant returns `400 Bad +> Request`, the bearer can't be refreshed, and spoke sync silently degrades +> (`heph sync --status` → `auth_failure: true`). `heph auth login` papers over it +> until the next session expiry. Keep `offline_access` in the provider's +> `property_mappings`. Because no Authentik instance ships a device-code flow by default, the blueprint also creates `default-device-code-flow` and binds it to the default brand's