From 2985007430e0bd2da852b79cbcb9c32f6c3504bf Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:37:52 -0700 Subject: [PATCH 01/11] Add native Dagger build for external-secrets (container.py) Localizes external-secrets off ghcr.io: clones the forge mirror at v2.2.0 and builds the single all_providers static Go binary, faithful to upstream's `make build`. ENTRYPOINT is the binary so the controller/webhook/cert-controller Deployments can select their role via args. Co-Authored-By: Claude Opus 4.8 (1M context) --- containers/external-secrets/container.py | 51 ++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 containers/external-secrets/container.py diff --git a/containers/external-secrets/container.py b/containers/external-secrets/container.py new file mode 100644 index 0000000..6be5765 --- /dev/null +++ b/containers/external-secrets/container.py @@ -0,0 +1,51 @@ +"""External Secrets Operator — native Dagger build. + +Two-stage build: Go binary (all providers), Alpine runtime. +Source cloned from forge mirror. + +A single binary serves as the controller, webhook, and cert-controller; the +Deployments select the role via a subcommand passed in `args:`, so the image +ENTRYPOINT must be the binary itself (matching upstream's distroless image). +""" + +import dagger + +from blumeops.containers import ( + alpine_runtime, + clone_from_forge, + go_build, + oci_labels, +) + +VERSION = "v2.2.0" + + +async def build(src: dagger.Directory) -> dagger.Container: + source = clone_from_forge("external-secrets", VERSION) + + # Upstream `make build` compiles every secret provider into a single + # static binary (`-tags all_providers`, CGO disabled). Mirror that so the + # local image is functionally identical to ghcr.io/.../external-secrets. + backend = go_build( + source, + "/external-secrets", + tags="all_providers", + ) + + runtime = alpine_runtime( + extra_apk=["ca-certificates"], + create_user=False, + ) + runtime = oci_labels( + runtime, + title="External Secrets Operator", + description=( + "Kubernetes operator that integrates external secret management systems" + ), + version=VERSION, + ) + return ( + runtime.with_file("/bin/external-secrets", backend.file("/external-secrets")) + .with_user("65534") + .with_entrypoint(["/bin/external-secrets"]) + ) From adc24358f479f6ed447e45a78dd1e1e6e967d593 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:48:08 -0700 Subject: [PATCH 02/11] Deploy external-secrets from local registry image Swap the controller/webhook/cert-controller image from ghcr.io to the locally built registry.ops.eblu.me/blumeops/external-secrets:v2.2.0-2985007. Like-for-like (v2.2.0); mark service reviewed. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/external-secrets/kustomization.yaml | 3 ++- docs/changelog.d/local-external-secrets.infra.md | 1 + service-versions.yaml | 7 +++++-- 3 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 docs/changelog.d/local-external-secrets.infra.md diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index 574aaa7..c25a7d5 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -12,4 +12,5 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets - newTag: v2.2.0 + newName: registry.ops.eblu.me/blumeops/external-secrets + newTag: v2.2.0-2985007 diff --git a/docs/changelog.d/local-external-secrets.infra.md b/docs/changelog.d/local-external-secrets.infra.md new file mode 100644 index 0000000..13cbb05 --- /dev/null +++ b/docs/changelog.d/local-external-secrets.infra.md @@ -0,0 +1 @@ +Localized the external-secrets controller image. It now builds from the forge mirror via a native Dagger `container.py` (single `all_providers` static Go binary, faithful to upstream's `make build`) and is served from `registry.ops.eblu.me/blumeops/external-secrets` instead of `ghcr.io`, bringing another platform component under local supply-chain control. diff --git a/service-versions.yaml b/service-versions.yaml index 11ec9f9..cc9dc9e 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -159,10 +159,13 @@ services: - name: external-secrets type: argocd - last-reviewed: 2026-03-25 + last-reviewed: 2026-06-04 current-version: "v2.2.0" upstream-source: https://github.com/external-secrets/external-secrets/releases - notes: Static kustomize manifests rendered from upstream Helm chart + notes: >- + Static kustomize manifests rendered from upstream Helm chart. Controller + image is locally built from the forge mirror via containers/external-secrets/container.py + (single all_providers static Go binary). - name: 1password-connect type: argocd From 0e70a1b5242183170a5d7d8ac96ee864063f65bb Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:55:55 -0700 Subject: [PATCH 03/11] Localize external-secrets container (native container.py build) (#367) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knocks out the weekly "pick one non-local container and make it local" task by moving **external-secrets** off `ghcr.io` onto a locally-built image, under our own supply-chain control. Doubles as its overdue service review. ## What changed - **`containers/external-secrets/container.py`** (new) — native Dagger build (the Dockerfile→container.py migration pattern). Clones the forge mirror at `v2.2.0` and builds the single `all_providers` static Go binary, faithful to upstream's `make build` (CGO off, no version ldflags upstream). ENTRYPOINT is `/bin/external-secrets` so the controller/webhook/cert-controller Deployments select their role via `args:` exactly as before. - **`argocd/manifests/external-secrets/kustomization.yaml`** — image swapped to `registry.ops.eblu.me/blumeops/external-secrets:v2.2.0-2985007`. **Like-for-like (v2.2.0)**, not an upgrade. - **`service-versions.yaml`** — marked reviewed (2026-06-04), noted the local build. ## Build Built on the indri forge runner (run #579, ~4 min) → pushed to Zot. Image config verified: `Entrypoint=/bin/external-secrets`, `User=65534`, version label `v2.2.0`. ## Deployed from branch & verified - All 3 pods (controller / webhook / cert-controller) rolled to the local image, `1/1 Running` - Controller + webhook logs clean (no errors; webhook serving TLS) - **End-to-end secret fetch proven:** force-synced `monitoring/grafana-admin` → `refreshTime` advanced to now, `Ready=True` - All 10 ExternalSecrets cluster-wide remain `SecretSynced=True` — no collateral damage - App `Healthy` ## Post-merge `external-secrets` currently points at this branch (so `apps` reads OutOfSync — expected). After merge: ``` argocd app set external-secrets --revision main && argocd app sync external-secrets ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/367 --- .../external-secrets/kustomization.yaml | 3 +- containers/external-secrets/container.py | 51 +++++++++++++++++++ .../local-external-secrets.infra.md | 1 + service-versions.yaml | 7 ++- 4 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 containers/external-secrets/container.py create mode 100644 docs/changelog.d/local-external-secrets.infra.md diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index 574aaa7..c25a7d5 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -12,4 +12,5 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets - newTag: v2.2.0 + newName: registry.ops.eblu.me/blumeops/external-secrets + newTag: v2.2.0-2985007 diff --git a/containers/external-secrets/container.py b/containers/external-secrets/container.py new file mode 100644 index 0000000..6be5765 --- /dev/null +++ b/containers/external-secrets/container.py @@ -0,0 +1,51 @@ +"""External Secrets Operator — native Dagger build. + +Two-stage build: Go binary (all providers), Alpine runtime. +Source cloned from forge mirror. + +A single binary serves as the controller, webhook, and cert-controller; the +Deployments select the role via a subcommand passed in `args:`, so the image +ENTRYPOINT must be the binary itself (matching upstream's distroless image). +""" + +import dagger + +from blumeops.containers import ( + alpine_runtime, + clone_from_forge, + go_build, + oci_labels, +) + +VERSION = "v2.2.0" + + +async def build(src: dagger.Directory) -> dagger.Container: + source = clone_from_forge("external-secrets", VERSION) + + # Upstream `make build` compiles every secret provider into a single + # static binary (`-tags all_providers`, CGO disabled). Mirror that so the + # local image is functionally identical to ghcr.io/.../external-secrets. + backend = go_build( + source, + "/external-secrets", + tags="all_providers", + ) + + runtime = alpine_runtime( + extra_apk=["ca-certificates"], + create_user=False, + ) + runtime = oci_labels( + runtime, + title="External Secrets Operator", + description=( + "Kubernetes operator that integrates external secret management systems" + ), + version=VERSION, + ) + return ( + runtime.with_file("/bin/external-secrets", backend.file("/external-secrets")) + .with_user("65534") + .with_entrypoint(["/bin/external-secrets"]) + ) diff --git a/docs/changelog.d/local-external-secrets.infra.md b/docs/changelog.d/local-external-secrets.infra.md new file mode 100644 index 0000000..13cbb05 --- /dev/null +++ b/docs/changelog.d/local-external-secrets.infra.md @@ -0,0 +1 @@ +Localized the external-secrets controller image. It now builds from the forge mirror via a native Dagger `container.py` (single `all_providers` static Go binary, faithful to upstream's `make build`) and is served from `registry.ops.eblu.me/blumeops/external-secrets` instead of `ghcr.io`, bringing another platform component under local supply-chain control. diff --git a/service-versions.yaml b/service-versions.yaml index 11ec9f9..cc9dc9e 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -159,10 +159,13 @@ services: - name: external-secrets type: argocd - last-reviewed: 2026-03-25 + last-reviewed: 2026-06-04 current-version: "v2.2.0" upstream-source: https://github.com/external-secrets/external-secrets/releases - notes: Static kustomize manifests rendered from upstream Helm chart + notes: >- + Static kustomize manifests rendered from upstream Helm chart. Controller + image is locally built from the forge mirror via containers/external-secrets/container.py + (single all_providers static Go binary). - name: 1password-connect type: argocd From 30c82079b9dbb8e2492586d979cd4ec5b04cd08d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:59:17 -0700 Subject: [PATCH 04/11] C0: rebuild external-secrets image off main (v2.2.0-0e70a1b) Repoint to the main-branch-built image so the deployed tag traces to a main commit rather than the merged feature branch. Same v2.2.0 source, stable provenance. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/external-secrets/kustomization.yaml | 2 +- docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index c25a7d5..8b1aea5 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets newName: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-2985007 + newTag: v2.2.0-0e70a1b diff --git a/docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md b/docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md new file mode 100644 index 0000000..2e931d4 --- /dev/null +++ b/docs/changelog.d/+external-secrets-main-sha-rebuild.infra.md @@ -0,0 +1 @@ +Rebuilt the locally-built external-secrets image from the `main` branch so the deployed tag (`v2.2.0-0e70a1b`) traces to a `main` commit rather than the now-merged feature branch, giving a stable provenance reference. From 13895bb04a5afcbb723d7ab3355d228431d76a5d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 15:37:42 -0700 Subject: [PATCH 05/11] Localize external-secrets on ringtail (amd64 nix build) (#368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #367. That PR localized external-secrets but the Dagger build (on indri's Apple Silicon runner) only produces an **arm64** image — and external-secrets also runs on **ringtail (amd64)** via the same shared manifest. This completes the localization so both clusters run the local binary on their native arch. ## Approach (matches the kube-state-metrics dual-build pattern) - **`containers/external-secrets/default.nix`** (new) — builds the **amd64** image on ringtail's nix-container-builder. `buildGoModule` with Go 1.26 (v2.2.0 requires ≥1.26.1; nixpkgs default is 1.25.x) and `-tags all_providers`, faithful to upstream. Same v2.2.0 source from the forge mirror. - **`argocd/manifests/external-secrets-ringtail/`** (new) — thin kustomize overlay that reuses the shared indri manifest as a base and overrides **only** the image to the `-nix` (amd64) tag. No manifest duplication. - **`argocd/apps/external-secrets-ringtail.yaml`** — repointed at the new overlay. Result: indri → `v2.2.0-…` (arm64, Dagger), ringtail → `v2.2.0-…-nix` (amd64, nix). ## Build Run #581 built both arches at the branch commit. Verified the nix image is `linux/amd64`, entrypoint = the binary, user 65534. ## Deployed from branch & verified on ringtail (k3s, amd64) - All 3 pods rolled to the nix amd64 image, `1/1 Running` (no exec-format error → arch correct) - Controller logs clean - **Live secret fetch proven:** force-synced `homepage/homepage-grafana` → `refreshTime` advanced, `Ready=True` - **All 20** ringtail ExternalSecrets remain `SecretSynced=True` ## Post-merge The `external-secrets-ringtail` app is temporarily pointed at this branch + overlay path (apps app left on `main`, manual-sync, untouched). After merge: ``` argocd app sync apps # picks up the new Application path on main argocd app set external-secrets-ringtail --revision main && argocd app sync external-secrets-ringtail ``` I'll also rebuild off `main` so both clusters land on stable main-sha tags (as done for indri in #367). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/368 --- argocd/apps/external-secrets-ringtail.yaml | 2 +- .../kustomization.yaml | 16 ++++++ containers/external-secrets/default.nix | 56 +++++++++++++++++++ .../external-secrets-ringtail-nix.infra.md | 1 + 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 argocd/manifests/external-secrets-ringtail/kustomization.yaml create mode 100644 containers/external-secrets/default.nix create mode 100644 docs/changelog.d/external-secrets-ringtail-nix.infra.md diff --git a/argocd/apps/external-secrets-ringtail.yaml b/argocd/apps/external-secrets-ringtail.yaml index e2f5898..0bb8bd7 100644 --- a/argocd/apps/external-secrets-ringtail.yaml +++ b/argocd/apps/external-secrets-ringtail.yaml @@ -15,7 +15,7 @@ spec: source: repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git targetRevision: main - path: argocd/manifests/external-secrets + path: argocd/manifests/external-secrets-ringtail destination: server: https://ringtail.tail8d86e.ts.net:6443 namespace: external-secrets diff --git a/argocd/manifests/external-secrets-ringtail/kustomization.yaml b/argocd/manifests/external-secrets-ringtail/kustomization.yaml new file mode 100644 index 0000000..05b6b54 --- /dev/null +++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml @@ -0,0 +1,16 @@ +# Ringtail (amd64) overlay for external-secrets. +# +# Reuses the shared indri manifest as a base and only overrides the controller +# image to the nix-built amd64 variant (`-nix` tag). The base sets the arm64 +# image (built via containers/external-secrets/container.py on indri's Dagger +# runner); ringtail's k3s is amd64 and needs the image built by +# containers/external-secrets/default.nix on the nix-container-builder. +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - ../external-secrets + +images: + - name: registry.ops.eblu.me/blumeops/external-secrets + newTag: v2.2.0-59dace8-nix diff --git a/containers/external-secrets/default.nix b/containers/external-secrets/default.nix new file mode 100644 index 0000000..eabe03d --- /dev/null +++ b/containers/external-secrets/default.nix @@ -0,0 +1,56 @@ +# Nix-built External Secrets Operator (amd64, for ringtail k3s). +# Builds v2.2.0 from the forge mirror with all secret providers compiled in, +# faithful to upstream's `make build` (-tags all_providers). The container.py +# sibling builds the arm64 image for indri's minikube; this default.nix builds +# the amd64 image on ringtail's nix-container-builder. +{ pkgs ? import { } }: + +let + version = "2.2.0"; + + src = pkgs.fetchgit { + url = "https://forge.ops.eblu.me/mirrors/external-secrets.git"; + rev = "v${version}"; + hash = "sha256-eAocOAp5s4CFRrpKfQr2lf3Ji+6nQQ1A5/eTw5B7v9U="; + }; + + # external-secrets v2.2.0 requires Go >= 1.26.1; nixpkgs default go is 1.25.x. + external-secrets = (pkgs.buildGoModule.override { go = pkgs.go_1_26; }) { + inherit src version; + pname = "external-secrets"; + vendorHash = "sha256-0xuBK3fjAplPLAElHvKB6d+2lDz+De/s91fV4dPZwjE="; + + doCheck = false; + + subPackages = [ "." ]; + + tags = [ "all_providers" ]; + + ldflags = [ "-s" "-w" ]; + + meta = with pkgs.lib; { + description = "Kubernetes operator that integrates external secret management systems"; + homepage = "https://github.com/external-secrets/external-secrets"; + license = licenses.asl20; + mainProgram = "external-secrets"; + }; + }; +in + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/external-secrets"; + contents = [ + external-secrets + pkgs.cacert + pkgs.tzdata + ]; + + config = { + Entrypoint = [ "${external-secrets}/bin/external-secrets" ]; + Env = [ + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "TZDIR=${pkgs.tzdata}/share/zoneinfo" + ]; + User = "65534"; + }; +} diff --git a/docs/changelog.d/external-secrets-ringtail-nix.infra.md b/docs/changelog.d/external-secrets-ringtail-nix.infra.md new file mode 100644 index 0000000..9ce3f85 --- /dev/null +++ b/docs/changelog.d/external-secrets-ringtail-nix.infra.md @@ -0,0 +1 @@ +Completed the external-secrets localization for the ringtail (amd64) cluster. The indri Dagger build (`container.py`) only produces an arm64 image; added `containers/external-secrets/default.nix` to build the amd64 variant on ringtail's nix-container-builder, and gave `external-secrets-ringtail` a thin kustomize overlay that reuses the shared manifest and points at the `-nix` image. Both clusters now run the locally-built external-secrets binary on their native architecture. From f6c926f1f594a0ee019bca5d31cdcc4225f6d6cf Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 16:19:20 -0700 Subject: [PATCH 06/11] C0: rebuild external-secrets off main, repoint both clusters to stable tags indri -> v2.2.0-13895bb (arm64), ringtail -> v2.2.0-13895bb-nix (amd64). Both deployed images now trace to main commit 13895bb instead of earlier branch builds. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/external-secrets-ringtail/kustomization.yaml | 2 +- argocd/manifests/external-secrets/kustomization.yaml | 2 +- docs/changelog.d/+external-secrets-stable-main-sha.infra.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 docs/changelog.d/+external-secrets-stable-main-sha.infra.md diff --git a/argocd/manifests/external-secrets-ringtail/kustomization.yaml b/argocd/manifests/external-secrets-ringtail/kustomization.yaml index 05b6b54..9fd4e2f 100644 --- a/argocd/manifests/external-secrets-ringtail/kustomization.yaml +++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-59dace8-nix + newTag: v2.2.0-13895bb-nix diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index 8b1aea5..639db66 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets newName: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-0e70a1b + newTag: v2.2.0-13895bb diff --git a/docs/changelog.d/+external-secrets-stable-main-sha.infra.md b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md new file mode 100644 index 0000000..fbe3c21 --- /dev/null +++ b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md @@ -0,0 +1 @@ +Rebuilt the external-secrets images off `main` and repointed both clusters to the stable main-sha tags (`v2.2.0-13895bb` arm64 / `v2.2.0-13895bb-nix` amd64), so the deployed images on indri and ringtail trace to the same `main` commit rather than earlier feature-branch builds. From a2f1e062243a47c7c68b5a57617f14102b798503 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 06:46:58 -0700 Subject: [PATCH 07/11] Add hephaestus sync hub to indri (launchagent, PWA, device-code OIDC) (#369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes indri the canonical **heph** hub for the hub-and-spoke task/context system, deployed as a self-updating LaunchAgent managed by Ansible. Other devices (gilbert) attach as offline-capable spokes. ## What's here - **`ansible/roles/heph`** (tag `heph`) — bootstrap `cargo install hephd` (only if absent; `--self-update` keeps it current after), version-pinned `heph-pwa` checkout served via `--web-root`, launchagent `mcquack.eblume.heph`: ``` hephd --mode server --http-addr 0.0.0.0:8787 --db … --web-root … --oidc-issuer …/o/heph/ --oidc-audience heph --self-update --self-update-interval-secs 600 ``` `~/.cargo/bin` is on the agent `PATH` so self-update's `cargo install` works. - **Caddy** — `heph.ops.eblu.me → localhost:8787` (TLS for the PWA secure context). - **Authentik** — new `heph` **public device-code** OIDC app + `default-device-code-flow` bound to the default brand's `flow_device_code` (verified live: brand `authentik-default`, field currently unset → additive). - **Docs** — `services/hephaestus.md` (Path-A seeding runbook + spoke caveat), `indri.md`, changelog fragment. ## Three features requested - **Autoupdate** — 10-min interval (`--self-update-interval-secs 600`). - **PWA** — `--web-root` (confirmed shipped in v1.2.0). - **Spoke** — gilbert reconfig documented (post-merge step). ## Deploy plan (not done yet — awaiting review) 1. Seed from gilbert (Path A): `heph daemon stop` → copy `heph.db` → `DELETE FROM meta WHERE key='origin'`. 2. Sync Authentik `apps`/blueprint; verify blueprint status via API (not just logs). 3. `provision-indri --tags heph,caddy` from this branch. 4. Point gilbert at the hub + `heph auth login`. ## Known follow-ups (heph-side, tracked in the Hephaestus project) - `heph daemon` can't bake hub/spoke config or pass `--self-update-interval-secs` → worked around by the ansible plist. - Path-A seeding lacks a clean `hephd --owner-id`/seed command → manual `meta.origin` reset for now. - Self-update moves hephd ahead of the ansible-pinned PWA shell over time (drift; tolerated by the SW cache, revisit on next release). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/369 --- ansible/playbooks/indri.yml | 2 + ansible/roles/caddy/defaults/main.yml | 3 + ansible/roles/heph/defaults/main.yml | 49 +++++++ ansible/roles/heph/handlers/main.yml | 6 + ansible/roles/heph/tasks/main.yml | 82 +++++++++++ ansible/roles/heph/templates/heph.plist.j2 | 50 +++++++ .../authentik/configmap-blueprint.yaml | 79 +++++++++++ docs/changelog.d/heph-indri-hub.infra.md | 1 + docs/reference/infrastructure/indri.md | 1 + docs/reference/services/hephaestus.md | 130 ++++++++++++++++++ 10 files changed, 403 insertions(+) create mode 100644 ansible/roles/heph/defaults/main.yml create mode 100644 ansible/roles/heph/handlers/main.yml create mode 100644 ansible/roles/heph/tasks/main.yml create mode 100644 ansible/roles/heph/templates/heph.plist.j2 create mode 100644 docs/changelog.d/heph-indri-hub.infra.md create mode 100644 docs/reference/services/hephaestus.md diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml index ddb57f8..1e33bb1 100644 --- a/ansible/playbooks/indri.yml +++ b/ansible/playbooks/indri.yml @@ -260,5 +260,7 @@ tags: cv - role: docs tags: docs + - role: heph + tags: heph - role: caddy tags: caddy diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index 363d09e..e6d7385 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -52,6 +52,9 @@ caddy_services: - name: devpi host: "pypi.{{ caddy_domain }}" backend: "http://localhost:3141" + - name: heph + host: "heph.{{ caddy_domain }}" + backend: "http://localhost:8787" # hephaestus hub (server mode) + PWA shell - name: kiwix host: "kiwix.{{ caddy_domain }}" backend: "https://kiwix.tail8d86e.ts.net" diff --git a/ansible/roles/heph/defaults/main.yml b/ansible/roles/heph/defaults/main.yml new file mode 100644 index 0000000..e5eea36 --- /dev/null +++ b/ansible/roles/heph/defaults/main.yml @@ -0,0 +1,49 @@ +--- +# hephaestus hub — the canonical heph replica (server mode) on indri. +# Other devices (e.g. gilbert) are spokes that sync against this hub. +# See [[set-up-sync-hub]] and [[host-heph-pwa]] in the hephaestus repo. + +# Pinned release used for the initial `cargo install` and the PWA shell. +# After bootstrap, hephd's own --self-update keeps the binary current; this +# pin only governs the first install and the bundled PWA shell version. +heph_version: v1.2.0 + +# Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial +# install and unattended self-update build from the same source (no ssh-agent). +heph_repo_url: https://forge.eblu.me/eblume/hephaestus.git + +heph_bin_dir: /Users/erichblume/.cargo/bin +heph_binary: "{{ heph_bin_dir }}/hephd" + +# rustc/cargo here are rustup shims. The bare (non-mise) environment that the +# launchagent and ansible run in falls back to rustup's *default* toolchain, +# which can lag behind heph's rust-version floor (Cargo.toml: 1.89). Pin the +# channel explicitly so both the bootstrap build and unattended self-update +# always use a current toolchain regardless of the host's rustup default. +heph_rust_toolchain: stable + +heph_data_dir: /Users/erichblume/.local/share/heph +heph_db: "{{ heph_data_dir }}/heph.db" +heph_socket: "{{ heph_data_dir }}/hephd.sock" +heph_log_dir: /Users/erichblume/Library/Logs + +# Version-pinned source checkout; the PWA static shell is served directly from +# its heph-pwa/ subdir (no copy), keeping shell and hub in lockstep at heph_version. +heph_pwa_src_dir: /Users/erichblume/.cache/heph-pwa-src +heph_web_root: "{{ heph_pwa_src_dir }}/heph-pwa" + +# Hub listens on all interfaces so tailnet spokes can reach it directly +# (http://indri.tail8d86e.ts.net:8787) and Caddy can proxy heph.ops.eblu.me. +# Access is gated by Authentik OIDC regardless — tailnet reachability is not +# enough (this is the owner's most sensitive data). +heph_http_addr: 0.0.0.0:8787 +heph_port: 8787 +heph_external_url: https://heph.ops.eblu.me + +# Authentik OIDC — issuer + audience together turn hub auth on. The audience is +# the device-code client id (see argocd/manifests/authentik heph blueprint). +heph_oidc_issuer: https://authentik.ops.eblu.me/application/o/heph/ +heph_oidc_audience: heph + +# Self-update poll interval (seconds). 10 minutes. +heph_self_update_interval_secs: 600 diff --git a/ansible/roles/heph/handlers/main.yml b/ansible/roles/heph/handlers/main.yml new file mode 100644 index 0000000..92fe9d7 --- /dev/null +++ b/ansible/roles/heph/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart heph + ansible.builtin.shell: | + launchctl unload ~/Library/LaunchAgents/mcquack.eblume.heph.plist 2>/dev/null || true + launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist + changed_when: true diff --git a/ansible/roles/heph/tasks/main.yml b/ansible/roles/heph/tasks/main.yml new file mode 100644 index 0000000..7a45fe3 --- /dev/null +++ b/ansible/roles/heph/tasks/main.yml @@ -0,0 +1,82 @@ +--- +# hephaestus hub (server mode) on indri. +# +# DATA SEEDING (one-time, Path A — do this BEFORE the first provision so the hub +# adopts gilbert's existing data instead of being born empty): +# +# 1. On the seed device (gilbert): heph daemon stop +# 2. Copy its store to indri: scp ~/.local/share/heph/heph.db \ +# indri:~/.local/share/heph/heph.db +# 3. On indri, give the hub its OWN device origin (keeps gilbert's owner_id + +# data; hephd regenerates a fresh origin on next start when it is missing): +# sqlite3 ~/.local/share/heph/heph.db "DELETE FROM meta WHERE key='origin';" +# 4. Run this role (installs hephd, stages the PWA, loads the launchagent). +# +# hephd auto-creates an empty store on first start if none exists, so seeding is +# optional — skip it only if you intend a fresh, empty hub. + +- name: Ensure heph data directory exists + ansible.builtin.file: + path: "{{ heph_data_dir }}" + state: directory + mode: '0700' + +- name: Check for installed hephd binary + ansible.builtin.stat: + path: "{{ heph_binary }}" + register: heph_binary_stat + +# Bootstrap install only when hephd is absent. Thereafter hephd's own +# --self-update keeps it current; ansible must not fight (or downgrade) it. +# This builds from source and can take several minutes on a cold cargo cache. +- name: Bootstrap-install heph + hephd from the forge ({{ heph_version }}) + ansible.builtin.command: + cmd: >- + {{ heph_bin_dir }}/cargo install --locked + --git {{ heph_repo_url }} + --tag {{ heph_version }} + heph hephd + environment: + PATH: "{{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin" + RUSTUP_TOOLCHAIN: "{{ heph_rust_toolchain }}" + when: not heph_binary_stat.stat.exists + changed_when: true + notify: Restart heph + +# Checkout provides the PWA shell at {{ heph_web_root }} (heph-pwa/ subdir), +# served directly by hephd. Static files are read from disk per request, so a +# version bump needs no restart; the service worker (CACHE = "heph-pwa-vN") +# evicts stale assets on next load. +- name: Ensure heph cache parent directory exists + ansible.builtin.file: + path: "{{ heph_pwa_src_dir | dirname }}" + state: directory + mode: '0755' + +- name: Stage heph-pwa source at {{ heph_version }} + ansible.builtin.git: + repo: "{{ heph_repo_url }}" + dest: "{{ heph_pwa_src_dir }}" + version: "{{ heph_version }}" + depth: 1 + single_branch: true + force: true + +- name: Deploy heph LaunchAgent plist + ansible.builtin.template: + src: heph.plist.j2 + dest: ~/Library/LaunchAgents/mcquack.eblume.heph.plist + mode: '0644' + notify: Restart heph + +- name: Check if heph LaunchAgent is loaded + ansible.builtin.command: launchctl list mcquack.eblume.heph + register: heph_launchctl_check + changed_when: false + failed_when: false + +- name: Load heph LaunchAgent if not loaded + ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist + when: heph_launchctl_check.rc != 0 + changed_when: true + failed_when: false diff --git a/ansible/roles/heph/templates/heph.plist.j2 b/ansible/roles/heph/templates/heph.plist.j2 new file mode 100644 index 0000000..19a2367 --- /dev/null +++ b/ansible/roles/heph/templates/heph.plist.j2 @@ -0,0 +1,50 @@ + + + + + + Label + mcquack.eblume.heph + ProgramArguments + + {{ heph_binary }} + --mode + server + --http-addr + {{ heph_http_addr }} + --db + {{ heph_db }} + --socket + {{ heph_socket }} + --web-root + {{ heph_web_root }} + --oidc-issuer + {{ heph_oidc_issuer }} + --oidc-audience + {{ heph_oidc_audience }} + --self-update + --self-update-interval-secs + {{ heph_self_update_interval_secs }} + + RunAtLoad + + KeepAlive + + EnvironmentVariables + + + PATH + {{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin + HOME + /Users/erichblume + + RUSTUP_TOOLCHAIN + {{ heph_rust_toolchain }} + + StandardOutPath + {{ heph_log_dir }}/mcquack.heph.out.log + StandardErrorPath + {{ heph_log_dir }}/mcquack.heph.err.log + + diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index fcbb99b..56d9110 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -434,3 +434,82 @@ data: provider: !KeyOf mealie-provider meta_launch_url: https://meals.ops.eblu.me policy_engine_mode: all + + heph.yaml: | + version: 1 + metadata: + name: BlumeOps Heph SSO + labels: + blueprints.goauthentik.io/description: "Hephaestus hub OIDC (device-code) provider, application, and device-code flow" + entries: + # Device-code flow (RFC 8628). authentik ships no default for this, so we + # create one and bind it to the brand below. An empty stage_configuration + # flow is sufficient: the already-authenticated user just confirms the code. + - model: authentik_flows.flow + id: device-code-flow + identifiers: + slug: default-device-code-flow + attrs: + name: Device code flow + title: Device code flow + slug: default-device-code-flow + designation: stage_configuration + authentication: require_authenticated + + # Enable the device-code grant globally by binding the flow to the default + # brand (domain authentik-default). Partial update — only sets this field. + - model: authentik_brands.brand + identifiers: + domain: authentik-default + attrs: + flow_device_code: !KeyOf device-code-flow + + # OAuth2 provider for heph — PUBLIC client (device-code + PKCE, no secret). + # client_id doubles as the token audience the hub verifies (--oidc-audience heph), + # and the app slug 'heph' is the issuer path (/application/o/heph/). + - model: authentik_providers_oauth2.oauth2provider + id: heph-provider + identifiers: + name: Heph + attrs: + name: Heph + authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]] + invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] + client_type: public + client_id: heph + # Device-code (RFC 8628) + PKCE use no redirect, but the provider + # serializer requires the field — an empty list satisfies it. + redirect_uris: [] + signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]] + property_mappings: + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + sub_mode: hashed_user_id + include_claims_in_id_token: true + + # Heph application — linked to the OAuth2 provider + - model: authentik_core.application + id: heph-app + identifiers: + slug: heph + attrs: + name: Hephaestus + slug: heph + provider: !KeyOf heph-provider + meta_launch_url: https://heph.ops.eblu.me + policy_engine_mode: any + + # Policy binding — restrict heph to admins group (single-owner, sensitive data) + - model: authentik_policies.policybinding + identifiers: + order: 0 + target: !KeyOf heph-app + group: !Find [authentik_core.group, [name, admins]] + attrs: + target: !KeyOf heph-app + group: !Find [authentik_core.group, [name, admins]] + order: 0 + enabled: true + negate: false + timeout: 30 diff --git a/docs/changelog.d/heph-indri-hub.infra.md b/docs/changelog.d/heph-indri-hub.infra.md new file mode 100644 index 0000000..6761cb7 --- /dev/null +++ b/docs/changelog.d/heph-indri-hub.infra.md @@ -0,0 +1 @@ +Added the [[hephaestus]] (`heph`) sync hub to indri as a self-updating LaunchAgent managed by Ansible (`ansible/roles/heph`, tag `heph`). The hub runs `hephd --mode server` behind `heph.ops.eblu.me` (Caddy TLS), with self-update on a 10-minute interval and the heph-pwa mobile shell served from `--web-root`. Access is gated by a new Authentik device-code (RFC 8628) OIDC application. Indri is now the canonical hub; other devices (e.g. gilbert) attach as offline-capable spokes. The hub's store was seeded from gilbert via the data-safe Path A bring-up (copy store, reset `meta.origin`). diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md index 67652ca..8364ba0 100644 --- a/docs/reference/infrastructure/indri.md +++ b/docs/reference/infrastructure/indri.md @@ -33,6 +33,7 @@ Primary BlumeOps server. Mac Mini M1 (2020). - [[alloy|Alloy]] - Metrics/logs collector - [[caddy]] - Reverse proxy for `*.ops.eblu.me` - [[devpi]] - PyPI mirror (LaunchAgent) +- [[hephaestus]] - heph task/context sync hub (LaunchAgent, self-updating) - [[cv]] - Static CV site, served by Caddy - [[docs]] - Quartz-built docs site, served by Caddy diff --git a/docs/reference/services/hephaestus.md b/docs/reference/services/hephaestus.md new file mode 100644 index 0000000..1754ea0 --- /dev/null +++ b/docs/reference/services/hephaestus.md @@ -0,0 +1,130 @@ +--- +title: Hephaestus +modified: 2026-06-04 +last-reviewed: 2026-06-04 +tags: + - service + - hephaestus +--- + +# Hephaestus + +[hephaestus](https://github.com/eblume/hephaestus) (`heph`) is the user's +self-hosted task + context/knowledge system. It is **hub-and-spoke**: each device +runs a full local SQLite replica (`hephd --mode local`) and background-syncs +against one canonical **hub**. Indri runs that hub. + +## Quick Reference + +| Property | Value | +|----------|-------| +| **PWA URL** | https://heph.ops.eblu.me (browser PWA, Caddy TLS) | +| **Spoke sync URL** | http://indri.tail8d86e.ts.net:8787 (direct, tailnet) | +| **Local Port** | 8787 (`hephd --mode server`, bound `0.0.0.0`) | +| **Binary** | `~/.cargo/bin/hephd` (self-updating) | +| **Data** | `~/.local/share/heph/heph.db` | +| **PWA shell** | `~/.local/share/heph/web` | +| **Logs** | `~/Library/Logs/mcquack.heph.{out,err}.log` | +| **LaunchAgent** | `mcquack.eblume.heph` | +| **Ansible role** | `ansible/roles/heph` (tag `heph`) | + +## What runs on indri + +The launchagent runs the hub in server mode with three features enabled: + +``` +hephd --mode server --http-addr 0.0.0.0:8787 --db ~/.local/share/heph/heph.db + --web-root ~/.local/share/heph/web + --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ + --oidc-audience heph + --self-update --self-update-interval-secs 600 +``` + +- **Server mode** exposes the HTTP sync endpoint (`/rpc`, `/sync/*`) that spokes + reconcile their op-log against. +- **Self-update** (10-minute poll) rebuilds `hephd` from the forge when a newer + release tag appears (`cargo install --git https://forge.eblu.me/eblume/hephaestus.git`). + Indri's Rust toolchain (`~/.cargo/bin`) is on the agent's `PATH` for this, and + the plist pins `RUSTUP_TOOLCHAIN=stable` — the + launchagent runs without mise, so a bare `cargo` shim would otherwise fall back + to rustup's *default* toolchain, which can lag behind heph's `rust-version` floor + (1.89) and silently fail the build. +- **PWA** (`--web-root`) serves the [heph-pwa] mobile shell; Caddy terminates TLS + at `heph.ops.eblu.me` so the PWA runs in a secure context (service worker, + install-to-home-screen, voice capture). + +[heph-pwa]: https://github.com/eblume/hephaestus + +The hub binds `0.0.0.0` so tailnet spokes can also sync directly +(`http://indri.tail8d86e.ts.net:8787`); access is gated by Authentik OIDC either +way — tailnet reachability alone is not enough. + +## Authentication (Authentik OIDC, device-code) + +The hub verifies an OIDC bearer token on every sync. The `heph` application is a +**public** OAuth2 client using the **device-code flow** (RFC 8628), provisioned +in the [[authentik]] blueprint (`argocd/manifests/authentik/configmap-blueprint.yaml`): + +- Issuer: `https://authentik.ops.eblu.me/application/o/heph/` +- Audience / client id: `heph` +- Restricted to the `admins` group (single-owner, sensitive data). + +Because no Authentik instance ships a device-code flow by default, the blueprint +also creates `default-device-code-flow` and binds it to the default brand's +`flow_device_code`. Devices obtain a token with `heph auth login`; the PWA +currently takes a pasted token (in-app device-code login is upstream follow-up). + +## Data seeding (Path A, one-time) + +The hub was seeded from the existing `gilbert` device so no task history was +lost. heph's data-safe bring-up ("Path A") has the hub **adopt the device's +identity** rather than rewriting the device: + +1. Quiesce the seed device: `heph daemon stop` (on gilbert). +2. Copy its store to indri: `scp ~/.local/share/heph/heph.db indri:~/.local/share/heph/heph.db`. +3. Give the hub its **own device origin** (keeps gilbert's `owner_id` + data; + `hephd` regenerates a fresh `origin` on next start when it is missing): + ```fish + ssh indri "sqlite3 ~/.local/share/heph/heph.db \"DELETE FROM meta WHERE key='origin';\"" + ``` +4. `mise run provision-indri -- --tags heph` (installs hephd, stages the PWA, + loads the launchagent → hub starts on the seeded store). + +Only `meta.origin` changes; `owner_id`, nodes, op-log, and links are copied +untouched. A clean `hephd --owner-id` / seed command is tracked upstream as +hephaestus follow-up — until then this manual reset is the documented path. + +## Connecting a spoke (e.g. gilbert) + +A device joins by running its local daemon with the hub URL + OIDC client and +logging in once: + +```bash +hephd --mode local --hub-url http://indri.tail8d86e.ts.net:8787 \ + --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ \ + --oidc-client-id heph +heph auth login --hub-url http://indri.tail8d86e.ts.net:8787 \ + --issuer https://authentik.ops.eblu.me/application/o/heph/ --client-id heph +``` + +> **Use the direct `http://…:8787` tailnet URL for sync, not the Caddy HTTPS +> URL.** hephd's sync client is plain-HTTP-only; pointing `--hub-url` at +> `https://heph.ops.eblu.me` fails with a confusing `error sending request` +> (the HTTP connector rejects the `https` scheme before connecting). Tailscale +> encrypts the transport, and the OIDC bearer token still gates every request. +> `heph.ops.eblu.me` (Caddy TLS) exists only for the browser PWA, which needs a +> secure context. The cached token is keyed by the exact `--hub-url`, so use the +> same value for `hephd` and `heph auth login`. + +> **Caveat:** `heph daemon` cannot yet bake hub/spoke flags into the generated +> launchd plist (upstream gap). On a spoke whose plist is managed by `heph +> daemon`, the hub/OIDC flags must be hand-added — and a later `heph daemon +> start/restart` will regenerate the plist and drop them. Avoid `heph daemon` +> subcommands on a configured spoke until that gap is closed; reload via +> `launchctl` instead. + +## Related + +- [[indri]] — host +- [[authentik]] — OIDC provider +- [[caddy]] — TLS termination for `heph.ops.eblu.me` From 6576880b0e8e80cd88452add47627c3b4e6d6435 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 07:30:31 -0700 Subject: [PATCH 08/11] heph Authentik: register heph-pwa redirect URIs (PKCE login) (#370) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the heph-pwa redirect URIs to the Authentik `heph` OAuth2 provider so the new browser **Login with Authentik** flow (Authorization Code + PKCE, hephaestus PR #9) can redirect back and exchange the code: - `https://heph.ops.eblu.me/` (the PWA origin) - `http://localhost:8787/` (local dev: `hephd --web-root`) Authentik also keys token-endpoint CORS off these origins, so they're required for the browser token exchange. Additive (the provider was `redirect_uris: []`); harmless until the PWA feature deploys. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.eblu.me/eblume/blumeops/pulls/370 --- argocd/manifests/authentik/configmap-blueprint.yaml | 13 ++++++++++--- docs/changelog.d/heph-pwa-redirect-uris.infra.md | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 docs/changelog.d/heph-pwa-redirect-uris.infra.md diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index 56d9110..9da2f70 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -477,9 +477,16 @@ data: invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] client_type: public client_id: heph - # Device-code (RFC 8628) + PKCE use no redirect, but the provider - # serializer requires the field — an empty list satisfies it. - redirect_uris: [] + # CLI/TUI use the device-code grant (no redirect). The heph-pwa browser + # login uses Authorization Code + PKCE, which DOES redirect back to the + # app's origin — register those here (Authentik also keys token-endpoint + # CORS off these origins). Trailing slash matters: the PWA's redirect_uri + # is its base dir, e.g. https://heph.ops.eblu.me/. + redirect_uris: + - matching_mode: strict + url: https://heph.ops.eblu.me/ + - matching_mode: strict + url: http://localhost:8787/ # local dev (hephd --web-root) signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]] property_mappings: - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] diff --git a/docs/changelog.d/heph-pwa-redirect-uris.infra.md b/docs/changelog.d/heph-pwa-redirect-uris.infra.md new file mode 100644 index 0000000..f887eed --- /dev/null +++ b/docs/changelog.d/heph-pwa-redirect-uris.infra.md @@ -0,0 +1 @@ +Registered the heph-pwa redirect URIs (`https://heph.ops.eblu.me/`, plus `http://localhost:8787/` for dev) on the Authentik `heph` OAuth2 provider, enabling the PWA's new Authorization Code + PKCE "Login with Authentik" flow (and the token-endpoint CORS it needs). Pairs with hephaestus PR #9. From 3abe80523a0b402c40a0bd3d825e5d81b87939d8 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 07:40:51 -0700 Subject: [PATCH 09/11] C0: bump indri heph hub to v1.2.1 (PWA Authentik login + /config) Co-Authored-By: Claude Opus 4.8 (1M context) --- ansible/roles/heph/defaults/main.yml | 2 +- docs/changelog.d/+heph-hub-v1.2.1.infra.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+heph-hub-v1.2.1.infra.md diff --git a/ansible/roles/heph/defaults/main.yml b/ansible/roles/heph/defaults/main.yml index e5eea36..88d2240 100644 --- a/ansible/roles/heph/defaults/main.yml +++ b/ansible/roles/heph/defaults/main.yml @@ -6,7 +6,7 @@ # Pinned release used for the initial `cargo install` and the PWA shell. # After bootstrap, hephd's own --self-update keeps the binary current; this # pin only governs the first install and the bundled PWA shell version. -heph_version: v1.2.0 +heph_version: v1.2.1 # Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial # install and unattended self-update build from the same source (no ssh-agent). diff --git a/docs/changelog.d/+heph-hub-v1.2.1.infra.md b/docs/changelog.d/+heph-hub-v1.2.1.infra.md new file mode 100644 index 0000000..c203323 --- /dev/null +++ b/docs/changelog.d/+heph-hub-v1.2.1.infra.md @@ -0,0 +1 @@ +Bumped the indri heph hub to v1.2.1, which adds the hub `GET /config` endpoint and ships the heph-pwa **Login with Authentik** flow (Authorization Code + PKCE). Pairs with the Authentik `heph` provider redirect URIs registered earlier. From cf63fcb5b5cf379700efe3ce0986b18ec4d76625 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Fri, 5 Jun 2026 08:22:46 -0700 Subject: [PATCH 10/11] C0: track heph in service-versions (self-updating; note drift task) Co-Authored-By: Claude Opus 4.8 (1M context) --- service-versions.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/service-versions.yaml b/service-versions.yaml index cc9dc9e..866c687 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -414,6 +414,23 @@ services: upstream-source: https://github.com/caddyserver/caddy/releases notes: Built from source with Gandi DNS and Layer 4 plugins + - name: heph + type: ansible + last-reviewed: 2026-06-05 + current-version: "v1.2.1" + upstream-source: https://forge.eblu.me/eblume/hephaestus/releases + notes: >- + hephaestus task/context sync hub on indri (server-mode launchagent, + ansible/roles/heph; cargo-built from the forge). SELF-UPDATING: hephd + polls the forge for newer releases every 10 min and rebuilds + restarts + itself, so the running version drifts AHEAD of the ansible heph_version + pin. current-version here is the last observed/deployed tag, not a hard + pin — verify the live version via `curl https://heph.ops.eblu.me/config` + is served (hub up) and the hub log's `current=` line. Reconciling this + self-update vs IaC-pin drift is tracked in the heph "Hephaestus" project: + "Reconcile hephd self-update with ansible-pinned version (drift on indri + hub)" (node 01KTBXWT6XTHNDH92CVJY88E5K). + - name: borgmatic type: ansible last-reviewed: 2026-04-15 From 50a36ff93a9d1c697c976a1db498bc5633f2cd7c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Sat, 6 Jun 2026 18:07:13 -0700 Subject: [PATCH 11/11] heph Authentik: grant offline_access scope (fixes spoke sync refresh-token 400) The heph CLI requests scope "openid offline_access", but the Authentik heph OAuth2 provider only mapped openid/email/profile. Without the offline_access mapping the issued refresh token is bound to the login session rather than the 30-day refresh-token window; once the session lapses, hephd's refresh_token grant returns 400 Bad Request and spoke sync silently degrades (heph sync --status -> auth_failure: true). Add the built-in offline_access scope mapping to the provider's property_mappings and document the requirement in the service reference. Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/manifests/authentik/configmap-blueprint.yaml | 4 ++++ docs/changelog.d/heph-offline-access.bugfix.md | 1 + docs/reference/services/hephaestus.md | 11 +++++++++++ 3 files changed, 16 insertions(+) create mode 100644 docs/changelog.d/heph-offline-access.bugfix.md diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index 9da2f70..cc97dea 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -492,6 +492,10 @@ data: - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] + # offline_access: heph CLI requests "openid offline_access"; without + # this mapping the refresh token is session-bound and hephd's + # refresh_token grant 400s once the session lapses (spoke sync dies). + - !Find [authentik_providers_oauth2.scopemapping, [scope_name, offline_access]] sub_mode: hashed_user_id include_claims_in_id_token: true diff --git a/docs/changelog.d/heph-offline-access.bugfix.md b/docs/changelog.d/heph-offline-access.bugfix.md new file mode 100644 index 0000000..e9721bc --- /dev/null +++ b/docs/changelog.d/heph-offline-access.bugfix.md @@ -0,0 +1 @@ +Granted the `offline_access` scope on the Authentik `heph` OAuth2 provider so hephaestus spokes receive a durable 30-day refresh token. Previously the refresh token was session-bound, so spoke sync would silently fail with a `400 Bad Request` on the `refresh_token` grant once the Authentik session lapsed. diff --git a/docs/reference/services/hephaestus.md b/docs/reference/services/hephaestus.md index 1754ea0..7abc35b 100644 --- a/docs/reference/services/hephaestus.md +++ b/docs/reference/services/hephaestus.md @@ -68,6 +68,17 @@ in the [[authentik]] blueprint (`argocd/manifests/authentik/configmap-blueprint. - Issuer: `https://authentik.ops.eblu.me/application/o/heph/` - Audience / client id: `heph` - Restricted to the `admins` group (single-owner, sensitive data). +- Scope mappings: `openid`, `email`, `profile`, **`offline_access`**. + +> **`offline_access` is required for durable sync.** The `heph` CLI requests +> `scope = "openid offline_access"`, and a refresh token is only issued for the +> 30-day refresh-token window when the provider actually grants `offline_access`. +> Without that scope mapping the refresh token is bound to the login **session**; +> once the session lapses, hephd's `refresh_token` grant returns `400 Bad +> Request`, the bearer can't be refreshed, and spoke sync silently degrades +> (`heph sync --status` → `auth_failure: true`). `heph auth login` papers over it +> until the next session expiry. Keep `offline_access` in the provider's +> `property_mappings`. Because no Authentik instance ships a device-code flow by default, the blueprint also creates `default-device-code-flow` and binds it to the default brand's