diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml
index ddb57f8..1e33bb1 100644
--- a/ansible/playbooks/indri.yml
+++ b/ansible/playbooks/indri.yml
@@ -260,5 +260,7 @@
tags: cv
- role: docs
tags: docs
+ - role: heph
+ tags: heph
- role: caddy
tags: caddy
diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml
index 363d09e..e6d7385 100644
--- a/ansible/roles/caddy/defaults/main.yml
+++ b/ansible/roles/caddy/defaults/main.yml
@@ -52,6 +52,9 @@ caddy_services:
- name: devpi
host: "pypi.{{ caddy_domain }}"
backend: "http://localhost:3141"
+ - name: heph
+ host: "heph.{{ caddy_domain }}"
+ backend: "http://localhost:8787" # hephaestus hub (server mode) + PWA shell
- name: kiwix
host: "kiwix.{{ caddy_domain }}"
backend: "https://kiwix.tail8d86e.ts.net"
diff --git a/ansible/roles/heph/defaults/main.yml b/ansible/roles/heph/defaults/main.yml
new file mode 100644
index 0000000..88d2240
--- /dev/null
+++ b/ansible/roles/heph/defaults/main.yml
@@ -0,0 +1,49 @@
+---
+# hephaestus hub — the canonical heph replica (server mode) on indri.
+# Other devices (e.g. gilbert) are spokes that sync against this hub.
+# See [[set-up-sync-hub]] and [[host-heph-pwa]] in the hephaestus repo.
+
+# Pinned release used for the initial `cargo install` and the PWA shell.
+# After bootstrap, hephd's own --self-update keeps the binary current; this
+# pin only governs the first install and the bundled PWA shell version.
+heph_version: v1.2.1
+
+# Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial
+# install and unattended self-update build from the same source (no ssh-agent).
+heph_repo_url: https://forge.eblu.me/eblume/hephaestus.git
+
+heph_bin_dir: /Users/erichblume/.cargo/bin
+heph_binary: "{{ heph_bin_dir }}/hephd"
+
+# rustc/cargo here are rustup shims. The bare (non-mise) environment that the
+# launchagent and ansible run in falls back to rustup's *default* toolchain,
+# which can lag behind heph's rust-version floor (Cargo.toml: 1.89). Pin the
+# channel explicitly so both the bootstrap build and unattended self-update
+# always use a current toolchain regardless of the host's rustup default.
+heph_rust_toolchain: stable
+
+heph_data_dir: /Users/erichblume/.local/share/heph
+heph_db: "{{ heph_data_dir }}/heph.db"
+heph_socket: "{{ heph_data_dir }}/hephd.sock"
+heph_log_dir: /Users/erichblume/Library/Logs
+
+# Version-pinned source checkout; the PWA static shell is served directly from
+# its heph-pwa/ subdir (no copy), keeping shell and hub in lockstep at heph_version.
+heph_pwa_src_dir: /Users/erichblume/.cache/heph-pwa-src
+heph_web_root: "{{ heph_pwa_src_dir }}/heph-pwa"
+
+# Hub listens on all interfaces so tailnet spokes can reach it directly
+# (http://indri.tail8d86e.ts.net:8787) and Caddy can proxy heph.ops.eblu.me.
+# Access is gated by Authentik OIDC regardless — tailnet reachability is not
+# enough (this is the owner's most sensitive data).
+heph_http_addr: 0.0.0.0:8787
+heph_port: 8787
+heph_external_url: https://heph.ops.eblu.me
+
+# Authentik OIDC — issuer + audience together turn hub auth on. The audience is
+# the device-code client id (see argocd/manifests/authentik heph blueprint).
+heph_oidc_issuer: https://authentik.ops.eblu.me/application/o/heph/
+heph_oidc_audience: heph
+
+# Self-update poll interval (seconds). 10 minutes.
+heph_self_update_interval_secs: 600
diff --git a/ansible/roles/heph/handlers/main.yml b/ansible/roles/heph/handlers/main.yml
new file mode 100644
index 0000000..92fe9d7
--- /dev/null
+++ b/ansible/roles/heph/handlers/main.yml
@@ -0,0 +1,6 @@
+---
+- name: Restart heph
+ ansible.builtin.shell: |
+ launchctl unload ~/Library/LaunchAgents/mcquack.eblume.heph.plist 2>/dev/null || true
+ launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist
+ changed_when: true
diff --git a/ansible/roles/heph/tasks/main.yml b/ansible/roles/heph/tasks/main.yml
new file mode 100644
index 0000000..7a45fe3
--- /dev/null
+++ b/ansible/roles/heph/tasks/main.yml
@@ -0,0 +1,82 @@
+---
+# hephaestus hub (server mode) on indri.
+#
+# DATA SEEDING (one-time, Path A — do this BEFORE the first provision so the hub
+# adopts gilbert's existing data instead of being born empty):
+#
+# 1. On the seed device (gilbert): heph daemon stop
+# 2. Copy its store to indri: scp ~/.local/share/heph/heph.db \
+# indri:~/.local/share/heph/heph.db
+# 3. On indri, give the hub its OWN device origin (keeps gilbert's owner_id +
+# data; hephd regenerates a fresh origin on next start when it is missing):
+# sqlite3 ~/.local/share/heph/heph.db "DELETE FROM meta WHERE key='origin';"
+# 4. Run this role (installs hephd, stages the PWA, loads the launchagent).
+#
+# hephd auto-creates an empty store on first start if none exists, so seeding is
+# optional — skip it only if you intend a fresh, empty hub.
+
+- name: Ensure heph data directory exists
+ ansible.builtin.file:
+ path: "{{ heph_data_dir }}"
+ state: directory
+ mode: '0700'
+
+- name: Check for installed hephd binary
+ ansible.builtin.stat:
+ path: "{{ heph_binary }}"
+ register: heph_binary_stat
+
+# Bootstrap install only when hephd is absent. Thereafter hephd's own
+# --self-update keeps it current; ansible must not fight (or downgrade) it.
+# This builds from source and can take several minutes on a cold cargo cache.
+- name: Bootstrap-install heph + hephd from the forge ({{ heph_version }})
+ ansible.builtin.command:
+ cmd: >-
+ {{ heph_bin_dir }}/cargo install --locked
+ --git {{ heph_repo_url }}
+ --tag {{ heph_version }}
+ heph hephd
+ environment:
+ PATH: "{{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin"
+ RUSTUP_TOOLCHAIN: "{{ heph_rust_toolchain }}"
+ when: not heph_binary_stat.stat.exists
+ changed_when: true
+ notify: Restart heph
+
+# Checkout provides the PWA shell at {{ heph_web_root }} (heph-pwa/ subdir),
+# served directly by hephd. Static files are read from disk per request, so a
+# version bump needs no restart; the service worker (CACHE = "heph-pwa-vN")
+# evicts stale assets on next load.
+- name: Ensure heph cache parent directory exists
+ ansible.builtin.file:
+ path: "{{ heph_pwa_src_dir | dirname }}"
+ state: directory
+ mode: '0755'
+
+- name: Stage heph-pwa source at {{ heph_version }}
+ ansible.builtin.git:
+ repo: "{{ heph_repo_url }}"
+ dest: "{{ heph_pwa_src_dir }}"
+ version: "{{ heph_version }}"
+ depth: 1
+ single_branch: true
+ force: true
+
+- name: Deploy heph LaunchAgent plist
+ ansible.builtin.template:
+ src: heph.plist.j2
+ dest: ~/Library/LaunchAgents/mcquack.eblume.heph.plist
+ mode: '0644'
+ notify: Restart heph
+
+- name: Check if heph LaunchAgent is loaded
+ ansible.builtin.command: launchctl list mcquack.eblume.heph
+ register: heph_launchctl_check
+ changed_when: false
+ failed_when: false
+
+- name: Load heph LaunchAgent if not loaded
+ ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist
+ when: heph_launchctl_check.rc != 0
+ changed_when: true
+ failed_when: false
diff --git a/ansible/roles/heph/templates/heph.plist.j2 b/ansible/roles/heph/templates/heph.plist.j2
new file mode 100644
index 0000000..19a2367
--- /dev/null
+++ b/ansible/roles/heph/templates/heph.plist.j2
@@ -0,0 +1,50 @@
+
+
+
+
+
+ Label
+ mcquack.eblume.heph
+ ProgramArguments
+
+ {{ heph_binary }}
+ --mode
+ server
+ --http-addr
+ {{ heph_http_addr }}
+ --db
+ {{ heph_db }}
+ --socket
+ {{ heph_socket }}
+ --web-root
+ {{ heph_web_root }}
+ --oidc-issuer
+ {{ heph_oidc_issuer }}
+ --oidc-audience
+ {{ heph_oidc_audience }}
+ --self-update
+ --self-update-interval-secs
+ {{ heph_self_update_interval_secs }}
+
+ RunAtLoad
+
+ KeepAlive
+
+ EnvironmentVariables
+
+
+ PATH
+ {{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin
+ HOME
+ /Users/erichblume
+
+ RUSTUP_TOOLCHAIN
+ {{ heph_rust_toolchain }}
+
+ StandardOutPath
+ {{ heph_log_dir }}/mcquack.heph.out.log
+ StandardErrorPath
+ {{ heph_log_dir }}/mcquack.heph.err.log
+
+
diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml
index fcbb99b..cc97dea 100644
--- a/argocd/manifests/authentik/configmap-blueprint.yaml
+++ b/argocd/manifests/authentik/configmap-blueprint.yaml
@@ -434,3 +434,93 @@ data:
provider: !KeyOf mealie-provider
meta_launch_url: https://meals.ops.eblu.me
policy_engine_mode: all
+
+ heph.yaml: |
+ version: 1
+ metadata:
+ name: BlumeOps Heph SSO
+ labels:
+ blueprints.goauthentik.io/description: "Hephaestus hub OIDC (device-code) provider, application, and device-code flow"
+ entries:
+ # Device-code flow (RFC 8628). authentik ships no default for this, so we
+ # create one and bind it to the brand below. An empty stage_configuration
+ # flow is sufficient: the already-authenticated user just confirms the code.
+ - model: authentik_flows.flow
+ id: device-code-flow
+ identifiers:
+ slug: default-device-code-flow
+ attrs:
+ name: Device code flow
+ title: Device code flow
+ slug: default-device-code-flow
+ designation: stage_configuration
+ authentication: require_authenticated
+
+ # Enable the device-code grant globally by binding the flow to the default
+ # brand (domain authentik-default). Partial update — only sets this field.
+ - model: authentik_brands.brand
+ identifiers:
+ domain: authentik-default
+ attrs:
+ flow_device_code: !KeyOf device-code-flow
+
+ # OAuth2 provider for heph — PUBLIC client (device-code + PKCE, no secret).
+ # client_id doubles as the token audience the hub verifies (--oidc-audience heph),
+ # and the app slug 'heph' is the issuer path (/application/o/heph/).
+ - model: authentik_providers_oauth2.oauth2provider
+ id: heph-provider
+ identifiers:
+ name: Heph
+ attrs:
+ name: Heph
+ authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]]
+ invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]]
+ client_type: public
+ client_id: heph
+ # CLI/TUI use the device-code grant (no redirect). The heph-pwa browser
+ # login uses Authorization Code + PKCE, which DOES redirect back to the
+ # app's origin — register those here (Authentik also keys token-endpoint
+ # CORS off these origins). Trailing slash matters: the PWA's redirect_uri
+ # is its base dir, e.g. https://heph.ops.eblu.me/.
+ redirect_uris:
+ - matching_mode: strict
+ url: https://heph.ops.eblu.me/
+ - matching_mode: strict
+ url: http://localhost:8787/ # local dev (hephd --web-root)
+ signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]]
+ property_mappings:
+ - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]]
+ - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]]
+ - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]]
+ # offline_access: heph CLI requests "openid offline_access"; without
+ # this mapping the refresh token is session-bound and hephd's
+ # refresh_token grant 400s once the session lapses (spoke sync dies).
+ - !Find [authentik_providers_oauth2.scopemapping, [scope_name, offline_access]]
+ sub_mode: hashed_user_id
+ include_claims_in_id_token: true
+
+ # Heph application — linked to the OAuth2 provider
+ - model: authentik_core.application
+ id: heph-app
+ identifiers:
+ slug: heph
+ attrs:
+ name: Hephaestus
+ slug: heph
+ provider: !KeyOf heph-provider
+ meta_launch_url: https://heph.ops.eblu.me
+ policy_engine_mode: any
+
+ # Policy binding — restrict heph to admins group (single-owner, sensitive data)
+ - model: authentik_policies.policybinding
+ identifiers:
+ order: 0
+ target: !KeyOf heph-app
+ group: !Find [authentik_core.group, [name, admins]]
+ attrs:
+ target: !KeyOf heph-app
+ group: !Find [authentik_core.group, [name, admins]]
+ order: 0
+ enabled: true
+ negate: false
+ timeout: 30
diff --git a/argocd/manifests/external-secrets-ringtail/kustomization.yaml b/argocd/manifests/external-secrets-ringtail/kustomization.yaml
index 05b6b54..9fd4e2f 100644
--- a/argocd/manifests/external-secrets-ringtail/kustomization.yaml
+++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml
@@ -13,4 +13,4 @@ resources:
images:
- name: registry.ops.eblu.me/blumeops/external-secrets
- newTag: v2.2.0-59dace8-nix
+ newTag: v2.2.0-13895bb-nix
diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml
index 8b1aea5..639db66 100644
--- a/argocd/manifests/external-secrets/kustomization.yaml
+++ b/argocd/manifests/external-secrets/kustomization.yaml
@@ -13,4 +13,4 @@ resources:
images:
- name: ghcr.io/external-secrets/external-secrets
newName: registry.ops.eblu.me/blumeops/external-secrets
- newTag: v2.2.0-0e70a1b
+ newTag: v2.2.0-13895bb
diff --git a/argocd/manifests/prowler/cronjob-iac-scan.yaml b/argocd/manifests/prowler/cronjob-iac-scan.yaml
deleted file mode 100644
index c1303a5..0000000
--- a/argocd/manifests/prowler/cronjob-iac-scan.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
----
-apiVersion: batch/v1
-kind: CronJob
-metadata:
- name: prowler-iac-scan
- namespace: prowler
-spec:
- schedule: "0 2 * * 6" # Saturday 2am
- concurrencyPolicy: Forbid
- jobTemplate:
- spec:
- ttlSecondsAfterFinished: 604800 # Auto-delete after 7 days
- template:
- spec:
- securityContext:
- seccompProfile:
- type: RuntimeDefault
- containers:
- - name: prowler
- image: registry.ops.eblu.me/blumeops/prowler:kustomized
- command: ["/bin/sh", "-c"]
- # Prowler's --mutelist-file is a no-op for the IaC provider
- # (it delegates to Trivy). The Prowler image's trivy shim
- # injects --ignorefile $TRIVY_IGNOREFILE when set; see
- # containers/prowler/Dockerfile.
- env:
- - name: TRIVY_IGNOREFILE
- value: /mutelist/trivyignore.yaml
- args:
- - |
- DATEDIR=/reports/prowler-iac/$(date +%Y-%m-%d)
- mkdir -p "$DATEDIR"
- prowler iac \
- --scan-repository-url https://forge.ops.eblu.me/eblume/blumeops.git \
- -z \
- --output-formats html csv json-ocsf \
- --output-directory "$DATEDIR"
- volumeMounts:
- - name: reports
- mountPath: /reports
- - name: mutelist
- mountPath: /mutelist
- readOnly: true
- restartPolicy: OnFailure
- volumes:
- - name: reports
- persistentVolumeClaim:
- claimName: prowler-reports
- - name: mutelist
- configMap:
- name: prowler-mutelist
- items:
- - key: trivyignore.yaml
- path: trivyignore.yaml
diff --git a/argocd/manifests/prowler/cronjob-image-scan.yaml b/argocd/manifests/prowler/cronjob-image-scan.yaml
deleted file mode 100644
index b779d08..0000000
--- a/argocd/manifests/prowler/cronjob-image-scan.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
----
-apiVersion: batch/v1
-kind: CronJob
-metadata:
- name: prowler-image-scan
- namespace: prowler
-spec:
- schedule: "0 3 * * 6" # Saturday 3am
- concurrencyPolicy: Forbid
- jobTemplate:
- spec:
- ttlSecondsAfterFinished: 604800 # Auto-delete after 7 days
- template:
- spec:
- securityContext:
- seccompProfile:
- type: RuntimeDefault
- containers:
- - name: prowler
- image: registry.ops.eblu.me/blumeops/prowler:kustomized
- command: ["/bin/sh", "-c"]
- args:
- - |
- DATEDIR=/reports/prowler-images/$(date +%Y-%m-%d)
- mkdir -p "$DATEDIR"
- prowler image \
- --registry https://registry.ops.eblu.me \
- --image-filter "^blumeops/" \
- -z \
- --output-formats html csv json-ocsf \
- --output-directory "$DATEDIR"
- volumeMounts:
- - name: reports
- mountPath: /reports
- restartPolicy: OnFailure
- volumes:
- - name: reports
- persistentVolumeClaim:
- claimName: prowler-reports
diff --git a/argocd/manifests/prowler/kustomization.yaml b/argocd/manifests/prowler/kustomization.yaml
index 1d92a6b..38295a3 100644
--- a/argocd/manifests/prowler/kustomization.yaml
+++ b/argocd/manifests/prowler/kustomization.yaml
@@ -10,8 +10,6 @@ resources:
- pv-nfs.yaml
- pvc.yaml
- cronjob.yaml
- - cronjob-image-scan.yaml
- - cronjob-iac-scan.yaml
configMapGenerator:
- name: prowler-mutelist
@@ -23,7 +21,6 @@ configMapGenerator:
- mutelist/core-pod-security.yaml
- mutelist/manual-node-checks.yaml
- mutelist/rbac.yaml
- - mutelist/trivyignore.yaml
images:
- name: registry.ops.eblu.me/blumeops/prowler
diff --git a/argocd/manifests/prowler/mutelist/trivyignore.yaml b/argocd/manifests/prowler/mutelist/trivyignore.yaml
deleted file mode 100644
index 87af966..0000000
--- a/argocd/manifests/prowler/mutelist/trivyignore.yaml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Trivy ignorefile for Prowler IaC scan.
-#
-# Prowler's `--mutelist-file` flag is a no-op for the IaC provider
-# (iac_provider.py sets self._mutelist = None and delegates to Trivy).
-# Trivy in turn does not auto-discover this YAML form from cwd, so the
-# Prowler image ships a shim wrapper around `trivy` that injects
-# --ignorefile $TRIVY_IGNOREFILE when the env var is set. The cronjob
-# mounts this file and sets TRIVY_IGNOREFILE accordingly.
-#
-# Schema: https://trivy.dev/latest/docs/configuration/filtering/
-# IDs use the hyphenated form Trivy displays (KSV-0041, not KSV0041).
-misconfigurations:
- - id: KSV-0041
- paths:
- - "argocd/manifests/external-secrets/rbac.yaml"
- statement: >-
- external-secrets-operator's entire function is to read and
- synthesize Secret objects; ClusterRole over secrets is its
- purpose. Both the controller and cert-controller are
- upstream-defined.
- - id: KSV-0041
- paths:
- - "argocd/manifests/kube-state-metrics/rbac.yaml"
- - "argocd/manifests/kube-state-metrics-ringtail/rbac.yaml"
- statement: >-
- KSM exposes only Secret metadata (name, namespace, type, labels),
- never the data field. list/watch on secrets is required for
- kube_secret_info / kube_secret_labels metrics.
- - id: KSV-0114
- paths:
- - "argocd/manifests/external-secrets/rbac.yaml"
- statement: >-
- cert-controller manages the external-secrets validating webhook
- configurations to inject its own rotating CA bundle. RBAC is
- scoped to two named webhooks (secretstore-validate,
- externalsecret-validate) via resourceNames; KSV-0114 doesn't see
- the resourceNames restriction so reports the full ClusterRole.
diff --git a/docs/changelog.d/+external-secrets-stable-main-sha.infra.md b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md
new file mode 100644
index 0000000..fbe3c21
--- /dev/null
+++ b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md
@@ -0,0 +1 @@
+Rebuilt the external-secrets images off `main` and repointed both clusters to the stable main-sha tags (`v2.2.0-13895bb` arm64 / `v2.2.0-13895bb-nix` amd64), so the deployed images on indri and ringtail trace to the same `main` commit rather than earlier feature-branch builds.
diff --git a/docs/changelog.d/+heph-hub-v1.2.1.infra.md b/docs/changelog.d/+heph-hub-v1.2.1.infra.md
new file mode 100644
index 0000000..c203323
--- /dev/null
+++ b/docs/changelog.d/+heph-hub-v1.2.1.infra.md
@@ -0,0 +1 @@
+Bumped the indri heph hub to v1.2.1, which adds the hub `GET /config` endpoint and ships the heph-pwa **Login with Authentik** flow (Authorization Code + PKCE). Pairs with the Authentik `heph` provider redirect URIs registered earlier.
diff --git a/docs/changelog.d/+jellyfin-10-11-11.bugfix.md b/docs/changelog.d/+jellyfin-10-11-11.bugfix.md
new file mode 100644
index 0000000..779a042
--- /dev/null
+++ b/docs/changelog.d/+jellyfin-10-11-11.bugfix.md
@@ -0,0 +1 @@
+Upgraded Jellyfin on indri from 10.11.6 to 10.11.11, picking up the security fixes in 10.11.7 (disclosed CVEs/GHSAs, flagged "upgrade immediately") and 10.11.10 (three further GHSAs). Noted the recurring gotcha in the service-versions tracking: after a `brew upgrade --cask jellyfin`, the re-quarantined `.app` makes the launchd-spawned process hang silently until the Gatekeeper first-launch dialog is approved on indri's GUI console — removing the quarantine xattr over SSH is blocked by macOS TCC.
diff --git a/docs/changelog.d/+ringtail-flake-update.infra.md b/docs/changelog.d/+ringtail-flake-update.infra.md
new file mode 100644
index 0000000..1d806df
--- /dev/null
+++ b/docs/changelog.d/+ringtail-flake-update.infra.md
@@ -0,0 +1 @@
+Updated ringtail NixOS flake inputs (nixpkgs `nixos-25.11`, disko) to latest via `dagger call flake-update`.
diff --git a/docs/changelog.d/+tailscale-operator-doc-review.doc.md b/docs/changelog.d/+tailscale-operator-doc-review.doc.md
new file mode 100644
index 0000000..8f7d5a3
--- /dev/null
+++ b/docs/changelog.d/+tailscale-operator-doc-review.doc.md
@@ -0,0 +1 @@
+Reviewed the tailscale-operator reference card: documented the dual indri/ringtail deployment, corrected the ArgoCD apps list, pinned the upstream version, and added the ProxyGroup Ingress `host:` caveat.
diff --git a/docs/changelog.d/heph-indri-hub.infra.md b/docs/changelog.d/heph-indri-hub.infra.md
new file mode 100644
index 0000000..6761cb7
--- /dev/null
+++ b/docs/changelog.d/heph-indri-hub.infra.md
@@ -0,0 +1 @@
+Added the [[hephaestus]] (`heph`) sync hub to indri as a self-updating LaunchAgent managed by Ansible (`ansible/roles/heph`, tag `heph`). The hub runs `hephd --mode server` behind `heph.ops.eblu.me` (Caddy TLS), with self-update on a 10-minute interval and the heph-pwa mobile shell served from `--web-root`. Access is gated by a new Authentik device-code (RFC 8628) OIDC application. Indri is now the canonical hub; other devices (e.g. gilbert) attach as offline-capable spokes. The hub's store was seeded from gilbert via the data-safe Path A bring-up (copy store, reset `meta.origin`).
diff --git a/docs/changelog.d/heph-offline-access.bugfix.md b/docs/changelog.d/heph-offline-access.bugfix.md
new file mode 100644
index 0000000..e9721bc
--- /dev/null
+++ b/docs/changelog.d/heph-offline-access.bugfix.md
@@ -0,0 +1 @@
+Granted the `offline_access` scope on the Authentik `heph` OAuth2 provider so hephaestus spokes receive a durable 30-day refresh token. Previously the refresh token was session-bound, so spoke sync would silently fail with a `400 Bad Request` on the `refresh_token` grant once the Authentik session lapsed.
diff --git a/docs/changelog.d/heph-pwa-redirect-uris.infra.md b/docs/changelog.d/heph-pwa-redirect-uris.infra.md
new file mode 100644
index 0000000..f887eed
--- /dev/null
+++ b/docs/changelog.d/heph-pwa-redirect-uris.infra.md
@@ -0,0 +1 @@
+Registered the heph-pwa redirect URIs (`https://heph.ops.eblu.me/`, plus `http://localhost:8787/` for dev) on the Authentik `heph` OAuth2 provider, enabling the PWA's new Authorization Code + PKCE "Login with Authentik" flow (and the token-endpoint CORS it needs). Pairs with hephaestus PR #9.
diff --git a/docs/changelog.d/retire-prowler-image-iac-scans.infra.md b/docs/changelog.d/retire-prowler-image-iac-scans.infra.md
new file mode 100644
index 0000000..9afd261
--- /dev/null
+++ b/docs/changelog.d/retire-prowler-image-iac-scans.infra.md
@@ -0,0 +1 @@
+Retired the Prowler container-image CVE scan and IaC scan, keeping only the K8s CIS benchmark scan. The two retired scans generated tens of thousands of un-actioned, un-muted findings every week (~20,000 image findings and growing, mostly unpatchable upstream-image CVEs; ~650 systemic Trivy KSV pod-security warnings) — the weekly `mise run review-compliance-reports` re-surfaced them all as "action needed" though none were ever triaged. The K8s CIS scan is fully mutelisted and runs clean, so it stays. Removed the two CronJobs, the now-unused `trivyignore.yaml` mutelist, and the grouped-findings rendering in the review tool that existed solely for the high-volume scans.
diff --git a/docs/how-to/operations/deploy-prowler.md b/docs/how-to/operations/deploy-prowler.md
index 75dced2..1475680 100644
--- a/docs/how-to/operations/deploy-prowler.md
+++ b/docs/how-to/operations/deploy-prowler.md
@@ -1,6 +1,6 @@
---
title: Deploy Prowler CIS Scanner
-modified: 2026-03-24
+modified: 2026-06-08
last-reviewed: 2026-03-24
tags:
- how-to
@@ -11,7 +11,20 @@ tags:
# Deploy Prowler CIS Scanner
-Prowler runs weekly CIS Kubernetes Benchmark scans against minikube-indri and writes HTML/CSV/JSON reports to the NFS share on sifaka.
+Prowler runs a weekly CIS Kubernetes Benchmark scan against minikube-indri and writes HTML/CSV/JSON reports to the NFS share on sifaka.
+
+## Why only the K8s CIS scan
+
+Prowler originally ran three CronJobs: K8s CIS, container-image CVE scanning, and IaC scanning. The image and IaC scans were **retired in 2026-06**.
+
+Both were pure toil with no realized value:
+
+- **Image scan** produced ~20,000 unmuted findings per run and growing, none ever triaged or muted. They were overwhelmingly CVEs in *upstream* base images we don't control and can't patch, and the job re-scanned every historical tag still in the registry, multiplying the count.
+- **IaC scan** produced ~650 Trivy KSV findings (`runAsNonRoot`, `readOnlyRootFilesystem`, drop-capabilities, …) against our own manifests — real but systemic, homelab-acceptable, and likewise never muted, so the weekly review re-surfaced all of them indefinitely.
+
+The K8s CIS scan, by contrast, is fully mutelisted and runs clean (0 unmuted findings week over week), so it stays. The guiding principle matches [[ai-scraper-mitigation]]: don't keep generating a firehose of output that has no audience. If image-CVE signal is wanted later, the right shape is critical-severity-only, currently-deployed-tags-only, alert-on-new — a rebuild, not a revival (tracked as the "Trivy for image/IaC scanning" task).
+
+Note that the K8s CIS scan itself is tied to minikube-indri, which is slated for retirement; on k3s only ~22 of 70 checks produce results (no static pods). Re-pointing a lean posture check at ringtail is tracked separately ("prowler scan against ringtail").
## What it checks
@@ -33,38 +46,6 @@ Prowler's Kubernetes provider runs ~70 checks from the CIS Kubernetes Benchmark
**k3s note:** k3s embeds the control plane in a single binary — no static pods exist. Only core + RBAC checks (~22 of 70) produce results. Consider `kube-bench` for k3s control plane checks.
-### Image vulnerability scanning (Saturday 3am)
-
-Prowler's image provider scans all `blumeops/*` container images in `registry.ops.eblu.me` for:
-
-- **CVEs** — known vulnerabilities from NVD, Alpine SecDB, Debian Security Tracker, and other sources
-- **Embedded secrets** — credentials or API keys baked into image layers
-- **Misconfigurations** — Dockerfile best practices (running as root, missing HEALTHCHECK, etc.)
-
-Uses Trivy under the hood. Reports are written to `sifaka:/volume1/reports/prowler-images/`.
-
-To run an ad-hoc image scan:
-
-```fish
-kubectl create job --from=cronjob/prowler-image-scan prowler-image-manual -n prowler --context=minikube-indri
-```
-
-### IaC scanning (Saturday 2am)
-
-Prowler's IaC provider scans the blumeops repository (cloned at scan time) for misconfigurations in:
-
-- **Dockerfiles** — running as root, using `latest` tags, missing `HEALTHCHECK`
-- **Kubernetes manifests** — missing resource limits, privileged containers, insecure settings
-- **Other IaC files** — Terraform, CloudFormation, etc. if present
-
-Uses Trivy under the hood. Reports are written to `sifaka:/volume1/reports/prowler-iac/`.
-
-To run an ad-hoc IaC scan:
-
-```fish
-kubectl create job --from=cronjob/prowler-iac-scan prowler-iac-manual -n prowler --context=minikube-indri
-```
-
## Reports
Reports are written to `sifaka:/volume1/reports/prowler/` with timestamped filenames. See [[read-compliance-reports]] for how to access and interpret them.
diff --git a/docs/how-to/operations/read-compliance-reports.md b/docs/how-to/operations/read-compliance-reports.md
index e676ad5..2990026 100644
--- a/docs/how-to/operations/read-compliance-reports.md
+++ b/docs/how-to/operations/read-compliance-reports.md
@@ -1,6 +1,6 @@
---
title: Read Compliance Reports
-modified: 2026-04-06
+modified: 2026-06-08
last-reviewed: 2026-04-06
tags:
- how-to
@@ -27,8 +27,13 @@ Reports are stored on sifaka at `/volume1/reports/`. Each scanner writes to its
| Scanner | Path | Schedule |
|---------|------|----------|
| [[prowler]] K8s CIS | `sifaka:/volume1/reports/prowler/` | Weekly (Sunday 3am) |
-| [[prowler]] Image | `sifaka:/volume1/reports/prowler-images/` | Weekly (Saturday 3am) |
-| [[prowler]] IaC | `sifaka:/volume1/reports/prowler-iac/` | Weekly (Saturday 2am) |
+
+> **Retired (2026-06):** the Prowler **image** (`prowler-images/`) and **IaC**
+> (`prowler-iac/`) scans were retired. They produced tens of thousands of
+> un-actioned, un-muted findings every week — mostly unpatchable upstream-image
+> CVEs and systemic pod-security KSV warnings — and nobody triaged them. See
+> [[deploy-prowler#Why only the K8s CIS scan]] for the rationale. Their stale
+> report directories may linger on sifaka until manually removed.
Copy reports to your local machine (remember `scp -O` for sifaka):
diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md
index 67652ca..8364ba0 100644
--- a/docs/reference/infrastructure/indri.md
+++ b/docs/reference/infrastructure/indri.md
@@ -33,6 +33,7 @@ Primary BlumeOps server. Mac Mini M1 (2020).
- [[alloy|Alloy]] - Metrics/logs collector
- [[caddy]] - Reverse proxy for `*.ops.eblu.me`
- [[devpi]] - PyPI mirror (LaunchAgent)
+- [[hephaestus]] - heph task/context sync hub (LaunchAgent, self-updating)
- [[cv]] - Static CV site, served by Caddy
- [[docs]] - Quartz-built docs site, served by Caddy
diff --git a/docs/reference/kubernetes/tailscale-operator.md b/docs/reference/kubernetes/tailscale-operator.md
index c102e02..174b347 100644
--- a/docs/reference/kubernetes/tailscale-operator.md
+++ b/docs/reference/kubernetes/tailscale-operator.md
@@ -1,6 +1,7 @@
---
title: Tailscale Operator
-modified: 2026-02-08
+modified: 2026-06-08
+last-reviewed: 2026-06-08
tags:
- kubernetes
- tailscale
@@ -15,8 +16,16 @@ The Tailscale operator enables Kubernetes services to be exposed directly on the
| Property | Value |
|----------|-------|
| **Namespace** | `tailscale` |
-| **Upstream** | `mirrors/tailscale` on forge (static manifest) |
-| **ArgoCD Apps** | `tailscale-operator-base` (upstream), `tailscale-operator` (config) |
+| **Upstream** | `mirrors/tailscale` on forge (static manifest, pinned `v1.94.2`) |
+| **ArgoCD Apps** | `tailscale-operator` (indri/minikube), `tailscale-operator-ringtail` (ringtail/k3s) |
+
+The operator runs on **both** clusters — indri's minikube and ringtail's k3s.
+Both apps layer on the shared `tailscale-operator-base` kustomize directory
+(operator manifest, `ProxyClass`, `dnsconfig`); each cluster supplies its own
+`ProxyGroup` (indri: 2 replicas, ringtail: 1) and OAuth `ExternalSecret`. The
+ringtail overlay additionally rewrites the proxy image to a locally nix-built
+mirror. See [[ringtail]] and [[migrate-wave1-ringtail]] for the ongoing
+migration of k8s workloads onto ringtail.
## How It Works
@@ -27,7 +36,13 @@ Ingresses use a shared ProxyGroup (`ingress`) rather than per-service Tailscale
3. Service becomes accessible at `.tail8d86e.ts.net`
4. TLS is handled automatically via Tailscale
-Tailnet clients must have `--accept-routes` enabled to route to VIP addresses.
+Two requirements for VIP routing to work:
+
+1. Tailnet clients must have `--accept-routes` enabled to route to VIP addresses.
+2. Ingress rules must **not** set an explicit `host:` field. The ProxyGroup
+ proxy receives the FQDN as the `Host` header (e.g.
+ `prometheus.tail8d86e.ts.net`), which won't match a short name. Use
+ `host: "*"` or omit `host:` entirely.
Services can be individually tagged (e.g., `tag:flyio-target`) via Ingress annotations to control which ACL grants apply. See [[expose-service-publicly]] for the tagging workflow.
diff --git a/docs/reference/operations/security.md b/docs/reference/operations/security.md
index 11c4df9..86b3d3b 100644
--- a/docs/reference/operations/security.md
+++ b/docs/reference/operations/security.md
@@ -1,6 +1,6 @@
---
title: Security & Compliance
-modified: 2026-03-24
+modified: 2026-06-08
last-reviewed: 2026-03-24
tags:
- operations
@@ -21,7 +21,7 @@ Security posture and compliance scanning for BlumeOps infrastructure.
## Scanning tools
-- [[prowler]] — CIS Kubernetes Benchmark scanner (weekly CronJob)
+- [[prowler]] — CIS Kubernetes Benchmark scanner (weekly CronJob). The container-image CVE scan and IaC scan were retired in 2026-06 (un-actioned noise — see [[deploy-prowler#Why only the K8s CIS scan]]); only the K8s CIS scan remains.
- [[deploy-prowler]] — deployment and ad-hoc scan how-to
- [[read-compliance-reports]] — accessing and interpreting reports
- [[kingfisher]] — Secret detection and live validation for Forgejo repos (weekly CronJob + prek hook)
@@ -52,5 +52,5 @@ Suppressed findings are kept in Prowler mutelist YAML under `argocd/manifests/pr
- No SOC 2 compliance mapping for Kubernetes (Prowler only maps SOC 2 for AWS/Azure/GCP)
- k3s control plane checks produce no results (embedded binary, no static pods) — consider kube-bench
-- Container image scanning covers `blumeops/*` images only — upstream images (ollama, immich, etc.) are not scanned
-- IaC scanning covers the blumeops repo only — no scanning of third-party Helm charts or vendored manifests
+- No container-image CVE scanning (the Prowler image scan was retired 2026-06 as un-actioned noise). If reintroduced, scope it to critical-severity, currently-deployed tags, alert-on-new
+- No automated IaC misconfiguration scanning (the Prowler IaC scan was retired 2026-06). Manifest pod-security hardening is now an accept-and-document decision rather than a weekly report
diff --git a/docs/reference/services/hephaestus.md b/docs/reference/services/hephaestus.md
new file mode 100644
index 0000000..7abc35b
--- /dev/null
+++ b/docs/reference/services/hephaestus.md
@@ -0,0 +1,141 @@
+---
+title: Hephaestus
+modified: 2026-06-04
+last-reviewed: 2026-06-04
+tags:
+ - service
+ - hephaestus
+---
+
+# Hephaestus
+
+[hephaestus](https://github.com/eblume/hephaestus) (`heph`) is the user's
+self-hosted task + context/knowledge system. It is **hub-and-spoke**: each device
+runs a full local SQLite replica (`hephd --mode local`) and background-syncs
+against one canonical **hub**. Indri runs that hub.
+
+## Quick Reference
+
+| Property | Value |
+|----------|-------|
+| **PWA URL** | https://heph.ops.eblu.me (browser PWA, Caddy TLS) |
+| **Spoke sync URL** | http://indri.tail8d86e.ts.net:8787 (direct, tailnet) |
+| **Local Port** | 8787 (`hephd --mode server`, bound `0.0.0.0`) |
+| **Binary** | `~/.cargo/bin/hephd` (self-updating) |
+| **Data** | `~/.local/share/heph/heph.db` |
+| **PWA shell** | `~/.local/share/heph/web` |
+| **Logs** | `~/Library/Logs/mcquack.heph.{out,err}.log` |
+| **LaunchAgent** | `mcquack.eblume.heph` |
+| **Ansible role** | `ansible/roles/heph` (tag `heph`) |
+
+## What runs on indri
+
+The launchagent runs the hub in server mode with three features enabled:
+
+```
+hephd --mode server --http-addr 0.0.0.0:8787 --db ~/.local/share/heph/heph.db
+ --web-root ~/.local/share/heph/web
+ --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/
+ --oidc-audience heph
+ --self-update --self-update-interval-secs 600
+```
+
+- **Server mode** exposes the HTTP sync endpoint (`/rpc`, `/sync/*`) that spokes
+ reconcile their op-log against.
+- **Self-update** (10-minute poll) rebuilds `hephd` from the forge when a newer
+ release tag appears (`cargo install --git https://forge.eblu.me/eblume/hephaestus.git`).
+ Indri's Rust toolchain (`~/.cargo/bin`) is on the agent's `PATH` for this, and
+ the plist pins `RUSTUP_TOOLCHAIN=stable` — the
+ launchagent runs without mise, so a bare `cargo` shim would otherwise fall back
+ to rustup's *default* toolchain, which can lag behind heph's `rust-version` floor
+ (1.89) and silently fail the build.
+- **PWA** (`--web-root`) serves the [heph-pwa] mobile shell; Caddy terminates TLS
+ at `heph.ops.eblu.me` so the PWA runs in a secure context (service worker,
+ install-to-home-screen, voice capture).
+
+[heph-pwa]: https://github.com/eblume/hephaestus
+
+The hub binds `0.0.0.0` so tailnet spokes can also sync directly
+(`http://indri.tail8d86e.ts.net:8787`); access is gated by Authentik OIDC either
+way — tailnet reachability alone is not enough.
+
+## Authentication (Authentik OIDC, device-code)
+
+The hub verifies an OIDC bearer token on every sync. The `heph` application is a
+**public** OAuth2 client using the **device-code flow** (RFC 8628), provisioned
+in the [[authentik]] blueprint (`argocd/manifests/authentik/configmap-blueprint.yaml`):
+
+- Issuer: `https://authentik.ops.eblu.me/application/o/heph/`
+- Audience / client id: `heph`
+- Restricted to the `admins` group (single-owner, sensitive data).
+- Scope mappings: `openid`, `email`, `profile`, **`offline_access`**.
+
+> **`offline_access` is required for durable sync.** The `heph` CLI requests
+> `scope = "openid offline_access"`, and a refresh token is only issued for the
+> 30-day refresh-token window when the provider actually grants `offline_access`.
+> Without that scope mapping the refresh token is bound to the login **session**;
+> once the session lapses, hephd's `refresh_token` grant returns `400 Bad
+> Request`, the bearer can't be refreshed, and spoke sync silently degrades
+> (`heph sync --status` → `auth_failure: true`). `heph auth login` papers over it
+> until the next session expiry. Keep `offline_access` in the provider's
+> `property_mappings`.
+
+Because no Authentik instance ships a device-code flow by default, the blueprint
+also creates `default-device-code-flow` and binds it to the default brand's
+`flow_device_code`. Devices obtain a token with `heph auth login`; the PWA
+currently takes a pasted token (in-app device-code login is upstream follow-up).
+
+## Data seeding (Path A, one-time)
+
+The hub was seeded from the existing `gilbert` device so no task history was
+lost. heph's data-safe bring-up ("Path A") has the hub **adopt the device's
+identity** rather than rewriting the device:
+
+1. Quiesce the seed device: `heph daemon stop` (on gilbert).
+2. Copy its store to indri: `scp ~/.local/share/heph/heph.db indri:~/.local/share/heph/heph.db`.
+3. Give the hub its **own device origin** (keeps gilbert's `owner_id` + data;
+ `hephd` regenerates a fresh `origin` on next start when it is missing):
+ ```fish
+ ssh indri "sqlite3 ~/.local/share/heph/heph.db \"DELETE FROM meta WHERE key='origin';\""
+ ```
+4. `mise run provision-indri -- --tags heph` (installs hephd, stages the PWA,
+ loads the launchagent → hub starts on the seeded store).
+
+Only `meta.origin` changes; `owner_id`, nodes, op-log, and links are copied
+untouched. A clean `hephd --owner-id` / seed command is tracked upstream as
+hephaestus follow-up — until then this manual reset is the documented path.
+
+## Connecting a spoke (e.g. gilbert)
+
+A device joins by running its local daemon with the hub URL + OIDC client and
+logging in once:
+
+```bash
+hephd --mode local --hub-url http://indri.tail8d86e.ts.net:8787 \
+ --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ \
+ --oidc-client-id heph
+heph auth login --hub-url http://indri.tail8d86e.ts.net:8787 \
+ --issuer https://authentik.ops.eblu.me/application/o/heph/ --client-id heph
+```
+
+> **Use the direct `http://…:8787` tailnet URL for sync, not the Caddy HTTPS
+> URL.** hephd's sync client is plain-HTTP-only; pointing `--hub-url` at
+> `https://heph.ops.eblu.me` fails with a confusing `error sending request`
+> (the HTTP connector rejects the `https` scheme before connecting). Tailscale
+> encrypts the transport, and the OIDC bearer token still gates every request.
+> `heph.ops.eblu.me` (Caddy TLS) exists only for the browser PWA, which needs a
+> secure context. The cached token is keyed by the exact `--hub-url`, so use the
+> same value for `hephd` and `heph auth login`.
+
+> **Caveat:** `heph daemon` cannot yet bake hub/spoke flags into the generated
+> launchd plist (upstream gap). On a spoke whose plist is managed by `heph
+> daemon`, the hub/OIDC flags must be hand-added — and a later `heph daemon
+> start/restart` will regenerate the plist and drop them. Avoid `heph daemon`
+> subcommands on a configured spoke until that gap is closed; reload via
+> `launchctl` instead.
+
+## Related
+
+- [[indri]] — host
+- [[authentik]] — OIDC provider
+- [[caddy]] — TLS termination for `heph.ops.eblu.me`
diff --git a/docs/reference/services/jellyfin.md b/docs/reference/services/jellyfin.md
index bbdfafd..c7b3074 100644
--- a/docs/reference/services/jellyfin.md
+++ b/docs/reference/services/jellyfin.md
@@ -1,7 +1,7 @@
---
title: Jellyfin
-modified: 2026-02-07
-last-reviewed: 2026-03-23
+modified: 2026-06-08
+last-reviewed: 2026-06-08
tags:
- service
- media
@@ -41,6 +41,24 @@ Dashboard > Playback:
2. Allow hardware encoding: Enabled
3. VPP Tone mapping: Enabled
+## Upgrades
+
+Installed via Homebrew cask (`state: present`, unpinned), so the Ansible role
+won't bump an already-installed cask. To upgrade, run on indri:
+
+```bash
+brew upgrade --cask jellyfin
+```
+
+**Gatekeeper gotcha:** a cask upgrade replaces `/Applications/Jellyfin.app` and
+re-applies the `com.apple.quarantine` xattr. When launchd respawns the service,
+the new binary hangs silently — process alive but ~0 CPU, no logs, no listening
+socket — because Gatekeeper is holding the first launch pending approval.
+Removing the xattr over SSH fails (`xattr -dr com.apple.quarantine ...` →
+"Operation not permitted", blocked by macOS TCC). Approve the first-launch
+dialog on indri's GUI console (or run the `xattr` removal from a local Terminal
+with Full Disk Access), then reload the LaunchAgent.
+
## Observability
- Metrics: `jellyfin_metrics` ansible role
diff --git a/docs/reference/services/prowler.md b/docs/reference/services/prowler.md
index f45955f..9f7e4b3 100644
--- a/docs/reference/services/prowler.md
+++ b/docs/reference/services/prowler.md
@@ -1,6 +1,6 @@
---
title: Prowler
-modified: 2026-03-24
+modified: 2026-06-08
last-reviewed: 2026-03-24
tags:
- service
@@ -17,20 +17,20 @@ CIS Kubernetes Benchmark scanner for compliance posture reporting.
|----------|-------|
| **Namespace** | `prowler` |
| **Image** | `registry.ops.eblu.me/blumeops/prowler` (see `argocd/manifests/prowler/kustomization.yaml` for current tag) |
-| **Schedule** | K8s CIS: Sunday 3am / Image: Saturday 3am / IaC: Saturday 2am |
-| **Reports** | `sifaka:/volume1/reports/prowler/`, `prowler-images/`, `prowler-iac/` (NFS) |
+| **Schedule** | K8s CIS: Sunday 3am |
+| **Reports** | `sifaka:/volume1/reports/prowler/` (NFS) |
| **Manifests** | `argocd/manifests/prowler/` |
## What it does
-Runs Prowler 5 as two CronJobs:
+Runs Prowler 5 as a single CronJob:
- **K8s CIS scan** (Sunday) — CIS Kubernetes Benchmark v1.11 checks across pod security, RBAC, apiserver, etcd, kubelet, controller-manager, and scheduler
-- **Image scan** (Saturday) — CVE, secret, and misconfiguration scanning of all `blumeops/*` container images in the registry via Trivy
-- **IaC scan** (Saturday) — static analysis of Dockerfiles, K8s manifests, and other IaC files in the repo via Trivy
Reports are written in HTML, CSV, and JSON-OCSF to the NFS share on sifaka.
+The **image** and **IaC** scans (formerly Saturday CronJobs) were retired in 2026-06 — they generated tens of thousands of un-actioned findings weekly. See [[deploy-prowler#Why only the K8s CIS scan]].
+
## See also
- [[security]] — security & compliance posture overview
diff --git a/mise-tasks/review-compliance-reports b/mise-tasks/review-compliance-reports
index 24d2afc..f2a0a54 100755
--- a/mise-tasks/review-compliance-reports
+++ b/mise-tasks/review-compliance-reports
@@ -10,19 +10,19 @@
Covers:
- Prowler K8s CIS (in-cluster): per-finding detail
- - Prowler container image scans: grouped by check + resource
- - Prowler IaC manifest scans: grouped by check + resource
- Kingfisher secret scanning: TODO — pending upstream JSON/CSV output
support (currently HTML-only; contribute from spork)
-For each Prowler scan, copies the two most recent CSV reports, parses
+The Prowler container-image CVE scan and IaC scan were retired in 2026-06
+(see docs/how-to/operations/deploy-prowler.md) — they produced tens of
+thousands of un-actioned findings weekly. Only the K8s CIS scan remains.
+
+For the Prowler scan, copies the two most recent CSV reports, parses
them, and displays:
1. Overall status (pass/fail/manual/muted counts)
2. Unmuted failures by severity
3. Delta from the previous report (new vs resolved)
- 4. Actionable unmuted failures (per-finding for in-cluster; grouped
- by check ID and resource for image/IaC because they have far too
- many findings to list individually)
+ 4. Actionable unmuted failures (per-finding detail)
This is the primary tool for the weekly compliance report review.
"""
@@ -39,11 +39,9 @@ from rich.console import Console
from rich.panel import Panel
from rich.table import Table
-PROWLER_SCANS: list[tuple[str, str, bool]] = [
- # (label, sifaka base path, group_findings)
- ("K8s CIS (In-Cluster)", "/volume1/reports/prowler", False),
- ("Container Images", "/volume1/reports/prowler-images", True),
- ("IaC (manifests)", "/volume1/reports/prowler-iac", True),
+PROWLER_SCANS: list[tuple[str, str]] = [
+ # (label, sifaka base path)
+ ("K8s CIS (In-Cluster)", "/volume1/reports/prowler"),
]
console = Console()
@@ -334,14 +332,8 @@ def summarize_report(
tmpdir: str,
*,
show_muted: bool = False,
- group_findings: bool = False,
) -> None:
- """Fetch and summarize the latest Prowler report under `base`.
-
- When `group_findings` is True, top-N CHECK_ID and RESOURCE_NAME tables
- are shown instead of a per-finding detail table — appropriate for
- image and IaC scans that produce thousands of findings.
- """
+ """Fetch and summarize the latest Prowler report under `base`."""
console.rule(f"[bold]{label}[/bold]")
csvs = list_reports(base)
if not csvs:
@@ -458,36 +450,29 @@ def summarize_report(
)
console.print()
- # For grouped scans the new/resolved listings are too noisy
- # (potentially thousands of lines). Skip the listings; the count
- # is in the panel above and detail is in the grouped tables.
- if not group_findings:
- if new_keys:
- console.print("[bold red]New Unmuted Failures:[/bold red]")
- for k in sorted(new_keys):
- r = curr_keys[k]
- console.print(
- f" [{r['SEVERITY']}] {r['CHECK_ID']}: "
- f"{r['STATUS_EXTENDED'][:120]}"
- )
- console.print()
+ if new_keys:
+ console.print("[bold red]New Unmuted Failures:[/bold red]")
+ for k in sorted(new_keys):
+ r = curr_keys[k]
+ console.print(
+ f" [{r['SEVERITY']}] {r['CHECK_ID']}: "
+ f"{r['STATUS_EXTENDED'][:120]}"
+ )
+ console.print()
- if resolved_keys:
- console.print("[bold green]Resolved:[/bold green]")
- for k in sorted(resolved_keys):
- r = prev_keys[k]
- console.print(
- f" [dim][{r['SEVERITY']}] {r['CHECK_ID']}: "
- f"{r['STATUS_EXTENDED'][:120]}[/dim]"
- )
- console.print()
+ if resolved_keys:
+ console.print("[bold green]Resolved:[/bold green]")
+ for k in sorted(resolved_keys):
+ r = prev_keys[k]
+ console.print(
+ f" [dim][{r['SEVERITY']}] {r['CHECK_ID']}: "
+ f"{r['STATUS_EXTENDED'][:120]}[/dim]"
+ )
+ console.print()
- # --- Unmuted failure details (grouped or per-finding) ---
+ # --- Unmuted failure details ---
if latest["unmuted"]:
- if group_findings:
- _print_grouped_findings(latest["unmuted"])
- else:
- _print_findings_detail(latest["unmuted"])
+ _print_findings_detail(latest["unmuted"])
# --- Muted findings summary ---
if show_muted and latest["muted"]:
@@ -566,75 +551,6 @@ def _print_findings_detail(unmuted: list[dict]) -> None:
console.print()
-def _worst_severity(rows: list[dict]) -> str:
- """Return the most severe severity label across `rows`."""
- if not rows:
- return ""
- return min(
- (r["SEVERITY"] for r in rows),
- key=lambda s: severity_sort({"SEVERITY": s}),
- )
-
-
-def _print_grouped_findings(unmuted: list[dict], top_n: int = 15) -> None:
- """Top-N tables grouped by CHECK_ID and RESOURCE_NAME.
-
- Used for image and IaC scans where per-finding tables would be too
- large to be useful. Shows count and worst severity for each group.
- """
- by_check: dict[str, list[dict]] = {}
- by_resource: dict[str, list[dict]] = {}
- for r in unmuted:
- by_check.setdefault(r["CHECK_ID"], []).append(r)
- by_resource.setdefault(r.get("RESOURCE_NAME", "") or "(no resource)", []).append(r)
-
- check_table = Table(
- show_header=True,
- header_style="bold",
- title=f"Top {top_n} Checks by Unmuted Finding Count",
- )
- check_table.add_column("Worst Sev")
- check_table.add_column("Check ID")
- check_table.add_column("Count", justify="right")
-
- for check, rows in sorted(
- by_check.items(), key=lambda kv: -len(kv[1])
- )[:top_n]:
- worst = _worst_severity(rows)
- style = _sev_style(worst)
- check_table.add_row(
- f"[{style}]{worst}[/{style}]" if style else worst,
- check,
- str(len(rows)),
- )
-
- console.print(check_table)
- console.print()
-
- res_table = Table(
- show_header=True,
- header_style="bold",
- title=f"Top {top_n} Resources by Unmuted Finding Count",
- )
- res_table.add_column("Worst Sev")
- res_table.add_column("Resource")
- res_table.add_column("Count", justify="right")
-
- for resource, rows in sorted(
- by_resource.items(), key=lambda kv: -len(kv[1])
- )[:top_n]:
- worst = _worst_severity(rows)
- style = _sev_style(worst)
- res_table.add_row(
- f"[{style}]{worst}[/{style}]" if style else worst,
- resource[:80],
- str(len(rows)),
- )
-
- console.print(res_table)
- console.print()
-
-
def main(
full: Annotated[
bool, typer.Option(help="(reserved) currently a no-op; all unmuted failures already shown")
@@ -646,13 +562,12 @@ def main(
del full # historical flag, kept for backwards compatibility
with tempfile.TemporaryDirectory() as tmpdir:
- for label, base, group in PROWLER_SCANS:
+ for label, base in PROWLER_SCANS:
summarize_report(
label,
base,
tmpdir,
show_muted=show_muted,
- group_findings=group,
)
# --- Node-level MANUAL check verification ---
diff --git a/nixos/ringtail/flake.lock b/nixos/ringtail/flake.lock
index bb60501..340bd9d 100644
--- a/nixos/ringtail/flake.lock
+++ b/nixos/ringtail/flake.lock
@@ -7,11 +7,11 @@
]
},
"locked": {
- "lastModified": 1780290312,
- "narHash": "sha256-eTAlX0CwgB84Ts3GaBd944A3DRXVMzgA0EqroZBISUo=",
+ "lastModified": 1780894562,
+ "narHash": "sha256-c3430xwxwhHipl3jigUGMMBfpaMylDqytW/kdmB3ZGs=",
"owner": "nix-community",
"repo": "disko",
- "rev": "115e5211780054d8a890b41f0b7734cafad54dfe",
+ "rev": "24fed06cac83bcc44ac8efbb57cab1a82fa0bedc",
"type": "github"
},
"original": {
@@ -43,11 +43,11 @@
},
"nixpkgs": {
"locked": {
- "lastModified": 1779796641,
- "narHash": "sha256-ZsIrKmhp4vbBXoXXmR/tBXA/UCsAQiJL9vsgZEduhVY=",
+ "lastModified": 1780511130,
+ "narHash": "sha256-2v9lT4ya59Lh1FqPeLnz1MoX9y/wz2huqfe9RtQZITk=",
"owner": "NixOS",
"repo": "nixpkgs",
- "rev": "25f538306313eae3927264466c70d7001dcea1df",
+ "rev": "535f3e6942cb1cead3929c604320d3db54b542b9",
"type": "github"
},
"original": {
diff --git a/service-versions.yaml b/service-versions.yaml
index cc9dc9e..419d129 100644
--- a/service-versions.yaml
+++ b/service-versions.yaml
@@ -414,6 +414,23 @@ services:
upstream-source: https://github.com/caddyserver/caddy/releases
notes: Built from source with Gandi DNS and Layer 4 plugins
+ - name: heph
+ type: ansible
+ last-reviewed: 2026-06-05
+ current-version: "v1.2.1"
+ upstream-source: https://forge.eblu.me/eblume/hephaestus/releases
+ notes: >-
+ hephaestus task/context sync hub on indri (server-mode launchagent,
+ ansible/roles/heph; cargo-built from the forge). SELF-UPDATING: hephd
+ polls the forge for newer releases every 10 min and rebuilds + restarts
+ itself, so the running version drifts AHEAD of the ansible heph_version
+ pin. current-version here is the last observed/deployed tag, not a hard
+ pin — verify the live version via `curl https://heph.ops.eblu.me/config`
+ is served (hub up) and the hub log's `current=` line. Reconciling this
+ self-update vs IaC-pin drift is tracked in the heph "Hephaestus" project:
+ "Reconcile hephd self-update with ansible-pinned version (drift on indri
+ hub)" (node 01KTBXWT6XTHNDH92CVJY88E5K).
+
- name: borgmatic
type: ansible
last-reviewed: 2026-04-15
@@ -423,9 +440,15 @@ services:
- name: jellyfin
type: ansible
- last-reviewed: 2026-03-17
- current-version: "10.11.6"
+ last-reviewed: 2026-06-08
+ current-version: "10.11.11"
upstream-source: https://github.com/jellyfin/jellyfin/releases
+ notes: >-
+ Homebrew cask (state: present, unpinned). Upgrade with
+ `brew upgrade --cask jellyfin` on indri. After upgrade the .app is
+ re-quarantined; launchd-spawned launch hangs silently until the
+ Gatekeeper first-launch dialog is approved on indri's GUI console
+ (xattr removal over SSH is blocked by TCC).
- name: automounter
type: ansible