diff --git a/ansible/playbooks/indri.yml b/ansible/playbooks/indri.yml index 1e33bb1..ddb57f8 100644 --- a/ansible/playbooks/indri.yml +++ b/ansible/playbooks/indri.yml @@ -260,7 +260,5 @@ tags: cv - role: docs tags: docs - - role: heph - tags: heph - role: caddy tags: caddy diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index e6d7385..363d09e 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -52,9 +52,6 @@ caddy_services: - name: devpi host: "pypi.{{ caddy_domain }}" backend: "http://localhost:3141" - - name: heph - host: "heph.{{ caddy_domain }}" - backend: "http://localhost:8787" # hephaestus hub (server mode) + PWA shell - name: kiwix host: "kiwix.{{ caddy_domain }}" backend: "https://kiwix.tail8d86e.ts.net" diff --git a/ansible/roles/heph/defaults/main.yml b/ansible/roles/heph/defaults/main.yml deleted file mode 100644 index 88d2240..0000000 --- a/ansible/roles/heph/defaults/main.yml +++ /dev/null @@ -1,49 +0,0 @@ ---- -# hephaestus hub — the canonical heph replica (server mode) on indri. -# Other devices (e.g. gilbert) are spokes that sync against this hub. -# See [[set-up-sync-hub]] and [[host-heph-pwa]] in the hephaestus repo. - -# Pinned release used for the initial `cargo install` and the PWA shell. -# After bootstrap, hephd's own --self-update keeps the binary current; this -# pin only governs the first install and the bundled PWA shell version. -heph_version: v1.2.1 - -# Anonymous public HTTPS clone — matches hephd's INSTALL_GIT_URL so the initial -# install and unattended self-update build from the same source (no ssh-agent). -heph_repo_url: https://forge.eblu.me/eblume/hephaestus.git - -heph_bin_dir: /Users/erichblume/.cargo/bin -heph_binary: "{{ heph_bin_dir }}/hephd" - -# rustc/cargo here are rustup shims. The bare (non-mise) environment that the -# launchagent and ansible run in falls back to rustup's *default* toolchain, -# which can lag behind heph's rust-version floor (Cargo.toml: 1.89). Pin the -# channel explicitly so both the bootstrap build and unattended self-update -# always use a current toolchain regardless of the host's rustup default. -heph_rust_toolchain: stable - -heph_data_dir: /Users/erichblume/.local/share/heph -heph_db: "{{ heph_data_dir }}/heph.db" -heph_socket: "{{ heph_data_dir }}/hephd.sock" -heph_log_dir: /Users/erichblume/Library/Logs - -# Version-pinned source checkout; the PWA static shell is served directly from -# its heph-pwa/ subdir (no copy), keeping shell and hub in lockstep at heph_version. -heph_pwa_src_dir: /Users/erichblume/.cache/heph-pwa-src -heph_web_root: "{{ heph_pwa_src_dir }}/heph-pwa" - -# Hub listens on all interfaces so tailnet spokes can reach it directly -# (http://indri.tail8d86e.ts.net:8787) and Caddy can proxy heph.ops.eblu.me. -# Access is gated by Authentik OIDC regardless — tailnet reachability is not -# enough (this is the owner's most sensitive data). -heph_http_addr: 0.0.0.0:8787 -heph_port: 8787 -heph_external_url: https://heph.ops.eblu.me - -# Authentik OIDC — issuer + audience together turn hub auth on. The audience is -# the device-code client id (see argocd/manifests/authentik heph blueprint). -heph_oidc_issuer: https://authentik.ops.eblu.me/application/o/heph/ -heph_oidc_audience: heph - -# Self-update poll interval (seconds). 10 minutes. -heph_self_update_interval_secs: 600 diff --git a/ansible/roles/heph/handlers/main.yml b/ansible/roles/heph/handlers/main.yml deleted file mode 100644 index 92fe9d7..0000000 --- a/ansible/roles/heph/handlers/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- name: Restart heph - ansible.builtin.shell: | - launchctl unload ~/Library/LaunchAgents/mcquack.eblume.heph.plist 2>/dev/null || true - launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist - changed_when: true diff --git a/ansible/roles/heph/tasks/main.yml b/ansible/roles/heph/tasks/main.yml deleted file mode 100644 index 7a45fe3..0000000 --- a/ansible/roles/heph/tasks/main.yml +++ /dev/null @@ -1,82 +0,0 @@ ---- -# hephaestus hub (server mode) on indri. -# -# DATA SEEDING (one-time, Path A — do this BEFORE the first provision so the hub -# adopts gilbert's existing data instead of being born empty): -# -# 1. On the seed device (gilbert): heph daemon stop -# 2. Copy its store to indri: scp ~/.local/share/heph/heph.db \ -# indri:~/.local/share/heph/heph.db -# 3. On indri, give the hub its OWN device origin (keeps gilbert's owner_id + -# data; hephd regenerates a fresh origin on next start when it is missing): -# sqlite3 ~/.local/share/heph/heph.db "DELETE FROM meta WHERE key='origin';" -# 4. Run this role (installs hephd, stages the PWA, loads the launchagent). -# -# hephd auto-creates an empty store on first start if none exists, so seeding is -# optional — skip it only if you intend a fresh, empty hub. - -- name: Ensure heph data directory exists - ansible.builtin.file: - path: "{{ heph_data_dir }}" - state: directory - mode: '0700' - -- name: Check for installed hephd binary - ansible.builtin.stat: - path: "{{ heph_binary }}" - register: heph_binary_stat - -# Bootstrap install only when hephd is absent. Thereafter hephd's own -# --self-update keeps it current; ansible must not fight (or downgrade) it. -# This builds from source and can take several minutes on a cold cargo cache. -- name: Bootstrap-install heph + hephd from the forge ({{ heph_version }}) - ansible.builtin.command: - cmd: >- - {{ heph_bin_dir }}/cargo install --locked - --git {{ heph_repo_url }} - --tag {{ heph_version }} - heph hephd - environment: - PATH: "{{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin" - RUSTUP_TOOLCHAIN: "{{ heph_rust_toolchain }}" - when: not heph_binary_stat.stat.exists - changed_when: true - notify: Restart heph - -# Checkout provides the PWA shell at {{ heph_web_root }} (heph-pwa/ subdir), -# served directly by hephd. Static files are read from disk per request, so a -# version bump needs no restart; the service worker (CACHE = "heph-pwa-vN") -# evicts stale assets on next load. -- name: Ensure heph cache parent directory exists - ansible.builtin.file: - path: "{{ heph_pwa_src_dir | dirname }}" - state: directory - mode: '0755' - -- name: Stage heph-pwa source at {{ heph_version }} - ansible.builtin.git: - repo: "{{ heph_repo_url }}" - dest: "{{ heph_pwa_src_dir }}" - version: "{{ heph_version }}" - depth: 1 - single_branch: true - force: true - -- name: Deploy heph LaunchAgent plist - ansible.builtin.template: - src: heph.plist.j2 - dest: ~/Library/LaunchAgents/mcquack.eblume.heph.plist - mode: '0644' - notify: Restart heph - -- name: Check if heph LaunchAgent is loaded - ansible.builtin.command: launchctl list mcquack.eblume.heph - register: heph_launchctl_check - changed_when: false - failed_when: false - -- name: Load heph LaunchAgent if not loaded - ansible.builtin.command: launchctl load ~/Library/LaunchAgents/mcquack.eblume.heph.plist - when: heph_launchctl_check.rc != 0 - changed_when: true - failed_when: false diff --git a/ansible/roles/heph/templates/heph.plist.j2 b/ansible/roles/heph/templates/heph.plist.j2 deleted file mode 100644 index 19a2367..0000000 --- a/ansible/roles/heph/templates/heph.plist.j2 +++ /dev/null @@ -1,50 +0,0 @@ - - - - - - Label - mcquack.eblume.heph - ProgramArguments - - {{ heph_binary }} - --mode - server - --http-addr - {{ heph_http_addr }} - --db - {{ heph_db }} - --socket - {{ heph_socket }} - --web-root - {{ heph_web_root }} - --oidc-issuer - {{ heph_oidc_issuer }} - --oidc-audience - {{ heph_oidc_audience }} - --self-update - --self-update-interval-secs - {{ heph_self_update_interval_secs }} - - RunAtLoad - - KeepAlive - - EnvironmentVariables - - - PATH - {{ heph_bin_dir }}:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin - HOME - /Users/erichblume - - RUSTUP_TOOLCHAIN - {{ heph_rust_toolchain }} - - StandardOutPath - {{ heph_log_dir }}/mcquack.heph.out.log - StandardErrorPath - {{ heph_log_dir }}/mcquack.heph.err.log - - diff --git a/argocd/manifests/authentik/configmap-blueprint.yaml b/argocd/manifests/authentik/configmap-blueprint.yaml index cc97dea..fcbb99b 100644 --- a/argocd/manifests/authentik/configmap-blueprint.yaml +++ b/argocd/manifests/authentik/configmap-blueprint.yaml @@ -434,93 +434,3 @@ data: provider: !KeyOf mealie-provider meta_launch_url: https://meals.ops.eblu.me policy_engine_mode: all - - heph.yaml: | - version: 1 - metadata: - name: BlumeOps Heph SSO - labels: - blueprints.goauthentik.io/description: "Hephaestus hub OIDC (device-code) provider, application, and device-code flow" - entries: - # Device-code flow (RFC 8628). authentik ships no default for this, so we - # create one and bind it to the brand below. An empty stage_configuration - # flow is sufficient: the already-authenticated user just confirms the code. - - model: authentik_flows.flow - id: device-code-flow - identifiers: - slug: default-device-code-flow - attrs: - name: Device code flow - title: Device code flow - slug: default-device-code-flow - designation: stage_configuration - authentication: require_authenticated - - # Enable the device-code grant globally by binding the flow to the default - # brand (domain authentik-default). Partial update — only sets this field. - - model: authentik_brands.brand - identifiers: - domain: authentik-default - attrs: - flow_device_code: !KeyOf device-code-flow - - # OAuth2 provider for heph — PUBLIC client (device-code + PKCE, no secret). - # client_id doubles as the token audience the hub verifies (--oidc-audience heph), - # and the app slug 'heph' is the issuer path (/application/o/heph/). - - model: authentik_providers_oauth2.oauth2provider - id: heph-provider - identifiers: - name: Heph - attrs: - name: Heph - authorization_flow: !Find [authentik_flows.flow, [slug, default-provider-authorization-implicit-consent]] - invalidation_flow: !Find [authentik_flows.flow, [slug, default-provider-invalidation-flow]] - client_type: public - client_id: heph - # CLI/TUI use the device-code grant (no redirect). The heph-pwa browser - # login uses Authorization Code + PKCE, which DOES redirect back to the - # app's origin — register those here (Authentik also keys token-endpoint - # CORS off these origins). Trailing slash matters: the PWA's redirect_uri - # is its base dir, e.g. https://heph.ops.eblu.me/. - redirect_uris: - - matching_mode: strict - url: https://heph.ops.eblu.me/ - - matching_mode: strict - url: http://localhost:8787/ # local dev (hephd --web-root) - signing_key: !Find [authentik_crypto.certificatekeypair, [name, authentik Self-signed Certificate]] - property_mappings: - - !Find [authentik_providers_oauth2.scopemapping, [scope_name, openid]] - - !Find [authentik_providers_oauth2.scopemapping, [scope_name, email]] - - !Find [authentik_providers_oauth2.scopemapping, [scope_name, profile]] - # offline_access: heph CLI requests "openid offline_access"; without - # this mapping the refresh token is session-bound and hephd's - # refresh_token grant 400s once the session lapses (spoke sync dies). - - !Find [authentik_providers_oauth2.scopemapping, [scope_name, offline_access]] - sub_mode: hashed_user_id - include_claims_in_id_token: true - - # Heph application — linked to the OAuth2 provider - - model: authentik_core.application - id: heph-app - identifiers: - slug: heph - attrs: - name: Hephaestus - slug: heph - provider: !KeyOf heph-provider - meta_launch_url: https://heph.ops.eblu.me - policy_engine_mode: any - - # Policy binding — restrict heph to admins group (single-owner, sensitive data) - - model: authentik_policies.policybinding - identifiers: - order: 0 - target: !KeyOf heph-app - group: !Find [authentik_core.group, [name, admins]] - attrs: - target: !KeyOf heph-app - group: !Find [authentik_core.group, [name, admins]] - order: 0 - enabled: true - negate: false - timeout: 30 diff --git a/argocd/manifests/external-secrets-ringtail/kustomization.yaml b/argocd/manifests/external-secrets-ringtail/kustomization.yaml index 9fd4e2f..05b6b54 100644 --- a/argocd/manifests/external-secrets-ringtail/kustomization.yaml +++ b/argocd/manifests/external-secrets-ringtail/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-13895bb-nix + newTag: v2.2.0-59dace8-nix diff --git a/argocd/manifests/external-secrets/kustomization.yaml b/argocd/manifests/external-secrets/kustomization.yaml index 639db66..8b1aea5 100644 --- a/argocd/manifests/external-secrets/kustomization.yaml +++ b/argocd/manifests/external-secrets/kustomization.yaml @@ -13,4 +13,4 @@ resources: images: - name: ghcr.io/external-secrets/external-secrets newName: registry.ops.eblu.me/blumeops/external-secrets - newTag: v2.2.0-13895bb + newTag: v2.2.0-0e70a1b diff --git a/argocd/manifests/prowler/cronjob-iac-scan.yaml b/argocd/manifests/prowler/cronjob-iac-scan.yaml new file mode 100644 index 0000000..c1303a5 --- /dev/null +++ b/argocd/manifests/prowler/cronjob-iac-scan.yaml @@ -0,0 +1,54 @@ +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: prowler-iac-scan + namespace: prowler +spec: + schedule: "0 2 * * 6" # Saturday 2am + concurrencyPolicy: Forbid + jobTemplate: + spec: + ttlSecondsAfterFinished: 604800 # Auto-delete after 7 days + template: + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: prowler + image: registry.ops.eblu.me/blumeops/prowler:kustomized + command: ["/bin/sh", "-c"] + # Prowler's --mutelist-file is a no-op for the IaC provider + # (it delegates to Trivy). The Prowler image's trivy shim + # injects --ignorefile $TRIVY_IGNOREFILE when set; see + # containers/prowler/Dockerfile. + env: + - name: TRIVY_IGNOREFILE + value: /mutelist/trivyignore.yaml + args: + - | + DATEDIR=/reports/prowler-iac/$(date +%Y-%m-%d) + mkdir -p "$DATEDIR" + prowler iac \ + --scan-repository-url https://forge.ops.eblu.me/eblume/blumeops.git \ + -z \ + --output-formats html csv json-ocsf \ + --output-directory "$DATEDIR" + volumeMounts: + - name: reports + mountPath: /reports + - name: mutelist + mountPath: /mutelist + readOnly: true + restartPolicy: OnFailure + volumes: + - name: reports + persistentVolumeClaim: + claimName: prowler-reports + - name: mutelist + configMap: + name: prowler-mutelist + items: + - key: trivyignore.yaml + path: trivyignore.yaml diff --git a/argocd/manifests/prowler/cronjob-image-scan.yaml b/argocd/manifests/prowler/cronjob-image-scan.yaml new file mode 100644 index 0000000..b779d08 --- /dev/null +++ b/argocd/manifests/prowler/cronjob-image-scan.yaml @@ -0,0 +1,39 @@ +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: prowler-image-scan + namespace: prowler +spec: + schedule: "0 3 * * 6" # Saturday 3am + concurrencyPolicy: Forbid + jobTemplate: + spec: + ttlSecondsAfterFinished: 604800 # Auto-delete after 7 days + template: + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: prowler + image: registry.ops.eblu.me/blumeops/prowler:kustomized + command: ["/bin/sh", "-c"] + args: + - | + DATEDIR=/reports/prowler-images/$(date +%Y-%m-%d) + mkdir -p "$DATEDIR" + prowler image \ + --registry https://registry.ops.eblu.me \ + --image-filter "^blumeops/" \ + -z \ + --output-formats html csv json-ocsf \ + --output-directory "$DATEDIR" + volumeMounts: + - name: reports + mountPath: /reports + restartPolicy: OnFailure + volumes: + - name: reports + persistentVolumeClaim: + claimName: prowler-reports diff --git a/argocd/manifests/prowler/kustomization.yaml b/argocd/manifests/prowler/kustomization.yaml index 38295a3..1d92a6b 100644 --- a/argocd/manifests/prowler/kustomization.yaml +++ b/argocd/manifests/prowler/kustomization.yaml @@ -10,6 +10,8 @@ resources: - pv-nfs.yaml - pvc.yaml - cronjob.yaml + - cronjob-image-scan.yaml + - cronjob-iac-scan.yaml configMapGenerator: - name: prowler-mutelist @@ -21,6 +23,7 @@ configMapGenerator: - mutelist/core-pod-security.yaml - mutelist/manual-node-checks.yaml - mutelist/rbac.yaml + - mutelist/trivyignore.yaml images: - name: registry.ops.eblu.me/blumeops/prowler diff --git a/argocd/manifests/prowler/mutelist/trivyignore.yaml b/argocd/manifests/prowler/mutelist/trivyignore.yaml new file mode 100644 index 0000000..87af966 --- /dev/null +++ b/argocd/manifests/prowler/mutelist/trivyignore.yaml @@ -0,0 +1,37 @@ +# Trivy ignorefile for Prowler IaC scan. +# +# Prowler's `--mutelist-file` flag is a no-op for the IaC provider +# (iac_provider.py sets self._mutelist = None and delegates to Trivy). +# Trivy in turn does not auto-discover this YAML form from cwd, so the +# Prowler image ships a shim wrapper around `trivy` that injects +# --ignorefile $TRIVY_IGNOREFILE when the env var is set. The cronjob +# mounts this file and sets TRIVY_IGNOREFILE accordingly. +# +# Schema: https://trivy.dev/latest/docs/configuration/filtering/ +# IDs use the hyphenated form Trivy displays (KSV-0041, not KSV0041). +misconfigurations: + - id: KSV-0041 + paths: + - "argocd/manifests/external-secrets/rbac.yaml" + statement: >- + external-secrets-operator's entire function is to read and + synthesize Secret objects; ClusterRole over secrets is its + purpose. Both the controller and cert-controller are + upstream-defined. + - id: KSV-0041 + paths: + - "argocd/manifests/kube-state-metrics/rbac.yaml" + - "argocd/manifests/kube-state-metrics-ringtail/rbac.yaml" + statement: >- + KSM exposes only Secret metadata (name, namespace, type, labels), + never the data field. list/watch on secrets is required for + kube_secret_info / kube_secret_labels metrics. + - id: KSV-0114 + paths: + - "argocd/manifests/external-secrets/rbac.yaml" + statement: >- + cert-controller manages the external-secrets validating webhook + configurations to inject its own rotating CA bundle. RBAC is + scoped to two named webhooks (secretstore-validate, + externalsecret-validate) via resourceNames; KSV-0114 doesn't see + the resourceNames restriction so reports the full ClusterRole. diff --git a/docs/changelog.d/+external-secrets-stable-main-sha.infra.md b/docs/changelog.d/+external-secrets-stable-main-sha.infra.md deleted file mode 100644 index fbe3c21..0000000 --- a/docs/changelog.d/+external-secrets-stable-main-sha.infra.md +++ /dev/null @@ -1 +0,0 @@ -Rebuilt the external-secrets images off `main` and repointed both clusters to the stable main-sha tags (`v2.2.0-13895bb` arm64 / `v2.2.0-13895bb-nix` amd64), so the deployed images on indri and ringtail trace to the same `main` commit rather than earlier feature-branch builds. diff --git a/docs/changelog.d/+heph-hub-v1.2.1.infra.md b/docs/changelog.d/+heph-hub-v1.2.1.infra.md deleted file mode 100644 index c203323..0000000 --- a/docs/changelog.d/+heph-hub-v1.2.1.infra.md +++ /dev/null @@ -1 +0,0 @@ -Bumped the indri heph hub to v1.2.1, which adds the hub `GET /config` endpoint and ships the heph-pwa **Login with Authentik** flow (Authorization Code + PKCE). Pairs with the Authentik `heph` provider redirect URIs registered earlier. diff --git a/docs/changelog.d/+jellyfin-10-11-11.bugfix.md b/docs/changelog.d/+jellyfin-10-11-11.bugfix.md deleted file mode 100644 index 779a042..0000000 --- a/docs/changelog.d/+jellyfin-10-11-11.bugfix.md +++ /dev/null @@ -1 +0,0 @@ -Upgraded Jellyfin on indri from 10.11.6 to 10.11.11, picking up the security fixes in 10.11.7 (disclosed CVEs/GHSAs, flagged "upgrade immediately") and 10.11.10 (three further GHSAs). Noted the recurring gotcha in the service-versions tracking: after a `brew upgrade --cask jellyfin`, the re-quarantined `.app` makes the launchd-spawned process hang silently until the Gatekeeper first-launch dialog is approved on indri's GUI console — removing the quarantine xattr over SSH is blocked by macOS TCC. diff --git a/docs/changelog.d/+ringtail-flake-update.infra.md b/docs/changelog.d/+ringtail-flake-update.infra.md deleted file mode 100644 index 1d806df..0000000 --- a/docs/changelog.d/+ringtail-flake-update.infra.md +++ /dev/null @@ -1 +0,0 @@ -Updated ringtail NixOS flake inputs (nixpkgs `nixos-25.11`, disko) to latest via `dagger call flake-update`. diff --git a/docs/changelog.d/+tailscale-operator-doc-review.doc.md b/docs/changelog.d/+tailscale-operator-doc-review.doc.md deleted file mode 100644 index 8f7d5a3..0000000 --- a/docs/changelog.d/+tailscale-operator-doc-review.doc.md +++ /dev/null @@ -1 +0,0 @@ -Reviewed the tailscale-operator reference card: documented the dual indri/ringtail deployment, corrected the ArgoCD apps list, pinned the upstream version, and added the ProxyGroup Ingress `host:` caveat. diff --git a/docs/changelog.d/heph-indri-hub.infra.md b/docs/changelog.d/heph-indri-hub.infra.md deleted file mode 100644 index 6761cb7..0000000 --- a/docs/changelog.d/heph-indri-hub.infra.md +++ /dev/null @@ -1 +0,0 @@ -Added the [[hephaestus]] (`heph`) sync hub to indri as a self-updating LaunchAgent managed by Ansible (`ansible/roles/heph`, tag `heph`). The hub runs `hephd --mode server` behind `heph.ops.eblu.me` (Caddy TLS), with self-update on a 10-minute interval and the heph-pwa mobile shell served from `--web-root`. Access is gated by a new Authentik device-code (RFC 8628) OIDC application. Indri is now the canonical hub; other devices (e.g. gilbert) attach as offline-capable spokes. The hub's store was seeded from gilbert via the data-safe Path A bring-up (copy store, reset `meta.origin`). diff --git a/docs/changelog.d/heph-offline-access.bugfix.md b/docs/changelog.d/heph-offline-access.bugfix.md deleted file mode 100644 index e9721bc..0000000 --- a/docs/changelog.d/heph-offline-access.bugfix.md +++ /dev/null @@ -1 +0,0 @@ -Granted the `offline_access` scope on the Authentik `heph` OAuth2 provider so hephaestus spokes receive a durable 30-day refresh token. Previously the refresh token was session-bound, so spoke sync would silently fail with a `400 Bad Request` on the `refresh_token` grant once the Authentik session lapsed. diff --git a/docs/changelog.d/heph-pwa-redirect-uris.infra.md b/docs/changelog.d/heph-pwa-redirect-uris.infra.md deleted file mode 100644 index f887eed..0000000 --- a/docs/changelog.d/heph-pwa-redirect-uris.infra.md +++ /dev/null @@ -1 +0,0 @@ -Registered the heph-pwa redirect URIs (`https://heph.ops.eblu.me/`, plus `http://localhost:8787/` for dev) on the Authentik `heph` OAuth2 provider, enabling the PWA's new Authorization Code + PKCE "Login with Authentik" flow (and the token-endpoint CORS it needs). Pairs with hephaestus PR #9. diff --git a/docs/changelog.d/retire-prowler-image-iac-scans.infra.md b/docs/changelog.d/retire-prowler-image-iac-scans.infra.md deleted file mode 100644 index 9afd261..0000000 --- a/docs/changelog.d/retire-prowler-image-iac-scans.infra.md +++ /dev/null @@ -1 +0,0 @@ -Retired the Prowler container-image CVE scan and IaC scan, keeping only the K8s CIS benchmark scan. The two retired scans generated tens of thousands of un-actioned, un-muted findings every week (~20,000 image findings and growing, mostly unpatchable upstream-image CVEs; ~650 systemic Trivy KSV pod-security warnings) — the weekly `mise run review-compliance-reports` re-surfaced them all as "action needed" though none were ever triaged. The K8s CIS scan is fully mutelisted and runs clean, so it stays. Removed the two CronJobs, the now-unused `trivyignore.yaml` mutelist, and the grouped-findings rendering in the review tool that existed solely for the high-volume scans. diff --git a/docs/how-to/operations/deploy-prowler.md b/docs/how-to/operations/deploy-prowler.md index 1475680..75dced2 100644 --- a/docs/how-to/operations/deploy-prowler.md +++ b/docs/how-to/operations/deploy-prowler.md @@ -1,6 +1,6 @@ --- title: Deploy Prowler CIS Scanner -modified: 2026-06-08 +modified: 2026-03-24 last-reviewed: 2026-03-24 tags: - how-to @@ -11,20 +11,7 @@ tags: # Deploy Prowler CIS Scanner -Prowler runs a weekly CIS Kubernetes Benchmark scan against minikube-indri and writes HTML/CSV/JSON reports to the NFS share on sifaka. - -## Why only the K8s CIS scan - -Prowler originally ran three CronJobs: K8s CIS, container-image CVE scanning, and IaC scanning. The image and IaC scans were **retired in 2026-06**. - -Both were pure toil with no realized value: - -- **Image scan** produced ~20,000 unmuted findings per run and growing, none ever triaged or muted. They were overwhelmingly CVEs in *upstream* base images we don't control and can't patch, and the job re-scanned every historical tag still in the registry, multiplying the count. -- **IaC scan** produced ~650 Trivy KSV findings (`runAsNonRoot`, `readOnlyRootFilesystem`, drop-capabilities, …) against our own manifests — real but systemic, homelab-acceptable, and likewise never muted, so the weekly review re-surfaced all of them indefinitely. - -The K8s CIS scan, by contrast, is fully mutelisted and runs clean (0 unmuted findings week over week), so it stays. The guiding principle matches [[ai-scraper-mitigation]]: don't keep generating a firehose of output that has no audience. If image-CVE signal is wanted later, the right shape is critical-severity-only, currently-deployed-tags-only, alert-on-new — a rebuild, not a revival (tracked as the "Trivy for image/IaC scanning" task). - -Note that the K8s CIS scan itself is tied to minikube-indri, which is slated for retirement; on k3s only ~22 of 70 checks produce results (no static pods). Re-pointing a lean posture check at ringtail is tracked separately ("prowler scan against ringtail"). +Prowler runs weekly CIS Kubernetes Benchmark scans against minikube-indri and writes HTML/CSV/JSON reports to the NFS share on sifaka. ## What it checks @@ -46,6 +33,38 @@ Prowler's Kubernetes provider runs ~70 checks from the CIS Kubernetes Benchmark **k3s note:** k3s embeds the control plane in a single binary — no static pods exist. Only core + RBAC checks (~22 of 70) produce results. Consider `kube-bench` for k3s control plane checks. +### Image vulnerability scanning (Saturday 3am) + +Prowler's image provider scans all `blumeops/*` container images in `registry.ops.eblu.me` for: + +- **CVEs** — known vulnerabilities from NVD, Alpine SecDB, Debian Security Tracker, and other sources +- **Embedded secrets** — credentials or API keys baked into image layers +- **Misconfigurations** — Dockerfile best practices (running as root, missing HEALTHCHECK, etc.) + +Uses Trivy under the hood. Reports are written to `sifaka:/volume1/reports/prowler-images/`. + +To run an ad-hoc image scan: + +```fish +kubectl create job --from=cronjob/prowler-image-scan prowler-image-manual -n prowler --context=minikube-indri +``` + +### IaC scanning (Saturday 2am) + +Prowler's IaC provider scans the blumeops repository (cloned at scan time) for misconfigurations in: + +- **Dockerfiles** — running as root, using `latest` tags, missing `HEALTHCHECK` +- **Kubernetes manifests** — missing resource limits, privileged containers, insecure settings +- **Other IaC files** — Terraform, CloudFormation, etc. if present + +Uses Trivy under the hood. Reports are written to `sifaka:/volume1/reports/prowler-iac/`. + +To run an ad-hoc IaC scan: + +```fish +kubectl create job --from=cronjob/prowler-iac-scan prowler-iac-manual -n prowler --context=minikube-indri +``` + ## Reports Reports are written to `sifaka:/volume1/reports/prowler/` with timestamped filenames. See [[read-compliance-reports]] for how to access and interpret them. diff --git a/docs/how-to/operations/read-compliance-reports.md b/docs/how-to/operations/read-compliance-reports.md index 2990026..e676ad5 100644 --- a/docs/how-to/operations/read-compliance-reports.md +++ b/docs/how-to/operations/read-compliance-reports.md @@ -1,6 +1,6 @@ --- title: Read Compliance Reports -modified: 2026-06-08 +modified: 2026-04-06 last-reviewed: 2026-04-06 tags: - how-to @@ -27,13 +27,8 @@ Reports are stored on sifaka at `/volume1/reports/`. Each scanner writes to its | Scanner | Path | Schedule | |---------|------|----------| | [[prowler]] K8s CIS | `sifaka:/volume1/reports/prowler/` | Weekly (Sunday 3am) | - -> **Retired (2026-06):** the Prowler **image** (`prowler-images/`) and **IaC** -> (`prowler-iac/`) scans were retired. They produced tens of thousands of -> un-actioned, un-muted findings every week — mostly unpatchable upstream-image -> CVEs and systemic pod-security KSV warnings — and nobody triaged them. See -> [[deploy-prowler#Why only the K8s CIS scan]] for the rationale. Their stale -> report directories may linger on sifaka until manually removed. +| [[prowler]] Image | `sifaka:/volume1/reports/prowler-images/` | Weekly (Saturday 3am) | +| [[prowler]] IaC | `sifaka:/volume1/reports/prowler-iac/` | Weekly (Saturday 2am) | Copy reports to your local machine (remember `scp -O` for sifaka): diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md index 8364ba0..67652ca 100644 --- a/docs/reference/infrastructure/indri.md +++ b/docs/reference/infrastructure/indri.md @@ -33,7 +33,6 @@ Primary BlumeOps server. Mac Mini M1 (2020). - [[alloy|Alloy]] - Metrics/logs collector - [[caddy]] - Reverse proxy for `*.ops.eblu.me` - [[devpi]] - PyPI mirror (LaunchAgent) -- [[hephaestus]] - heph task/context sync hub (LaunchAgent, self-updating) - [[cv]] - Static CV site, served by Caddy - [[docs]] - Quartz-built docs site, served by Caddy diff --git a/docs/reference/kubernetes/tailscale-operator.md b/docs/reference/kubernetes/tailscale-operator.md index 174b347..c102e02 100644 --- a/docs/reference/kubernetes/tailscale-operator.md +++ b/docs/reference/kubernetes/tailscale-operator.md @@ -1,7 +1,6 @@ --- title: Tailscale Operator -modified: 2026-06-08 -last-reviewed: 2026-06-08 +modified: 2026-02-08 tags: - kubernetes - tailscale @@ -16,16 +15,8 @@ The Tailscale operator enables Kubernetes services to be exposed directly on the | Property | Value | |----------|-------| | **Namespace** | `tailscale` | -| **Upstream** | `mirrors/tailscale` on forge (static manifest, pinned `v1.94.2`) | -| **ArgoCD Apps** | `tailscale-operator` (indri/minikube), `tailscale-operator-ringtail` (ringtail/k3s) | - -The operator runs on **both** clusters — indri's minikube and ringtail's k3s. -Both apps layer on the shared `tailscale-operator-base` kustomize directory -(operator manifest, `ProxyClass`, `dnsconfig`); each cluster supplies its own -`ProxyGroup` (indri: 2 replicas, ringtail: 1) and OAuth `ExternalSecret`. The -ringtail overlay additionally rewrites the proxy image to a locally nix-built -mirror. See [[ringtail]] and [[migrate-wave1-ringtail]] for the ongoing -migration of k8s workloads onto ringtail. +| **Upstream** | `mirrors/tailscale` on forge (static manifest) | +| **ArgoCD Apps** | `tailscale-operator-base` (upstream), `tailscale-operator` (config) | ## How It Works @@ -36,13 +27,7 @@ Ingresses use a shared ProxyGroup (`ingress`) rather than per-service Tailscale 3. Service becomes accessible at `.tail8d86e.ts.net` 4. TLS is handled automatically via Tailscale -Two requirements for VIP routing to work: - -1. Tailnet clients must have `--accept-routes` enabled to route to VIP addresses. -2. Ingress rules must **not** set an explicit `host:` field. The ProxyGroup - proxy receives the FQDN as the `Host` header (e.g. - `prometheus.tail8d86e.ts.net`), which won't match a short name. Use - `host: "*"` or omit `host:` entirely. +Tailnet clients must have `--accept-routes` enabled to route to VIP addresses. Services can be individually tagged (e.g., `tag:flyio-target`) via Ingress annotations to control which ACL grants apply. See [[expose-service-publicly]] for the tagging workflow. diff --git a/docs/reference/operations/security.md b/docs/reference/operations/security.md index 86b3d3b..11c4df9 100644 --- a/docs/reference/operations/security.md +++ b/docs/reference/operations/security.md @@ -1,6 +1,6 @@ --- title: Security & Compliance -modified: 2026-06-08 +modified: 2026-03-24 last-reviewed: 2026-03-24 tags: - operations @@ -21,7 +21,7 @@ Security posture and compliance scanning for BlumeOps infrastructure. ## Scanning tools -- [[prowler]] — CIS Kubernetes Benchmark scanner (weekly CronJob). The container-image CVE scan and IaC scan were retired in 2026-06 (un-actioned noise — see [[deploy-prowler#Why only the K8s CIS scan]]); only the K8s CIS scan remains. +- [[prowler]] — CIS Kubernetes Benchmark scanner (weekly CronJob) - [[deploy-prowler]] — deployment and ad-hoc scan how-to - [[read-compliance-reports]] — accessing and interpreting reports - [[kingfisher]] — Secret detection and live validation for Forgejo repos (weekly CronJob + prek hook) @@ -52,5 +52,5 @@ Suppressed findings are kept in Prowler mutelist YAML under `argocd/manifests/pr - No SOC 2 compliance mapping for Kubernetes (Prowler only maps SOC 2 for AWS/Azure/GCP) - k3s control plane checks produce no results (embedded binary, no static pods) — consider kube-bench -- No container-image CVE scanning (the Prowler image scan was retired 2026-06 as un-actioned noise). If reintroduced, scope it to critical-severity, currently-deployed tags, alert-on-new -- No automated IaC misconfiguration scanning (the Prowler IaC scan was retired 2026-06). Manifest pod-security hardening is now an accept-and-document decision rather than a weekly report +- Container image scanning covers `blumeops/*` images only — upstream images (ollama, immich, etc.) are not scanned +- IaC scanning covers the blumeops repo only — no scanning of third-party Helm charts or vendored manifests diff --git a/docs/reference/services/hephaestus.md b/docs/reference/services/hephaestus.md deleted file mode 100644 index 7abc35b..0000000 --- a/docs/reference/services/hephaestus.md +++ /dev/null @@ -1,141 +0,0 @@ ---- -title: Hephaestus -modified: 2026-06-04 -last-reviewed: 2026-06-04 -tags: - - service - - hephaestus ---- - -# Hephaestus - -[hephaestus](https://github.com/eblume/hephaestus) (`heph`) is the user's -self-hosted task + context/knowledge system. It is **hub-and-spoke**: each device -runs a full local SQLite replica (`hephd --mode local`) and background-syncs -against one canonical **hub**. Indri runs that hub. - -## Quick Reference - -| Property | Value | -|----------|-------| -| **PWA URL** | https://heph.ops.eblu.me (browser PWA, Caddy TLS) | -| **Spoke sync URL** | http://indri.tail8d86e.ts.net:8787 (direct, tailnet) | -| **Local Port** | 8787 (`hephd --mode server`, bound `0.0.0.0`) | -| **Binary** | `~/.cargo/bin/hephd` (self-updating) | -| **Data** | `~/.local/share/heph/heph.db` | -| **PWA shell** | `~/.local/share/heph/web` | -| **Logs** | `~/Library/Logs/mcquack.heph.{out,err}.log` | -| **LaunchAgent** | `mcquack.eblume.heph` | -| **Ansible role** | `ansible/roles/heph` (tag `heph`) | - -## What runs on indri - -The launchagent runs the hub in server mode with three features enabled: - -``` -hephd --mode server --http-addr 0.0.0.0:8787 --db ~/.local/share/heph/heph.db - --web-root ~/.local/share/heph/web - --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ - --oidc-audience heph - --self-update --self-update-interval-secs 600 -``` - -- **Server mode** exposes the HTTP sync endpoint (`/rpc`, `/sync/*`) that spokes - reconcile their op-log against. -- **Self-update** (10-minute poll) rebuilds `hephd` from the forge when a newer - release tag appears (`cargo install --git https://forge.eblu.me/eblume/hephaestus.git`). - Indri's Rust toolchain (`~/.cargo/bin`) is on the agent's `PATH` for this, and - the plist pins `RUSTUP_TOOLCHAIN=stable` — the - launchagent runs without mise, so a bare `cargo` shim would otherwise fall back - to rustup's *default* toolchain, which can lag behind heph's `rust-version` floor - (1.89) and silently fail the build. -- **PWA** (`--web-root`) serves the [heph-pwa] mobile shell; Caddy terminates TLS - at `heph.ops.eblu.me` so the PWA runs in a secure context (service worker, - install-to-home-screen, voice capture). - -[heph-pwa]: https://github.com/eblume/hephaestus - -The hub binds `0.0.0.0` so tailnet spokes can also sync directly -(`http://indri.tail8d86e.ts.net:8787`); access is gated by Authentik OIDC either -way — tailnet reachability alone is not enough. - -## Authentication (Authentik OIDC, device-code) - -The hub verifies an OIDC bearer token on every sync. The `heph` application is a -**public** OAuth2 client using the **device-code flow** (RFC 8628), provisioned -in the [[authentik]] blueprint (`argocd/manifests/authentik/configmap-blueprint.yaml`): - -- Issuer: `https://authentik.ops.eblu.me/application/o/heph/` -- Audience / client id: `heph` -- Restricted to the `admins` group (single-owner, sensitive data). -- Scope mappings: `openid`, `email`, `profile`, **`offline_access`**. - -> **`offline_access` is required for durable sync.** The `heph` CLI requests -> `scope = "openid offline_access"`, and a refresh token is only issued for the -> 30-day refresh-token window when the provider actually grants `offline_access`. -> Without that scope mapping the refresh token is bound to the login **session**; -> once the session lapses, hephd's `refresh_token` grant returns `400 Bad -> Request`, the bearer can't be refreshed, and spoke sync silently degrades -> (`heph sync --status` → `auth_failure: true`). `heph auth login` papers over it -> until the next session expiry. Keep `offline_access` in the provider's -> `property_mappings`. - -Because no Authentik instance ships a device-code flow by default, the blueprint -also creates `default-device-code-flow` and binds it to the default brand's -`flow_device_code`. Devices obtain a token with `heph auth login`; the PWA -currently takes a pasted token (in-app device-code login is upstream follow-up). - -## Data seeding (Path A, one-time) - -The hub was seeded from the existing `gilbert` device so no task history was -lost. heph's data-safe bring-up ("Path A") has the hub **adopt the device's -identity** rather than rewriting the device: - -1. Quiesce the seed device: `heph daemon stop` (on gilbert). -2. Copy its store to indri: `scp ~/.local/share/heph/heph.db indri:~/.local/share/heph/heph.db`. -3. Give the hub its **own device origin** (keeps gilbert's `owner_id` + data; - `hephd` regenerates a fresh `origin` on next start when it is missing): - ```fish - ssh indri "sqlite3 ~/.local/share/heph/heph.db \"DELETE FROM meta WHERE key='origin';\"" - ``` -4. `mise run provision-indri -- --tags heph` (installs hephd, stages the PWA, - loads the launchagent → hub starts on the seeded store). - -Only `meta.origin` changes; `owner_id`, nodes, op-log, and links are copied -untouched. A clean `hephd --owner-id` / seed command is tracked upstream as -hephaestus follow-up — until then this manual reset is the documented path. - -## Connecting a spoke (e.g. gilbert) - -A device joins by running its local daemon with the hub URL + OIDC client and -logging in once: - -```bash -hephd --mode local --hub-url http://indri.tail8d86e.ts.net:8787 \ - --oidc-issuer https://authentik.ops.eblu.me/application/o/heph/ \ - --oidc-client-id heph -heph auth login --hub-url http://indri.tail8d86e.ts.net:8787 \ - --issuer https://authentik.ops.eblu.me/application/o/heph/ --client-id heph -``` - -> **Use the direct `http://…:8787` tailnet URL for sync, not the Caddy HTTPS -> URL.** hephd's sync client is plain-HTTP-only; pointing `--hub-url` at -> `https://heph.ops.eblu.me` fails with a confusing `error sending request` -> (the HTTP connector rejects the `https` scheme before connecting). Tailscale -> encrypts the transport, and the OIDC bearer token still gates every request. -> `heph.ops.eblu.me` (Caddy TLS) exists only for the browser PWA, which needs a -> secure context. The cached token is keyed by the exact `--hub-url`, so use the -> same value for `hephd` and `heph auth login`. - -> **Caveat:** `heph daemon` cannot yet bake hub/spoke flags into the generated -> launchd plist (upstream gap). On a spoke whose plist is managed by `heph -> daemon`, the hub/OIDC flags must be hand-added — and a later `heph daemon -> start/restart` will regenerate the plist and drop them. Avoid `heph daemon` -> subcommands on a configured spoke until that gap is closed; reload via -> `launchctl` instead. - -## Related - -- [[indri]] — host -- [[authentik]] — OIDC provider -- [[caddy]] — TLS termination for `heph.ops.eblu.me` diff --git a/docs/reference/services/jellyfin.md b/docs/reference/services/jellyfin.md index c7b3074..bbdfafd 100644 --- a/docs/reference/services/jellyfin.md +++ b/docs/reference/services/jellyfin.md @@ -1,7 +1,7 @@ --- title: Jellyfin -modified: 2026-06-08 -last-reviewed: 2026-06-08 +modified: 2026-02-07 +last-reviewed: 2026-03-23 tags: - service - media @@ -41,24 +41,6 @@ Dashboard > Playback: 2. Allow hardware encoding: Enabled 3. VPP Tone mapping: Enabled -## Upgrades - -Installed via Homebrew cask (`state: present`, unpinned), so the Ansible role -won't bump an already-installed cask. To upgrade, run on indri: - -```bash -brew upgrade --cask jellyfin -``` - -**Gatekeeper gotcha:** a cask upgrade replaces `/Applications/Jellyfin.app` and -re-applies the `com.apple.quarantine` xattr. When launchd respawns the service, -the new binary hangs silently — process alive but ~0 CPU, no logs, no listening -socket — because Gatekeeper is holding the first launch pending approval. -Removing the xattr over SSH fails (`xattr -dr com.apple.quarantine ...` → -"Operation not permitted", blocked by macOS TCC). Approve the first-launch -dialog on indri's GUI console (or run the `xattr` removal from a local Terminal -with Full Disk Access), then reload the LaunchAgent. - ## Observability - Metrics: `jellyfin_metrics` ansible role diff --git a/docs/reference/services/prowler.md b/docs/reference/services/prowler.md index 9f7e4b3..f45955f 100644 --- a/docs/reference/services/prowler.md +++ b/docs/reference/services/prowler.md @@ -1,6 +1,6 @@ --- title: Prowler -modified: 2026-06-08 +modified: 2026-03-24 last-reviewed: 2026-03-24 tags: - service @@ -17,20 +17,20 @@ CIS Kubernetes Benchmark scanner for compliance posture reporting. |----------|-------| | **Namespace** | `prowler` | | **Image** | `registry.ops.eblu.me/blumeops/prowler` (see `argocd/manifests/prowler/kustomization.yaml` for current tag) | -| **Schedule** | K8s CIS: Sunday 3am | -| **Reports** | `sifaka:/volume1/reports/prowler/` (NFS) | +| **Schedule** | K8s CIS: Sunday 3am / Image: Saturday 3am / IaC: Saturday 2am | +| **Reports** | `sifaka:/volume1/reports/prowler/`, `prowler-images/`, `prowler-iac/` (NFS) | | **Manifests** | `argocd/manifests/prowler/` | ## What it does -Runs Prowler 5 as a single CronJob: +Runs Prowler 5 as two CronJobs: - **K8s CIS scan** (Sunday) — CIS Kubernetes Benchmark v1.11 checks across pod security, RBAC, apiserver, etcd, kubelet, controller-manager, and scheduler +- **Image scan** (Saturday) — CVE, secret, and misconfiguration scanning of all `blumeops/*` container images in the registry via Trivy +- **IaC scan** (Saturday) — static analysis of Dockerfiles, K8s manifests, and other IaC files in the repo via Trivy Reports are written in HTML, CSV, and JSON-OCSF to the NFS share on sifaka. -The **image** and **IaC** scans (formerly Saturday CronJobs) were retired in 2026-06 — they generated tens of thousands of un-actioned findings weekly. See [[deploy-prowler#Why only the K8s CIS scan]]. - ## See also - [[security]] — security & compliance posture overview diff --git a/mise-tasks/review-compliance-reports b/mise-tasks/review-compliance-reports index f2a0a54..24d2afc 100755 --- a/mise-tasks/review-compliance-reports +++ b/mise-tasks/review-compliance-reports @@ -10,19 +10,19 @@ Covers: - Prowler K8s CIS (in-cluster): per-finding detail + - Prowler container image scans: grouped by check + resource + - Prowler IaC manifest scans: grouped by check + resource - Kingfisher secret scanning: TODO — pending upstream JSON/CSV output support (currently HTML-only; contribute from spork) -The Prowler container-image CVE scan and IaC scan were retired in 2026-06 -(see docs/how-to/operations/deploy-prowler.md) — they produced tens of -thousands of un-actioned findings weekly. Only the K8s CIS scan remains. - -For the Prowler scan, copies the two most recent CSV reports, parses +For each Prowler scan, copies the two most recent CSV reports, parses them, and displays: 1. Overall status (pass/fail/manual/muted counts) 2. Unmuted failures by severity 3. Delta from the previous report (new vs resolved) - 4. Actionable unmuted failures (per-finding detail) + 4. Actionable unmuted failures (per-finding for in-cluster; grouped + by check ID and resource for image/IaC because they have far too + many findings to list individually) This is the primary tool for the weekly compliance report review. """ @@ -39,9 +39,11 @@ from rich.console import Console from rich.panel import Panel from rich.table import Table -PROWLER_SCANS: list[tuple[str, str]] = [ - # (label, sifaka base path) - ("K8s CIS (In-Cluster)", "/volume1/reports/prowler"), +PROWLER_SCANS: list[tuple[str, str, bool]] = [ + # (label, sifaka base path, group_findings) + ("K8s CIS (In-Cluster)", "/volume1/reports/prowler", False), + ("Container Images", "/volume1/reports/prowler-images", True), + ("IaC (manifests)", "/volume1/reports/prowler-iac", True), ] console = Console() @@ -332,8 +334,14 @@ def summarize_report( tmpdir: str, *, show_muted: bool = False, + group_findings: bool = False, ) -> None: - """Fetch and summarize the latest Prowler report under `base`.""" + """Fetch and summarize the latest Prowler report under `base`. + + When `group_findings` is True, top-N CHECK_ID and RESOURCE_NAME tables + are shown instead of a per-finding detail table — appropriate for + image and IaC scans that produce thousands of findings. + """ console.rule(f"[bold]{label}[/bold]") csvs = list_reports(base) if not csvs: @@ -450,29 +458,36 @@ def summarize_report( ) console.print() - if new_keys: - console.print("[bold red]New Unmuted Failures:[/bold red]") - for k in sorted(new_keys): - r = curr_keys[k] - console.print( - f" [{r['SEVERITY']}] {r['CHECK_ID']}: " - f"{r['STATUS_EXTENDED'][:120]}" - ) - console.print() + # For grouped scans the new/resolved listings are too noisy + # (potentially thousands of lines). Skip the listings; the count + # is in the panel above and detail is in the grouped tables. + if not group_findings: + if new_keys: + console.print("[bold red]New Unmuted Failures:[/bold red]") + for k in sorted(new_keys): + r = curr_keys[k] + console.print( + f" [{r['SEVERITY']}] {r['CHECK_ID']}: " + f"{r['STATUS_EXTENDED'][:120]}" + ) + console.print() - if resolved_keys: - console.print("[bold green]Resolved:[/bold green]") - for k in sorted(resolved_keys): - r = prev_keys[k] - console.print( - f" [dim][{r['SEVERITY']}] {r['CHECK_ID']}: " - f"{r['STATUS_EXTENDED'][:120]}[/dim]" - ) - console.print() + if resolved_keys: + console.print("[bold green]Resolved:[/bold green]") + for k in sorted(resolved_keys): + r = prev_keys[k] + console.print( + f" [dim][{r['SEVERITY']}] {r['CHECK_ID']}: " + f"{r['STATUS_EXTENDED'][:120]}[/dim]" + ) + console.print() - # --- Unmuted failure details --- + # --- Unmuted failure details (grouped or per-finding) --- if latest["unmuted"]: - _print_findings_detail(latest["unmuted"]) + if group_findings: + _print_grouped_findings(latest["unmuted"]) + else: + _print_findings_detail(latest["unmuted"]) # --- Muted findings summary --- if show_muted and latest["muted"]: @@ -551,6 +566,75 @@ def _print_findings_detail(unmuted: list[dict]) -> None: console.print() +def _worst_severity(rows: list[dict]) -> str: + """Return the most severe severity label across `rows`.""" + if not rows: + return "" + return min( + (r["SEVERITY"] for r in rows), + key=lambda s: severity_sort({"SEVERITY": s}), + ) + + +def _print_grouped_findings(unmuted: list[dict], top_n: int = 15) -> None: + """Top-N tables grouped by CHECK_ID and RESOURCE_NAME. + + Used for image and IaC scans where per-finding tables would be too + large to be useful. Shows count and worst severity for each group. + """ + by_check: dict[str, list[dict]] = {} + by_resource: dict[str, list[dict]] = {} + for r in unmuted: + by_check.setdefault(r["CHECK_ID"], []).append(r) + by_resource.setdefault(r.get("RESOURCE_NAME", "") or "(no resource)", []).append(r) + + check_table = Table( + show_header=True, + header_style="bold", + title=f"Top {top_n} Checks by Unmuted Finding Count", + ) + check_table.add_column("Worst Sev") + check_table.add_column("Check ID") + check_table.add_column("Count", justify="right") + + for check, rows in sorted( + by_check.items(), key=lambda kv: -len(kv[1]) + )[:top_n]: + worst = _worst_severity(rows) + style = _sev_style(worst) + check_table.add_row( + f"[{style}]{worst}[/{style}]" if style else worst, + check, + str(len(rows)), + ) + + console.print(check_table) + console.print() + + res_table = Table( + show_header=True, + header_style="bold", + title=f"Top {top_n} Resources by Unmuted Finding Count", + ) + res_table.add_column("Worst Sev") + res_table.add_column("Resource") + res_table.add_column("Count", justify="right") + + for resource, rows in sorted( + by_resource.items(), key=lambda kv: -len(kv[1]) + )[:top_n]: + worst = _worst_severity(rows) + style = _sev_style(worst) + res_table.add_row( + f"[{style}]{worst}[/{style}]" if style else worst, + resource[:80], + str(len(rows)), + ) + + console.print(res_table) + console.print() + + def main( full: Annotated[ bool, typer.Option(help="(reserved) currently a no-op; all unmuted failures already shown") @@ -562,12 +646,13 @@ def main( del full # historical flag, kept for backwards compatibility with tempfile.TemporaryDirectory() as tmpdir: - for label, base in PROWLER_SCANS: + for label, base, group in PROWLER_SCANS: summarize_report( label, base, tmpdir, show_muted=show_muted, + group_findings=group, ) # --- Node-level MANUAL check verification --- diff --git a/nixos/ringtail/flake.lock b/nixos/ringtail/flake.lock index 340bd9d..bb60501 100644 --- a/nixos/ringtail/flake.lock +++ b/nixos/ringtail/flake.lock @@ -7,11 +7,11 @@ ] }, "locked": { - "lastModified": 1780894562, - "narHash": "sha256-c3430xwxwhHipl3jigUGMMBfpaMylDqytW/kdmB3ZGs=", + "lastModified": 1780290312, + "narHash": "sha256-eTAlX0CwgB84Ts3GaBd944A3DRXVMzgA0EqroZBISUo=", "owner": "nix-community", "repo": "disko", - "rev": "24fed06cac83bcc44ac8efbb57cab1a82fa0bedc", + "rev": "115e5211780054d8a890b41f0b7734cafad54dfe", "type": "github" }, "original": { @@ -43,11 +43,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1780511130, - "narHash": "sha256-2v9lT4ya59Lh1FqPeLnz1MoX9y/wz2huqfe9RtQZITk=", + "lastModified": 1779796641, + "narHash": "sha256-ZsIrKmhp4vbBXoXXmR/tBXA/UCsAQiJL9vsgZEduhVY=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "535f3e6942cb1cead3929c604320d3db54b542b9", + "rev": "25f538306313eae3927264466c70d7001dcea1df", "type": "github" }, "original": { diff --git a/service-versions.yaml b/service-versions.yaml index 419d129..cc9dc9e 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -414,23 +414,6 @@ services: upstream-source: https://github.com/caddyserver/caddy/releases notes: Built from source with Gandi DNS and Layer 4 plugins - - name: heph - type: ansible - last-reviewed: 2026-06-05 - current-version: "v1.2.1" - upstream-source: https://forge.eblu.me/eblume/hephaestus/releases - notes: >- - hephaestus task/context sync hub on indri (server-mode launchagent, - ansible/roles/heph; cargo-built from the forge). SELF-UPDATING: hephd - polls the forge for newer releases every 10 min and rebuilds + restarts - itself, so the running version drifts AHEAD of the ansible heph_version - pin. current-version here is the last observed/deployed tag, not a hard - pin — verify the live version via `curl https://heph.ops.eblu.me/config` - is served (hub up) and the hub log's `current=` line. Reconciling this - self-update vs IaC-pin drift is tracked in the heph "Hephaestus" project: - "Reconcile hephd self-update with ansible-pinned version (drift on indri - hub)" (node 01KTBXWT6XTHNDH92CVJY88E5K). - - name: borgmatic type: ansible last-reviewed: 2026-04-15 @@ -440,15 +423,9 @@ services: - name: jellyfin type: ansible - last-reviewed: 2026-06-08 - current-version: "10.11.11" + last-reviewed: 2026-03-17 + current-version: "10.11.6" upstream-source: https://github.com/jellyfin/jellyfin/releases - notes: >- - Homebrew cask (state: present, unpinned). Upgrade with - `brew upgrade --cask jellyfin` on indri. After upgrade the .app is - re-quarantined; launchd-spawned launch hangs silently until the - Gatekeeper first-launch dialog is approved on indri's GUI console - (xattr removal over SSH is blocked by TCC). - name: automounter type: ansible