From 6e37abda5ddeca7c396c3d4470cf24674af37dd9 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 08:14:12 -0700 Subject: [PATCH 01/20] C1: deploy adelaide-baby-shower-app to ringtail k3s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the Adelaide / Heidi / Addie baby shower app — a Django guest splash, raffle picker, and prize-assignment console — on ringtail k3s. Public landing at shower.eblu.me (via fly proxy), tailnet admin at shower.ops.eblu.me. App source: forge.eblu.me/eblume/adelaide-baby-shower-app, wheel-published to the Forgejo Packages PyPI index. Manifests under argocd/manifests/shower/: NFS-backed PVC for /app/media, local-path PVC for SQLite, ExternalSecret pulling DJANGO_SECRET_KEY from 1Password (item "Shower (blumeops)"), Tailscale ProxyGroup ingress. Defense-in-depth for the public surface: - /admin/ blocked at the fly edge except /admin/login/ and /admin/logout/ - shower_auth rate limit on the login path - new fail2ban filter+jail with a per-service shower-deny.conf (nginx-deny action generalized to accept nginx_deny_file) - django-axes (5 / 1h) keyed on (username, ip_address) Plus: Caddy route on indri, Pulumi gandi CNAME, Grafana APM dashboard mirroring docs-apm.json, runbook at how-to/operations/shower-app.md, and a service-versions entry. X-Clacks-Overhead set on the new server block — GNU Terry Pratchett. Build: containers/shower/default.nix uses dockerTools to ship a nixpkgs Python plus a startup wrapper that installs the wheel into /app/data/.venv on first boot and execs gunicorn. Lets the wheel come from forge PyPI without pinning hashes for every transitive dep. Prerequisites tracked in the runbook (not yet executed): - NFS share sifaka:/volume1/shower (manual Synology step) - 1Password item "Shower (blumeops)" with secret-key field - container build via `mise run container-build-and-release shower` - Pulumi dns-up after merge - fly certs add shower.eblu.me Co-Authored-By: Claude Opus 4.7 (1M context) --- ansible/roles/caddy/defaults/main.yml | 3 + argocd/apps/shower.yaml | 20 ++ .../dashboards/configmap-shower-apm.yaml | 229 ++++++++++++++++++ .../grafana-config/kustomization.yaml | 1 + argocd/manifests/shower/configmap.yaml | 11 + argocd/manifests/shower/deployment.yaml | 81 +++++++ argocd/manifests/shower/external-secret.yaml | 19 ++ .../manifests/shower/ingress-tailscale.yaml | 30 +++ argocd/manifests/shower/kustomization.yaml | 17 ++ argocd/manifests/shower/pv-nfs.yaml | 26 ++ argocd/manifests/shower/pvc.yaml | 30 +++ argocd/manifests/shower/service.yaml | 13 + containers/shower/default.nix | 117 +++++++++ docs/changelog.d/shower-app-deploy.feature.md | 4 + docs/changelog.d/shower-app-deploy.infra.md | 8 + docs/how-to/operations/shower-app.md | 174 +++++++++++++ docs/reference/kubernetes/apps.md | 1 + fly/Dockerfile | 2 + fly/fail2ban/action.d/nginx-deny.conf | 13 +- fly/fail2ban/filter.d/shower-admin-login.conf | 13 + fly/fail2ban/jail.d/shower.conf | 8 + fly/nginx.conf | 101 ++++++++ fly/start.sh | 4 +- pulumi/gandi/__main__.py | 10 + service-versions.yaml | 10 + 25 files changed, 942 insertions(+), 3 deletions(-) create mode 100644 argocd/apps/shower.yaml create mode 100644 argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml create mode 100644 argocd/manifests/shower/configmap.yaml create mode 100644 argocd/manifests/shower/deployment.yaml create mode 100644 argocd/manifests/shower/external-secret.yaml create mode 100644 argocd/manifests/shower/ingress-tailscale.yaml create mode 100644 argocd/manifests/shower/kustomization.yaml create mode 100644 argocd/manifests/shower/pv-nfs.yaml create mode 100644 argocd/manifests/shower/pvc.yaml create mode 100644 argocd/manifests/shower/service.yaml create mode 100644 containers/shower/default.nix create mode 100644 docs/changelog.d/shower-app-deploy.feature.md create mode 100644 docs/changelog.d/shower-app-deploy.infra.md create mode 100644 docs/how-to/operations/shower-app.md create mode 100644 fly/fail2ban/filter.d/shower-admin-login.conf create mode 100644 fly/fail2ban/jail.d/shower.conf diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index 6eada76..da6f3f9 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -101,6 +101,9 @@ caddy_services: - name: paperless host: "paperless.{{ caddy_domain }}" backend: "https://paperless.tail8d86e.ts.net" + - name: shower + host: "shower.{{ caddy_domain }}" + backend: "https://shower.tail8d86e.ts.net" - name: sifaka host: "nas.{{ caddy_domain }}" backend: "http://sifaka:5000" diff --git a/argocd/apps/shower.yaml b/argocd/apps/shower.yaml new file mode 100644 index 0000000..c4a7a62 --- /dev/null +++ b/argocd/apps/shower.yaml @@ -0,0 +1,20 @@ +# Adelaide / Heidi / Addie baby shower app — Django guest/raffle/prize system. +# Public landing page at shower.eblu.me (via fly proxy), staff console + admin +# at shower.ops.eblu.me (tailnet only). Built from forge PyPI wheel. +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: shower + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/shower + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: shower + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml b/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml new file mode 100644 index 0000000..96348e8 --- /dev/null +++ b/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml @@ -0,0 +1,229 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-shower-apm + namespace: monitoring + labels: + grafana_dashboard: "1" +data: + shower-apm.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 20, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 16, "x": 0, "y": 0 }, + "id": 1, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (status) (rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "legendFormat": "{{status}}", "refId": "A" } + ], + "title": "Request Rate by Status", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.05 }] }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 0 }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\",status=~\"5..\"}[5m])) / sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "refId": "A" } + ], + "title": "Error Rate (5xx)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 5 }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 4 }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(increase(flyio_nginx_http_requests_total{host=\"shower.eblu.me\",request_uri=~\"/admin/login.*\",status=~\"4..\"}[$__range]))", "refId": "A" } + ], + "title": "Failed admin logins (range)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 4 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "refId": "A" } + ], + "title": "Current RPS", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "seconds", + "drawStyle": "line", + "fillOpacity": 10, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.90, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p90", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.99, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p99", "refId": "C" } + ], + "title": "Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "", + "drawStyle": "line", + "fillOpacity": 20, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_response_bytes_total{host=\"shower.eblu.me\"}[5m]))", "legendFormat": "Bandwidth", "refId": "A" } + ], + "title": "Bandwidth", + "type": "timeseries" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { "datasource": { "type": "loki", "uid": "loki" }, "expr": "{instance=\"flyio-proxy\", job=\"flyio-nginx\"} |= \"shower.eblu.me\" | json | line_format \"{{.client_ip}} {{.request_method}} {{.request_uri}} {{.status}} {{.request_time}}s\"", "refId": "A" } + ], + "title": "Recent Access Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["shower", "flyio", "apm"], + "templating": { "list": [] }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Shower APM", + "uid": "shower-apm", + "version": 1, + "weekStart": "" + } diff --git a/argocd/manifests/grafana-config/kustomization.yaml b/argocd/manifests/grafana-config/kustomization.yaml index a6e8000..b518043 100644 --- a/argocd/manifests/grafana-config/kustomization.yaml +++ b/argocd/manifests/grafana-config/kustomization.yaml @@ -22,6 +22,7 @@ resources: - dashboards/configmap-transmission.yaml - dashboards/configmap-cv-apm.yaml - dashboards/configmap-docs-apm.yaml + - dashboards/configmap-shower-apm.yaml - dashboards/configmap-flyio.yaml - dashboards/configmap-sifaka-disks.yaml - dashboards/configmap-forgejo.yaml diff --git a/argocd/manifests/shower/configmap.yaml b/argocd/manifests/shower/configmap.yaml new file mode 100644 index 0000000..111f524 --- /dev/null +++ b/argocd/manifests/shower/configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: shower-app-config + namespace: shower +data: + DJANGO_DEBUG: "0" + # Admin lives behind the tailnet; the public proxy blocks /admin/ except + # /admin/login/ and /admin/logout/. /host/'s "Django admin" link follows + # this var so admin CRUD only happens on the tailnet hostname. + DJANGO_ADMIN_URL: "https://shower.ops.eblu.me/admin/" diff --git a/argocd/manifests/shower/deployment.yaml b/argocd/manifests/shower/deployment.yaml new file mode 100644 index 0000000..70547aa --- /dev/null +++ b/argocd/manifests/shower/deployment.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: shower + namespace: shower +spec: + replicas: 1 + # SQLite + RWO data PVC: only one writer at a time. Recreate ensures the + # old pod's lock on the local-path volume is released before the new one + # mounts it. + strategy: + type: Recreate + selector: + matchLabels: + app: shower + template: + metadata: + labels: + app: shower + spec: + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: shower + image: registry.ops.eblu.me/blumeops/shower:kustomized + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + ports: + - containerPort: 8000 + name: http + envFrom: + - configMapRef: + name: shower-app-config + - secretRef: + name: shower-app-secrets + volumeMounts: + - name: media + mountPath: /app/media + - name: data + mountPath: /app/data + resources: + requests: + memory: "128Mi" + cpu: "50m" + limits: + memory: "512Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: / + port: 8000 + httpHeaders: + - name: Host + value: shower.ops.eblu.me + - name: X-Forwarded-Proto + value: https + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 8000 + httpHeaders: + - name: Host + value: shower.ops.eblu.me + - name: X-Forwarded-Proto + value: https + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: media + persistentVolumeClaim: + claimName: shower-media + - name: data + persistentVolumeClaim: + claimName: shower-data diff --git a/argocd/manifests/shower/external-secret.yaml b/argocd/manifests/shower/external-secret.yaml new file mode 100644 index 0000000..005a7e9 --- /dev/null +++ b/argocd/manifests/shower/external-secret.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: shower-app-secrets + namespace: shower +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: shower-app-secrets + creationPolicy: Owner + data: + - secretKey: DJANGO_SECRET_KEY + remoteRef: + key: "Shower (blumeops)" + property: secret-key diff --git a/argocd/manifests/shower/ingress-tailscale.yaml b/argocd/manifests/shower/ingress-tailscale.yaml new file mode 100644 index 0000000..d09a696 --- /dev/null +++ b/argocd/manifests/shower/ingress-tailscale.yaml @@ -0,0 +1,30 @@ +# Tailscale Ingress for shower app. +# Exposes at shower.tail8d86e.ts.net. +# Caddy on indri proxies shower.ops.eblu.me here. The fly proxy then proxies +# shower.eblu.me through Caddy to this same endpoint (fly does not contact +# the k8s service directly — all traffic routes through indri's Caddy). +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: shower-tailscale + namespace: shower + annotations: + tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" + gethomepage.dev/enabled: "true" + gethomepage.dev/name: "Shower" + gethomepage.dev/group: "Home" + gethomepage.dev/icon: "mdi-baby" + gethomepage.dev/description: "Adelaide baby shower" + gethomepage.dev/href: "https://shower.ops.eblu.me" + gethomepage.dev/pod-selector: "app=shower" +spec: + ingressClassName: tailscale + defaultBackend: + service: + name: shower + port: + number: 8000 + tls: + - hosts: + - shower diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml new file mode 100644 index 0000000..cd4dd08 --- /dev/null +++ b/argocd/manifests/shower/kustomization.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: shower + +resources: + - configmap.yaml + - external-secret.yaml + - pv-nfs.yaml + - pvc.yaml + - service.yaml + - ingress-tailscale.yaml + - deployment.yaml + +images: + - name: registry.ops.eblu.me/blumeops/shower + newTag: v1.0.0-PLACEHOLDER-nix diff --git a/argocd/manifests/shower/pv-nfs.yaml b/argocd/manifests/shower/pv-nfs.yaml new file mode 100644 index 0000000..d07cecc --- /dev/null +++ b/argocd/manifests/shower/pv-nfs.yaml @@ -0,0 +1,26 @@ +# NFS PersistentVolume for shower app media uploads (prize photos). +# Requires: NFS share on sifaka at /volume1/shower with NFS permissions +# for ringtail. +# +# To create on Synology: +# 1. Control Panel > Shared Folder > Create +# 2. Name: shower, Location: Volume 1 +# 3. Control Panel > File Services > NFS > NFS Rules +# 4. Add rule for "shower" share: Hostname=ringtail, Privilege=Read/Write, +# Squash=No mapping +# 5. chown -R 1000:1000 /volume1/shower (or pick another UID and align the +# container's runAsUser to match) +apiVersion: v1 +kind: PersistentVolume +metadata: + name: shower-media-nfs-pv +spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + nfs: + server: sifaka + path: /volume1/shower diff --git a/argocd/manifests/shower/pvc.yaml b/argocd/manifests/shower/pvc.yaml new file mode 100644 index 0000000..47fee54 --- /dev/null +++ b/argocd/manifests/shower/pvc.yaml @@ -0,0 +1,30 @@ +# Media PVC — RWX NFS share for /app/media (prize photo uploads). +# SQLite DB lives in a separate local-path PVC; NFS file locking is not +# reliable enough for SQLite's WAL/journal. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shower-media + namespace: shower +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + volumeName: shower-media-nfs-pv + resources: + requests: + storage: 10Gi +--- +# Database PVC — k3s local-path (default storage class) for SQLite. +# RWO is fine: the deployment runs with a single replica. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shower-data + namespace: shower +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi diff --git a/argocd/manifests/shower/service.yaml b/argocd/manifests/shower/service.yaml new file mode 100644 index 0000000..0a73aab --- /dev/null +++ b/argocd/manifests/shower/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: shower + namespace: shower +spec: + selector: + app: shower + ports: + - name: http + port: 8000 + targetPort: 8000 + protocol: TCP diff --git a/containers/shower/default.nix b/containers/shower/default.nix new file mode 100644 index 0000000..5b92e85 --- /dev/null +++ b/containers/shower/default.nix @@ -0,0 +1,117 @@ +# Nix-built shower app container — Adelaide / Heidi / Addie baby shower. +# +# The app is published as a wheel to the Forgejo PyPI index at +# https://forge.eblu.me/api/packages/eblume/pypi/. Rather than pin and +# fetch the wheel + transitive deps at nix build time (which requires +# every wheel hash to be tracked here), this image ships a Python from +# nixpkgs and pip-installs the wheel into a venv on /app/data at first +# boot. Subsequent boots reuse the venv. This trades reproducibility for +# a much simpler nix file. +# +# Built on the nix-container-builder runner (ringtail, amd64) so the +# image runs natively on ringtail's k3s without QEMU emulation. +{ pkgs ? import { } }: + +let + version = "1.0.0"; + + python = pkgs.python314; + appVersion = version; + + entrypoint = pkgs.writeShellScript "shower-entrypoint" '' + set -eu + + APP_DIR=/app + DATA_DIR=/app/data + VENV_DIR=$DATA_DIR/.venv + INSTALLED_MARKER=$VENV_DIR/.installed-${appVersion} + + export HOME=$DATA_DIR + export PIP_DISABLE_PIP_VERSION_CHECK=1 + export PIP_NO_CACHE_DIR=1 + + mkdir -p "$DATA_DIR" "$APP_DIR/media" + + # First boot (or version change): create venv and install the app + deps. + # The wheel comes from the internal devpi mirror (default index), with + # forge.eblu.me as the extra index for the adelaide-baby-shower-app wheel. + if [ ! -f "$INSTALLED_MARKER" ]; then + echo "shower: installing adelaide-baby-shower-app==${appVersion} into $VENV_DIR" + rm -rf "$VENV_DIR" + ${python}/bin/python -m venv "$VENV_DIR" + "$VENV_DIR/bin/pip" install --upgrade pip + "$VENV_DIR/bin/pip" install \ + --index-url=https://pypi.ops.eblu.me/root/pypi/+simple/ \ + --extra-index-url=https://forge.eblu.me/api/packages/eblume/pypi/simple/ \ + "adelaide-baby-shower-app==${appVersion}" gunicorn + touch "$INSTALLED_MARKER" + fi + + # The wheel's config/settings.py uses BASE_DIR = parent.parent of its + # own __file__, so MEDIA_ROOT and DATABASES.NAME resolve relative to + # site-packages. Override with a thin shim placed in $APP_DIR. + cat > "$APP_DIR/local_settings.py" <<'PY' + from config.settings import * # noqa: F401,F403 + + DATABASES["default"]["NAME"] = "/app/data/db.sqlite3" + MEDIA_ROOT = "/app/media" + STATIC_ROOT = "/app/data/staticfiles" + PY + + export PYTHONPATH=$APP_DIR + export DJANGO_SETTINGS_MODULE=local_settings + + cd "$APP_DIR" + + echo "shower: running migrations" + "$VENV_DIR/bin/python" -m django migrate --noinput + + echo "shower: collecting static files" + "$VENV_DIR/bin/python" -m django collectstatic --noinput --clear + + echo "shower: starting gunicorn" + exec "$VENV_DIR/bin/gunicorn" \ + --bind 0.0.0.0:8000 \ + --workers 2 \ + --forwarded-allow-ips='*' \ + config.wsgi:application + ''; +in + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/shower"; + contents = [ + python + pkgs.cacert + pkgs.tzdata + pkgs.bashInteractive + pkgs.coreutils + pkgs.gnused + pkgs.gnugrep + ]; + + # /app is writable by uid 1000 (matches deployment.yaml runAsUser). + fakeRootCommands = '' + mkdir -p app/data app/media tmp + chmod 1777 tmp + chown -R 1000:1000 app + ''; + enableFakechroot = true; + + config = { + Entrypoint = [ "${entrypoint}" ]; + Env = [ + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "TZDIR=${pkgs.tzdata}/share/zoneinfo" + "TZ=America/Los_Angeles" + "TMPDIR=/tmp" + "LANG=C.UTF-8" + "LC_ALL=C.UTF-8" + ]; + ExposedPorts = { + "8000/tcp" = { }; + }; + User = "1000"; + WorkingDir = "/app"; + }; +} diff --git a/docs/changelog.d/shower-app-deploy.feature.md b/docs/changelog.d/shower-app-deploy.feature.md new file mode 100644 index 0000000..96218be --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.feature.md @@ -0,0 +1,4 @@ +Deploy the Adelaide / Heidi / Addie baby shower app — guest splash, raffle +picker, and prize assignment console — on ringtail k3s with `shower.eblu.me` +as the public entry and `shower.ops.eblu.me` as the tailnet admin host. App +source: [`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app). diff --git a/docs/changelog.d/shower-app-deploy.infra.md b/docs/changelog.d/shower-app-deploy.infra.md new file mode 100644 index 0000000..c14fbce --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.infra.md @@ -0,0 +1,8 @@ +Wire shower app for public exposure: fly nginx `shower.eblu.me` server +block with `/admin/` blocked at the edge (except `/admin/login/` and +`/admin/logout/`), per-IP rate limit, fail2ban filter+jail with a +shower-specific deny list (`shower-deny.conf`), Caddy route, Pulumi +Gandi CNAME, and a Grafana APM dashboard tracking request rate, error +rate, failed admin logins, latency, bandwidth, and access logs. +Generalized the `nginx-deny` action to accept a per-jail +`nginx_deny_file` parameter so each service has its own ban list. diff --git a/docs/how-to/operations/shower-app.md b/docs/how-to/operations/shower-app.md new file mode 100644 index 0000000..401eeb6 --- /dev/null +++ b/docs/how-to/operations/shower-app.md @@ -0,0 +1,174 @@ +--- +title: Shower App on Ringtail +modified: 2026-05-10 +last-reviewed: 2026-05-10 +tags: + - how-to + - operations + - kubernetes + - django +--- + +# Shower App on Ringtail + +How the Adelaide / Heidi / Addie baby shower app is deployed. The app is a +Django project ([`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app)) +released as a wheel to the Forgejo Packages PyPI index and run on +[[ringtail]]'s k3s cluster. Public landing page at `shower.eblu.me`, staff +console + admin UI at `shower.ops.eblu.me` (tailnet only). + +The contract this deploy implements is defined in the app repo's +`docs/how-to/hosting.md` — read that for the env-var contract, security +model, and storage requirements before changing anything here. + +## Routing + +``` +Internet → shower.eblu.me + │ (Fly.io nginx — public) + ▼ + Caddy on indri (shower.ops.eblu.me) + │ + ▼ + Tailscale ProxyGroup ingress (shower.tail8d86e.ts.net) + │ + ▼ + Service shower:8000 → Pod (Django + gunicorn) +``` + +| Hostname | Reachable from | Notes | +|---|---|---| +| `shower.eblu.me` | Public internet | `/admin/` blocked except `/admin/login/`, `/admin/logout/` | +| `shower.ops.eblu.me` | Tailnet | Full app surface, including the admin | +| `shower.tail8d86e.ts.net` | Tailnet | Bare ProxyGroup endpoint Caddy proxies to | + +## Defense layers (public side) + +The public path stacks four checks against `/admin/login/` brute force: + +1. **fly nginx `geo $shower_banned`** — per-service ban list populated by + fail2ban (`/etc/nginx/shower-deny.conf`) +2. **fly nginx `limit_req zone=shower_auth`** — 3 r/s per Fly-Client-IP +3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)` +4. **edge `/admin/` block** — anything that isn't `/admin/login/` or + `/admin/logout/` returns 403 from nginx, period + +The fail2ban filter `shower-admin-login.conf` matches 401/403/429 on +`/admin/login/`. The 429 case catches attackers who keep hammering after +django-axes has already locked them out. + +## Persistent storage + +| Mount | PVC | Type | Why | +|---|---|---|---| +| `/app/media` | `shower-media` | NFS RWX on sifaka (`/volume1/shower`) | Prize photos survive pod rescheduling | +| `/app/data` | `shower-data` | k3s `local-path` RWO | SQLite DB; NFS file locking can't be trusted for WAL/journal | + +The container's entrypoint installs the wheel into `/app/data/.venv` on +first boot, runs migrations, runs `collectstatic`, and `exec`s gunicorn. +A `local_settings.py` shim overrides `DATABASES.NAME`, `MEDIA_ROOT`, and +`STATIC_ROOT` to absolute paths under `/app/`, sidestepping the wheel's +`BASE_DIR = parent.parent` of an in-site-packages settings module. + +## One-time setup steps + +These steps are required the first time the service is deployed and are +not encoded in the manifests. + +### 1. NFS share on sifaka + +On the Synology: + +1. Control Panel → Shared Folder → Create. Name: `shower`, Volume 1. +2. Control Panel → File Services → NFS → NFS Rules. Add rule for + `shower`: Hostname=`ringtail`, Privilege=Read/Write, Squash=No mapping. +3. `chown -R 1000:1000 /volume1/shower` over SSH so the pod's uid 1000 + can write. + +### 2. 1Password item + +Item name: **`Shower (blumeops)`** in the `blumeops` vault. +Required property: + +| Field | Value | +|---|---| +| `secret-key` | Output of `openssl rand -base64 48` | + +The `ExternalSecret` `shower-app-secrets` will sync this into the +`shower` namespace as a `Secret` and `envFrom` exposes it as +`DJANGO_SECRET_KEY` to the container. + +**Never reuse a key that has ever been in git history.** Per the app's +hosting.md, an early dev key was committed before being replaced with +the `django-insecure-...` placeholder; the production key must be +freshly generated. + +### 3. Container image + +Built by the `build-container` Forgejo Actions workflow on the +`nix-container-builder` runner (ringtail, amd64). Trigger with: + +```fish +mise run container-build-and-release shower +``` + +After the workflow finishes, update `images[].newTag` in +`argocd/manifests/shower/kustomization.yaml` to the resulting +`vX.Y.Z--nix` tag, then commit (C0). + +### 4. DNS + +`pulumi/gandi/__main__.py` declares the `shower-public` CNAME pointing +at `blumeops-proxy.fly.dev.`. Apply with: + +```fish +mise run dns-preview +mise run dns-up +``` + +### 5. Fly.io certificate + +```fish +fly certs add shower.eblu.me -a blumeops-proxy +``` + +(Add to `mise-tasks/fly-setup` so re-runs of the one-time setup pick +it up.) + +### 6. Caddy on indri + +`shower` is in `ansible/roles/caddy/defaults/main.yml`. Push with: + +```fish +mise run provision-indri -- --tags caddy +``` + +## Deploying a new version + +1. Bump the wheel version in the app repo (`adelaide-baby-shower-app`) + and release it to Forgejo PyPI. +2. Bump `appVersion` in `containers/shower/default.nix` to match. +3. `mise run container-build-and-release shower`. Verify the build + with `mise run runner-logs`. +4. Update the `newTag` in `argocd/manifests/shower/kustomization.yaml` + to the new `[main]` SHA tag. +5. Commit (C0 after PR merge — see [[build-container-image#Squash-merge and container tags]]). +6. `argocd app sync shower`. + +## Verifying after a deploy + +```fish +kubectl --context=k3s-ringtail -n shower get pods +kubectl --context=k3s-ringtail -n shower logs deploy/shower +curl -sf https://shower.ops.eblu.me/ # tailnet +curl -sf https://shower.eblu.me/ # public +curl -I https://shower.eblu.me/admin/users/ # expect 403 (edge block) +curl -I https://shower.ops.eblu.me/admin/ # expect 200 / 302 (login) +``` + +## Related + +- [[expose-service-publicly]] — Fly.io proxy + Tailscale pattern +- [[deploy-k8s-service]] — generic ArgoCD service onboarding +- [[ringtail]] — the cluster +- [`hosting.md`](https://forge.eblu.me/eblume/adelaide-baby-shower-app/src/branch/main/docs/how-to/hosting.md) — app's deployment contract diff --git a/docs/reference/kubernetes/apps.md b/docs/reference/kubernetes/apps.md index 80ea72e..fd5c06f 100644 --- a/docs/reference/kubernetes/apps.md +++ b/docs/reference/kubernetes/apps.md @@ -41,6 +41,7 @@ Registry of all applications deployed via [[argocd]]. | `ollama` | ollama | `argocd/manifests/ollama/` | [[ollama]] | | `mealie` | mealie | `argocd/manifests/mealie/` | [[mealie]] | | `paperless` | paperless | `argocd/manifests/paperless/` | [[paperless]] | +| `shower` | shower | `argocd/manifests/shower/` | [[shower-app]] | | `prowler` | prowler | `argocd/manifests/prowler/` | [[prowler]] | ## Sync Policies diff --git a/fly/Dockerfile b/fly/Dockerfile index eae8c35..355b404 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -20,7 +20,9 @@ COPY --from=docker.io/grafana/alloy@sha256:6e00cf7c5a692ff5f24844529416ed017d76f RUN mkdir -p /var/log/nginx /etc/alloy /tmp/alloy-data COPY fail2ban/filter.d/forge-login.conf /etc/fail2ban/filter.d/forge-login.conf +COPY fail2ban/filter.d/shower-admin-login.conf /etc/fail2ban/filter.d/shower-admin-login.conf COPY fail2ban/jail.d/forge.conf /etc/fail2ban/jail.d/forge.conf +COPY fail2ban/jail.d/shower.conf /etc/fail2ban/jail.d/shower.conf COPY fail2ban/action.d/nginx-deny.conf /etc/fail2ban/action.d/nginx-deny.conf COPY nginx.conf /etc/nginx/nginx.conf diff --git a/fly/fail2ban/action.d/nginx-deny.conf b/fly/fail2ban/action.d/nginx-deny.conf index 1d3737b..bab8abb 100644 --- a/fly/fail2ban/action.d/nginx-deny.conf +++ b/fly/fail2ban/action.d/nginx-deny.conf @@ -2,13 +2,22 @@ # Standard iptables banning won't work in Fly.io because $remote_addr # is Fly's internal proxy IP. Instead, we write banned IPs to a file # that nginx checks via a geo directive keyed on $http_fly_client_ip. +# +# The deny file is per-service: each jail sets `nginx_deny_file = ...` +# (see jail.d/*.conf) and a matching `geo $http_fly_client_ip $..._banned` +# block in nginx.conf includes the same path. [Definition] -actionban = echo " 1;" >> /etc/nginx/forge-deny.conf && nginx -s reload +actionban = echo " 1;" >> && nginx -s reload -actionunban = sed -i '/ 1;/d' /etc/nginx/forge-deny.conf && nginx -s reload +actionunban = sed -i '/ 1;/d' && nginx -s reload actionstart = actionstop = actioncheck = + +[Init] + +# Default for jails that don't override (preserves forge behaviour). +nginx_deny_file = /etc/nginx/forge-deny.conf diff --git a/fly/fail2ban/filter.d/shower-admin-login.conf b/fly/fail2ban/filter.d/shower-admin-login.conf new file mode 100644 index 0000000..c73cd3a --- /dev/null +++ b/fly/fail2ban/filter.d/shower-admin-login.conf @@ -0,0 +1,13 @@ +# Filter for shower-app /admin/login/ failures via nginx JSON access log. +# Matches 401/403/429 responses on the login endpoint, keyed on the +# client_ip field (populated from Fly-Client-IP header). +# +# The 429 case catches attackers who keep hammering after django-axes has +# already locked them out — those requests return 429 from +# axes.middleware.AxesMiddleware before reaching the view. + +[Definition] + +failregex = "client_ip":"".*"request_uri":"\/admin\/login[^"]*".*"status":(401|403|429) + +ignoreregex = diff --git a/fly/fail2ban/jail.d/shower.conf b/fly/fail2ban/jail.d/shower.conf new file mode 100644 index 0000000..59fa7fb --- /dev/null +++ b/fly/fail2ban/jail.d/shower.conf @@ -0,0 +1,8 @@ +[shower-admin-login] +enabled = true +filter = shower-admin-login +logpath = /var/log/nginx/access.json.log +maxretry = 5 +findtime = 600 +bantime = 3600 +banaction = nginx-deny[nginx_deny_file=/etc/nginx/shower-deny.conf] diff --git a/fly/nginx.conf b/fly/nginx.conf index 5e49d88..0aca716 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -34,6 +34,11 @@ http { # bucket. $http_fly_client_ip has the actual client IP. limit_req_zone $http_fly_client_ip zone=forge_auth:10m rate=3r/s; + # Shower-app rate limit on /admin/login/ (the only admin path the public + # proxy exposes). 3r/s with django-axes (5 strikes, 1h lockout) gives + # plenty of room for a real staff login while making brute-force costly. + limit_req_zone $http_fly_client_ip zone=shower_auth:10m rate=3r/s; + # fail2ban deny list — banned IPs are written here by fail2ban and # checked via the $forge_banned variable. The file is touched at # container start to ensure it exists. @@ -42,6 +47,13 @@ http { include /etc/nginx/forge-deny.conf; } + # Per-service deny list for the shower app — populated by fail2ban + # when /admin/login/ attempts trip the threshold. Same scheme as forge. + geo $http_fly_client_ip $shower_banned { + default 0; + include /etc/nginx/shower-deny.conf; + } + # Proxy cache: 200MB, evict after 24h of no access proxy_cache_path /tmp/cache levels=1:2 keys_zone=services:10m max_size=200m inactive=24h; @@ -288,6 +300,95 @@ http { } } + # --- shower.eblu.me (dynamic Django: guest splash + raffle/prize console) --- + # Public-facing Adelaide baby shower app. Defense layers: + # * geo+fail2ban deny list ($shower_banned) + # * nginx limit_req on /admin/login/ via the shower_auth zone + # * django-axes inside Django (5 fails / 1h lockout per user+IP) + # * /admin/ paths blocked at the proxy except /admin/login/ and /admin/logout/ + # so staff can sign in publicly but the CRUD admin is tailnet-only + server { + listen 8080; + server_name shower.eblu.me; + + # Block fail2ban-banned IPs + if ($shower_banned) { + return 403 "Temporarily blocked. Try again later.\n"; + } + + # General per-IP rate limit (cushion for the splash page + form posts) + limit_req zone=general burst=20 nodelay; + + # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs; + # 5 MiB matches the Django-side cap. + client_max_body_size 5m; + + # Security headers — HSTS matches Django's SECURE_HSTS_SECONDS. + add_header X-Frame-Options "DENY" always; + add_header X-Content-Type-Options "nosniff" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header Referrer-Policy "same-origin" always; + + error_page 502 503 504 /error.html; + location = /error.html { + root /usr/share/nginx/html; + internal; + } + + # GNU Terry Pratchett — keep the name moving. + add_header X-Clacks-Overhead "GNU Terry Pratchett" always; + + # Reject indexers — there's nothing here we want crawled. + location = /robots.txt { + default_type text/plain; + return 200 "User-agent: *\nDisallow: /\n"; + } + + # Public admin surface: only the login/logout endpoints, rate-limited. + location ~ ^/admin/(login|logout)/? { + limit_req zone=shower_auth burst=5 nodelay; + + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + proxy_intercept_errors on; + + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + } + + # Block the rest of /admin/ at the public edge. The admin CRUD UI + # is only reachable via shower.ops.eblu.me on the tailnet. + location /admin/ { + return 403 "The Django admin is tailnet-only — visit shower.ops.eblu.me.\n"; + } + + location / { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + proxy_intercept_errors on; + + # No proxy_cache — dynamic content with sessions and CSRF. + + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + } + } + # Catch-all: reject unknown hosts, but serve health check server { listen 8080 default_server; diff --git a/fly/start.sh b/fly/start.sh index 1f2acaa..ef17641 100644 --- a/fly/start.sh +++ b/fly/start.sh @@ -19,8 +19,10 @@ until nslookup forge.tail8d86e.ts.net 100.100.100.100 > /dev/null 2>&1; do done echo "MagicDNS ready" -# Ensure fail2ban deny file exists before nginx starts +# Ensure fail2ban per-service deny files exist before nginx starts +# (the geo directive's `include` fails if the file is missing). touch /etc/nginx/forge-deny.conf +touch /etc/nginx/shower-deny.conf # Start nginx — MagicDNS is available, upstreams resolved. nginx -g "daemon off;" & diff --git a/pulumi/gandi/__main__.py b/pulumi/gandi/__main__.py index bda7a8a..25fd0f7 100644 --- a/pulumi/gandi/__main__.py +++ b/pulumi/gandi/__main__.py @@ -85,6 +85,15 @@ forge_public = gandi.livedns.Record( values=["blumeops-proxy.fly.dev."], ) +shower_public = gandi.livedns.Record( + "shower-public", + zone=domain, + name="shower", + type="CNAME", + ttl=300, + values=["blumeops-proxy.fly.dev."], +) + # ============== Exports ============== pulumi.export("domain", domain) pulumi.export("wildcard_fqdn", f"*.{subdomain}.{domain}") @@ -93,3 +102,4 @@ pulumi.export("target_ip", tailscale_ip) pulumi.export("docs_public_fqdn", f"docs.{domain}") pulumi.export("cv_public_fqdn", f"cv.{domain}") pulumi.export("forge_public_fqdn", f"forge.{domain}") +pulumi.export("shower_public_fqdn", f"shower.{domain}") diff --git a/service-versions.yaml b/service-versions.yaml index f7f0f4e..b6163ad 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -44,6 +44,16 @@ services: upstream-source: https://github.com/gethomepage/homepage/releases notes: Custom container, kustomize manifests + - name: shower + type: argocd + last-reviewed: 2026-05-10 + current-version: "1.0.0" + upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app + notes: | + Django app for Adelaide / Heidi / Addie's baby shower. Wheel + published to Forgejo Packages PyPI; runs on ringtail k3s. Public + at shower.eblu.me (fly proxy), tailnet admin at shower.ops.eblu.me. + - name: nvidia-device-plugin type: argocd last-reviewed: 2026-03-27 From cb4f4085c296f2b3ea152c44b33f23e9b945f67b Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 08:37:12 -0700 Subject: [PATCH 02/20] C1: bake shower wheel into image; wire borgmatic; refine NFS docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups on the shower deployment branch: 1. containers/shower/default.nix now uses buildPythonPackage to install the adelaide-baby-shower-app wheel + its deps at nix build time. The wheel comes from the forge PyPI index with a pinned SRI hash. The entrypoint no longer does pip-at-boot — it just runs migrations, collectstatic, and execs gunicorn. 2. ansible/roles/borgmatic/defaults/main.yml: - Adds shower to borgmatic_k8s_sqlite_dumps (context k3s-ringtail) so /app/data/db.sqlite3 is dumped via kubectl exec on every run. - Adds /Volumes/shower (sifaka SMB mount on indri) to borgmatic_source_directories so prize-photo media gets archived. 3. NFS share docs corrected to match the real on-sifaka pattern: exports allowlist 192.168.1.0/24 + 100.64.0.0/10 with all_squash to admin (matching frigate/paperless/etc.), not "Squash=No mapping". The pod's runAsUser doesn't need to match an on-disk uid because all_squash rewrites every write to admin:users. Also adds a missing service-versions entry for the tailscale container introduced in PR #347 — pre-existing gap surfaced by the container-version-check hook on this commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- ansible/roles/borgmatic/defaults/main.yml | 8 ++ argocd/manifests/shower/pv-nfs.yaml | 18 ++-- containers/shower/default.nix | 104 ++++++++++--------- docs/changelog.d/shower-app-deploy.bugfix.md | 13 +++ docs/how-to/operations/shower-app.md | 76 +++++++++++--- service-versions.yaml | 9 ++ 6 files changed, 155 insertions(+), 73 deletions(-) create mode 100644 docs/changelog.d/shower-app-deploy.bugfix.md diff --git a/ansible/roles/borgmatic/defaults/main.yml b/ansible/roles/borgmatic/defaults/main.yml index 25d0149..123cb0f 100644 --- a/ansible/roles/borgmatic/defaults/main.yml +++ b/ansible/roles/borgmatic/defaults/main.yml @@ -27,6 +27,9 @@ borgmatic_source_directories: - /Users/erichblume/.config/borgmatic - /Users/erichblume/Documents - /Users/erichblume/.local/share/borgmatic/k8s-dumps + # Shower app prize-photo uploads (sifaka SMB mount). Mounted manually + # on indri via Finder — see docs/how-to/operations/shower-app.md. + - /Volumes/shower # Backup repositories borgmatic_repositories: @@ -54,6 +57,11 @@ borgmatic_k8s_sqlite_dumps: label_selector: app=mealie db_path: /app/data/mealie.db context: minikube + - name: shower + namespace: shower + label_selector: app=shower + db_path: /app/data/db.sqlite3 + context: k3s-ringtail # Exclude patterns borgmatic_exclude_patterns: [] diff --git a/argocd/manifests/shower/pv-nfs.yaml b/argocd/manifests/shower/pv-nfs.yaml index d07cecc..7354fb5 100644 --- a/argocd/manifests/shower/pv-nfs.yaml +++ b/argocd/manifests/shower/pv-nfs.yaml @@ -1,15 +1,13 @@ # NFS PersistentVolume for shower app media uploads (prize photos). -# Requires: NFS share on sifaka at /volume1/shower with NFS permissions -# for ringtail. # -# To create on Synology: -# 1. Control Panel > Shared Folder > Create -# 2. Name: shower, Location: Volume 1 -# 3. Control Panel > File Services > NFS > NFS Rules -# 4. Add rule for "shower" share: Hostname=ringtail, Privilege=Read/Write, -# Squash=No mapping -# 5. chown -R 1000:1000 /volume1/shower (or pick another UID and align the -# container's runAsUser to match) +# Requires the `shower` share on sifaka with NFS exports matching the +# blumeops standard (192.168.1.0/24 + 100.64.0.0/10, all_squash → admin). +# See docs/how-to/operations/shower-app.md for the Synology web-UI walk +# and docs/reference/storage/sifaka.md for the exports table. +# +# Because all_squash rewrites every NFS write to admin:users (1024:100), +# the in-pod runAsUser does NOT have to match an on-disk uid. Mode 0777 +# on /volume1/shower lets the pod read back what it wrote. apiVersion: v1 kind: PersistentVolume metadata: diff --git a/containers/shower/default.nix b/containers/shower/default.nix index 5b92e85..e8d7383 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -1,76 +1,79 @@ # Nix-built shower app container — Adelaide / Heidi / Addie baby shower. # # The app is published as a wheel to the Forgejo PyPI index at -# https://forge.eblu.me/api/packages/eblume/pypi/. Rather than pin and -# fetch the wheel + transitive deps at nix build time (which requires -# every wheel hash to be tracked here), this image ships a Python from -# nixpkgs and pip-installs the wheel into a venv on /app/data at first -# boot. Subsequent boots reuse the venv. This trades reproducibility for -# a much simpler nix file. +# https://forge.eblu.me/api/packages/eblume/pypi/. The wheel + its +# Python deps are baked in at build time via buildPythonPackage so the +# container boots cleanly with no pip-at-runtime. Build runs on the +# nix-container-builder runner (ringtail, amd64) so the image is native. # -# Built on the nix-container-builder runner (ringtail, amd64) so the -# image runs natively on ringtail's k3s without QEMU emulation. +# To bump the version: +# 1. Update `version` below. +# 2. Update `wheelHash` — `nix-prefetch-url ` against the new wheel, +# or set it to `pkgs.lib.fakeHash` and let the build print the right one. { pkgs ? import { } }: let version = "1.0.0"; + wheelHash = "sha256-9Xk3TCzl474As8n0RhLoy/QYw+K1DABBWEwLC8v1X0A="; python = pkgs.python314; - appVersion = version; - entrypoint = pkgs.writeShellScript "shower-entrypoint" '' - set -eu + showerWheel = pkgs.fetchurl { + name = "adelaide_baby_shower_app-${version}-py3-none-any.whl"; + url = "https://forge.eblu.me/api/packages/eblume/pypi/files/adelaide-baby-shower-app/${version}/adelaide_baby_shower_app-${version}-py3-none-any.whl"; + hash = wheelHash; + }; - APP_DIR=/app - DATA_DIR=/app/data - VENV_DIR=$DATA_DIR/.venv - INSTALLED_MARKER=$VENV_DIR/.installed-${appVersion} + shower = python.pkgs.buildPythonPackage { + pname = "adelaide-baby-shower-app"; + inherit version; + format = "wheel"; + src = showerWheel; + doCheck = false; + propagatedBuildInputs = with python.pkgs; [ + django + django-axes + pillow + scipy + segno + ]; + }; - export HOME=$DATA_DIR - export PIP_DISABLE_PIP_VERSION_CHECK=1 - export PIP_NO_CACHE_DIR=1 + pyEnv = python.withPackages (ps: [ + shower + ps.gunicorn + ]); - mkdir -p "$DATA_DIR" "$APP_DIR/media" - - # First boot (or version change): create venv and install the app + deps. - # The wheel comes from the internal devpi mirror (default index), with - # forge.eblu.me as the extra index for the adelaide-baby-shower-app wheel. - if [ ! -f "$INSTALLED_MARKER" ]; then - echo "shower: installing adelaide-baby-shower-app==${appVersion} into $VENV_DIR" - rm -rf "$VENV_DIR" - ${python}/bin/python -m venv "$VENV_DIR" - "$VENV_DIR/bin/pip" install --upgrade pip - "$VENV_DIR/bin/pip" install \ - --index-url=https://pypi.ops.eblu.me/root/pypi/+simple/ \ - --extra-index-url=https://forge.eblu.me/api/packages/eblume/pypi/simple/ \ - "adelaide-baby-shower-app==${appVersion}" gunicorn - touch "$INSTALLED_MARKER" - fi - - # The wheel's config/settings.py uses BASE_DIR = parent.parent of its - # own __file__, so MEDIA_ROOT and DATABASES.NAME resolve relative to - # site-packages. Override with a thin shim placed in $APP_DIR. - cat > "$APP_DIR/local_settings.py" <<'PY' + # Settings shim — config/settings.py's `BASE_DIR = parent.parent` would + # otherwise resolve to site-packages, scattering db.sqlite3 / media / + # staticfiles into the venv. Pin them to /app/{data,media,data/static}. + localSettings = pkgs.writeText "local_settings.py" '' from config.settings import * # noqa: F401,F403 DATABASES["default"]["NAME"] = "/app/data/db.sqlite3" MEDIA_ROOT = "/app/media" STATIC_ROOT = "/app/data/staticfiles" - PY + ''; - export PYTHONPATH=$APP_DIR + entrypoint = pkgs.writeShellScript "shower-entrypoint" '' + set -eu + + export HOME=/app/data + export PYTHONPATH=/app export DJANGO_SETTINGS_MODULE=local_settings - cd "$APP_DIR" + cd /app + + mkdir -p /app/data /app/media echo "shower: running migrations" - "$VENV_DIR/bin/python" -m django migrate --noinput + ${pyEnv}/bin/python -m django migrate --noinput echo "shower: collecting static files" - "$VENV_DIR/bin/python" -m django collectstatic --noinput --clear + ${pyEnv}/bin/python -m django collectstatic --noinput --clear echo "shower: starting gunicorn" - exec "$VENV_DIR/bin/gunicorn" \ + exec ${pyEnv}/bin/gunicorn \ --bind 0.0.0.0:8000 \ --workers 2 \ --forwarded-allow-ips='*' \ @@ -81,19 +84,20 @@ in pkgs.dockerTools.buildLayeredImage { name = "blumeops/shower"; contents = [ - python + pyEnv pkgs.cacert pkgs.tzdata pkgs.bashInteractive pkgs.coreutils - pkgs.gnused - pkgs.gnugrep ]; - # /app is writable by uid 1000 (matches deployment.yaml runAsUser). - fakeRootCommands = '' + extraCommands = '' mkdir -p app/data app/media tmp chmod 1777 tmp + cp ${localSettings} app/local_settings.py + ''; + + fakeRootCommands = '' chown -R 1000:1000 app ''; enableFakechroot = true; diff --git a/docs/changelog.d/shower-app-deploy.bugfix.md b/docs/changelog.d/shower-app-deploy.bugfix.md new file mode 100644 index 0000000..91d2b3b --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.bugfix.md @@ -0,0 +1,13 @@ +Shower app container now bakes the wheel + Python deps into the image +at build time via `buildPythonPackage` instead of pip-installing on +first boot. Boots are deterministic and don't depend on forge PyPI +being reachable from the pod. The `wheelHash` in +`containers/shower/default.nix` is the sha256 sourced from the +[forge PyPI simple index](https://forge.eblu.me/api/packages/eblume/pypi/simple/adelaide-baby-shower-app/); +bumping the version means bumping that hash too. + +Borgmatic now covers the shower app: SQLite is dumped from the live +pod via `kubectl exec` (mirroring the existing mealie entry, with +`context: k3s-ringtail`), and the prize-photo media share is picked up +through `/Volumes/shower` (sifaka SMB mount on indri, same pattern as +`/Volumes/photos`). diff --git a/docs/how-to/operations/shower-app.md b/docs/how-to/operations/shower-app.md index 401eeb6..9a7af1b 100644 --- a/docs/how-to/operations/shower-app.md +++ b/docs/how-to/operations/shower-app.md @@ -64,26 +64,70 @@ django-axes has already locked them out. | `/app/media` | `shower-media` | NFS RWX on sifaka (`/volume1/shower`) | Prize photos survive pod rescheduling | | `/app/data` | `shower-data` | k3s `local-path` RWO | SQLite DB; NFS file locking can't be trusted for WAL/journal | -The container's entrypoint installs the wheel into `/app/data/.venv` on -first boot, runs migrations, runs `collectstatic`, and `exec`s gunicorn. -A `local_settings.py` shim overrides `DATABASES.NAME`, `MEDIA_ROOT`, and -`STATIC_ROOT` to absolute paths under `/app/`, sidestepping the wheel's -`BASE_DIR = parent.parent` of an in-site-packages settings module. +The container has the app + its Python deps baked in at nix build time +(`buildPythonPackage` against the wheel fetched from forge PyPI). The +entrypoint runs migrations, runs `collectstatic`, and `exec`s gunicorn — +no pip-at-boot. A `local_settings.py` shim overrides `DATABASES.NAME`, +`MEDIA_ROOT`, and `STATIC_ROOT` to absolute paths under `/app/`, +sidestepping the wheel's `BASE_DIR = parent.parent` of an +in-site-packages settings module. + +## Backups + +[[borgmatic]] (running on indri) captures both halves of the persistent +state on its daily 2 a.m. run: + +- **`/app/data/db.sqlite3`** — dumped via `kubectl exec`'s + `sqlite3.backup()` against the live pod (entry in + `borgmatic_k8s_sqlite_dumps`, context `k3s-ringtail`). The dumped + file lands in `borgmatic_k8s_dump_dir` on indri and is picked up by + the main source-directory sweep. +- **`/app/media`** — picked up via `/Volumes/shower`, the SMB mount of + `sifaka:/volume1/shower` on indri. The same Synology share is exposed + via SMB *and* NFS simultaneously; ringtail's pod uses the NFS export, + while indri reads the SMB side for the borgmatic source. + +Both archive to [[sifaka]] (`borg-backups`) and BorgBase offsite, with +retention `keep_daily=7 / keep_monthly=12 / keep_yearly=1000`. + +The SMB mount on indri is set up manually once via Finder (Cmd-K → +`smb://sifaka/shower`, save credentials, "Always log in" so it +reconnects after reboot). If `/Volumes/shower` is missing at backup +time borgmatic will fail loudly — `source_directories_must_exist: true` +applies to all entries. ## One-time setup steps These steps are required the first time the service is deployed and are not encoded in the manifests. -### 1. NFS share on sifaka +### 1. NFS + SMB share on sifaka -On the Synology: +On the Synology DSM web UI: -1. Control Panel → Shared Folder → Create. Name: `shower`, Volume 1. -2. Control Panel → File Services → NFS → NFS Rules. Add rule for - `shower`: Hostname=`ringtail`, Privilege=Read/Write, Squash=No mapping. -3. `chown -R 1000:1000 /volume1/shower` over SSH so the pod's uid 1000 - can write. +1. **Control Panel → Shared Folder → Create**. Name: `shower`, + Location: Volume 1. Leave the rest at default. +2. **Control Panel → File Services → NFS → NFS Rules** (on the + `shower` row's *Permissions* tab). Add a rule mirroring the other + shares' pattern: Hostname/IP=`192.168.1.0/24` and again for + `100.64.0.0/10`, Privilege=Read/Write, Squash=`Map all users to + admin` (= `all_squash`), and tick *Allow connections from + non-privileged ports*. (See [[sifaka#NFS Exports]] — the existing + `frigate`, `paperless`, etc. shares use this exact pattern.) +3. **Control Panel → File Services → SMB**: leave SMB enabled + globally. No per-share rule required — the share inherits the + default `eblume` access. +4. The directory ownership at `/volume1/shower` will end up + `root:root`, mode `0777` (DSM default) — which is fine because + `all_squash` rewrites every NFS write to `admin:users`, and the + `0777` lets pods read what other pods wrote. No `chown` needed. + +After the share exists, mount it on indri for borgmatic: + +- In Finder, **Cmd-K → `smb://sifaka/shower`**, sign in as `eblume`, + and tick **Remember in Keychain** + **Always log in** so it + reconnects on reboot. This produces `/Volumes/shower`, which the + borgmatic source-directory list points at. ### 2. 1Password item @@ -106,7 +150,13 @@ freshly generated. ### 3. Container image Built by the `build-container` Forgejo Actions workflow on the -`nix-container-builder` runner (ringtail, amd64). Trigger with: +`nix-container-builder` runner (ringtail, amd64). The wheel is fetched +from forge PyPI at nix build time and baked into the image — no +pip-at-runtime. To bump the version, change `version` in +`containers/shower/default.nix` and update `wheelHash` (or set it to +`pkgs.lib.fakeHash` and let the next build print the correct one). + +Trigger with: ```fish mise run container-build-and-release shower diff --git a/service-versions.yaml b/service-versions.yaml index b6163ad..8caa2be 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -106,6 +106,15 @@ services: current-version: "v1.94.2" upstream-source: https://github.com/tailscale/tailscale/releases + - name: tailscale + type: container + last-reviewed: 2026-05-10 + current-version: "1.94.2" + upstream-source: https://github.com/tailscale/tailscale/releases + notes: | + Locally mirrored tailscale image used by ringtail's tailscale-operator + ProxyClass. Built via containers/tailscale/default.nix. + - name: grafana type: argocd last-reviewed: 2026-04-02 From ba4c1e89531d2f28434d0addbfe944aca00c29ed Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 09:00:26 -0700 Subject: [PATCH 03/20] C1: switch shower container to pip-install FOD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buildPythonPackage approach with `propagatedBuildInputs = [ python.pkgs.django ... ]` doesn't work: 1. nixpkgs python314Packages.django still aliases to Django 4.2 LTS, which doesn't support Python 3.14. 2. django-axes from nixpkgs pulls selenium + browser fonts into its check phase, and the nix sandbox can't provide those (fontconfig errors, then build dep tree collapses). Switching to authentik's FOD pattern instead: a single fixed-output derivation that pip-installs the adelaide-baby-shower-app wheel + every transitive dep from forge PyPI into a target dir. FODs get network access in exchange for a pinned output hash, so the closure stays reproducible. outputHash is set to fakeHash for the first build — the runner will print the real hash on failure; a follow-up commit will pin it. Co-Authored-By: Claude Opus 4.7 (1M context) --- containers/shower/default.nix | 126 +++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 33 deletions(-) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index e8d7383..cb64ca8 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -2,51 +2,108 @@ # # The app is published as a wheel to the Forgejo PyPI index at # https://forge.eblu.me/api/packages/eblume/pypi/. The wheel + its -# Python deps are baked in at build time via buildPythonPackage so the -# container boots cleanly with no pip-at-runtime. Build runs on the +# transitive Python deps are baked in at build time via a fixed-output +# derivation that runs `pip install --target` against forge PyPI (proxied +# through pypi.ops.eblu.me for upstream packages). Build runs on the # nix-container-builder runner (ringtail, amd64) so the image is native. # +# Going through pip-install-target rather than nixpkgs Python packages +# sidesteps two issues we hit going through `python.pkgs.buildPythonPackage`: +# 1. python314Packages.django still aliases to Django 4.2 LTS, which +# doesn't support Python 3.14 at all. +# 2. django-axes pulls selenium + browser fonts into its check phase +# and the nix sandbox can't provide those. +# # To bump the version: # 1. Update `version` below. -# 2. Update `wheelHash` — `nix-prefetch-url ` against the new wheel, -# or set it to `pkgs.lib.fakeHash` and let the build print the right one. +# 2. Set `outputHash` to `pkgs.lib.fakeHash`, run the build, copy the +# real hash out of the error, and commit it. { pkgs ? import { } }: let version = "1.0.0"; - wheelHash = "sha256-9Xk3TCzl474As8n0RhLoy/QYw+K1DABBWEwLC8v1X0A="; python = pkgs.python314; - showerWheel = pkgs.fetchurl { - name = "adelaide_baby_shower_app-${version}-py3-none-any.whl"; - url = "https://forge.eblu.me/api/packages/eblume/pypi/files/adelaide-baby-shower-app/${version}/adelaide_baby_shower_app-${version}-py3-none-any.whl"; - hash = wheelHash; - }; - - shower = python.pkgs.buildPythonPackage { - pname = "adelaide-baby-shower-app"; + # Fixed-output derivation: pip-installs the app wheel + every transitive + # dep into a single target dir. FODs get network access in exchange for + # a pinned output hash, which means the whole dependency closure is + # immutable across rebuilds. + pyDeps = pkgs.stdenv.mkDerivation { + pname = "shower-python-deps"; inherit version; - format = "wheel"; - src = showerWheel; - doCheck = false; - propagatedBuildInputs = with python.pkgs; [ - django - django-axes - pillow - scipy - segno - ]; + + dontUnpack = true; + + nativeBuildInputs = [ python pkgs.cacert ]; + + buildPhase = '' + runHook preBuild + + export HOME=$TMPDIR + export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt + export PIP_DISABLE_PIP_VERSION_CHECK=1 + + # Install into a venv first so pip's bytecode-compile + entry-point + # generation pick up the right interpreter, then copy site-packages + # + bin into $out at a stable layout. + ${python}/bin/python -m venv "$TMPDIR/venv" + "$TMPDIR/venv/bin/pip" install --upgrade pip + "$TMPDIR/venv/bin/pip" install \ + --no-cache-dir \ + --index-url=https://pypi.ops.eblu.me/root/pypi/+simple/ \ + --extra-index-url=https://forge.eblu.me/api/packages/eblume/pypi/simple/ \ + "adelaide-baby-shower-app==${version}" \ + gunicorn + + runHook postBuild + ''; + + installPhase = '' + runHook preInstall + + mkdir -p $out/lib/python3.14 $out/bin + cp -r "$TMPDIR/venv/lib/python3.14/site-packages" $out/lib/python3.14/site-packages + + # Copy console scripts (gunicorn, django-admin, etc.) but drop the + # venv-specific shebang prefix that points at $TMPDIR/venv/bin/python. + # Rewrite shebangs to the eventual on-image python path. + for script in "$TMPDIR/venv/bin/"*; do + [ -f "$script" ] || continue + name=$(basename "$script") + case "$name" in + python*|pip*|activate*) continue ;; + esac + # Replace the venv python shebang with a path that resolves inside + # the docker image (where ${python} ends up in /nix/store). + sed -e "1 s|^#!.*python.*|#!${python}/bin/python3.14|" "$script" > "$out/bin/$name" + chmod +x "$out/bin/$name" + done + + runHook postInstall + ''; + + # Bytecode files embed absolute paths; deletion forces re-compile inside + # the image at first run, with paths matching the image filesystem. + postInstall = '' + find $out -type f -name '*.pyc' -delete + find $out -type d -name '__pycache__' -exec rm -rf {} + 2>/dev/null || true + ''; + + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + # Computed by setting to pkgs.lib.fakeHash and reading the failure. + # Pin the dep closure — rebuilds are reproducible until the version bumps. + outputHash = pkgs.lib.fakeHash; + + dontFixup = true; }; - pyEnv = python.withPackages (ps: [ - shower - ps.gunicorn - ]); + sitePackages = "${pyDeps}/lib/python3.14/site-packages"; # Settings shim — config/settings.py's `BASE_DIR = parent.parent` would # otherwise resolve to site-packages, scattering db.sqlite3 / media / - # staticfiles into the venv. Pin them to /app/{data,media,data/static}. + # staticfiles into the venv. Pin them to /app/{data,media,data/staticfiles}. localSettings = pkgs.writeText "local_settings.py" '' from config.settings import * # noqa: F401,F403 @@ -59,7 +116,8 @@ let set -eu export HOME=/app/data - export PYTHONPATH=/app + export PATH=${pyDeps}/bin:${python}/bin:/bin + export PYTHONPATH=/app:${sitePackages} export DJANGO_SETTINGS_MODULE=local_settings cd /app @@ -67,13 +125,13 @@ let mkdir -p /app/data /app/media echo "shower: running migrations" - ${pyEnv}/bin/python -m django migrate --noinput + ${python}/bin/python -m django migrate --noinput echo "shower: collecting static files" - ${pyEnv}/bin/python -m django collectstatic --noinput --clear + ${python}/bin/python -m django collectstatic --noinput --clear echo "shower: starting gunicorn" - exec ${pyEnv}/bin/gunicorn \ + exec ${pyDeps}/bin/gunicorn \ --bind 0.0.0.0:8000 \ --workers 2 \ --forwarded-allow-ips='*' \ @@ -84,7 +142,8 @@ in pkgs.dockerTools.buildLayeredImage { name = "blumeops/shower"; contents = [ - pyEnv + python + pyDeps pkgs.cacert pkgs.tzdata pkgs.bashInteractive @@ -111,6 +170,7 @@ pkgs.dockerTools.buildLayeredImage { "TMPDIR=/tmp" "LANG=C.UTF-8" "LC_ALL=C.UTF-8" + "PYTHONDONTWRITEBYTECODE=1" ]; ExposedPorts = { "8000/tcp" = { }; From f8598a66124dbbdb9e1c7836f44e395f4a42bc18 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 09:06:44 -0700 Subject: [PATCH 04/20] C1: strip store refs in shower FOD; autopatchelf wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Run 534 failed with 'fixed-output derivations must not reference store paths: ... gcc-14.3.0-lib' because pip-installed wheels pulled stdenv into the venv (Python's setup, gcc-lib runtime references). Adapts authentik's two-stage pattern: - pyDepsFOD: pip-installs into the venv, then strips every nix store ref it can find (find+remove-references-to). Output is fully self-contained — pinned by outputHash. - pyDeps (non-FOD wrapper): copies the FOD output and runs autoPatchelfHook against runtime buildInputs (libstdc++, zlib, image libs for pillow). This restores RPATHs on the .so files that pillow and scipy ship, against the real on-image library locations. outputHash still fakeHash — next build prints the real one. Co-Authored-By: Claude Opus 4.7 (1M context) --- containers/shower/default.nix | 77 +++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 18 deletions(-) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index cb64ca8..fa1f07f 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -29,13 +29,13 @@ let # dep into a single target dir. FODs get network access in exchange for # a pinned output hash, which means the whole dependency closure is # immutable across rebuilds. - pyDeps = pkgs.stdenv.mkDerivation { - pname = "shower-python-deps"; + pyDepsFOD = pkgs.stdenv.mkDerivation { + pname = "shower-python-deps-fod"; inherit version; dontUnpack = true; - nativeBuildInputs = [ python pkgs.cacert ]; + nativeBuildInputs = [ python pkgs.cacert pkgs.removeReferencesTo ]; buildPhase = '' runHook preBuild @@ -44,9 +44,6 @@ let export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt export PIP_DISABLE_PIP_VERSION_CHECK=1 - # Install into a venv first so pip's bytecode-compile + entry-point - # generation pick up the right interpreter, then copy site-packages - # + bin into $out at a stable layout. ${python}/bin/python -m venv "$TMPDIR/venv" "$TMPDIR/venv/bin/pip" install --upgrade pip "$TMPDIR/venv/bin/pip" install \ @@ -65,40 +62,84 @@ let mkdir -p $out/lib/python3.14 $out/bin cp -r "$TMPDIR/venv/lib/python3.14/site-packages" $out/lib/python3.14/site-packages - # Copy console scripts (gunicorn, django-admin, etc.) but drop the - # venv-specific shebang prefix that points at $TMPDIR/venv/bin/python. - # Rewrite shebangs to the eventual on-image python path. for script in "$TMPDIR/venv/bin/"*; do [ -f "$script" ] || continue name=$(basename "$script") case "$name" in python*|pip*|activate*) continue ;; esac - # Replace the venv python shebang with a path that resolves inside - # the docker image (where ${python} ends up in /nix/store). - sed -e "1 s|^#!.*python.*|#!${python}/bin/python3.14|" "$script" > "$out/bin/$name" + cp "$script" "$out/bin/$name" chmod +x "$out/bin/$name" done - runHook postInstall - ''; + # --- Strip Nix store references (FOD outputs must be self-contained) --- + # The wrapper derivation below restores them via autoPatchelfHook + a + # python wrapper that points pyc-less imports at the on-image python. - # Bytecode files embed absolute paths; deletion forces re-compile inside - # the image at first run, with paths matching the image filesystem. - postInstall = '' + # Strip bytecode entirely — pyc files embed compile-time paths. find $out -type f -name '*.pyc' -delete find $out -type d -name '__pycache__' -exec rm -rf {} + 2>/dev/null || true + + # Dynamically discover all nix store references and strip them. We + # don't have a static list because pip pulls in stdenv via Python's + # build env (gcc-lib, libstdc++, etc.) and the closure is opaque. + { find $out -type f -print0 \ + | xargs -0 grep -aohE '/nix/store/[a-z0-9]{32}-[^/"[:space:]]+' 2>/dev/null \ + || true; } | sort -u > $TMPDIR/store-refs.txt + echo "Found $(wc -l < $TMPDIR/store-refs.txt) unique store path references to strip" + + refs_args="" + while IFS= read -r ref; do + refs_args="$refs_args -t $ref" + done < $TMPDIR/store-refs.txt + + if [ -n "$refs_args" ]; then + find $out -type f -exec remove-references-to $refs_args {} + 2>/dev/null || true + fi + + remaining=$({ find $out -type f -print0 | xargs -0 grep -cl '/nix/store/' 2>/dev/null || true; } | wc -l) + echo "Files with remaining store references: $remaining" + + runHook postInstall ''; outputHashMode = "recursive"; outputHashAlgo = "sha256"; # Computed by setting to pkgs.lib.fakeHash and reading the failure. - # Pin the dep closure — rebuilds are reproducible until the version bumps. outputHash = pkgs.lib.fakeHash; dontFixup = true; }; + # Non-FOD wrapper: re-applies RPATHs to pre-built .so files (pillow, + # scipy) so they find libstdc++ / libz / etc. at runtime. autoPatchelfHook + # discovers needed libraries from buildInputs. + pyDeps = pkgs.stdenv.mkDerivation { + pname = "shower-python-deps"; + inherit version; + + dontUnpack = true; + + nativeBuildInputs = [ pkgs.autoPatchelfHook ]; + + buildInputs = with pkgs; [ + python + stdenv.cc.cc.lib # libstdc++, libgcc_s + zlib + libjpeg + libwebp + libtiff + openjpeg + lcms2 + freetype + ]; + + installPhase = '' + cp -r ${pyDepsFOD} $out + chmod -R u+w $out + ''; + }; + sitePackages = "${pyDeps}/lib/python3.14/site-packages"; # Settings shim — config/settings.py's `BASE_DIR = parent.parent` would From 118cbbf74bee3eb94ad95768db439a0958dbe6f9 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 09:13:53 -0700 Subject: [PATCH 05/20] C1: pin shower FOD outputHash from run 535 --- containers/shower/default.nix | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index fa1f07f..dff5269 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -105,8 +105,9 @@ let outputHashMode = "recursive"; outputHashAlgo = "sha256"; - # Computed by setting to pkgs.lib.fakeHash and reading the failure. - outputHash = pkgs.lib.fakeHash; + # Pinned dep closure — reproducible until version bumps. To recompute, + # set to pkgs.lib.fakeHash and read the failure. + outputHash = "sha256-DQIzHm7b4f1SHDU/KiclS6yZmF/GlizaeH8CM1Hg59s="; dontFixup = true; }; From f28612f931006323720e07af583255bf93c0b751 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 09:20:45 -0700 Subject: [PATCH 06/20] C1: pin shower image tag to v1.0.0-118cbbf-nix (run 536) Build 536 finished cleanly with the strip-refs FOD + autopatchelf wrapper. The [branch] tag is fine for ArgoCD branch-revision testing; a follow-up C0 will rebuild from main and re-pin to the [main] SHA tag after merge, per docs/how-to/deployment/build-container-image.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- argocd/manifests/shower/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml index cd4dd08..0ce959f 100644 --- a/argocd/manifests/shower/kustomization.yaml +++ b/argocd/manifests/shower/kustomization.yaml @@ -14,4 +14,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/shower - newTag: v1.0.0-PLACEHOLDER-nix + newTag: v1.0.0-118cbbf-nix From 3fa608cb08b32a3145393f3fe9e52d38e050ceeb Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 09:23:10 -0700 Subject: [PATCH 07/20] C1: add shower.eblu.me to fly-setup cert list Lets a re-run of `mise run fly-setup` (e.g. after a fly-app rebuild or when bootstrapping fresh) re-issue the cert without remembering the ad-hoc `fly certs add` we did during this deployment. Co-Authored-By: Claude Opus 4.7 (1M context) --- mise-tasks/fly-setup | 1 + 1 file changed, 1 insertion(+) diff --git a/mise-tasks/fly-setup b/mise-tasks/fly-setup index 0c5cb56..be797e5 100755 --- a/mise-tasks/fly-setup +++ b/mise-tasks/fly-setup @@ -23,6 +23,7 @@ echo "IPs allocated" fly certs add docs.eblu.me -a "$APP" 2>/dev/null || true fly certs add cv.eblu.me -a "$APP" 2>/dev/null || true fly certs add forge.eblu.me -a "$APP" 2>/dev/null || true +fly certs add shower.eblu.me -a "$APP" 2>/dev/null || true echo "Certificates configured" echo "Done. Run 'mise run fly-deploy' to deploy." From 702592bcc9d60c08569b13af4666c344f662c9f9 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 10:23:40 -0700 Subject: [PATCH 08/20] C1: bump shower to v1.0.1; collapse WAN admin to tailnet-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR review caught that we didn't need an admin login surface on WAN. App v1.0.1 adds DJANGO_PUBLIC_URL_BASE so QR codes generated from /host/ (now tailnet-only) still point at shower.eblu.me for guest phones — that closes the loop and lets us strip the WAN admin surface entirely. Container: - bump version to 1.0.1 - outputHash → fakeHash (build will print the real one) - entrypoint still does migrate + collectstatic before gunicorn — the app is small enough that auto-migration is fine Manifests: - configmap adds DJANGO_PUBLIC_URL_BASE=https://shower.eblu.me Fly nginx (shower.eblu.me): - drop the /admin/(login|logout) carveout - 403 anything under /admin/ AND /host/ with a "tailnet only" pointer - drop the shower_auth limit_req zone and \$shower_banned geo - drop the shower-admin-login fail2ban filter + jail - drop the shower-deny.conf touch from start.sh Docs: - rename how-to docs/how-to/operations/shower-app.md → shower-on-ringtail.md (mirrors cv-on-indri / docs-on-indri) - new reference card docs/reference/services/shower-app.md per PR review comment 2 (≈30s read; quick facts + cross-links) - rewrite Defense layers section: collapses to general rate limit + django-axes on the tailnet-side login (the only credential surface) - rewrite the .infra.md changelog fragment to match - add a 'Create the admin user' step (kubectl exec createsuperuser) so first-time deploys aren't locked out The nginx-deny action's per-jail \`nginx_deny_file\` generalization stays — harmless future-proofing for the next public service. Co-Authored-By: Claude Opus 4.7 (1M context) --- argocd/manifests/shower/configmap.yaml | 11 ++- containers/shower/default.nix | 4 +- docs/changelog.d/shower-app-deploy.infra.md | 15 ++-- .../{shower-app.md => shower-on-ringtail.md} | 45 ++++++++--- docs/reference/services/shower-app.md | 55 ++++++++++++++ fly/Dockerfile | 2 - fly/fail2ban/filter.d/shower-admin-login.conf | 13 ---- fly/fail2ban/jail.d/shower.conf | 8 -- fly/nginx.conf | 74 ++++++------------- fly/start.sh | 3 +- service-versions.yaml | 2 +- 11 files changed, 132 insertions(+), 100 deletions(-) rename docs/how-to/operations/{shower-app.md => shower-on-ringtail.md} (81%) create mode 100644 docs/reference/services/shower-app.md delete mode 100644 fly/fail2ban/filter.d/shower-admin-login.conf delete mode 100644 fly/fail2ban/jail.d/shower.conf diff --git a/argocd/manifests/shower/configmap.yaml b/argocd/manifests/shower/configmap.yaml index 111f524..330e183 100644 --- a/argocd/manifests/shower/configmap.yaml +++ b/argocd/manifests/shower/configmap.yaml @@ -5,7 +5,12 @@ metadata: namespace: shower data: DJANGO_DEBUG: "0" - # Admin lives behind the tailnet; the public proxy blocks /admin/ except - # /admin/login/ and /admin/logout/. /host/'s "Django admin" link follows - # this var so admin CRUD only happens on the tailnet hostname. + # /host/, /admin/, and Django's login surface are all tailnet-only — the + # public proxy 403s everything outside of `/` and `/prizes//`. + # /host/'s "Django admin" link follows DJANGO_ADMIN_URL. DJANGO_ADMIN_URL: "https://shower.ops.eblu.me/admin/" + # /host/ is served on shower.ops.eblu.me (tailnet), but the QR codes it + # generates need to point at the public WAN hostname so guest phones can + # reach them. PUBLIC_URL_BASE overrides Django's request.build_absolute_uri() + # in the QR views — see shower/views.py:_public_url. Added in app v1.0.1. + DJANGO_PUBLIC_URL_BASE: "https://shower.eblu.me" diff --git a/containers/shower/default.nix b/containers/shower/default.nix index dff5269..08983c7 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -21,7 +21,7 @@ { pkgs ? import { } }: let - version = "1.0.0"; + version = "1.0.1"; python = pkgs.python314; @@ -107,7 +107,7 @@ let outputHashAlgo = "sha256"; # Pinned dep closure — reproducible until version bumps. To recompute, # set to pkgs.lib.fakeHash and read the failure. - outputHash = "sha256-DQIzHm7b4f1SHDU/KiclS6yZmF/GlizaeH8CM1Hg59s="; + outputHash = pkgs.lib.fakeHash; dontFixup = true; }; diff --git a/docs/changelog.d/shower-app-deploy.infra.md b/docs/changelog.d/shower-app-deploy.infra.md index c14fbce..157a068 100644 --- a/docs/changelog.d/shower-app-deploy.infra.md +++ b/docs/changelog.d/shower-app-deploy.infra.md @@ -1,8 +1,9 @@ Wire shower app for public exposure: fly nginx `shower.eblu.me` server -block with `/admin/` blocked at the edge (except `/admin/login/` and -`/admin/logout/`), per-IP rate limit, fail2ban filter+jail with a -shower-specific deny list (`shower-deny.conf`), Caddy route, Pulumi -Gandi CNAME, and a Grafana APM dashboard tracking request rate, error -rate, failed admin logins, latency, bandwidth, and access logs. -Generalized the `nginx-deny` action to accept a per-jail -`nginx_deny_file` parameter so each service has its own ban list. +block as a guest-only surface — splash page, `/prizes//`, static +assets, media. Everything authenticated (`/admin/`, `/host/`, +`/accounts/`) returns 403 with a "tailnet only" pointer. Staff hit +`shower.ops.eblu.me` for the operator console + admin; the app's +v1.0.1 `DJANGO_PUBLIC_URL_BASE` setting makes QR codes generated on +the tailnet point back at the WAN host for guests. Plus a Caddy route +on indri, Pulumi Gandi CNAME, and a Grafana APM dashboard tracking +request rate, error rate, latency, bandwidth, and access logs. diff --git a/docs/how-to/operations/shower-app.md b/docs/how-to/operations/shower-on-ringtail.md similarity index 81% rename from docs/how-to/operations/shower-app.md rename to docs/how-to/operations/shower-on-ringtail.md index 9a7af1b..daf1046 100644 --- a/docs/how-to/operations/shower-app.md +++ b/docs/how-to/operations/shower-on-ringtail.md @@ -38,24 +38,29 @@ Internet → shower.eblu.me | Hostname | Reachable from | Notes | |---|---|---| -| `shower.eblu.me` | Public internet | `/admin/` blocked except `/admin/login/`, `/admin/logout/` | -| `shower.ops.eblu.me` | Tailnet | Full app surface, including the admin | +| `shower.eblu.me` | Public internet | Guest surface only — splash, `/prizes//`, `/static/`, `/media/`. Everything authenticated 403s with a tailnet pointer. | +| `shower.ops.eblu.me` | Tailnet | Full app surface — `/host/`, `/admin/`, the works | | `shower.tail8d86e.ts.net` | Tailnet | Bare ProxyGroup endpoint Caddy proxies to | ## Defense layers (public side) -The public path stacks four checks against `/admin/login/` brute force: +The public surface is guest-only, so the threat model collapses: there +is no credential-accepting endpoint reachable from WAN, and nothing on +WAN that requires authentication. -1. **fly nginx `geo $shower_banned`** — per-service ban list populated by - fail2ban (`/etc/nginx/shower-deny.conf`) -2. **fly nginx `limit_req zone=shower_auth`** — 3 r/s per Fly-Client-IP -3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)` -4. **edge `/admin/` block** — anything that isn't `/admin/login/` or - `/admin/logout/` returns 403 from nginx, period +1. **edge auth lockout** — fly nginx 403s `/admin/`, `/host/`, and + anything that would redirect into them. Anyone hitting an auth URL + on WAN gets a "tailnet only" message. +2. **fly nginx `limit_req zone=general`** — 10 r/s per Fly-Client-IP + cushion for the splash form. +3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)`, + running on the tailnet-side login. Provides the only credential + defense, since brute-force is only reachable to tailnet members. -The fail2ban filter `shower-admin-login.conf` matches 401/403/429 on -`/admin/login/`. The 429 case catches attackers who keep hammering after -django-axes has already locked them out. +The QR codes that `/host/` (on tailnet) generates for guests embed +`https://shower.eblu.me/...` even though the QR view is served from +the tailnet host. The app's `PUBLIC_URL_BASE` setting (added in v1.0.1) +overrides Django's `request.build_absolute_uri()` for those URLs. ## Persistent storage @@ -193,6 +198,22 @@ it up.) mise run provision-indri -- --tags caddy ``` +### 7. Create the admin user + +The container's entrypoint runs `migrate --noinput` + `collectstatic +--noinput --clear` before gunicorn, so a fresh `db.sqlite3` is schema- +ready as soon as the pod boots. It does *not* create a Django superuser +— that has to happen once, interactively, after the first pod is up: + +```fish +kubectl --context=k3s-ringtail -n shower exec -it deploy/shower -- \ + python -m django createsuperuser +``` + +Use `erich` / your usual email. The same account doubles as the +`@staff_member_required` login for `/host/`. Subsequent staff accounts +can be created from `/admin/auth/user/` once you're signed in. + ## Deploying a new version 1. Bump the wheel version in the app repo (`adelaide-baby-shower-app`) diff --git a/docs/reference/services/shower-app.md b/docs/reference/services/shower-app.md new file mode 100644 index 0000000..26d1764 --- /dev/null +++ b/docs/reference/services/shower-app.md @@ -0,0 +1,55 @@ +--- +title: Shower App +modified: 2026-05-10 +last-reviewed: 2026-05-10 +tags: + - service + - django +--- + +# Shower App + +Django web app for Adelaide / Heidi / Addie's baby shower — guest splash with +a "what did you bring?" form, raffle picker, contest-prize ranking via +QR-coded `/prizes//` URLs, and an `/host/` operator console with +drag-rank assignment solving via scipy. + +## Quick Reference + +| Property | Value | +|----------|-------| +| **Public URL** | `shower.eblu.me` (guest surface only — via [[flyio-proxy]]) | +| **Private URL** | `shower.ops.eblu.me` (admin + `/host/` console — Caddy on indri) | +| **Cluster** | [[ringtail]] k3s, namespace `shower` | +| **Container** | `registry.ops.eblu.me/blumeops/shower` (built from `containers/shower/default.nix`) | +| **App source** | `forge.eblu.me/eblume/adelaide-baby-shower-app` (wheel on Forgejo PyPI) | +| **Database** | SQLite on a local-path PVC (`shower-data`, RWO 2 Gi) | +| **Media (prize photos)** | NFS RWX PVC `shower-media` → `sifaka:/volume1/shower` | +| **Secrets** | `Shower (blumeops)` 1Password item → `DJANGO_SECRET_KEY` | + +## Routing + +``` +Internet → shower.eblu.me (Fly nginx, guest-only 403s on /admin/ /host/) + │ + ▼ + Caddy on indri (shower.ops.eblu.me — full surface) + │ + ▼ + Tailscale ProxyGroup → k3s Service → Deployment +``` + +## Backups + +- **SQLite** dumped via `kubectl exec` to indri's `borgmatic_k8s_dump_dir` on every 2 a.m. run (mealie-pattern entry in `borgmatic_k8s_sqlite_dumps`) +- **Media** picked up via `/Volumes/shower` (sifaka SMB mount on indri) in the main `borgmatic_source_directories` list + +Both archive to sifaka + BorgBase. + +## Related + +- [[shower-on-ringtail]] — onboarding + day-of runbook +- [[expose-service-publicly]] — Fly proxy + tailnet pattern this rides on +- [[ringtail]] — host cluster +- [[sifaka#NFS Exports]] — NFS share table +- [[borgmatic]] — backup system diff --git a/fly/Dockerfile b/fly/Dockerfile index 355b404..eae8c35 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -20,9 +20,7 @@ COPY --from=docker.io/grafana/alloy@sha256:6e00cf7c5a692ff5f24844529416ed017d76f RUN mkdir -p /var/log/nginx /etc/alloy /tmp/alloy-data COPY fail2ban/filter.d/forge-login.conf /etc/fail2ban/filter.d/forge-login.conf -COPY fail2ban/filter.d/shower-admin-login.conf /etc/fail2ban/filter.d/shower-admin-login.conf COPY fail2ban/jail.d/forge.conf /etc/fail2ban/jail.d/forge.conf -COPY fail2ban/jail.d/shower.conf /etc/fail2ban/jail.d/shower.conf COPY fail2ban/action.d/nginx-deny.conf /etc/fail2ban/action.d/nginx-deny.conf COPY nginx.conf /etc/nginx/nginx.conf diff --git a/fly/fail2ban/filter.d/shower-admin-login.conf b/fly/fail2ban/filter.d/shower-admin-login.conf deleted file mode 100644 index c73cd3a..0000000 --- a/fly/fail2ban/filter.d/shower-admin-login.conf +++ /dev/null @@ -1,13 +0,0 @@ -# Filter for shower-app /admin/login/ failures via nginx JSON access log. -# Matches 401/403/429 responses on the login endpoint, keyed on the -# client_ip field (populated from Fly-Client-IP header). -# -# The 429 case catches attackers who keep hammering after django-axes has -# already locked them out — those requests return 429 from -# axes.middleware.AxesMiddleware before reaching the view. - -[Definition] - -failregex = "client_ip":"".*"request_uri":"\/admin\/login[^"]*".*"status":(401|403|429) - -ignoreregex = diff --git a/fly/fail2ban/jail.d/shower.conf b/fly/fail2ban/jail.d/shower.conf deleted file mode 100644 index 59fa7fb..0000000 --- a/fly/fail2ban/jail.d/shower.conf +++ /dev/null @@ -1,8 +0,0 @@ -[shower-admin-login] -enabled = true -filter = shower-admin-login -logpath = /var/log/nginx/access.json.log -maxretry = 5 -findtime = 600 -bantime = 3600 -banaction = nginx-deny[nginx_deny_file=/etc/nginx/shower-deny.conf] diff --git a/fly/nginx.conf b/fly/nginx.conf index 0aca716..44d3903 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -34,11 +34,6 @@ http { # bucket. $http_fly_client_ip has the actual client IP. limit_req_zone $http_fly_client_ip zone=forge_auth:10m rate=3r/s; - # Shower-app rate limit on /admin/login/ (the only admin path the public - # proxy exposes). 3r/s with django-axes (5 strikes, 1h lockout) gives - # plenty of room for a real staff login while making brute-force costly. - limit_req_zone $http_fly_client_ip zone=shower_auth:10m rate=3r/s; - # fail2ban deny list — banned IPs are written here by fail2ban and # checked via the $forge_banned variable. The file is touched at # container start to ensure it exists. @@ -47,13 +42,6 @@ http { include /etc/nginx/forge-deny.conf; } - # Per-service deny list for the shower app — populated by fail2ban - # when /admin/login/ attempts trip the threshold. Same scheme as forge. - geo $http_fly_client_ip $shower_banned { - default 0; - include /etc/nginx/shower-deny.conf; - } - # Proxy cache: 200MB, evict after 24h of no access proxy_cache_path /tmp/cache levels=1:2 keys_zone=services:10m max_size=200m inactive=24h; @@ -300,27 +288,25 @@ http { } } - # --- shower.eblu.me (dynamic Django: guest splash + raffle/prize console) --- - # Public-facing Adelaide baby shower app. Defense layers: - # * geo+fail2ban deny list ($shower_banned) - # * nginx limit_req on /admin/login/ via the shower_auth zone - # * django-axes inside Django (5 fails / 1h lockout per user+IP) - # * /admin/ paths blocked at the proxy except /admin/login/ and /admin/logout/ - # so staff can sign in publicly but the CRUD admin is tailnet-only + # --- shower.eblu.me (Adelaide baby shower — guest-only public surface) --- + # Only the guest paths (`/`, `/prizes//`, /static/, /media/) are + # exposed on WAN. /host/, /admin/, and Django's login views are blocked + # at the edge with a 403 pointing at the tailnet hostname — staff sign + # in on shower.ops.eblu.me, which is reachable from any device with + # Tailscale installed. Defense layers reduce to: general per-IP rate + # limit + django-axes (5 fails / 1h) on the tailnet-side login. No + # fail2ban needed here because the public surface no longer takes + # credentials of any kind. server { listen 8080; server_name shower.eblu.me; - # Block fail2ban-banned IPs - if ($shower_banned) { - return 403 "Temporarily blocked. Try again later.\n"; - } - # General per-IP rate limit (cushion for the splash page + form posts) limit_req zone=general burst=20 nodelay; - # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs; - # 5 MiB matches the Django-side cap. + # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs. + # The host page itself isn't reachable here, but /media/ reads can + # be larger than 1 MiB so set the cap to 5 MiB to match Django. client_max_body_size 5m; # Security headers — HSTS matches Django's SECURE_HSTS_SECONDS. @@ -328,6 +314,8 @@ http { add_header X-Content-Type-Options "nosniff" always; add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; add_header Referrer-Policy "same-origin" always; + # GNU Terry Pratchett — keep the name moving. + add_header X-Clacks-Overhead "GNU Terry Pratchett" always; error_page 502 503 504 /error.html; location = /error.html { @@ -335,38 +323,24 @@ http { internal; } - # GNU Terry Pratchett — keep the name moving. - add_header X-Clacks-Overhead "GNU Terry Pratchett" always; - # Reject indexers — there's nothing here we want crawled. location = /robots.txt { default_type text/plain; return 200 "User-agent: *\nDisallow: /\n"; } - # Public admin surface: only the login/logout endpoints, rate-limited. - location ~ ^/admin/(login|logout)/? { - limit_req zone=shower_auth burst=5 nodelay; - - proxy_pass https://indri_backend$request_uri; - proxy_ssl_verify off; - proxy_ssl_server_name on; - proxy_ssl_name shower.ops.eblu.me; - proxy_intercept_errors on; - - proxy_set_header Host shower.ops.eblu.me; - proxy_set_header X-Real-IP $http_fly_client_ip; - proxy_set_header X-Forwarded-For $http_fly_client_ip; - proxy_set_header X-Forwarded-Proto $scheme; - - proxy_http_version 1.1; - proxy_set_header Connection $connection_upgrade; + # Admin surface: tailnet-only. Anything under /admin/ — login, + # logout, CRUD UI, password reset — returns 403 with a pointer to + # the tailnet host. Django's `staff_member_required` will redirect + # /host/ to /admin/login/, which lands on this 403 if a guest + # device wanders into it. Staff hit the tailnet host directly. + location /admin/ { + return 403 "Authentication is tailnet-only — visit shower.ops.eblu.me.\n"; } - # Block the rest of /admin/ at the public edge. The admin CRUD UI - # is only reachable via shower.ops.eblu.me on the tailnet. - location /admin/ { - return 403 "The Django admin is tailnet-only — visit shower.ops.eblu.me.\n"; + # Operator console: tailnet-only. Same rationale as /admin/. + location /host/ { + return 403 "The host console is tailnet-only — visit shower.ops.eblu.me.\n"; } location / { diff --git a/fly/start.sh b/fly/start.sh index ef17641..a924849 100644 --- a/fly/start.sh +++ b/fly/start.sh @@ -19,10 +19,9 @@ until nslookup forge.tail8d86e.ts.net 100.100.100.100 > /dev/null 2>&1; do done echo "MagicDNS ready" -# Ensure fail2ban per-service deny files exist before nginx starts +# Ensure fail2ban deny file exists before nginx starts # (the geo directive's `include` fails if the file is missing). touch /etc/nginx/forge-deny.conf -touch /etc/nginx/shower-deny.conf # Start nginx — MagicDNS is available, upstreams resolved. nginx -g "daemon off;" & diff --git a/service-versions.yaml b/service-versions.yaml index 8caa2be..aa3a6b6 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -47,7 +47,7 @@ services: - name: shower type: argocd last-reviewed: 2026-05-10 - current-version: "1.0.0" + current-version: "1.0.1" upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app notes: | Django app for Adelaide / Heidi / Addie's baby shower. Wheel From 3ead5ae213fd9ccfd45474270ea634536a2a5f7e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 10:28:50 -0700 Subject: [PATCH 09/20] C1: pin shower v1.0.1 FOD outputHash from run 537 --- containers/shower/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index 08983c7..20b7525 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -107,7 +107,7 @@ let outputHashAlgo = "sha256"; # Pinned dep closure — reproducible until version bumps. To recompute, # set to pkgs.lib.fakeHash and read the failure. - outputHash = pkgs.lib.fakeHash; + outputHash = "sha256-9J2cxNxFSkSWLy2nwPtqV0Qtn54YVJ3LiijsqKQT+uc="; dontFixup = true; }; From 6d3cce56269b3fbda15ec5bc35b8592e3f0e598b Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 10:34:45 -0700 Subject: [PATCH 10/20] C1: pin shower image tag to v1.0.1-3ead5ae-nix (run 538) --- argocd/manifests/shower/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml index 0ce959f..8f54972 100644 --- a/argocd/manifests/shower/kustomization.yaml +++ b/argocd/manifests/shower/kustomization.yaml @@ -14,4 +14,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/shower - newTag: v1.0.0-118cbbf-nix + newTag: v1.0.1-3ead5ae-nix From 95b663209f9d0b0dc6cb59c2002d4b35659d526e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 10:46:28 -0700 Subject: [PATCH 11/20] C1: bake shower runtime env into image; allow tailnet host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two complementary fixes for the deploy that just landed: 1. Pod was 0/1 Running because the readiness probe sends `Host: shower.ops.eblu.me` and the app's hardcoded ALLOWED_HOSTS only includes `shower.eblu.me`. settings.py exposes a DJANGO_ALLOWED_HOSTS env-var extras hook for exactly this case — wired into the configmap. 2. `kubectl exec deploy/shower -- python -m django ` returned "No module named django" because PYTHONPATH lived only inside the entrypoint script. Moved PYTHONPATH, DJANGO_SETTINGS_MODULE, PATH, and HOME into the image's Env block so exec'd shells inherit them. The entrypoint now just runs the boot sequence; the exports are redundant (image Env covers them) and gone. FOD inputs are unchanged so outputHash stays valid; no fakeHash dance. Co-Authored-By: Claude Opus 4.7 (1M context) --- argocd/manifests/shower/configmap.yaml | 6 ++++++ containers/shower/default.nix | 22 ++++++++++++++-------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/argocd/manifests/shower/configmap.yaml b/argocd/manifests/shower/configmap.yaml index 330e183..6102c1e 100644 --- a/argocd/manifests/shower/configmap.yaml +++ b/argocd/manifests/shower/configmap.yaml @@ -5,6 +5,12 @@ metadata: namespace: shower data: DJANGO_DEBUG: "0" + # The app's settings.py hardcodes ALLOWED_HOSTS = ["shower.eblu.me", + # "localhost", "127.0.0.1"] and exposes this env var as a comma-separated + # extras list. shower.ops.eblu.me is what Caddy on indri and the + # Tailscale ProxyGroup both send as the Host header, so the app needs to + # accept it. + DJANGO_ALLOWED_HOSTS: "shower.ops.eblu.me" # /host/, /admin/, and Django's login surface are all tailnet-only — the # public proxy 403s everything outside of `/` and `/prizes//`. # /host/'s "Django admin" link follows DJANGO_ADMIN_URL. diff --git a/containers/shower/default.nix b/containers/shower/default.nix index 20b7525..23539fb 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -154,26 +154,25 @@ let STATIC_ROOT = "/app/data/staticfiles" ''; + # PYTHONPATH, DJANGO_SETTINGS_MODULE, PATH, and HOME live in the image's + # `Env` block below — that way `kubectl exec deploy/shower -- python -m + # django ` Just Works without an inline `env` ceremony. + # The entrypoint just changes directory and runs the boot sequence. entrypoint = pkgs.writeShellScript "shower-entrypoint" '' set -eu - export HOME=/app/data - export PATH=${pyDeps}/bin:${python}/bin:/bin - export PYTHONPATH=/app:${sitePackages} - export DJANGO_SETTINGS_MODULE=local_settings - cd /app mkdir -p /app/data /app/media echo "shower: running migrations" - ${python}/bin/python -m django migrate --noinput + python -m django migrate --noinput echo "shower: collecting static files" - ${python}/bin/python -m django collectstatic --noinput --clear + python -m django collectstatic --noinput --clear echo "shower: starting gunicorn" - exec ${pyDeps}/bin/gunicorn \ + exec gunicorn \ --bind 0.0.0.0:8000 \ --workers 2 \ --forwarded-allow-ips='*' \ @@ -213,6 +212,13 @@ pkgs.dockerTools.buildLayeredImage { "LANG=C.UTF-8" "LC_ALL=C.UTF-8" "PYTHONDONTWRITEBYTECODE=1" + "HOME=/app/data" + "PATH=${pyDeps}/bin:${python}/bin:/bin" + # /app first so local_settings.py is importable; sitePackages second so + # django, gunicorn, etc. resolve. Inherited by entrypoint + any + # `kubectl exec` so manual django subcommands work without ceremony. + "PYTHONPATH=/app:${sitePackages}" + "DJANGO_SETTINGS_MODULE=local_settings" ]; ExposedPorts = { "8000/tcp" = { }; From 8c683d0985a1048326d60449c22ee473804aab7f Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 10:53:09 -0700 Subject: [PATCH 12/20] C1: pin shower image tag to v1.0.1-95b6632-nix (run 539) --- argocd/manifests/shower/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml index 8f54972..431aaaa 100644 --- a/argocd/manifests/shower/kustomization.yaml +++ b/argocd/manifests/shower/kustomization.yaml @@ -14,4 +14,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/shower - newTag: v1.0.1-3ead5ae-nix + newTag: v1.0.1-95b6632-nix From eec455e56ab2dc1e22cab22790237e9f15727b53 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 11:25:02 -0700 Subject: [PATCH 13/20] C1: fix expose-service-publicly tailscale key flow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doc said "Store the auth key in 1Password as well for the \`fly-setup\` mise task" right next to the description of fly-setup, which reads the key from Pulumi state, not 1Password. No code path anywhere reads this key from 1P — the instruction is vestigial from an earlier design and confused us during the v1.0.1 rotation when the flyio-proxy-key expired. Rewrite the section to: - point at \`mise run fly-setup\` as the canonical path - state explicitly that Pulumi state is the only source of truth - document the rotation recipe (tailnet-up --replace= + fly-setup + fly-deploy) for the next time this 90-day key lapses Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/tutorials/expose-service-publicly.md | 36 ++++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/docs/tutorials/expose-service-publicly.md b/docs/tutorials/expose-service-publicly.md index 6bc8fae..886cad4 100644 --- a/docs/tutorials/expose-service-publicly.md +++ b/docs/tutorials/expose-service-publicly.md @@ -176,17 +176,39 @@ Indri carries `tag:flyio-target` so the Fly proxy can reach Caddy. No per-servic Deploy: `mise run tailnet-preview` then `mise run tailnet-up`. -After deploying, extract the auth key and set it as a Fly.io secret: +After deploying, push the auth key to Fly.io. The simplest path is +`mise run fly-setup`, which reads the current value from Pulumi state +and stages it as a Fly.io secret: ```bash -# Get the key from Pulumi state -cd pulumi/tailscale && pulumi stack output flyio_authkey --show-secrets - -# Set it in Fly.io -fly secrets set TS_AUTHKEY="tskey-auth-..." -a blumeops-proxy +mise run fly-setup ``` -Store the auth key in 1Password as well for the `fly-setup` mise task. +Manual equivalent for reference: + +```bash +cd pulumi/tailscale && pulumi stack output flyio_authkey --show-secrets +# then in fly/: +fly secrets set TS_AUTHKEY="tskey-auth-..." -a blumeops-proxy --stage +``` + +**Pulumi state is the only source of truth for this key.** No other +process (mise tasks, ansible, scripts) reads it from anywhere else — +in particular, the key is not stored in 1Password. To rotate +(every 90 days, or after a compromise), force-replace the resource +and re-run `fly-setup`: + +```bash +mise run tailnet-up -- \ + --replace='urn:pulumi:tail8d86e::blumeops-tailnet::tailscale:index/tailnetKey:TailnetKey::flyio-proxy-key' +mise run fly-setup +mise run fly-deploy +``` + +Pulumi destroys the old key and mints a new 90-day one in a single +operation. Older fly machines that already authed against the old key +are unaffected (they don't need it after the initial join); only +*new* machine starts read the rotated value. ### Step 4: Mise tasks From 473bc781819ddd3374b1434009e3472287825c81 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 12:06:28 -0700 Subject: [PATCH 14/20] C1: bump shower to v1.0.2 (WhiteNoise upstreamed); cache static on fly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit App v1.0.2 ships WhiteNoise for /static/ and /media/, so the blumeops-side workaround is no longer needed: - containers/shower/default.nix: drop the WhiteNoise pip dep + the middleware-injection block from local_settings. The shim is back to just path overrides (DATABASES.NAME, MEDIA_ROOT, STATIC_ROOT). - version → 1.0.2, outputHash → fakeHash for re-pinning. - service-versions.yaml mirrored. fly/nginx.conf: cache /static/ (1y) and /media/ (1d) per location for shower.eblu.me. /static/ filenames are content-hashed thanks to CompressedManifestStaticFilesStorage so a year is safe and invalidation is automatic on the next collectstatic. Co-Authored-By: Claude Opus 4.7 (1M context) --- containers/shower/default.nix | 4 +-- fly/nginx.conf | 54 +++++++++++++++++++++++++++++++++++ service-versions.yaml | 2 +- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index 23539fb..a59e3ab 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -21,7 +21,7 @@ { pkgs ? import { } }: let - version = "1.0.1"; + version = "1.0.2"; python = pkgs.python314; @@ -107,7 +107,7 @@ let outputHashAlgo = "sha256"; # Pinned dep closure — reproducible until version bumps. To recompute, # set to pkgs.lib.fakeHash and read the failure. - outputHash = "sha256-9J2cxNxFSkSWLy2nwPtqV0Qtn54YVJ3LiijsqKQT+uc="; + outputHash = pkgs.lib.fakeHash; dontFixup = true; }; diff --git a/fly/nginx.conf b/fly/nginx.conf index 44d3903..7a70167 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -343,6 +343,60 @@ http { return 403 "The host console is tailnet-only — visit shower.ops.eblu.me.\n"; } + # Static assets — WhiteNoise + CompressedManifestStaticFilesStorage + # gives content-hashed filenames, so cache aggressively. Hashed + # names make cache invalidation automatic on app upgrades. + location /static/ { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache services; + proxy_cache_valid 200 1y; + proxy_cache_valid 404 1m; + proxy_cache_use_stale error timeout updating; + proxy_cache_lock on; + proxy_cache_key $host$uri; + proxy_ignore_headers Cache-Control Set-Cookie; + + add_header X-Cache-Status $upstream_cache_status; + } + + # Prize photo uploads. Shorter TTL than /static/ because filenames + # aren't content-hashed — operators can re-upload a prize photo + # and we want guests to see the new image within a day. + location /media/ { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache services; + proxy_cache_valid 200 1d; + proxy_cache_valid 404 1m; + proxy_cache_use_stale error timeout updating; + proxy_cache_lock on; + proxy_cache_key $host$uri; + proxy_ignore_headers Cache-Control Set-Cookie; + + add_header X-Cache-Status $upstream_cache_status; + } + location / { proxy_pass https://indri_backend$request_uri; proxy_ssl_verify off; diff --git a/service-versions.yaml b/service-versions.yaml index aa3a6b6..74d467e 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -47,7 +47,7 @@ services: - name: shower type: argocd last-reviewed: 2026-05-10 - current-version: "1.0.1" + current-version: "1.0.2" upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app notes: | Django app for Adelaide / Heidi / Addie's baby shower. Wheel From 83b038e1f4b6960df8091aa57f3d8cbd28f58a3b Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 12:11:54 -0700 Subject: [PATCH 15/20] C1: pin shower v1.0.2 FOD outputHash from run 540 --- containers/shower/default.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index a59e3ab..c968a7b 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -107,7 +107,7 @@ let outputHashAlgo = "sha256"; # Pinned dep closure — reproducible until version bumps. To recompute, # set to pkgs.lib.fakeHash and read the failure. - outputHash = pkgs.lib.fakeHash; + outputHash = "sha256-tSTH/HaDY7M0qxlauBTM+JekZAgF++K2lGP3PLvym/o="; dontFixup = true; }; From 727ca2b460c8fe10e671217106352c55846ff8e6 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 12:21:13 -0700 Subject: [PATCH 16/20] C1: pin shower image to v1.0.2-83b038e-nix --- argocd/manifests/shower/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml index 431aaaa..eb5e131 100644 --- a/argocd/manifests/shower/kustomization.yaml +++ b/argocd/manifests/shower/kustomization.yaml @@ -14,4 +14,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/shower - newTag: v1.0.1-95b6632-nix + newTag: v1.0.2-83b038e-nix From 2d38418e6e3100486fdb152e5780808a515ba41f Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 13:06:48 -0700 Subject: [PATCH 17/20] C1: close forge package leak at the fly edge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit forge.eblu.me's package registry (/api/packages/* and /api/v1/packages/*) served anonymous reads to the world even for private-repo releases — Forgejo's per-user visibility treats packages as world-readable when the owner's Visibility is Public, and we keep eblume Public so the profile page stays open. The sdist downloads include full source trees of private repos; that's the leak. The fix is to keep the user public but block /api/packages/* and /api/v1/packages/* at the proxy edge. forge.ops.eblu.me (tailnet) is untouched, so CI workflows + gilbert's uv + the nix-container-builder still work — they just need to use the tailnet hostname. Three consumers updated to forge.ops.eblu.me: - containers/shower/default.nix (the FOD pip --extra-index-url) - ansible/roles/cv/defaults/main.yml (cv_release_url for generic package) - chezmoi-tracked fish dotfiles (devpi.fish + conf.d/pypi.fish) — edited in chezmoi source, user will apply separately The blumeops repo had no other forge-pypi consumers (audited: workers, runner-job-image, ansible roles, container builds). Doc references in changelog fragments + comments left as-is — they describe history. The proper long-term fix is to move private packages to a Limited- visibility Forgejo org instead of relying on a proxy-side block (see queued Todoist for the migration plan). Edge block stays as defense in depth. Co-Authored-By: Claude Opus 4.7 (1M context) --- ansible/roles/cv/defaults/main.yml | 2 +- containers/shower/default.nix | 2 +- fly/nginx.conf | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ansible/roles/cv/defaults/main.yml b/ansible/roles/cv/defaults/main.yml index 734e52b..a18cc82 100644 --- a/ansible/roles/cv/defaults/main.yml +++ b/ansible/roles/cv/defaults/main.yml @@ -3,7 +3,7 @@ # Caddy serves cv_content_dir directly via the static-kind service block. cv_version: "v1.0.3" -cv_release_url: "https://forge.eblu.me/api/packages/eblume/generic/cv/{{ cv_version }}/cv-{{ cv_version }}.tar.gz" +cv_release_url: "https://forge.ops.eblu.me/api/packages/eblume/generic/cv/{{ cv_version }}/cv-{{ cv_version }}.tar.gz" cv_home: /Users/erichblume/blumeops/cv cv_content_dir: "{{ cv_home }}/content" diff --git a/containers/shower/default.nix b/containers/shower/default.nix index c968a7b..1b12649 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -49,7 +49,7 @@ let "$TMPDIR/venv/bin/pip" install \ --no-cache-dir \ --index-url=https://pypi.ops.eblu.me/root/pypi/+simple/ \ - --extra-index-url=https://forge.eblu.me/api/packages/eblume/pypi/simple/ \ + --extra-index-url=https://forge.ops.eblu.me/api/packages/eblume/pypi/simple/ \ "adelaide-baby-shower-app==${version}" \ gunicorn diff --git a/fly/nginx.conf b/fly/nginx.conf index 7a70167..089971c 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -184,6 +184,23 @@ http { return 200 "User-agent: *\nDisallow: /mirrors/\nDisallow: /user/\nDisallow: /users/\nDisallow: /*/archive/\nDisallow: /*/releases/download/\n"; } + # Block the package registry at the public edge. Forgejo's per-user + # visibility model treats packages as world-readable when the owner + # has Visibility=Public — which means anyone on the internet can + # enumerate and download every wheel/sdist/generic artifact, even + # for private-repo releases (the sdist contains full source). We + # like keeping eblume's profile public, so we close the hole here + # at the proxy instead: WAN sees 403, tailnet (forge.ops.eblu.me) + # stays open for legitimate consumers (CI workflows, gilbert). + # See docs/tutorials/expose-service-publicly.md for the broader + # threat model on this proxy. + location /api/packages/ { + return 403 "Package downloads are tailnet-only — use forge.ops.eblu.me.\n"; + } + location /api/v1/packages { + return 403 "Package enumeration is tailnet-only — use forge.ops.eblu.me.\n"; + } + # Block swagger API docs — use forge.ops.eblu.me from tailnet location /swagger { return 403 "API documentation is only available at forge.ops.eblu.me (tailnet).\n"; From 039d9b950718e69894146c42e40cd81301fd99f3 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 13:18:16 -0700 Subject: [PATCH 18/20] C1: pull shower sdist for vendored static (fixes /host/ 500) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wheel ships config/ and shower/ only (per pyproject hatchling config), leaving the repo's top-level static/ dir — Sortable.min.js, cropper.min.js, cropper.min.css, prize-placeholder.svg — behind. At runtime, host_dashboard.html's {% static 'css/cropper.min.css' %} hits the manifest, CompressedManifestStaticFilesStorage raises ValueError on the missing entry, /host/ returns 500. Fix on the deploy side: fetch the sdist via fetchurl (pinned SRI hash from forge PyPI), extract its top-level static/ subtree into a non-FOD derivation, lay it down at /app/static in the image. The local_settings shim adds /app/static to STATICFILES_DIRS so collectstatic at boot picks the vendored assets up alongside the Django admin's own static files. Sdist URL is forge.ops.eblu.me/api/packages/... (tailnet) — matches the just-landed edge block on forge.eblu.me/api/packages/*. The nix-container-builder runner on ringtail is on the tailnet, so the FOD fetch works. App doesn't change. v1.0.3 is no longer needed for the static gap — the wheel's "packages = [config, shower]" pattern stays as-is, and we treat the sdist as the canonical bundle for the assets the wheel intentionally omits. Co-Authored-By: Claude Opus 4.7 (1M context) --- containers/shower/default.nix | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/containers/shower/default.nix b/containers/shower/default.nix index 1b12649..d9863e1 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -25,6 +25,28 @@ let python = pkgs.python314; + # The repo's top-level static/ directory (vendored Sortable + cropper + # JS/CSS, prize placeholder SVG) isn't shipped in the wheel — hatchling + # only packages config/ and shower/, leaving the repo-root static/ + # behind. Pull the sdist (which contains the full source tree) and + # extract just the static/ subtree into the image as /app/static. + # local_settings adds it to STATICFILES_DIRS so collectstatic at boot + # picks it up alongside the Django admin's static files. + # + # Fetched from forge.ops.eblu.me (tailnet) because /api/packages/* is + # blocked at the fly edge — see fly/nginx.conf forge.eblu.me block. + # Hash is the upstream sha256 from forge PyPI's simple index. + showerSdist = pkgs.fetchurl { + name = "adelaide_baby_shower_app-${version}.tar.gz"; + url = "https://forge.ops.eblu.me/api/packages/eblume/pypi/files/adelaide-baby-shower-app/${version}/adelaide_baby_shower_app-${version}.tar.gz"; + hash = "sha256-nlCtlx9zuYaLoJZSckybLV5YPpA8vZamN96O3RXOstM="; + }; + + staticAssets = pkgs.runCommand "shower-static-assets-${version}" { } '' + ${pkgs.gnutar}/bin/tar -xzf ${showerSdist} -C $TMPDIR + cp -r $TMPDIR/adelaide_baby_shower_app-${version}/static $out + ''; + # Fixed-output derivation: pip-installs the app wheel + every transitive # dep into a single target dir. FODs get network access in exchange for # a pinned output hash, which means the whole dependency closure is @@ -147,11 +169,17 @@ let # otherwise resolve to site-packages, scattering db.sqlite3 / media / # staticfiles into the venv. Pin them to /app/{data,media,data/staticfiles}. localSettings = pkgs.writeText "local_settings.py" '' + from pathlib import Path + from config.settings import * # noqa: F401,F403 DATABASES["default"]["NAME"] = "/app/data/db.sqlite3" MEDIA_ROOT = "/app/media" STATIC_ROOT = "/app/data/staticfiles" + # /app/static comes from the repo-root static/ subtree of the sdist + # (see default.nix staticAssets). Added because the wheel doesn't + # ship vendored Sortable/cropper assets. + STATICFILES_DIRS = [Path("/app/static")] ''; # PYTHONPATH, DJANGO_SETTINGS_MODULE, PATH, and HOME live in the image's @@ -195,6 +223,8 @@ pkgs.dockerTools.buildLayeredImage { mkdir -p app/data app/media tmp chmod 1777 tmp cp ${localSettings} app/local_settings.py + cp -r ${staticAssets} app/static + chmod -R u+w app/static ''; fakeRootCommands = '' From c430528583d898fa8a1d9b6040d2c95c70cb0873 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 13:24:02 -0700 Subject: [PATCH 19/20] C1: pin shower image to v1.0.2-039d9b9-nix (with sdist static fetch) --- argocd/manifests/shower/kustomization.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml index eb5e131..0afc8e3 100644 --- a/argocd/manifests/shower/kustomization.yaml +++ b/argocd/manifests/shower/kustomization.yaml @@ -14,4 +14,4 @@ resources: images: - name: registry.ops.eblu.me/blumeops/shower - newTag: v1.0.2-83b038e-nix + newTag: v1.0.2-039d9b9-nix From fb6067b620f281f860c24430a96574cff90e3f99 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 13:44:22 -0700 Subject: [PATCH 20/20] C1: shower-specific rate-limit zone for venue-wifi NAT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default `general` zone (10r/s burst=20) is tuned for internet drive-by traffic. At the party, 30 guests scanning the splash QR from one venue-wifi NAT'd public IP would each fetch HTML + ~5 static assets within a few seconds — easily clearing burst=20, and the second-wave guests would see 503 with no auto-retry. New shower_general zone (50r/s burst=200) absorbs that simultaneous- load spike. Exploit scanners still trip it: the 45.88.138.44 burst we already saw in Loki fired ~30 req in 2s, well above the new sustained 50r/s when extrapolated, and burst=200 is still a hard cap on instantaneous spikes. Self-healing: `limit_req` is a token bucket — no persistent ban, nothing to manually flush. A guest who trips it auto-recovers within ~1s; tuning here is about not tripping it on legit traffic in the first place. Co-Authored-By: Claude Opus 4.7 (1M context) --- fly/nginx.conf | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/fly/nginx.conf b/fly/nginx.conf index 089971c..570e6c9 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -34,6 +34,15 @@ http { # bucket. $http_fly_client_ip has the actual client IP. limit_req_zone $http_fly_client_ip zone=forge_auth:10m rate=3r/s; + # Shower-specific zone: loose enough that ~30 guests sharing a single + # venue-wifi NAT'd public IP can all scan the QR and load the splash + # (HTML + a handful of static asset hits each) without anyone tripping + # the limit. 50r/s + burst=200 covers the simultaneous-load spike; + # exploit scanners still trip it (e.g. the .env-sweeping bot we saw + # fired ~30 req in 2s — that pattern stays caught). See the + # shower.eblu.me server block for the matching `limit_req`. + limit_req_zone $http_fly_client_ip zone=shower_general:10m rate=50r/s; + # fail2ban deny list — banned IPs are written here by fail2ban and # checked via the $forge_banned variable. The file is touched at # container start to ensure it exists. @@ -318,8 +327,13 @@ http { listen 8080; server_name shower.eblu.me; - # General per-IP rate limit (cushion for the splash page + form posts) - limit_req zone=general burst=20 nodelay; + # Per-IP rate limit. shower_general (50r/s, burst=200) instead of + # the global `general` zone because at the party, guests on the + # venue's wifi all NAT through a single Fly-Client-IP — 30 guests + # scanning the QR at once would each fetch HTML + a few static + # assets, easily clearing 20 burst on `general`. Exploit scanners + # still trip it (sustained ≫ 50r/s patterns). + limit_req zone=shower_general burst=200 nodelay; # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs. # The host page itself isn't reachable here, but /media/ reads can