From 702592bcc9d60c08569b13af4666c344f662c9f9 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 11 May 2026 10:23:40 -0700 Subject: [PATCH] C1: bump shower to v1.0.1; collapse WAN admin to tailnet-only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR review caught that we didn't need an admin login surface on WAN. App v1.0.1 adds DJANGO_PUBLIC_URL_BASE so QR codes generated from /host/ (now tailnet-only) still point at shower.eblu.me for guest phones — that closes the loop and lets us strip the WAN admin surface entirely. Container: - bump version to 1.0.1 - outputHash → fakeHash (build will print the real one) - entrypoint still does migrate + collectstatic before gunicorn — the app is small enough that auto-migration is fine Manifests: - configmap adds DJANGO_PUBLIC_URL_BASE=https://shower.eblu.me Fly nginx (shower.eblu.me): - drop the /admin/(login|logout) carveout - 403 anything under /admin/ AND /host/ with a "tailnet only" pointer - drop the shower_auth limit_req zone and \$shower_banned geo - drop the shower-admin-login fail2ban filter + jail - drop the shower-deny.conf touch from start.sh Docs: - rename how-to docs/how-to/operations/shower-app.md → shower-on-ringtail.md (mirrors cv-on-indri / docs-on-indri) - new reference card docs/reference/services/shower-app.md per PR review comment 2 (≈30s read; quick facts + cross-links) - rewrite Defense layers section: collapses to general rate limit + django-axes on the tailnet-side login (the only credential surface) - rewrite the .infra.md changelog fragment to match - add a 'Create the admin user' step (kubectl exec createsuperuser) so first-time deploys aren't locked out The nginx-deny action's per-jail \`nginx_deny_file\` generalization stays — harmless future-proofing for the next public service. Co-Authored-By: Claude Opus 4.7 (1M context) --- argocd/manifests/shower/configmap.yaml | 11 ++- containers/shower/default.nix | 4 +- docs/changelog.d/shower-app-deploy.infra.md | 15 ++-- .../{shower-app.md => shower-on-ringtail.md} | 45 ++++++++--- docs/reference/services/shower-app.md | 55 ++++++++++++++ fly/Dockerfile | 2 - fly/fail2ban/filter.d/shower-admin-login.conf | 13 ---- fly/fail2ban/jail.d/shower.conf | 8 -- fly/nginx.conf | 74 ++++++------------- fly/start.sh | 3 +- service-versions.yaml | 2 +- 11 files changed, 132 insertions(+), 100 deletions(-) rename docs/how-to/operations/{shower-app.md => shower-on-ringtail.md} (81%) create mode 100644 docs/reference/services/shower-app.md delete mode 100644 fly/fail2ban/filter.d/shower-admin-login.conf delete mode 100644 fly/fail2ban/jail.d/shower.conf diff --git a/argocd/manifests/shower/configmap.yaml b/argocd/manifests/shower/configmap.yaml index 111f524..330e183 100644 --- a/argocd/manifests/shower/configmap.yaml +++ b/argocd/manifests/shower/configmap.yaml @@ -5,7 +5,12 @@ metadata: namespace: shower data: DJANGO_DEBUG: "0" - # Admin lives behind the tailnet; the public proxy blocks /admin/ except - # /admin/login/ and /admin/logout/. /host/'s "Django admin" link follows - # this var so admin CRUD only happens on the tailnet hostname. + # /host/, /admin/, and Django's login surface are all tailnet-only — the + # public proxy 403s everything outside of `/` and `/prizes//`. + # /host/'s "Django admin" link follows DJANGO_ADMIN_URL. DJANGO_ADMIN_URL: "https://shower.ops.eblu.me/admin/" + # /host/ is served on shower.ops.eblu.me (tailnet), but the QR codes it + # generates need to point at the public WAN hostname so guest phones can + # reach them. PUBLIC_URL_BASE overrides Django's request.build_absolute_uri() + # in the QR views — see shower/views.py:_public_url. Added in app v1.0.1. + DJANGO_PUBLIC_URL_BASE: "https://shower.eblu.me" diff --git a/containers/shower/default.nix b/containers/shower/default.nix index dff5269..08983c7 100644 --- a/containers/shower/default.nix +++ b/containers/shower/default.nix @@ -21,7 +21,7 @@ { pkgs ? import { } }: let - version = "1.0.0"; + version = "1.0.1"; python = pkgs.python314; @@ -107,7 +107,7 @@ let outputHashAlgo = "sha256"; # Pinned dep closure — reproducible until version bumps. To recompute, # set to pkgs.lib.fakeHash and read the failure. - outputHash = "sha256-DQIzHm7b4f1SHDU/KiclS6yZmF/GlizaeH8CM1Hg59s="; + outputHash = pkgs.lib.fakeHash; dontFixup = true; }; diff --git a/docs/changelog.d/shower-app-deploy.infra.md b/docs/changelog.d/shower-app-deploy.infra.md index c14fbce..157a068 100644 --- a/docs/changelog.d/shower-app-deploy.infra.md +++ b/docs/changelog.d/shower-app-deploy.infra.md @@ -1,8 +1,9 @@ Wire shower app for public exposure: fly nginx `shower.eblu.me` server -block with `/admin/` blocked at the edge (except `/admin/login/` and -`/admin/logout/`), per-IP rate limit, fail2ban filter+jail with a -shower-specific deny list (`shower-deny.conf`), Caddy route, Pulumi -Gandi CNAME, and a Grafana APM dashboard tracking request rate, error -rate, failed admin logins, latency, bandwidth, and access logs. -Generalized the `nginx-deny` action to accept a per-jail -`nginx_deny_file` parameter so each service has its own ban list. +block as a guest-only surface — splash page, `/prizes//`, static +assets, media. Everything authenticated (`/admin/`, `/host/`, +`/accounts/`) returns 403 with a "tailnet only" pointer. Staff hit +`shower.ops.eblu.me` for the operator console + admin; the app's +v1.0.1 `DJANGO_PUBLIC_URL_BASE` setting makes QR codes generated on +the tailnet point back at the WAN host for guests. Plus a Caddy route +on indri, Pulumi Gandi CNAME, and a Grafana APM dashboard tracking +request rate, error rate, latency, bandwidth, and access logs. diff --git a/docs/how-to/operations/shower-app.md b/docs/how-to/operations/shower-on-ringtail.md similarity index 81% rename from docs/how-to/operations/shower-app.md rename to docs/how-to/operations/shower-on-ringtail.md index 9a7af1b..daf1046 100644 --- a/docs/how-to/operations/shower-app.md +++ b/docs/how-to/operations/shower-on-ringtail.md @@ -38,24 +38,29 @@ Internet → shower.eblu.me | Hostname | Reachable from | Notes | |---|---|---| -| `shower.eblu.me` | Public internet | `/admin/` blocked except `/admin/login/`, `/admin/logout/` | -| `shower.ops.eblu.me` | Tailnet | Full app surface, including the admin | +| `shower.eblu.me` | Public internet | Guest surface only — splash, `/prizes//`, `/static/`, `/media/`. Everything authenticated 403s with a tailnet pointer. | +| `shower.ops.eblu.me` | Tailnet | Full app surface — `/host/`, `/admin/`, the works | | `shower.tail8d86e.ts.net` | Tailnet | Bare ProxyGroup endpoint Caddy proxies to | ## Defense layers (public side) -The public path stacks four checks against `/admin/login/` brute force: +The public surface is guest-only, so the threat model collapses: there +is no credential-accepting endpoint reachable from WAN, and nothing on +WAN that requires authentication. -1. **fly nginx `geo $shower_banned`** — per-service ban list populated by - fail2ban (`/etc/nginx/shower-deny.conf`) -2. **fly nginx `limit_req zone=shower_auth`** — 3 r/s per Fly-Client-IP -3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)` -4. **edge `/admin/` block** — anything that isn't `/admin/login/` or - `/admin/logout/` returns 403 from nginx, period +1. **edge auth lockout** — fly nginx 403s `/admin/`, `/host/`, and + anything that would redirect into them. Anyone hitting an auth URL + on WAN gets a "tailnet only" message. +2. **fly nginx `limit_req zone=general`** — 10 r/s per Fly-Client-IP + cushion for the splash form. +3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)`, + running on the tailnet-side login. Provides the only credential + defense, since brute-force is only reachable to tailnet members. -The fail2ban filter `shower-admin-login.conf` matches 401/403/429 on -`/admin/login/`. The 429 case catches attackers who keep hammering after -django-axes has already locked them out. +The QR codes that `/host/` (on tailnet) generates for guests embed +`https://shower.eblu.me/...` even though the QR view is served from +the tailnet host. The app's `PUBLIC_URL_BASE` setting (added in v1.0.1) +overrides Django's `request.build_absolute_uri()` for those URLs. ## Persistent storage @@ -193,6 +198,22 @@ it up.) mise run provision-indri -- --tags caddy ``` +### 7. Create the admin user + +The container's entrypoint runs `migrate --noinput` + `collectstatic +--noinput --clear` before gunicorn, so a fresh `db.sqlite3` is schema- +ready as soon as the pod boots. It does *not* create a Django superuser +— that has to happen once, interactively, after the first pod is up: + +```fish +kubectl --context=k3s-ringtail -n shower exec -it deploy/shower -- \ + python -m django createsuperuser +``` + +Use `erich` / your usual email. The same account doubles as the +`@staff_member_required` login for `/host/`. Subsequent staff accounts +can be created from `/admin/auth/user/` once you're signed in. + ## Deploying a new version 1. Bump the wheel version in the app repo (`adelaide-baby-shower-app`) diff --git a/docs/reference/services/shower-app.md b/docs/reference/services/shower-app.md new file mode 100644 index 0000000..26d1764 --- /dev/null +++ b/docs/reference/services/shower-app.md @@ -0,0 +1,55 @@ +--- +title: Shower App +modified: 2026-05-10 +last-reviewed: 2026-05-10 +tags: + - service + - django +--- + +# Shower App + +Django web app for Adelaide / Heidi / Addie's baby shower — guest splash with +a "what did you bring?" form, raffle picker, contest-prize ranking via +QR-coded `/prizes//` URLs, and an `/host/` operator console with +drag-rank assignment solving via scipy. + +## Quick Reference + +| Property | Value | +|----------|-------| +| **Public URL** | `shower.eblu.me` (guest surface only — via [[flyio-proxy]]) | +| **Private URL** | `shower.ops.eblu.me` (admin + `/host/` console — Caddy on indri) | +| **Cluster** | [[ringtail]] k3s, namespace `shower` | +| **Container** | `registry.ops.eblu.me/blumeops/shower` (built from `containers/shower/default.nix`) | +| **App source** | `forge.eblu.me/eblume/adelaide-baby-shower-app` (wheel on Forgejo PyPI) | +| **Database** | SQLite on a local-path PVC (`shower-data`, RWO 2 Gi) | +| **Media (prize photos)** | NFS RWX PVC `shower-media` → `sifaka:/volume1/shower` | +| **Secrets** | `Shower (blumeops)` 1Password item → `DJANGO_SECRET_KEY` | + +## Routing + +``` +Internet → shower.eblu.me (Fly nginx, guest-only 403s on /admin/ /host/) + │ + ▼ + Caddy on indri (shower.ops.eblu.me — full surface) + │ + ▼ + Tailscale ProxyGroup → k3s Service → Deployment +``` + +## Backups + +- **SQLite** dumped via `kubectl exec` to indri's `borgmatic_k8s_dump_dir` on every 2 a.m. run (mealie-pattern entry in `borgmatic_k8s_sqlite_dumps`) +- **Media** picked up via `/Volumes/shower` (sifaka SMB mount on indri) in the main `borgmatic_source_directories` list + +Both archive to sifaka + BorgBase. + +## Related + +- [[shower-on-ringtail]] — onboarding + day-of runbook +- [[expose-service-publicly]] — Fly proxy + tailnet pattern this rides on +- [[ringtail]] — host cluster +- [[sifaka#NFS Exports]] — NFS share table +- [[borgmatic]] — backup system diff --git a/fly/Dockerfile b/fly/Dockerfile index 355b404..eae8c35 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -20,9 +20,7 @@ COPY --from=docker.io/grafana/alloy@sha256:6e00cf7c5a692ff5f24844529416ed017d76f RUN mkdir -p /var/log/nginx /etc/alloy /tmp/alloy-data COPY fail2ban/filter.d/forge-login.conf /etc/fail2ban/filter.d/forge-login.conf -COPY fail2ban/filter.d/shower-admin-login.conf /etc/fail2ban/filter.d/shower-admin-login.conf COPY fail2ban/jail.d/forge.conf /etc/fail2ban/jail.d/forge.conf -COPY fail2ban/jail.d/shower.conf /etc/fail2ban/jail.d/shower.conf COPY fail2ban/action.d/nginx-deny.conf /etc/fail2ban/action.d/nginx-deny.conf COPY nginx.conf /etc/nginx/nginx.conf diff --git a/fly/fail2ban/filter.d/shower-admin-login.conf b/fly/fail2ban/filter.d/shower-admin-login.conf deleted file mode 100644 index c73cd3a..0000000 --- a/fly/fail2ban/filter.d/shower-admin-login.conf +++ /dev/null @@ -1,13 +0,0 @@ -# Filter for shower-app /admin/login/ failures via nginx JSON access log. -# Matches 401/403/429 responses on the login endpoint, keyed on the -# client_ip field (populated from Fly-Client-IP header). -# -# The 429 case catches attackers who keep hammering after django-axes has -# already locked them out — those requests return 429 from -# axes.middleware.AxesMiddleware before reaching the view. - -[Definition] - -failregex = "client_ip":"".*"request_uri":"\/admin\/login[^"]*".*"status":(401|403|429) - -ignoreregex = diff --git a/fly/fail2ban/jail.d/shower.conf b/fly/fail2ban/jail.d/shower.conf deleted file mode 100644 index 59fa7fb..0000000 --- a/fly/fail2ban/jail.d/shower.conf +++ /dev/null @@ -1,8 +0,0 @@ -[shower-admin-login] -enabled = true -filter = shower-admin-login -logpath = /var/log/nginx/access.json.log -maxretry = 5 -findtime = 600 -bantime = 3600 -banaction = nginx-deny[nginx_deny_file=/etc/nginx/shower-deny.conf] diff --git a/fly/nginx.conf b/fly/nginx.conf index 0aca716..44d3903 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -34,11 +34,6 @@ http { # bucket. $http_fly_client_ip has the actual client IP. limit_req_zone $http_fly_client_ip zone=forge_auth:10m rate=3r/s; - # Shower-app rate limit on /admin/login/ (the only admin path the public - # proxy exposes). 3r/s with django-axes (5 strikes, 1h lockout) gives - # plenty of room for a real staff login while making brute-force costly. - limit_req_zone $http_fly_client_ip zone=shower_auth:10m rate=3r/s; - # fail2ban deny list — banned IPs are written here by fail2ban and # checked via the $forge_banned variable. The file is touched at # container start to ensure it exists. @@ -47,13 +42,6 @@ http { include /etc/nginx/forge-deny.conf; } - # Per-service deny list for the shower app — populated by fail2ban - # when /admin/login/ attempts trip the threshold. Same scheme as forge. - geo $http_fly_client_ip $shower_banned { - default 0; - include /etc/nginx/shower-deny.conf; - } - # Proxy cache: 200MB, evict after 24h of no access proxy_cache_path /tmp/cache levels=1:2 keys_zone=services:10m max_size=200m inactive=24h; @@ -300,27 +288,25 @@ http { } } - # --- shower.eblu.me (dynamic Django: guest splash + raffle/prize console) --- - # Public-facing Adelaide baby shower app. Defense layers: - # * geo+fail2ban deny list ($shower_banned) - # * nginx limit_req on /admin/login/ via the shower_auth zone - # * django-axes inside Django (5 fails / 1h lockout per user+IP) - # * /admin/ paths blocked at the proxy except /admin/login/ and /admin/logout/ - # so staff can sign in publicly but the CRUD admin is tailnet-only + # --- shower.eblu.me (Adelaide baby shower — guest-only public surface) --- + # Only the guest paths (`/`, `/prizes//`, /static/, /media/) are + # exposed on WAN. /host/, /admin/, and Django's login views are blocked + # at the edge with a 403 pointing at the tailnet hostname — staff sign + # in on shower.ops.eblu.me, which is reachable from any device with + # Tailscale installed. Defense layers reduce to: general per-IP rate + # limit + django-axes (5 fails / 1h) on the tailnet-side login. No + # fail2ban needed here because the public surface no longer takes + # credentials of any kind. server { listen 8080; server_name shower.eblu.me; - # Block fail2ban-banned IPs - if ($shower_banned) { - return 403 "Temporarily blocked. Try again later.\n"; - } - # General per-IP rate limit (cushion for the splash page + form posts) limit_req zone=general burst=20 nodelay; - # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs; - # 5 MiB matches the Django-side cap. + # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs. + # The host page itself isn't reachable here, but /media/ reads can + # be larger than 1 MiB so set the cap to 5 MiB to match Django. client_max_body_size 5m; # Security headers — HSTS matches Django's SECURE_HSTS_SECONDS. @@ -328,6 +314,8 @@ http { add_header X-Content-Type-Options "nosniff" always; add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; add_header Referrer-Policy "same-origin" always; + # GNU Terry Pratchett — keep the name moving. + add_header X-Clacks-Overhead "GNU Terry Pratchett" always; error_page 502 503 504 /error.html; location = /error.html { @@ -335,38 +323,24 @@ http { internal; } - # GNU Terry Pratchett — keep the name moving. - add_header X-Clacks-Overhead "GNU Terry Pratchett" always; - # Reject indexers — there's nothing here we want crawled. location = /robots.txt { default_type text/plain; return 200 "User-agent: *\nDisallow: /\n"; } - # Public admin surface: only the login/logout endpoints, rate-limited. - location ~ ^/admin/(login|logout)/? { - limit_req zone=shower_auth burst=5 nodelay; - - proxy_pass https://indri_backend$request_uri; - proxy_ssl_verify off; - proxy_ssl_server_name on; - proxy_ssl_name shower.ops.eblu.me; - proxy_intercept_errors on; - - proxy_set_header Host shower.ops.eblu.me; - proxy_set_header X-Real-IP $http_fly_client_ip; - proxy_set_header X-Forwarded-For $http_fly_client_ip; - proxy_set_header X-Forwarded-Proto $scheme; - - proxy_http_version 1.1; - proxy_set_header Connection $connection_upgrade; + # Admin surface: tailnet-only. Anything under /admin/ — login, + # logout, CRUD UI, password reset — returns 403 with a pointer to + # the tailnet host. Django's `staff_member_required` will redirect + # /host/ to /admin/login/, which lands on this 403 if a guest + # device wanders into it. Staff hit the tailnet host directly. + location /admin/ { + return 403 "Authentication is tailnet-only — visit shower.ops.eblu.me.\n"; } - # Block the rest of /admin/ at the public edge. The admin CRUD UI - # is only reachable via shower.ops.eblu.me on the tailnet. - location /admin/ { - return 403 "The Django admin is tailnet-only — visit shower.ops.eblu.me.\n"; + # Operator console: tailnet-only. Same rationale as /admin/. + location /host/ { + return 403 "The host console is tailnet-only — visit shower.ops.eblu.me.\n"; } location / { diff --git a/fly/start.sh b/fly/start.sh index ef17641..a924849 100644 --- a/fly/start.sh +++ b/fly/start.sh @@ -19,10 +19,9 @@ until nslookup forge.tail8d86e.ts.net 100.100.100.100 > /dev/null 2>&1; do done echo "MagicDNS ready" -# Ensure fail2ban per-service deny files exist before nginx starts +# Ensure fail2ban deny file exists before nginx starts # (the geo directive's `include` fails if the file is missing). touch /etc/nginx/forge-deny.conf -touch /etc/nginx/shower-deny.conf # Start nginx — MagicDNS is available, upstreams resolved. nginx -g "daemon off;" & diff --git a/service-versions.yaml b/service-versions.yaml index 8caa2be..aa3a6b6 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -47,7 +47,7 @@ services: - name: shower type: argocd last-reviewed: 2026-05-10 - current-version: "1.0.0" + current-version: "1.0.1" upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app notes: | Django app for Adelaide / Heidi / Addie's baby shower. Wheel