diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index 6eada76..da6f3f9 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -101,6 +101,9 @@ caddy_services: - name: paperless host: "paperless.{{ caddy_domain }}" backend: "https://paperless.tail8d86e.ts.net" + - name: shower + host: "shower.{{ caddy_domain }}" + backend: "https://shower.tail8d86e.ts.net" - name: sifaka host: "nas.{{ caddy_domain }}" backend: "http://sifaka:5000" diff --git a/argocd/apps/shower.yaml b/argocd/apps/shower.yaml new file mode 100644 index 0000000..c4a7a62 --- /dev/null +++ b/argocd/apps/shower.yaml @@ -0,0 +1,20 @@ +# Adelaide / Heidi / Addie baby shower app — Django guest/raffle/prize system. +# Public landing page at shower.eblu.me (via fly proxy), staff console + admin +# at shower.ops.eblu.me (tailnet only). Built from forge PyPI wheel. +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: shower + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/shower + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: shower + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml b/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml new file mode 100644 index 0000000..96348e8 --- /dev/null +++ b/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml @@ -0,0 +1,229 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-shower-apm + namespace: monitoring + labels: + grafana_dashboard: "1" +data: + shower-apm.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 20, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 16, "x": 0, "y": 0 }, + "id": 1, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (status) (rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "legendFormat": "{{status}}", "refId": "A" } + ], + "title": "Request Rate by Status", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.05 }] }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 0 }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\",status=~\"5..\"}[5m])) / sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "refId": "A" } + ], + "title": "Error Rate (5xx)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 5 }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 4 }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(increase(flyio_nginx_http_requests_total{host=\"shower.eblu.me\",request_uri=~\"/admin/login.*\",status=~\"4..\"}[$__range]))", "refId": "A" } + ], + "title": "Failed admin logins (range)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 4 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "refId": "A" } + ], + "title": "Current RPS", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "seconds", + "drawStyle": "line", + "fillOpacity": 10, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.90, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p90", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.99, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p99", "refId": "C" } + ], + "title": "Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "", + "drawStyle": "line", + "fillOpacity": 20, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_response_bytes_total{host=\"shower.eblu.me\"}[5m]))", "legendFormat": "Bandwidth", "refId": "A" } + ], + "title": "Bandwidth", + "type": "timeseries" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { "datasource": { "type": "loki", "uid": "loki" }, "expr": "{instance=\"flyio-proxy\", job=\"flyio-nginx\"} |= \"shower.eblu.me\" | json | line_format \"{{.client_ip}} {{.request_method}} {{.request_uri}} {{.status}} {{.request_time}}s\"", "refId": "A" } + ], + "title": "Recent Access Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["shower", "flyio", "apm"], + "templating": { "list": [] }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Shower APM", + "uid": "shower-apm", + "version": 1, + "weekStart": "" + } diff --git a/argocd/manifests/grafana-config/kustomization.yaml b/argocd/manifests/grafana-config/kustomization.yaml index a6e8000..b518043 100644 --- a/argocd/manifests/grafana-config/kustomization.yaml +++ b/argocd/manifests/grafana-config/kustomization.yaml @@ -22,6 +22,7 @@ resources: - dashboards/configmap-transmission.yaml - dashboards/configmap-cv-apm.yaml - dashboards/configmap-docs-apm.yaml + - dashboards/configmap-shower-apm.yaml - dashboards/configmap-flyio.yaml - dashboards/configmap-sifaka-disks.yaml - dashboards/configmap-forgejo.yaml diff --git a/argocd/manifests/shower/configmap.yaml b/argocd/manifests/shower/configmap.yaml new file mode 100644 index 0000000..111f524 --- /dev/null +++ b/argocd/manifests/shower/configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: shower-app-config + namespace: shower +data: + DJANGO_DEBUG: "0" + # Admin lives behind the tailnet; the public proxy blocks /admin/ except + # /admin/login/ and /admin/logout/. /host/'s "Django admin" link follows + # this var so admin CRUD only happens on the tailnet hostname. + DJANGO_ADMIN_URL: "https://shower.ops.eblu.me/admin/" diff --git a/argocd/manifests/shower/deployment.yaml b/argocd/manifests/shower/deployment.yaml new file mode 100644 index 0000000..70547aa --- /dev/null +++ b/argocd/manifests/shower/deployment.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: shower + namespace: shower +spec: + replicas: 1 + # SQLite + RWO data PVC: only one writer at a time. Recreate ensures the + # old pod's lock on the local-path volume is released before the new one + # mounts it. + strategy: + type: Recreate + selector: + matchLabels: + app: shower + template: + metadata: + labels: + app: shower + spec: + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: shower + image: registry.ops.eblu.me/blumeops/shower:kustomized + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + ports: + - containerPort: 8000 + name: http + envFrom: + - configMapRef: + name: shower-app-config + - secretRef: + name: shower-app-secrets + volumeMounts: + - name: media + mountPath: /app/media + - name: data + mountPath: /app/data + resources: + requests: + memory: "128Mi" + cpu: "50m" + limits: + memory: "512Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: / + port: 8000 + httpHeaders: + - name: Host + value: shower.ops.eblu.me + - name: X-Forwarded-Proto + value: https + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 8000 + httpHeaders: + - name: Host + value: shower.ops.eblu.me + - name: X-Forwarded-Proto + value: https + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: media + persistentVolumeClaim: + claimName: shower-media + - name: data + persistentVolumeClaim: + claimName: shower-data diff --git a/argocd/manifests/shower/external-secret.yaml b/argocd/manifests/shower/external-secret.yaml new file mode 100644 index 0000000..005a7e9 --- /dev/null +++ b/argocd/manifests/shower/external-secret.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: shower-app-secrets + namespace: shower +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: shower-app-secrets + creationPolicy: Owner + data: + - secretKey: DJANGO_SECRET_KEY + remoteRef: + key: "Shower (blumeops)" + property: secret-key diff --git a/argocd/manifests/shower/ingress-tailscale.yaml b/argocd/manifests/shower/ingress-tailscale.yaml new file mode 100644 index 0000000..d09a696 --- /dev/null +++ b/argocd/manifests/shower/ingress-tailscale.yaml @@ -0,0 +1,30 @@ +# Tailscale Ingress for shower app. +# Exposes at shower.tail8d86e.ts.net. +# Caddy on indri proxies shower.ops.eblu.me here. The fly proxy then proxies +# shower.eblu.me through Caddy to this same endpoint (fly does not contact +# the k8s service directly — all traffic routes through indri's Caddy). +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: shower-tailscale + namespace: shower + annotations: + tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" + gethomepage.dev/enabled: "true" + gethomepage.dev/name: "Shower" + gethomepage.dev/group: "Home" + gethomepage.dev/icon: "mdi-baby" + gethomepage.dev/description: "Adelaide baby shower" + gethomepage.dev/href: "https://shower.ops.eblu.me" + gethomepage.dev/pod-selector: "app=shower" +spec: + ingressClassName: tailscale + defaultBackend: + service: + name: shower + port: + number: 8000 + tls: + - hosts: + - shower diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml new file mode 100644 index 0000000..cd4dd08 --- /dev/null +++ b/argocd/manifests/shower/kustomization.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: shower + +resources: + - configmap.yaml + - external-secret.yaml + - pv-nfs.yaml + - pvc.yaml + - service.yaml + - ingress-tailscale.yaml + - deployment.yaml + +images: + - name: registry.ops.eblu.me/blumeops/shower + newTag: v1.0.0-PLACEHOLDER-nix diff --git a/argocd/manifests/shower/pv-nfs.yaml b/argocd/manifests/shower/pv-nfs.yaml new file mode 100644 index 0000000..d07cecc --- /dev/null +++ b/argocd/manifests/shower/pv-nfs.yaml @@ -0,0 +1,26 @@ +# NFS PersistentVolume for shower app media uploads (prize photos). +# Requires: NFS share on sifaka at /volume1/shower with NFS permissions +# for ringtail. +# +# To create on Synology: +# 1. Control Panel > Shared Folder > Create +# 2. Name: shower, Location: Volume 1 +# 3. Control Panel > File Services > NFS > NFS Rules +# 4. Add rule for "shower" share: Hostname=ringtail, Privilege=Read/Write, +# Squash=No mapping +# 5. chown -R 1000:1000 /volume1/shower (or pick another UID and align the +# container's runAsUser to match) +apiVersion: v1 +kind: PersistentVolume +metadata: + name: shower-media-nfs-pv +spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + nfs: + server: sifaka + path: /volume1/shower diff --git a/argocd/manifests/shower/pvc.yaml b/argocd/manifests/shower/pvc.yaml new file mode 100644 index 0000000..47fee54 --- /dev/null +++ b/argocd/manifests/shower/pvc.yaml @@ -0,0 +1,30 @@ +# Media PVC — RWX NFS share for /app/media (prize photo uploads). +# SQLite DB lives in a separate local-path PVC; NFS file locking is not +# reliable enough for SQLite's WAL/journal. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shower-media + namespace: shower +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + volumeName: shower-media-nfs-pv + resources: + requests: + storage: 10Gi +--- +# Database PVC — k3s local-path (default storage class) for SQLite. +# RWO is fine: the deployment runs with a single replica. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shower-data + namespace: shower +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi diff --git a/argocd/manifests/shower/service.yaml b/argocd/manifests/shower/service.yaml new file mode 100644 index 0000000..0a73aab --- /dev/null +++ b/argocd/manifests/shower/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: shower + namespace: shower +spec: + selector: + app: shower + ports: + - name: http + port: 8000 + targetPort: 8000 + protocol: TCP diff --git a/containers/shower/default.nix b/containers/shower/default.nix new file mode 100644 index 0000000..5b92e85 --- /dev/null +++ b/containers/shower/default.nix @@ -0,0 +1,117 @@ +# Nix-built shower app container — Adelaide / Heidi / Addie baby shower. +# +# The app is published as a wheel to the Forgejo PyPI index at +# https://forge.eblu.me/api/packages/eblume/pypi/. Rather than pin and +# fetch the wheel + transitive deps at nix build time (which requires +# every wheel hash to be tracked here), this image ships a Python from +# nixpkgs and pip-installs the wheel into a venv on /app/data at first +# boot. Subsequent boots reuse the venv. This trades reproducibility for +# a much simpler nix file. +# +# Built on the nix-container-builder runner (ringtail, amd64) so the +# image runs natively on ringtail's k3s without QEMU emulation. +{ pkgs ? import { } }: + +let + version = "1.0.0"; + + python = pkgs.python314; + appVersion = version; + + entrypoint = pkgs.writeShellScript "shower-entrypoint" '' + set -eu + + APP_DIR=/app + DATA_DIR=/app/data + VENV_DIR=$DATA_DIR/.venv + INSTALLED_MARKER=$VENV_DIR/.installed-${appVersion} + + export HOME=$DATA_DIR + export PIP_DISABLE_PIP_VERSION_CHECK=1 + export PIP_NO_CACHE_DIR=1 + + mkdir -p "$DATA_DIR" "$APP_DIR/media" + + # First boot (or version change): create venv and install the app + deps. + # The wheel comes from the internal devpi mirror (default index), with + # forge.eblu.me as the extra index for the adelaide-baby-shower-app wheel. + if [ ! -f "$INSTALLED_MARKER" ]; then + echo "shower: installing adelaide-baby-shower-app==${appVersion} into $VENV_DIR" + rm -rf "$VENV_DIR" + ${python}/bin/python -m venv "$VENV_DIR" + "$VENV_DIR/bin/pip" install --upgrade pip + "$VENV_DIR/bin/pip" install \ + --index-url=https://pypi.ops.eblu.me/root/pypi/+simple/ \ + --extra-index-url=https://forge.eblu.me/api/packages/eblume/pypi/simple/ \ + "adelaide-baby-shower-app==${appVersion}" gunicorn + touch "$INSTALLED_MARKER" + fi + + # The wheel's config/settings.py uses BASE_DIR = parent.parent of its + # own __file__, so MEDIA_ROOT and DATABASES.NAME resolve relative to + # site-packages. Override with a thin shim placed in $APP_DIR. + cat > "$APP_DIR/local_settings.py" <<'PY' + from config.settings import * # noqa: F401,F403 + + DATABASES["default"]["NAME"] = "/app/data/db.sqlite3" + MEDIA_ROOT = "/app/media" + STATIC_ROOT = "/app/data/staticfiles" + PY + + export PYTHONPATH=$APP_DIR + export DJANGO_SETTINGS_MODULE=local_settings + + cd "$APP_DIR" + + echo "shower: running migrations" + "$VENV_DIR/bin/python" -m django migrate --noinput + + echo "shower: collecting static files" + "$VENV_DIR/bin/python" -m django collectstatic --noinput --clear + + echo "shower: starting gunicorn" + exec "$VENV_DIR/bin/gunicorn" \ + --bind 0.0.0.0:8000 \ + --workers 2 \ + --forwarded-allow-ips='*' \ + config.wsgi:application + ''; +in + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/shower"; + contents = [ + python + pkgs.cacert + pkgs.tzdata + pkgs.bashInteractive + pkgs.coreutils + pkgs.gnused + pkgs.gnugrep + ]; + + # /app is writable by uid 1000 (matches deployment.yaml runAsUser). + fakeRootCommands = '' + mkdir -p app/data app/media tmp + chmod 1777 tmp + chown -R 1000:1000 app + ''; + enableFakechroot = true; + + config = { + Entrypoint = [ "${entrypoint}" ]; + Env = [ + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "TZDIR=${pkgs.tzdata}/share/zoneinfo" + "TZ=America/Los_Angeles" + "TMPDIR=/tmp" + "LANG=C.UTF-8" + "LC_ALL=C.UTF-8" + ]; + ExposedPorts = { + "8000/tcp" = { }; + }; + User = "1000"; + WorkingDir = "/app"; + }; +} diff --git a/docs/changelog.d/shower-app-deploy.feature.md b/docs/changelog.d/shower-app-deploy.feature.md new file mode 100644 index 0000000..96218be --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.feature.md @@ -0,0 +1,4 @@ +Deploy the Adelaide / Heidi / Addie baby shower app — guest splash, raffle +picker, and prize assignment console — on ringtail k3s with `shower.eblu.me` +as the public entry and `shower.ops.eblu.me` as the tailnet admin host. App +source: [`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app). diff --git a/docs/changelog.d/shower-app-deploy.infra.md b/docs/changelog.d/shower-app-deploy.infra.md new file mode 100644 index 0000000..c14fbce --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.infra.md @@ -0,0 +1,8 @@ +Wire shower app for public exposure: fly nginx `shower.eblu.me` server +block with `/admin/` blocked at the edge (except `/admin/login/` and +`/admin/logout/`), per-IP rate limit, fail2ban filter+jail with a +shower-specific deny list (`shower-deny.conf`), Caddy route, Pulumi +Gandi CNAME, and a Grafana APM dashboard tracking request rate, error +rate, failed admin logins, latency, bandwidth, and access logs. +Generalized the `nginx-deny` action to accept a per-jail +`nginx_deny_file` parameter so each service has its own ban list. diff --git a/docs/how-to/operations/shower-app.md b/docs/how-to/operations/shower-app.md new file mode 100644 index 0000000..401eeb6 --- /dev/null +++ b/docs/how-to/operations/shower-app.md @@ -0,0 +1,174 @@ +--- +title: Shower App on Ringtail +modified: 2026-05-10 +last-reviewed: 2026-05-10 +tags: + - how-to + - operations + - kubernetes + - django +--- + +# Shower App on Ringtail + +How the Adelaide / Heidi / Addie baby shower app is deployed. The app is a +Django project ([`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app)) +released as a wheel to the Forgejo Packages PyPI index and run on +[[ringtail]]'s k3s cluster. Public landing page at `shower.eblu.me`, staff +console + admin UI at `shower.ops.eblu.me` (tailnet only). + +The contract this deploy implements is defined in the app repo's +`docs/how-to/hosting.md` — read that for the env-var contract, security +model, and storage requirements before changing anything here. + +## Routing + +``` +Internet → shower.eblu.me + │ (Fly.io nginx — public) + ▼ + Caddy on indri (shower.ops.eblu.me) + │ + ▼ + Tailscale ProxyGroup ingress (shower.tail8d86e.ts.net) + │ + ▼ + Service shower:8000 → Pod (Django + gunicorn) +``` + +| Hostname | Reachable from | Notes | +|---|---|---| +| `shower.eblu.me` | Public internet | `/admin/` blocked except `/admin/login/`, `/admin/logout/` | +| `shower.ops.eblu.me` | Tailnet | Full app surface, including the admin | +| `shower.tail8d86e.ts.net` | Tailnet | Bare ProxyGroup endpoint Caddy proxies to | + +## Defense layers (public side) + +The public path stacks four checks against `/admin/login/` brute force: + +1. **fly nginx `geo $shower_banned`** — per-service ban list populated by + fail2ban (`/etc/nginx/shower-deny.conf`) +2. **fly nginx `limit_req zone=shower_auth`** — 3 r/s per Fly-Client-IP +3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)` +4. **edge `/admin/` block** — anything that isn't `/admin/login/` or + `/admin/logout/` returns 403 from nginx, period + +The fail2ban filter `shower-admin-login.conf` matches 401/403/429 on +`/admin/login/`. The 429 case catches attackers who keep hammering after +django-axes has already locked them out. + +## Persistent storage + +| Mount | PVC | Type | Why | +|---|---|---|---| +| `/app/media` | `shower-media` | NFS RWX on sifaka (`/volume1/shower`) | Prize photos survive pod rescheduling | +| `/app/data` | `shower-data` | k3s `local-path` RWO | SQLite DB; NFS file locking can't be trusted for WAL/journal | + +The container's entrypoint installs the wheel into `/app/data/.venv` on +first boot, runs migrations, runs `collectstatic`, and `exec`s gunicorn. +A `local_settings.py` shim overrides `DATABASES.NAME`, `MEDIA_ROOT`, and +`STATIC_ROOT` to absolute paths under `/app/`, sidestepping the wheel's +`BASE_DIR = parent.parent` of an in-site-packages settings module. + +## One-time setup steps + +These steps are required the first time the service is deployed and are +not encoded in the manifests. + +### 1. NFS share on sifaka + +On the Synology: + +1. Control Panel → Shared Folder → Create. Name: `shower`, Volume 1. +2. Control Panel → File Services → NFS → NFS Rules. Add rule for + `shower`: Hostname=`ringtail`, Privilege=Read/Write, Squash=No mapping. +3. `chown -R 1000:1000 /volume1/shower` over SSH so the pod's uid 1000 + can write. + +### 2. 1Password item + +Item name: **`Shower (blumeops)`** in the `blumeops` vault. +Required property: + +| Field | Value | +|---|---| +| `secret-key` | Output of `openssl rand -base64 48` | + +The `ExternalSecret` `shower-app-secrets` will sync this into the +`shower` namespace as a `Secret` and `envFrom` exposes it as +`DJANGO_SECRET_KEY` to the container. + +**Never reuse a key that has ever been in git history.** Per the app's +hosting.md, an early dev key was committed before being replaced with +the `django-insecure-...` placeholder; the production key must be +freshly generated. + +### 3. Container image + +Built by the `build-container` Forgejo Actions workflow on the +`nix-container-builder` runner (ringtail, amd64). Trigger with: + +```fish +mise run container-build-and-release shower +``` + +After the workflow finishes, update `images[].newTag` in +`argocd/manifests/shower/kustomization.yaml` to the resulting +`vX.Y.Z--nix` tag, then commit (C0). + +### 4. DNS + +`pulumi/gandi/__main__.py` declares the `shower-public` CNAME pointing +at `blumeops-proxy.fly.dev.`. Apply with: + +```fish +mise run dns-preview +mise run dns-up +``` + +### 5. Fly.io certificate + +```fish +fly certs add shower.eblu.me -a blumeops-proxy +``` + +(Add to `mise-tasks/fly-setup` so re-runs of the one-time setup pick +it up.) + +### 6. Caddy on indri + +`shower` is in `ansible/roles/caddy/defaults/main.yml`. Push with: + +```fish +mise run provision-indri -- --tags caddy +``` + +## Deploying a new version + +1. Bump the wheel version in the app repo (`adelaide-baby-shower-app`) + and release it to Forgejo PyPI. +2. Bump `appVersion` in `containers/shower/default.nix` to match. +3. `mise run container-build-and-release shower`. Verify the build + with `mise run runner-logs`. +4. Update the `newTag` in `argocd/manifests/shower/kustomization.yaml` + to the new `[main]` SHA tag. +5. Commit (C0 after PR merge — see [[build-container-image#Squash-merge and container tags]]). +6. `argocd app sync shower`. + +## Verifying after a deploy + +```fish +kubectl --context=k3s-ringtail -n shower get pods +kubectl --context=k3s-ringtail -n shower logs deploy/shower +curl -sf https://shower.ops.eblu.me/ # tailnet +curl -sf https://shower.eblu.me/ # public +curl -I https://shower.eblu.me/admin/users/ # expect 403 (edge block) +curl -I https://shower.ops.eblu.me/admin/ # expect 200 / 302 (login) +``` + +## Related + +- [[expose-service-publicly]] — Fly.io proxy + Tailscale pattern +- [[deploy-k8s-service]] — generic ArgoCD service onboarding +- [[ringtail]] — the cluster +- [`hosting.md`](https://forge.eblu.me/eblume/adelaide-baby-shower-app/src/branch/main/docs/how-to/hosting.md) — app's deployment contract diff --git a/docs/reference/kubernetes/apps.md b/docs/reference/kubernetes/apps.md index 80ea72e..fd5c06f 100644 --- a/docs/reference/kubernetes/apps.md +++ b/docs/reference/kubernetes/apps.md @@ -41,6 +41,7 @@ Registry of all applications deployed via [[argocd]]. | `ollama` | ollama | `argocd/manifests/ollama/` | [[ollama]] | | `mealie` | mealie | `argocd/manifests/mealie/` | [[mealie]] | | `paperless` | paperless | `argocd/manifests/paperless/` | [[paperless]] | +| `shower` | shower | `argocd/manifests/shower/` | [[shower-app]] | | `prowler` | prowler | `argocd/manifests/prowler/` | [[prowler]] | ## Sync Policies diff --git a/fly/Dockerfile b/fly/Dockerfile index eae8c35..355b404 100644 --- a/fly/Dockerfile +++ b/fly/Dockerfile @@ -20,7 +20,9 @@ COPY --from=docker.io/grafana/alloy@sha256:6e00cf7c5a692ff5f24844529416ed017d76f RUN mkdir -p /var/log/nginx /etc/alloy /tmp/alloy-data COPY fail2ban/filter.d/forge-login.conf /etc/fail2ban/filter.d/forge-login.conf +COPY fail2ban/filter.d/shower-admin-login.conf /etc/fail2ban/filter.d/shower-admin-login.conf COPY fail2ban/jail.d/forge.conf /etc/fail2ban/jail.d/forge.conf +COPY fail2ban/jail.d/shower.conf /etc/fail2ban/jail.d/shower.conf COPY fail2ban/action.d/nginx-deny.conf /etc/fail2ban/action.d/nginx-deny.conf COPY nginx.conf /etc/nginx/nginx.conf diff --git a/fly/fail2ban/action.d/nginx-deny.conf b/fly/fail2ban/action.d/nginx-deny.conf index 1d3737b..bab8abb 100644 --- a/fly/fail2ban/action.d/nginx-deny.conf +++ b/fly/fail2ban/action.d/nginx-deny.conf @@ -2,13 +2,22 @@ # Standard iptables banning won't work in Fly.io because $remote_addr # is Fly's internal proxy IP. Instead, we write banned IPs to a file # that nginx checks via a geo directive keyed on $http_fly_client_ip. +# +# The deny file is per-service: each jail sets `nginx_deny_file = ...` +# (see jail.d/*.conf) and a matching `geo $http_fly_client_ip $..._banned` +# block in nginx.conf includes the same path. [Definition] -actionban = echo " 1;" >> /etc/nginx/forge-deny.conf && nginx -s reload +actionban = echo " 1;" >> && nginx -s reload -actionunban = sed -i '/ 1;/d' /etc/nginx/forge-deny.conf && nginx -s reload +actionunban = sed -i '/ 1;/d' && nginx -s reload actionstart = actionstop = actioncheck = + +[Init] + +# Default for jails that don't override (preserves forge behaviour). +nginx_deny_file = /etc/nginx/forge-deny.conf diff --git a/fly/fail2ban/filter.d/shower-admin-login.conf b/fly/fail2ban/filter.d/shower-admin-login.conf new file mode 100644 index 0000000..c73cd3a --- /dev/null +++ b/fly/fail2ban/filter.d/shower-admin-login.conf @@ -0,0 +1,13 @@ +# Filter for shower-app /admin/login/ failures via nginx JSON access log. +# Matches 401/403/429 responses on the login endpoint, keyed on the +# client_ip field (populated from Fly-Client-IP header). +# +# The 429 case catches attackers who keep hammering after django-axes has +# already locked them out — those requests return 429 from +# axes.middleware.AxesMiddleware before reaching the view. + +[Definition] + +failregex = "client_ip":"".*"request_uri":"\/admin\/login[^"]*".*"status":(401|403|429) + +ignoreregex = diff --git a/fly/fail2ban/jail.d/shower.conf b/fly/fail2ban/jail.d/shower.conf new file mode 100644 index 0000000..59fa7fb --- /dev/null +++ b/fly/fail2ban/jail.d/shower.conf @@ -0,0 +1,8 @@ +[shower-admin-login] +enabled = true +filter = shower-admin-login +logpath = /var/log/nginx/access.json.log +maxretry = 5 +findtime = 600 +bantime = 3600 +banaction = nginx-deny[nginx_deny_file=/etc/nginx/shower-deny.conf] diff --git a/fly/nginx.conf b/fly/nginx.conf index 5e49d88..0aca716 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -34,6 +34,11 @@ http { # bucket. $http_fly_client_ip has the actual client IP. limit_req_zone $http_fly_client_ip zone=forge_auth:10m rate=3r/s; + # Shower-app rate limit on /admin/login/ (the only admin path the public + # proxy exposes). 3r/s with django-axes (5 strikes, 1h lockout) gives + # plenty of room for a real staff login while making brute-force costly. + limit_req_zone $http_fly_client_ip zone=shower_auth:10m rate=3r/s; + # fail2ban deny list — banned IPs are written here by fail2ban and # checked via the $forge_banned variable. The file is touched at # container start to ensure it exists. @@ -42,6 +47,13 @@ http { include /etc/nginx/forge-deny.conf; } + # Per-service deny list for the shower app — populated by fail2ban + # when /admin/login/ attempts trip the threshold. Same scheme as forge. + geo $http_fly_client_ip $shower_banned { + default 0; + include /etc/nginx/shower-deny.conf; + } + # Proxy cache: 200MB, evict after 24h of no access proxy_cache_path /tmp/cache levels=1:2 keys_zone=services:10m max_size=200m inactive=24h; @@ -288,6 +300,95 @@ http { } } + # --- shower.eblu.me (dynamic Django: guest splash + raffle/prize console) --- + # Public-facing Adelaide baby shower app. Defense layers: + # * geo+fail2ban deny list ($shower_banned) + # * nginx limit_req on /admin/login/ via the shower_auth zone + # * django-axes inside Django (5 fails / 1h lockout per user+IP) + # * /admin/ paths blocked at the proxy except /admin/login/ and /admin/logout/ + # so staff can sign in publicly but the CRUD admin is tailnet-only + server { + listen 8080; + server_name shower.eblu.me; + + # Block fail2ban-banned IPs + if ($shower_banned) { + return 403 "Temporarily blocked. Try again later.\n"; + } + + # General per-IP rate limit (cushion for the splash page + form posts) + limit_req zone=general burst=20 nodelay; + + # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs; + # 5 MiB matches the Django-side cap. + client_max_body_size 5m; + + # Security headers — HSTS matches Django's SECURE_HSTS_SECONDS. + add_header X-Frame-Options "DENY" always; + add_header X-Content-Type-Options "nosniff" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header Referrer-Policy "same-origin" always; + + error_page 502 503 504 /error.html; + location = /error.html { + root /usr/share/nginx/html; + internal; + } + + # GNU Terry Pratchett — keep the name moving. + add_header X-Clacks-Overhead "GNU Terry Pratchett" always; + + # Reject indexers — there's nothing here we want crawled. + location = /robots.txt { + default_type text/plain; + return 200 "User-agent: *\nDisallow: /\n"; + } + + # Public admin surface: only the login/logout endpoints, rate-limited. + location ~ ^/admin/(login|logout)/? { + limit_req zone=shower_auth burst=5 nodelay; + + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + proxy_intercept_errors on; + + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + } + + # Block the rest of /admin/ at the public edge. The admin CRUD UI + # is only reachable via shower.ops.eblu.me on the tailnet. + location /admin/ { + return 403 "The Django admin is tailnet-only — visit shower.ops.eblu.me.\n"; + } + + location / { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + proxy_intercept_errors on; + + # No proxy_cache — dynamic content with sessions and CSRF. + + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + } + } + # Catch-all: reject unknown hosts, but serve health check server { listen 8080 default_server; diff --git a/fly/start.sh b/fly/start.sh index 1f2acaa..ef17641 100644 --- a/fly/start.sh +++ b/fly/start.sh @@ -19,8 +19,10 @@ until nslookup forge.tail8d86e.ts.net 100.100.100.100 > /dev/null 2>&1; do done echo "MagicDNS ready" -# Ensure fail2ban deny file exists before nginx starts +# Ensure fail2ban per-service deny files exist before nginx starts +# (the geo directive's `include` fails if the file is missing). touch /etc/nginx/forge-deny.conf +touch /etc/nginx/shower-deny.conf # Start nginx — MagicDNS is available, upstreams resolved. nginx -g "daemon off;" & diff --git a/pulumi/gandi/__main__.py b/pulumi/gandi/__main__.py index bda7a8a..25fd0f7 100644 --- a/pulumi/gandi/__main__.py +++ b/pulumi/gandi/__main__.py @@ -85,6 +85,15 @@ forge_public = gandi.livedns.Record( values=["blumeops-proxy.fly.dev."], ) +shower_public = gandi.livedns.Record( + "shower-public", + zone=domain, + name="shower", + type="CNAME", + ttl=300, + values=["blumeops-proxy.fly.dev."], +) + # ============== Exports ============== pulumi.export("domain", domain) pulumi.export("wildcard_fqdn", f"*.{subdomain}.{domain}") @@ -93,3 +102,4 @@ pulumi.export("target_ip", tailscale_ip) pulumi.export("docs_public_fqdn", f"docs.{domain}") pulumi.export("cv_public_fqdn", f"cv.{domain}") pulumi.export("forge_public_fqdn", f"forge.{domain}") +pulumi.export("shower_public_fqdn", f"shower.{domain}") diff --git a/service-versions.yaml b/service-versions.yaml index f7f0f4e..b6163ad 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -44,6 +44,16 @@ services: upstream-source: https://github.com/gethomepage/homepage/releases notes: Custom container, kustomize manifests + - name: shower + type: argocd + last-reviewed: 2026-05-10 + current-version: "1.0.0" + upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app + notes: | + Django app for Adelaide / Heidi / Addie's baby shower. Wheel + published to Forgejo Packages PyPI; runs on ringtail k3s. Public + at shower.eblu.me (fly proxy), tailnet admin at shower.ops.eblu.me. + - name: nvidia-device-plugin type: argocd last-reviewed: 2026-03-27