diff --git a/ansible/roles/borgmatic/defaults/main.yml b/ansible/roles/borgmatic/defaults/main.yml index 25d0149..123cb0f 100644 --- a/ansible/roles/borgmatic/defaults/main.yml +++ b/ansible/roles/borgmatic/defaults/main.yml @@ -27,6 +27,9 @@ borgmatic_source_directories: - /Users/erichblume/.config/borgmatic - /Users/erichblume/Documents - /Users/erichblume/.local/share/borgmatic/k8s-dumps + # Shower app prize-photo uploads (sifaka SMB mount). Mounted manually + # on indri via Finder — see docs/how-to/operations/shower-app.md. + - /Volumes/shower # Backup repositories borgmatic_repositories: @@ -54,6 +57,11 @@ borgmatic_k8s_sqlite_dumps: label_selector: app=mealie db_path: /app/data/mealie.db context: minikube + - name: shower + namespace: shower + label_selector: app=shower + db_path: /app/data/db.sqlite3 + context: k3s-ringtail # Exclude patterns borgmatic_exclude_patterns: [] diff --git a/ansible/roles/caddy/defaults/main.yml b/ansible/roles/caddy/defaults/main.yml index 6eada76..da6f3f9 100644 --- a/ansible/roles/caddy/defaults/main.yml +++ b/ansible/roles/caddy/defaults/main.yml @@ -101,6 +101,9 @@ caddy_services: - name: paperless host: "paperless.{{ caddy_domain }}" backend: "https://paperless.tail8d86e.ts.net" + - name: shower + host: "shower.{{ caddy_domain }}" + backend: "https://shower.tail8d86e.ts.net" - name: sifaka host: "nas.{{ caddy_domain }}" backend: "http://sifaka:5000" diff --git a/ansible/roles/cv/defaults/main.yml b/ansible/roles/cv/defaults/main.yml index 734e52b..a18cc82 100644 --- a/ansible/roles/cv/defaults/main.yml +++ b/ansible/roles/cv/defaults/main.yml @@ -3,7 +3,7 @@ # Caddy serves cv_content_dir directly via the static-kind service block. cv_version: "v1.0.3" -cv_release_url: "https://forge.eblu.me/api/packages/eblume/generic/cv/{{ cv_version }}/cv-{{ cv_version }}.tar.gz" +cv_release_url: "https://forge.ops.eblu.me/api/packages/eblume/generic/cv/{{ cv_version }}/cv-{{ cv_version }}.tar.gz" cv_home: /Users/erichblume/blumeops/cv cv_content_dir: "{{ cv_home }}/content" diff --git a/argocd/apps/shower.yaml b/argocd/apps/shower.yaml new file mode 100644 index 0000000..c4a7a62 --- /dev/null +++ b/argocd/apps/shower.yaml @@ -0,0 +1,20 @@ +# Adelaide / Heidi / Addie baby shower app — Django guest/raffle/prize system. +# Public landing page at shower.eblu.me (via fly proxy), staff console + admin +# at shower.ops.eblu.me (tailnet only). Built from forge PyPI wheel. +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: shower + namespace: argocd +spec: + project: default + source: + repoURL: ssh://forgejo@forge.ops.eblu.me:2222/eblume/blumeops.git + targetRevision: main + path: argocd/manifests/shower + destination: + server: https://ringtail.tail8d86e.ts.net:6443 + namespace: shower + syncPolicy: + syncOptions: + - CreateNamespace=true diff --git a/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml b/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml new file mode 100644 index 0000000..96348e8 --- /dev/null +++ b/argocd/manifests/grafana-config/dashboards/configmap-shower-apm.yaml @@ -0,0 +1,229 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-shower-apm + namespace: monitoring + labels: + grafana_dashboard: "1" +data: + shower-apm.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "req/s", + "drawStyle": "line", + "fillOpacity": 20, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 16, "x": 0, "y": 0 }, + "id": 1, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum by (status) (rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "legendFormat": "{{status}}", "refId": "A" } + ], + "title": "Request Rate by Status", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.05 }] }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 8, "x": 16, "y": 0 }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\",status=~\"5..\"}[5m])) / sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "refId": "A" } + ], + "title": "Error Rate (5xx)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 1 }, { "color": "red", "value": 5 }] }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 4 }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(increase(flyio_nginx_http_requests_total{host=\"shower.eblu.me\",request_uri=~\"/admin/login.*\",status=~\"4..\"}[$__range]))", "refId": "A" } + ], + "title": "Failed admin logins (range)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 4 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_requests_total{host=\"shower.eblu.me\"}[5m]))", "refId": "A" } + ], + "title": "Current RPS", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "seconds", + "drawStyle": "line", + "fillOpacity": 10, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 5, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "multi", "sort": "desc" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.50, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p50", "refId": "A" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.90, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p90", "refId": "B" }, + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "histogram_quantile(0.99, sum by (le) (rate(flyio_nginx_http_request_duration_seconds_bucket{host=\"shower.eblu.me\"}[5m])))", "legendFormat": "p99", "refId": "C" } + ], + "title": "Latency Percentiles", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisLabel": "", + "drawStyle": "line", + "fillOpacity": 20, + "lineInterpolation": "linear", + "lineWidth": 1, + "showPoints": "never", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" } + }, + "mappings": [], + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 6, + "options": { + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "right", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { "datasource": { "type": "prometheus", "uid": "prometheus" }, "expr": "sum(rate(flyio_nginx_http_response_bytes_total{host=\"shower.eblu.me\"}[5m]))", "legendFormat": "Bandwidth", "refId": "A" } + ], + "title": "Bandwidth", + "type": "timeseries" + }, + { + "datasource": { "type": "loki", "uid": "loki" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 }, + "id": 7, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": true, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { "datasource": { "type": "loki", "uid": "loki" }, "expr": "{instance=\"flyio-proxy\", job=\"flyio-nginx\"} |= \"shower.eblu.me\" | json | line_format \"{{.client_ip}} {{.request_method}} {{.request_uri}} {{.status}} {{.request_time}}s\"", "refId": "A" } + ], + "title": "Recent Access Logs", + "type": "logs" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "tags": ["shower", "flyio", "apm"], + "templating": { "list": [] }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Shower APM", + "uid": "shower-apm", + "version": 1, + "weekStart": "" + } diff --git a/argocd/manifests/grafana-config/kustomization.yaml b/argocd/manifests/grafana-config/kustomization.yaml index a6e8000..b518043 100644 --- a/argocd/manifests/grafana-config/kustomization.yaml +++ b/argocd/manifests/grafana-config/kustomization.yaml @@ -22,6 +22,7 @@ resources: - dashboards/configmap-transmission.yaml - dashboards/configmap-cv-apm.yaml - dashboards/configmap-docs-apm.yaml + - dashboards/configmap-shower-apm.yaml - dashboards/configmap-flyio.yaml - dashboards/configmap-sifaka-disks.yaml - dashboards/configmap-forgejo.yaml diff --git a/argocd/manifests/shower/configmap.yaml b/argocd/manifests/shower/configmap.yaml new file mode 100644 index 0000000..6102c1e --- /dev/null +++ b/argocd/manifests/shower/configmap.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: shower-app-config + namespace: shower +data: + DJANGO_DEBUG: "0" + # The app's settings.py hardcodes ALLOWED_HOSTS = ["shower.eblu.me", + # "localhost", "127.0.0.1"] and exposes this env var as a comma-separated + # extras list. shower.ops.eblu.me is what Caddy on indri and the + # Tailscale ProxyGroup both send as the Host header, so the app needs to + # accept it. + DJANGO_ALLOWED_HOSTS: "shower.ops.eblu.me" + # /host/, /admin/, and Django's login surface are all tailnet-only — the + # public proxy 403s everything outside of `/` and `/prizes//`. + # /host/'s "Django admin" link follows DJANGO_ADMIN_URL. + DJANGO_ADMIN_URL: "https://shower.ops.eblu.me/admin/" + # /host/ is served on shower.ops.eblu.me (tailnet), but the QR codes it + # generates need to point at the public WAN hostname so guest phones can + # reach them. PUBLIC_URL_BASE overrides Django's request.build_absolute_uri() + # in the QR views — see shower/views.py:_public_url. Added in app v1.0.1. + DJANGO_PUBLIC_URL_BASE: "https://shower.eblu.me" diff --git a/argocd/manifests/shower/deployment.yaml b/argocd/manifests/shower/deployment.yaml new file mode 100644 index 0000000..70547aa --- /dev/null +++ b/argocd/manifests/shower/deployment.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: shower + namespace: shower +spec: + replicas: 1 + # SQLite + RWO data PVC: only one writer at a time. Recreate ensures the + # old pod's lock on the local-path volume is released before the new one + # mounts it. + strategy: + type: Recreate + selector: + matchLabels: + app: shower + template: + metadata: + labels: + app: shower + spec: + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + seccompProfile: + type: RuntimeDefault + containers: + - name: shower + image: registry.ops.eblu.me/blumeops/shower:kustomized + securityContext: + runAsNonRoot: true + allowPrivilegeEscalation: false + ports: + - containerPort: 8000 + name: http + envFrom: + - configMapRef: + name: shower-app-config + - secretRef: + name: shower-app-secrets + volumeMounts: + - name: media + mountPath: /app/media + - name: data + mountPath: /app/data + resources: + requests: + memory: "128Mi" + cpu: "50m" + limits: + memory: "512Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: / + port: 8000 + httpHeaders: + - name: Host + value: shower.ops.eblu.me + - name: X-Forwarded-Proto + value: https + initialDelaySeconds: 30 + periodSeconds: 30 + readinessProbe: + httpGet: + path: / + port: 8000 + httpHeaders: + - name: Host + value: shower.ops.eblu.me + - name: X-Forwarded-Proto + value: https + initialDelaySeconds: 10 + periodSeconds: 10 + volumes: + - name: media + persistentVolumeClaim: + claimName: shower-media + - name: data + persistentVolumeClaim: + claimName: shower-data diff --git a/argocd/manifests/shower/external-secret.yaml b/argocd/manifests/shower/external-secret.yaml new file mode 100644 index 0000000..005a7e9 --- /dev/null +++ b/argocd/manifests/shower/external-secret.yaml @@ -0,0 +1,19 @@ +--- +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: shower-app-secrets + namespace: shower +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: onepassword-blumeops + target: + name: shower-app-secrets + creationPolicy: Owner + data: + - secretKey: DJANGO_SECRET_KEY + remoteRef: + key: "Shower (blumeops)" + property: secret-key diff --git a/argocd/manifests/shower/ingress-tailscale.yaml b/argocd/manifests/shower/ingress-tailscale.yaml new file mode 100644 index 0000000..d09a696 --- /dev/null +++ b/argocd/manifests/shower/ingress-tailscale.yaml @@ -0,0 +1,30 @@ +# Tailscale Ingress for shower app. +# Exposes at shower.tail8d86e.ts.net. +# Caddy on indri proxies shower.ops.eblu.me here. The fly proxy then proxies +# shower.eblu.me through Caddy to this same endpoint (fly does not contact +# the k8s service directly — all traffic routes through indri's Caddy). +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: shower-tailscale + namespace: shower + annotations: + tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" + gethomepage.dev/enabled: "true" + gethomepage.dev/name: "Shower" + gethomepage.dev/group: "Home" + gethomepage.dev/icon: "mdi-baby" + gethomepage.dev/description: "Adelaide baby shower" + gethomepage.dev/href: "https://shower.ops.eblu.me" + gethomepage.dev/pod-selector: "app=shower" +spec: + ingressClassName: tailscale + defaultBackend: + service: + name: shower + port: + number: 8000 + tls: + - hosts: + - shower diff --git a/argocd/manifests/shower/kustomization.yaml b/argocd/manifests/shower/kustomization.yaml new file mode 100644 index 0000000..0afc8e3 --- /dev/null +++ b/argocd/manifests/shower/kustomization.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: shower + +resources: + - configmap.yaml + - external-secret.yaml + - pv-nfs.yaml + - pvc.yaml + - service.yaml + - ingress-tailscale.yaml + - deployment.yaml + +images: + - name: registry.ops.eblu.me/blumeops/shower + newTag: v1.0.2-039d9b9-nix diff --git a/argocd/manifests/shower/pv-nfs.yaml b/argocd/manifests/shower/pv-nfs.yaml new file mode 100644 index 0000000..7354fb5 --- /dev/null +++ b/argocd/manifests/shower/pv-nfs.yaml @@ -0,0 +1,24 @@ +# NFS PersistentVolume for shower app media uploads (prize photos). +# +# Requires the `shower` share on sifaka with NFS exports matching the +# blumeops standard (192.168.1.0/24 + 100.64.0.0/10, all_squash → admin). +# See docs/how-to/operations/shower-app.md for the Synology web-UI walk +# and docs/reference/storage/sifaka.md for the exports table. +# +# Because all_squash rewrites every NFS write to admin:users (1024:100), +# the in-pod runAsUser does NOT have to match an on-disk uid. Mode 0777 +# on /volume1/shower lets the pod read back what it wrote. +apiVersion: v1 +kind: PersistentVolume +metadata: + name: shower-media-nfs-pv +spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + nfs: + server: sifaka + path: /volume1/shower diff --git a/argocd/manifests/shower/pvc.yaml b/argocd/manifests/shower/pvc.yaml new file mode 100644 index 0000000..47fee54 --- /dev/null +++ b/argocd/manifests/shower/pvc.yaml @@ -0,0 +1,30 @@ +# Media PVC — RWX NFS share for /app/media (prize photo uploads). +# SQLite DB lives in a separate local-path PVC; NFS file locking is not +# reliable enough for SQLite's WAL/journal. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shower-media + namespace: shower +spec: + accessModes: + - ReadWriteMany + storageClassName: "" + volumeName: shower-media-nfs-pv + resources: + requests: + storage: 10Gi +--- +# Database PVC — k3s local-path (default storage class) for SQLite. +# RWO is fine: the deployment runs with a single replica. +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: shower-data + namespace: shower +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 2Gi diff --git a/argocd/manifests/shower/service.yaml b/argocd/manifests/shower/service.yaml new file mode 100644 index 0000000..0a73aab --- /dev/null +++ b/argocd/manifests/shower/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: shower + namespace: shower +spec: + selector: + app: shower + ports: + - name: http + port: 8000 + targetPort: 8000 + protocol: TCP diff --git a/containers/shower/default.nix b/containers/shower/default.nix new file mode 100644 index 0000000..d9863e1 --- /dev/null +++ b/containers/shower/default.nix @@ -0,0 +1,259 @@ +# Nix-built shower app container — Adelaide / Heidi / Addie baby shower. +# +# The app is published as a wheel to the Forgejo PyPI index at +# https://forge.eblu.me/api/packages/eblume/pypi/. The wheel + its +# transitive Python deps are baked in at build time via a fixed-output +# derivation that runs `pip install --target` against forge PyPI (proxied +# through pypi.ops.eblu.me for upstream packages). Build runs on the +# nix-container-builder runner (ringtail, amd64) so the image is native. +# +# Going through pip-install-target rather than nixpkgs Python packages +# sidesteps two issues we hit going through `python.pkgs.buildPythonPackage`: +# 1. python314Packages.django still aliases to Django 4.2 LTS, which +# doesn't support Python 3.14 at all. +# 2. django-axes pulls selenium + browser fonts into its check phase +# and the nix sandbox can't provide those. +# +# To bump the version: +# 1. Update `version` below. +# 2. Set `outputHash` to `pkgs.lib.fakeHash`, run the build, copy the +# real hash out of the error, and commit it. +{ pkgs ? import { } }: + +let + version = "1.0.2"; + + python = pkgs.python314; + + # The repo's top-level static/ directory (vendored Sortable + cropper + # JS/CSS, prize placeholder SVG) isn't shipped in the wheel — hatchling + # only packages config/ and shower/, leaving the repo-root static/ + # behind. Pull the sdist (which contains the full source tree) and + # extract just the static/ subtree into the image as /app/static. + # local_settings adds it to STATICFILES_DIRS so collectstatic at boot + # picks it up alongside the Django admin's static files. + # + # Fetched from forge.ops.eblu.me (tailnet) because /api/packages/* is + # blocked at the fly edge — see fly/nginx.conf forge.eblu.me block. + # Hash is the upstream sha256 from forge PyPI's simple index. + showerSdist = pkgs.fetchurl { + name = "adelaide_baby_shower_app-${version}.tar.gz"; + url = "https://forge.ops.eblu.me/api/packages/eblume/pypi/files/adelaide-baby-shower-app/${version}/adelaide_baby_shower_app-${version}.tar.gz"; + hash = "sha256-nlCtlx9zuYaLoJZSckybLV5YPpA8vZamN96O3RXOstM="; + }; + + staticAssets = pkgs.runCommand "shower-static-assets-${version}" { } '' + ${pkgs.gnutar}/bin/tar -xzf ${showerSdist} -C $TMPDIR + cp -r $TMPDIR/adelaide_baby_shower_app-${version}/static $out + ''; + + # Fixed-output derivation: pip-installs the app wheel + every transitive + # dep into a single target dir. FODs get network access in exchange for + # a pinned output hash, which means the whole dependency closure is + # immutable across rebuilds. + pyDepsFOD = pkgs.stdenv.mkDerivation { + pname = "shower-python-deps-fod"; + inherit version; + + dontUnpack = true; + + nativeBuildInputs = [ python pkgs.cacert pkgs.removeReferencesTo ]; + + buildPhase = '' + runHook preBuild + + export HOME=$TMPDIR + export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt + export PIP_DISABLE_PIP_VERSION_CHECK=1 + + ${python}/bin/python -m venv "$TMPDIR/venv" + "$TMPDIR/venv/bin/pip" install --upgrade pip + "$TMPDIR/venv/bin/pip" install \ + --no-cache-dir \ + --index-url=https://pypi.ops.eblu.me/root/pypi/+simple/ \ + --extra-index-url=https://forge.ops.eblu.me/api/packages/eblume/pypi/simple/ \ + "adelaide-baby-shower-app==${version}" \ + gunicorn + + runHook postBuild + ''; + + installPhase = '' + runHook preInstall + + mkdir -p $out/lib/python3.14 $out/bin + cp -r "$TMPDIR/venv/lib/python3.14/site-packages" $out/lib/python3.14/site-packages + + for script in "$TMPDIR/venv/bin/"*; do + [ -f "$script" ] || continue + name=$(basename "$script") + case "$name" in + python*|pip*|activate*) continue ;; + esac + cp "$script" "$out/bin/$name" + chmod +x "$out/bin/$name" + done + + # --- Strip Nix store references (FOD outputs must be self-contained) --- + # The wrapper derivation below restores them via autoPatchelfHook + a + # python wrapper that points pyc-less imports at the on-image python. + + # Strip bytecode entirely — pyc files embed compile-time paths. + find $out -type f -name '*.pyc' -delete + find $out -type d -name '__pycache__' -exec rm -rf {} + 2>/dev/null || true + + # Dynamically discover all nix store references and strip them. We + # don't have a static list because pip pulls in stdenv via Python's + # build env (gcc-lib, libstdc++, etc.) and the closure is opaque. + { find $out -type f -print0 \ + | xargs -0 grep -aohE '/nix/store/[a-z0-9]{32}-[^/"[:space:]]+' 2>/dev/null \ + || true; } | sort -u > $TMPDIR/store-refs.txt + echo "Found $(wc -l < $TMPDIR/store-refs.txt) unique store path references to strip" + + refs_args="" + while IFS= read -r ref; do + refs_args="$refs_args -t $ref" + done < $TMPDIR/store-refs.txt + + if [ -n "$refs_args" ]; then + find $out -type f -exec remove-references-to $refs_args {} + 2>/dev/null || true + fi + + remaining=$({ find $out -type f -print0 | xargs -0 grep -cl '/nix/store/' 2>/dev/null || true; } | wc -l) + echo "Files with remaining store references: $remaining" + + runHook postInstall + ''; + + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + # Pinned dep closure — reproducible until version bumps. To recompute, + # set to pkgs.lib.fakeHash and read the failure. + outputHash = "sha256-tSTH/HaDY7M0qxlauBTM+JekZAgF++K2lGP3PLvym/o="; + + dontFixup = true; + }; + + # Non-FOD wrapper: re-applies RPATHs to pre-built .so files (pillow, + # scipy) so they find libstdc++ / libz / etc. at runtime. autoPatchelfHook + # discovers needed libraries from buildInputs. + pyDeps = pkgs.stdenv.mkDerivation { + pname = "shower-python-deps"; + inherit version; + + dontUnpack = true; + + nativeBuildInputs = [ pkgs.autoPatchelfHook ]; + + buildInputs = with pkgs; [ + python + stdenv.cc.cc.lib # libstdc++, libgcc_s + zlib + libjpeg + libwebp + libtiff + openjpeg + lcms2 + freetype + ]; + + installPhase = '' + cp -r ${pyDepsFOD} $out + chmod -R u+w $out + ''; + }; + + sitePackages = "${pyDeps}/lib/python3.14/site-packages"; + + # Settings shim — config/settings.py's `BASE_DIR = parent.parent` would + # otherwise resolve to site-packages, scattering db.sqlite3 / media / + # staticfiles into the venv. Pin them to /app/{data,media,data/staticfiles}. + localSettings = pkgs.writeText "local_settings.py" '' + from pathlib import Path + + from config.settings import * # noqa: F401,F403 + + DATABASES["default"]["NAME"] = "/app/data/db.sqlite3" + MEDIA_ROOT = "/app/media" + STATIC_ROOT = "/app/data/staticfiles" + # /app/static comes from the repo-root static/ subtree of the sdist + # (see default.nix staticAssets). Added because the wheel doesn't + # ship vendored Sortable/cropper assets. + STATICFILES_DIRS = [Path("/app/static")] + ''; + + # PYTHONPATH, DJANGO_SETTINGS_MODULE, PATH, and HOME live in the image's + # `Env` block below — that way `kubectl exec deploy/shower -- python -m + # django ` Just Works without an inline `env` ceremony. + # The entrypoint just changes directory and runs the boot sequence. + entrypoint = pkgs.writeShellScript "shower-entrypoint" '' + set -eu + + cd /app + + mkdir -p /app/data /app/media + + echo "shower: running migrations" + python -m django migrate --noinput + + echo "shower: collecting static files" + python -m django collectstatic --noinput --clear + + echo "shower: starting gunicorn" + exec gunicorn \ + --bind 0.0.0.0:8000 \ + --workers 2 \ + --forwarded-allow-ips='*' \ + config.wsgi:application + ''; +in + +pkgs.dockerTools.buildLayeredImage { + name = "blumeops/shower"; + contents = [ + python + pyDeps + pkgs.cacert + pkgs.tzdata + pkgs.bashInteractive + pkgs.coreutils + ]; + + extraCommands = '' + mkdir -p app/data app/media tmp + chmod 1777 tmp + cp ${localSettings} app/local_settings.py + cp -r ${staticAssets} app/static + chmod -R u+w app/static + ''; + + fakeRootCommands = '' + chown -R 1000:1000 app + ''; + enableFakechroot = true; + + config = { + Entrypoint = [ "${entrypoint}" ]; + Env = [ + "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" + "TZDIR=${pkgs.tzdata}/share/zoneinfo" + "TZ=America/Los_Angeles" + "TMPDIR=/tmp" + "LANG=C.UTF-8" + "LC_ALL=C.UTF-8" + "PYTHONDONTWRITEBYTECODE=1" + "HOME=/app/data" + "PATH=${pyDeps}/bin:${python}/bin:/bin" + # /app first so local_settings.py is importable; sitePackages second so + # django, gunicorn, etc. resolve. Inherited by entrypoint + any + # `kubectl exec` so manual django subcommands work without ceremony. + "PYTHONPATH=/app:${sitePackages}" + "DJANGO_SETTINGS_MODULE=local_settings" + ]; + ExposedPorts = { + "8000/tcp" = { }; + }; + User = "1000"; + WorkingDir = "/app"; + }; +} diff --git a/docs/changelog.d/shower-app-deploy.bugfix.md b/docs/changelog.d/shower-app-deploy.bugfix.md new file mode 100644 index 0000000..91d2b3b --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.bugfix.md @@ -0,0 +1,13 @@ +Shower app container now bakes the wheel + Python deps into the image +at build time via `buildPythonPackage` instead of pip-installing on +first boot. Boots are deterministic and don't depend on forge PyPI +being reachable from the pod. The `wheelHash` in +`containers/shower/default.nix` is the sha256 sourced from the +[forge PyPI simple index](https://forge.eblu.me/api/packages/eblume/pypi/simple/adelaide-baby-shower-app/); +bumping the version means bumping that hash too. + +Borgmatic now covers the shower app: SQLite is dumped from the live +pod via `kubectl exec` (mirroring the existing mealie entry, with +`context: k3s-ringtail`), and the prize-photo media share is picked up +through `/Volumes/shower` (sifaka SMB mount on indri, same pattern as +`/Volumes/photos`). diff --git a/docs/changelog.d/shower-app-deploy.feature.md b/docs/changelog.d/shower-app-deploy.feature.md new file mode 100644 index 0000000..96218be --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.feature.md @@ -0,0 +1,4 @@ +Deploy the Adelaide / Heidi / Addie baby shower app — guest splash, raffle +picker, and prize assignment console — on ringtail k3s with `shower.eblu.me` +as the public entry and `shower.ops.eblu.me` as the tailnet admin host. App +source: [`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app). diff --git a/docs/changelog.d/shower-app-deploy.infra.md b/docs/changelog.d/shower-app-deploy.infra.md new file mode 100644 index 0000000..157a068 --- /dev/null +++ b/docs/changelog.d/shower-app-deploy.infra.md @@ -0,0 +1,9 @@ +Wire shower app for public exposure: fly nginx `shower.eblu.me` server +block as a guest-only surface — splash page, `/prizes//`, static +assets, media. Everything authenticated (`/admin/`, `/host/`, +`/accounts/`) returns 403 with a "tailnet only" pointer. Staff hit +`shower.ops.eblu.me` for the operator console + admin; the app's +v1.0.1 `DJANGO_PUBLIC_URL_BASE` setting makes QR codes generated on +the tailnet point back at the WAN host for guests. Plus a Caddy route +on indri, Pulumi Gandi CNAME, and a Grafana APM dashboard tracking +request rate, error rate, latency, bandwidth, and access logs. diff --git a/docs/how-to/operations/shower-on-ringtail.md b/docs/how-to/operations/shower-on-ringtail.md new file mode 100644 index 0000000..daf1046 --- /dev/null +++ b/docs/how-to/operations/shower-on-ringtail.md @@ -0,0 +1,245 @@ +--- +title: Shower App on Ringtail +modified: 2026-05-10 +last-reviewed: 2026-05-10 +tags: + - how-to + - operations + - kubernetes + - django +--- + +# Shower App on Ringtail + +How the Adelaide / Heidi / Addie baby shower app is deployed. The app is a +Django project ([`adelaide-baby-shower-app`](https://forge.eblu.me/eblume/adelaide-baby-shower-app)) +released as a wheel to the Forgejo Packages PyPI index and run on +[[ringtail]]'s k3s cluster. Public landing page at `shower.eblu.me`, staff +console + admin UI at `shower.ops.eblu.me` (tailnet only). + +The contract this deploy implements is defined in the app repo's +`docs/how-to/hosting.md` — read that for the env-var contract, security +model, and storage requirements before changing anything here. + +## Routing + +``` +Internet → shower.eblu.me + │ (Fly.io nginx — public) + ▼ + Caddy on indri (shower.ops.eblu.me) + │ + ▼ + Tailscale ProxyGroup ingress (shower.tail8d86e.ts.net) + │ + ▼ + Service shower:8000 → Pod (Django + gunicorn) +``` + +| Hostname | Reachable from | Notes | +|---|---|---| +| `shower.eblu.me` | Public internet | Guest surface only — splash, `/prizes//`, `/static/`, `/media/`. Everything authenticated 403s with a tailnet pointer. | +| `shower.ops.eblu.me` | Tailnet | Full app surface — `/host/`, `/admin/`, the works | +| `shower.tail8d86e.ts.net` | Tailnet | Bare ProxyGroup endpoint Caddy proxies to | + +## Defense layers (public side) + +The public surface is guest-only, so the threat model collapses: there +is no credential-accepting endpoint reachable from WAN, and nothing on +WAN that requires authentication. + +1. **edge auth lockout** — fly nginx 403s `/admin/`, `/host/`, and + anything that would redirect into them. Anyone hitting an auth URL + on WAN gets a "tailnet only" message. +2. **fly nginx `limit_req zone=general`** — 10 r/s per Fly-Client-IP + cushion for the splash form. +3. **django-axes** — 5 fails / 1 hour lockout per `(username, ip_address)`, + running on the tailnet-side login. Provides the only credential + defense, since brute-force is only reachable to tailnet members. + +The QR codes that `/host/` (on tailnet) generates for guests embed +`https://shower.eblu.me/...` even though the QR view is served from +the tailnet host. The app's `PUBLIC_URL_BASE` setting (added in v1.0.1) +overrides Django's `request.build_absolute_uri()` for those URLs. + +## Persistent storage + +| Mount | PVC | Type | Why | +|---|---|---|---| +| `/app/media` | `shower-media` | NFS RWX on sifaka (`/volume1/shower`) | Prize photos survive pod rescheduling | +| `/app/data` | `shower-data` | k3s `local-path` RWO | SQLite DB; NFS file locking can't be trusted for WAL/journal | + +The container has the app + its Python deps baked in at nix build time +(`buildPythonPackage` against the wheel fetched from forge PyPI). The +entrypoint runs migrations, runs `collectstatic`, and `exec`s gunicorn — +no pip-at-boot. A `local_settings.py` shim overrides `DATABASES.NAME`, +`MEDIA_ROOT`, and `STATIC_ROOT` to absolute paths under `/app/`, +sidestepping the wheel's `BASE_DIR = parent.parent` of an +in-site-packages settings module. + +## Backups + +[[borgmatic]] (running on indri) captures both halves of the persistent +state on its daily 2 a.m. run: + +- **`/app/data/db.sqlite3`** — dumped via `kubectl exec`'s + `sqlite3.backup()` against the live pod (entry in + `borgmatic_k8s_sqlite_dumps`, context `k3s-ringtail`). The dumped + file lands in `borgmatic_k8s_dump_dir` on indri and is picked up by + the main source-directory sweep. +- **`/app/media`** — picked up via `/Volumes/shower`, the SMB mount of + `sifaka:/volume1/shower` on indri. The same Synology share is exposed + via SMB *and* NFS simultaneously; ringtail's pod uses the NFS export, + while indri reads the SMB side for the borgmatic source. + +Both archive to [[sifaka]] (`borg-backups`) and BorgBase offsite, with +retention `keep_daily=7 / keep_monthly=12 / keep_yearly=1000`. + +The SMB mount on indri is set up manually once via Finder (Cmd-K → +`smb://sifaka/shower`, save credentials, "Always log in" so it +reconnects after reboot). If `/Volumes/shower` is missing at backup +time borgmatic will fail loudly — `source_directories_must_exist: true` +applies to all entries. + +## One-time setup steps + +These steps are required the first time the service is deployed and are +not encoded in the manifests. + +### 1. NFS + SMB share on sifaka + +On the Synology DSM web UI: + +1. **Control Panel → Shared Folder → Create**. Name: `shower`, + Location: Volume 1. Leave the rest at default. +2. **Control Panel → File Services → NFS → NFS Rules** (on the + `shower` row's *Permissions* tab). Add a rule mirroring the other + shares' pattern: Hostname/IP=`192.168.1.0/24` and again for + `100.64.0.0/10`, Privilege=Read/Write, Squash=`Map all users to + admin` (= `all_squash`), and tick *Allow connections from + non-privileged ports*. (See [[sifaka#NFS Exports]] — the existing + `frigate`, `paperless`, etc. shares use this exact pattern.) +3. **Control Panel → File Services → SMB**: leave SMB enabled + globally. No per-share rule required — the share inherits the + default `eblume` access. +4. The directory ownership at `/volume1/shower` will end up + `root:root`, mode `0777` (DSM default) — which is fine because + `all_squash` rewrites every NFS write to `admin:users`, and the + `0777` lets pods read what other pods wrote. No `chown` needed. + +After the share exists, mount it on indri for borgmatic: + +- In Finder, **Cmd-K → `smb://sifaka/shower`**, sign in as `eblume`, + and tick **Remember in Keychain** + **Always log in** so it + reconnects on reboot. This produces `/Volumes/shower`, which the + borgmatic source-directory list points at. + +### 2. 1Password item + +Item name: **`Shower (blumeops)`** in the `blumeops` vault. +Required property: + +| Field | Value | +|---|---| +| `secret-key` | Output of `openssl rand -base64 48` | + +The `ExternalSecret` `shower-app-secrets` will sync this into the +`shower` namespace as a `Secret` and `envFrom` exposes it as +`DJANGO_SECRET_KEY` to the container. + +**Never reuse a key that has ever been in git history.** Per the app's +hosting.md, an early dev key was committed before being replaced with +the `django-insecure-...` placeholder; the production key must be +freshly generated. + +### 3. Container image + +Built by the `build-container` Forgejo Actions workflow on the +`nix-container-builder` runner (ringtail, amd64). The wheel is fetched +from forge PyPI at nix build time and baked into the image — no +pip-at-runtime. To bump the version, change `version` in +`containers/shower/default.nix` and update `wheelHash` (or set it to +`pkgs.lib.fakeHash` and let the next build print the correct one). + +Trigger with: + +```fish +mise run container-build-and-release shower +``` + +After the workflow finishes, update `images[].newTag` in +`argocd/manifests/shower/kustomization.yaml` to the resulting +`vX.Y.Z--nix` tag, then commit (C0). + +### 4. DNS + +`pulumi/gandi/__main__.py` declares the `shower-public` CNAME pointing +at `blumeops-proxy.fly.dev.`. Apply with: + +```fish +mise run dns-preview +mise run dns-up +``` + +### 5. Fly.io certificate + +```fish +fly certs add shower.eblu.me -a blumeops-proxy +``` + +(Add to `mise-tasks/fly-setup` so re-runs of the one-time setup pick +it up.) + +### 6. Caddy on indri + +`shower` is in `ansible/roles/caddy/defaults/main.yml`. Push with: + +```fish +mise run provision-indri -- --tags caddy +``` + +### 7. Create the admin user + +The container's entrypoint runs `migrate --noinput` + `collectstatic +--noinput --clear` before gunicorn, so a fresh `db.sqlite3` is schema- +ready as soon as the pod boots. It does *not* create a Django superuser +— that has to happen once, interactively, after the first pod is up: + +```fish +kubectl --context=k3s-ringtail -n shower exec -it deploy/shower -- \ + python -m django createsuperuser +``` + +Use `erich` / your usual email. The same account doubles as the +`@staff_member_required` login for `/host/`. Subsequent staff accounts +can be created from `/admin/auth/user/` once you're signed in. + +## Deploying a new version + +1. Bump the wheel version in the app repo (`adelaide-baby-shower-app`) + and release it to Forgejo PyPI. +2. Bump `appVersion` in `containers/shower/default.nix` to match. +3. `mise run container-build-and-release shower`. Verify the build + with `mise run runner-logs`. +4. Update the `newTag` in `argocd/manifests/shower/kustomization.yaml` + to the new `[main]` SHA tag. +5. Commit (C0 after PR merge — see [[build-container-image#Squash-merge and container tags]]). +6. `argocd app sync shower`. + +## Verifying after a deploy + +```fish +kubectl --context=k3s-ringtail -n shower get pods +kubectl --context=k3s-ringtail -n shower logs deploy/shower +curl -sf https://shower.ops.eblu.me/ # tailnet +curl -sf https://shower.eblu.me/ # public +curl -I https://shower.eblu.me/admin/users/ # expect 403 (edge block) +curl -I https://shower.ops.eblu.me/admin/ # expect 200 / 302 (login) +``` + +## Related + +- [[expose-service-publicly]] — Fly.io proxy + Tailscale pattern +- [[deploy-k8s-service]] — generic ArgoCD service onboarding +- [[ringtail]] — the cluster +- [`hosting.md`](https://forge.eblu.me/eblume/adelaide-baby-shower-app/src/branch/main/docs/how-to/hosting.md) — app's deployment contract diff --git a/docs/reference/kubernetes/apps.md b/docs/reference/kubernetes/apps.md index 80ea72e..fd5c06f 100644 --- a/docs/reference/kubernetes/apps.md +++ b/docs/reference/kubernetes/apps.md @@ -41,6 +41,7 @@ Registry of all applications deployed via [[argocd]]. | `ollama` | ollama | `argocd/manifests/ollama/` | [[ollama]] | | `mealie` | mealie | `argocd/manifests/mealie/` | [[mealie]] | | `paperless` | paperless | `argocd/manifests/paperless/` | [[paperless]] | +| `shower` | shower | `argocd/manifests/shower/` | [[shower-app]] | | `prowler` | prowler | `argocd/manifests/prowler/` | [[prowler]] | ## Sync Policies diff --git a/docs/reference/services/shower-app.md b/docs/reference/services/shower-app.md new file mode 100644 index 0000000..26d1764 --- /dev/null +++ b/docs/reference/services/shower-app.md @@ -0,0 +1,55 @@ +--- +title: Shower App +modified: 2026-05-10 +last-reviewed: 2026-05-10 +tags: + - service + - django +--- + +# Shower App + +Django web app for Adelaide / Heidi / Addie's baby shower — guest splash with +a "what did you bring?" form, raffle picker, contest-prize ranking via +QR-coded `/prizes//` URLs, and an `/host/` operator console with +drag-rank assignment solving via scipy. + +## Quick Reference + +| Property | Value | +|----------|-------| +| **Public URL** | `shower.eblu.me` (guest surface only — via [[flyio-proxy]]) | +| **Private URL** | `shower.ops.eblu.me` (admin + `/host/` console — Caddy on indri) | +| **Cluster** | [[ringtail]] k3s, namespace `shower` | +| **Container** | `registry.ops.eblu.me/blumeops/shower` (built from `containers/shower/default.nix`) | +| **App source** | `forge.eblu.me/eblume/adelaide-baby-shower-app` (wheel on Forgejo PyPI) | +| **Database** | SQLite on a local-path PVC (`shower-data`, RWO 2 Gi) | +| **Media (prize photos)** | NFS RWX PVC `shower-media` → `sifaka:/volume1/shower` | +| **Secrets** | `Shower (blumeops)` 1Password item → `DJANGO_SECRET_KEY` | + +## Routing + +``` +Internet → shower.eblu.me (Fly nginx, guest-only 403s on /admin/ /host/) + │ + ▼ + Caddy on indri (shower.ops.eblu.me — full surface) + │ + ▼ + Tailscale ProxyGroup → k3s Service → Deployment +``` + +## Backups + +- **SQLite** dumped via `kubectl exec` to indri's `borgmatic_k8s_dump_dir` on every 2 a.m. run (mealie-pattern entry in `borgmatic_k8s_sqlite_dumps`) +- **Media** picked up via `/Volumes/shower` (sifaka SMB mount on indri) in the main `borgmatic_source_directories` list + +Both archive to sifaka + BorgBase. + +## Related + +- [[shower-on-ringtail]] — onboarding + day-of runbook +- [[expose-service-publicly]] — Fly proxy + tailnet pattern this rides on +- [[ringtail]] — host cluster +- [[sifaka#NFS Exports]] — NFS share table +- [[borgmatic]] — backup system diff --git a/docs/tutorials/expose-service-publicly.md b/docs/tutorials/expose-service-publicly.md index 6bc8fae..886cad4 100644 --- a/docs/tutorials/expose-service-publicly.md +++ b/docs/tutorials/expose-service-publicly.md @@ -176,17 +176,39 @@ Indri carries `tag:flyio-target` so the Fly proxy can reach Caddy. No per-servic Deploy: `mise run tailnet-preview` then `mise run tailnet-up`. -After deploying, extract the auth key and set it as a Fly.io secret: +After deploying, push the auth key to Fly.io. The simplest path is +`mise run fly-setup`, which reads the current value from Pulumi state +and stages it as a Fly.io secret: ```bash -# Get the key from Pulumi state -cd pulumi/tailscale && pulumi stack output flyio_authkey --show-secrets - -# Set it in Fly.io -fly secrets set TS_AUTHKEY="tskey-auth-..." -a blumeops-proxy +mise run fly-setup ``` -Store the auth key in 1Password as well for the `fly-setup` mise task. +Manual equivalent for reference: + +```bash +cd pulumi/tailscale && pulumi stack output flyio_authkey --show-secrets +# then in fly/: +fly secrets set TS_AUTHKEY="tskey-auth-..." -a blumeops-proxy --stage +``` + +**Pulumi state is the only source of truth for this key.** No other +process (mise tasks, ansible, scripts) reads it from anywhere else — +in particular, the key is not stored in 1Password. To rotate +(every 90 days, or after a compromise), force-replace the resource +and re-run `fly-setup`: + +```bash +mise run tailnet-up -- \ + --replace='urn:pulumi:tail8d86e::blumeops-tailnet::tailscale:index/tailnetKey:TailnetKey::flyio-proxy-key' +mise run fly-setup +mise run fly-deploy +``` + +Pulumi destroys the old key and mints a new 90-day one in a single +operation. Older fly machines that already authed against the old key +are unaffected (they don't need it after the initial join); only +*new* machine starts read the rotated value. ### Step 4: Mise tasks diff --git a/fly/fail2ban/action.d/nginx-deny.conf b/fly/fail2ban/action.d/nginx-deny.conf index 1d3737b..bab8abb 100644 --- a/fly/fail2ban/action.d/nginx-deny.conf +++ b/fly/fail2ban/action.d/nginx-deny.conf @@ -2,13 +2,22 @@ # Standard iptables banning won't work in Fly.io because $remote_addr # is Fly's internal proxy IP. Instead, we write banned IPs to a file # that nginx checks via a geo directive keyed on $http_fly_client_ip. +# +# The deny file is per-service: each jail sets `nginx_deny_file = ...` +# (see jail.d/*.conf) and a matching `geo $http_fly_client_ip $..._banned` +# block in nginx.conf includes the same path. [Definition] -actionban = echo " 1;" >> /etc/nginx/forge-deny.conf && nginx -s reload +actionban = echo " 1;" >> && nginx -s reload -actionunban = sed -i '/ 1;/d' /etc/nginx/forge-deny.conf && nginx -s reload +actionunban = sed -i '/ 1;/d' && nginx -s reload actionstart = actionstop = actioncheck = + +[Init] + +# Default for jails that don't override (preserves forge behaviour). +nginx_deny_file = /etc/nginx/forge-deny.conf diff --git a/fly/nginx.conf b/fly/nginx.conf index 5e49d88..570e6c9 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -34,6 +34,15 @@ http { # bucket. $http_fly_client_ip has the actual client IP. limit_req_zone $http_fly_client_ip zone=forge_auth:10m rate=3r/s; + # Shower-specific zone: loose enough that ~30 guests sharing a single + # venue-wifi NAT'd public IP can all scan the QR and load the splash + # (HTML + a handful of static asset hits each) without anyone tripping + # the limit. 50r/s + burst=200 covers the simultaneous-load spike; + # exploit scanners still trip it (e.g. the .env-sweeping bot we saw + # fired ~30 req in 2s — that pattern stays caught). See the + # shower.eblu.me server block for the matching `limit_req`. + limit_req_zone $http_fly_client_ip zone=shower_general:10m rate=50r/s; + # fail2ban deny list — banned IPs are written here by fail2ban and # checked via the $forge_banned variable. The file is touched at # container start to ensure it exists. @@ -184,6 +193,23 @@ http { return 200 "User-agent: *\nDisallow: /mirrors/\nDisallow: /user/\nDisallow: /users/\nDisallow: /*/archive/\nDisallow: /*/releases/download/\n"; } + # Block the package registry at the public edge. Forgejo's per-user + # visibility model treats packages as world-readable when the owner + # has Visibility=Public — which means anyone on the internet can + # enumerate and download every wheel/sdist/generic artifact, even + # for private-repo releases (the sdist contains full source). We + # like keeping eblume's profile public, so we close the hole here + # at the proxy instead: WAN sees 403, tailnet (forge.ops.eblu.me) + # stays open for legitimate consumers (CI workflows, gilbert). + # See docs/tutorials/expose-service-publicly.md for the broader + # threat model on this proxy. + location /api/packages/ { + return 403 "Package downloads are tailnet-only — use forge.ops.eblu.me.\n"; + } + location /api/v1/packages { + return 403 "Package enumeration is tailnet-only — use forge.ops.eblu.me.\n"; + } + # Block swagger API docs — use forge.ops.eblu.me from tailnet location /swagger { return 403 "API documentation is only available at forge.ops.eblu.me (tailnet).\n"; @@ -288,6 +314,140 @@ http { } } + # --- shower.eblu.me (Adelaide baby shower — guest-only public surface) --- + # Only the guest paths (`/`, `/prizes//`, /static/, /media/) are + # exposed on WAN. /host/, /admin/, and Django's login views are blocked + # at the edge with a 403 pointing at the tailnet hostname — staff sign + # in on shower.ops.eblu.me, which is reachable from any device with + # Tailscale installed. Defense layers reduce to: general per-IP rate + # limit + django-axes (5 fails / 1h) on the tailnet-side login. No + # fail2ban needed here because the public surface no longer takes + # credentials of any kind. + server { + listen 8080; + server_name shower.eblu.me; + + # Per-IP rate limit. shower_general (50r/s, burst=200) instead of + # the global `general` zone because at the party, guests on the + # venue's wifi all NAT through a single Fly-Client-IP — 30 guests + # scanning the QR at once would each fetch HTML + a few static + # assets, easily clearing 20 burst on `general`. Exploit scanners + # still trip it (sustained ≫ 50r/s patterns). + limit_req zone=shower_general burst=200 nodelay; + + # Image uploads from /host/'s prize cropper are ~150-300 KiB JPEGs. + # The host page itself isn't reachable here, but /media/ reads can + # be larger than 1 MiB so set the cap to 5 MiB to match Django. + client_max_body_size 5m; + + # Security headers — HSTS matches Django's SECURE_HSTS_SECONDS. + add_header X-Frame-Options "DENY" always; + add_header X-Content-Type-Options "nosniff" always; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header Referrer-Policy "same-origin" always; + # GNU Terry Pratchett — keep the name moving. + add_header X-Clacks-Overhead "GNU Terry Pratchett" always; + + error_page 502 503 504 /error.html; + location = /error.html { + root /usr/share/nginx/html; + internal; + } + + # Reject indexers — there's nothing here we want crawled. + location = /robots.txt { + default_type text/plain; + return 200 "User-agent: *\nDisallow: /\n"; + } + + # Admin surface: tailnet-only. Anything under /admin/ — login, + # logout, CRUD UI, password reset — returns 403 with a pointer to + # the tailnet host. Django's `staff_member_required` will redirect + # /host/ to /admin/login/, which lands on this 403 if a guest + # device wanders into it. Staff hit the tailnet host directly. + location /admin/ { + return 403 "Authentication is tailnet-only — visit shower.ops.eblu.me.\n"; + } + + # Operator console: tailnet-only. Same rationale as /admin/. + location /host/ { + return 403 "The host console is tailnet-only — visit shower.ops.eblu.me.\n"; + } + + # Static assets — WhiteNoise + CompressedManifestStaticFilesStorage + # gives content-hashed filenames, so cache aggressively. Hashed + # names make cache invalidation automatic on app upgrades. + location /static/ { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache services; + proxy_cache_valid 200 1y; + proxy_cache_valid 404 1m; + proxy_cache_use_stale error timeout updating; + proxy_cache_lock on; + proxy_cache_key $host$uri; + proxy_ignore_headers Cache-Control Set-Cookie; + + add_header X-Cache-Status $upstream_cache_status; + } + + # Prize photo uploads. Shorter TTL than /static/ because filenames + # aren't content-hashed — operators can re-upload a prize photo + # and we want guests to see the new image within a day. + location /media/ { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + + proxy_http_version 1.1; + proxy_set_header Connection $connection_upgrade; + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_cache services; + proxy_cache_valid 200 1d; + proxy_cache_valid 404 1m; + proxy_cache_use_stale error timeout updating; + proxy_cache_lock on; + proxy_cache_key $host$uri; + proxy_ignore_headers Cache-Control Set-Cookie; + + add_header X-Cache-Status $upstream_cache_status; + } + + location / { + proxy_pass https://indri_backend$request_uri; + proxy_ssl_verify off; + proxy_ssl_server_name on; + proxy_ssl_name shower.ops.eblu.me; + proxy_intercept_errors on; + + # No proxy_cache — dynamic content with sessions and CSRF. + + proxy_set_header Host shower.ops.eblu.me; + proxy_set_header X-Real-IP $http_fly_client_ip; + proxy_set_header X-Forwarded-For $http_fly_client_ip; + proxy_set_header X-Forwarded-Proto $scheme; + + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + } + } + # Catch-all: reject unknown hosts, but serve health check server { listen 8080 default_server; diff --git a/fly/start.sh b/fly/start.sh index 1f2acaa..a924849 100644 --- a/fly/start.sh +++ b/fly/start.sh @@ -20,6 +20,7 @@ done echo "MagicDNS ready" # Ensure fail2ban deny file exists before nginx starts +# (the geo directive's `include` fails if the file is missing). touch /etc/nginx/forge-deny.conf # Start nginx — MagicDNS is available, upstreams resolved. diff --git a/mise-tasks/fly-setup b/mise-tasks/fly-setup index 0c5cb56..be797e5 100755 --- a/mise-tasks/fly-setup +++ b/mise-tasks/fly-setup @@ -23,6 +23,7 @@ echo "IPs allocated" fly certs add docs.eblu.me -a "$APP" 2>/dev/null || true fly certs add cv.eblu.me -a "$APP" 2>/dev/null || true fly certs add forge.eblu.me -a "$APP" 2>/dev/null || true +fly certs add shower.eblu.me -a "$APP" 2>/dev/null || true echo "Certificates configured" echo "Done. Run 'mise run fly-deploy' to deploy." diff --git a/pulumi/gandi/__main__.py b/pulumi/gandi/__main__.py index bda7a8a..25fd0f7 100644 --- a/pulumi/gandi/__main__.py +++ b/pulumi/gandi/__main__.py @@ -85,6 +85,15 @@ forge_public = gandi.livedns.Record( values=["blumeops-proxy.fly.dev."], ) +shower_public = gandi.livedns.Record( + "shower-public", + zone=domain, + name="shower", + type="CNAME", + ttl=300, + values=["blumeops-proxy.fly.dev."], +) + # ============== Exports ============== pulumi.export("domain", domain) pulumi.export("wildcard_fqdn", f"*.{subdomain}.{domain}") @@ -93,3 +102,4 @@ pulumi.export("target_ip", tailscale_ip) pulumi.export("docs_public_fqdn", f"docs.{domain}") pulumi.export("cv_public_fqdn", f"cv.{domain}") pulumi.export("forge_public_fqdn", f"forge.{domain}") +pulumi.export("shower_public_fqdn", f"shower.{domain}") diff --git a/service-versions.yaml b/service-versions.yaml index f7f0f4e..74d467e 100644 --- a/service-versions.yaml +++ b/service-versions.yaml @@ -44,6 +44,16 @@ services: upstream-source: https://github.com/gethomepage/homepage/releases notes: Custom container, kustomize manifests + - name: shower + type: argocd + last-reviewed: 2026-05-10 + current-version: "1.0.2" + upstream-source: https://forge.eblu.me/eblume/adelaide-baby-shower-app + notes: | + Django app for Adelaide / Heidi / Addie's baby shower. Wheel + published to Forgejo Packages PyPI; runs on ringtail k3s. Public + at shower.eblu.me (fly proxy), tailnet admin at shower.ops.eblu.me. + - name: nvidia-device-plugin type: argocd last-reviewed: 2026-03-27 @@ -96,6 +106,15 @@ services: current-version: "v1.94.2" upstream-source: https://github.com/tailscale/tailscale/releases + - name: tailscale + type: container + last-reviewed: 2026-05-10 + current-version: "1.94.2" + upstream-source: https://github.com/tailscale/tailscale/releases + notes: | + Locally mirrored tailscale image used by ringtail's tailscale-operator + ProxyClass. Built via containers/tailscale/default.nix. + - name: grafana type: argocd last-reviewed: 2026-04-02