From e6cf7e47e0370facf21adb8f2de5ace57da6af94 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Sun, 8 Feb 2026 21:54:18 -0800 Subject: [PATCH] Restrict flyio-proxy ACLs to dedicated tag:flyio-target endpoints (#126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Introduce `tag:flyio-target` so services must explicitly opt in to be reachable by the fly.io proxy - Replace broad `tag:k8s` and `tag:homelab` grants with the new tag in the ACL rule and test - Add `tailscale.com/tags: "tag:k8s,tag:flyio-target"` annotation to docs, loki, and prometheus Ingresses - Switch Alloy push endpoints from `*.ops.eblu.me` (Caddy) to `*.tail8d86e.ts.net` (Tailscale Ingress) - Update docs: flyio-proxy, caddy, tailscale, forgejo (future public access + security checklist), expose-service-publicly ## Manual step (not in PR) Update the k8s operator OAuth client in the Tailscale admin console to include `tag:flyio-target` in its scope. Without this, the operator cannot assign the new tag to Ingress proxy nodes. ## Deployment order 1. **Pulumi ACLs** — `mise run tailnet-preview && mise run tailnet-up` 2. **OAuth client** — Manual update in Tailscale admin console 3. **K8s Ingresses** — `argocd app sync apps && argocd app sync docs loki prometheus` 4. **Fly.io proxy** — `mise run fly-deploy` 5. **Verify** — `mise run services-check`, check Grafana dashboards ## Test plan - [ ] `mise run tailnet-preview` shows clean diff - [ ] `argocd app diff docs`, `argocd app diff loki`, `argocd app diff prometheus` show only annotation additions - [ ] After deploy: Grafana dashboards show continued log/metric flow - [ ] `curl -sf https://docs.eblu.me` returns 200 - [ ] `mise run services-check` passes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/126 --- CLAUDE.md | 3 +- .../manifests/argocd/service-tailscale.yaml | 1 + argocd/manifests/devpi/ingress-tailscale.yaml | 1 + argocd/manifests/docs/ingress-tailscale.yaml | 2 + .../grafana-config/ingress-tailscale.yaml | 1 + .../manifests/immich/ingress-tailscale.yaml | 1 + argocd/manifests/kiwix/ingress-tailscale.yaml | 1 + argocd/manifests/loki/ingress-tailscale.yaml | 5 ++- .../manifests/miniflux/ingress-tailscale.yaml | 1 + .../navidrome/ingress-tailscale.yaml | 1 + .../prometheus/ingress-tailscale.yaml | 5 ++- .../tailscale-operator/kustomization.yaml | 1 + .../proxygroup-ingress.yaml | 10 +++++ .../teslamate/ingress-tailscale.yaml | 1 + .../manifests/torrent/ingress-tailscale.yaml | 1 + .../restrict-flyio-proxy-acl.infra.md | 1 + docs/explanation/architecture.md | 8 ++-- docs/explanation/security-model.md | 10 +++-- docs/how-to/expose-service-publicly.md | 40 +++++++++---------- docs/how-to/gandi-operations.md | 4 +- docs/reference/infrastructure/gandi.md | 16 +++++++- docs/reference/infrastructure/indri.md | 2 +- docs/reference/infrastructure/routing.md | 17 ++++++-- docs/reference/infrastructure/tailscale.md | 3 ++ .../kubernetes/tailscale-operator.md | 13 ++++-- docs/reference/services/caddy.md | 2 +- docs/reference/services/flyio-proxy.md | 6 +-- docs/reference/services/forgejo.md | 18 +++++++++ fly/alloy.river | 12 ++++-- pulumi/tailscale/policy.hujson | 20 +++++++--- 30 files changed, 151 insertions(+), 56 deletions(-) create mode 100644 argocd/manifests/tailscale-operator/proxygroup-ingress.yaml create mode 100644 docs/changelog.d/restrict-flyio-proxy-acl.infra.md diff --git a/CLAUDE.md b/CLAUDE.md index 1353f2e..60c32b4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -69,7 +69,8 @@ mise run provision-indri -- --check --diff # dry run | Domain | Mechanism | Reachable from | |--------|-----------|----------------| -| `*.ops.eblu.me` | Caddy on indri (100.98.163.89) | everywhere incl. k8s pods | +| `*.eblu.me` | Fly.io proxy (Tailscale tunnel) | public internet | +| `*.ops.eblu.me` | Caddy on indri | k8s pods, containers, tailnet | | `*.tail8d86e.ts.net` | Tailscale MagicDNS | tailnet clients only | Check tailscale serve: `ssh indri 'tailscale serve status --json'` diff --git a/argocd/manifests/argocd/service-tailscale.yaml b/argocd/manifests/argocd/service-tailscale.yaml index 23ff8f1..85393af 100644 --- a/argocd/manifests/argocd/service-tailscale.yaml +++ b/argocd/manifests/argocd/service-tailscale.yaml @@ -11,6 +11,7 @@ metadata: namespace: argocd annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "ArgoCD" gethomepage.dev/group: "Infrastructure" diff --git a/argocd/manifests/devpi/ingress-tailscale.yaml b/argocd/manifests/devpi/ingress-tailscale.yaml index 08a13ed..474bf72 100644 --- a/argocd/manifests/devpi/ingress-tailscale.yaml +++ b/argocd/manifests/devpi/ingress-tailscale.yaml @@ -5,6 +5,7 @@ metadata: namespace: devpi annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "PyPI" gethomepage.dev/group: "Infrastructure" diff --git a/argocd/manifests/docs/ingress-tailscale.yaml b/argocd/manifests/docs/ingress-tailscale.yaml index b895cfb..2445e63 100644 --- a/argocd/manifests/docs/ingress-tailscale.yaml +++ b/argocd/manifests/docs/ingress-tailscale.yaml @@ -6,6 +6,8 @@ metadata: namespace: docs annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" + tailscale.com/tags: "tag:k8s,tag:flyio-target" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Docs" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/grafana-config/ingress-tailscale.yaml b/argocd/manifests/grafana-config/ingress-tailscale.yaml index 905991c..929c912 100644 --- a/argocd/manifests/grafana-config/ingress-tailscale.yaml +++ b/argocd/manifests/grafana-config/ingress-tailscale.yaml @@ -9,6 +9,7 @@ metadata: namespace: monitoring annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Grafana" gethomepage.dev/group: "Observability" diff --git a/argocd/manifests/immich/ingress-tailscale.yaml b/argocd/manifests/immich/ingress-tailscale.yaml index 4b3faba..2a9e86d 100644 --- a/argocd/manifests/immich/ingress-tailscale.yaml +++ b/argocd/manifests/immich/ingress-tailscale.yaml @@ -8,6 +8,7 @@ metadata: namespace: immich annotations: tailscale.com/funnel: "false" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Immich" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/kiwix/ingress-tailscale.yaml b/argocd/manifests/kiwix/ingress-tailscale.yaml index c3aeb4c..ec7132c 100644 --- a/argocd/manifests/kiwix/ingress-tailscale.yaml +++ b/argocd/manifests/kiwix/ingress-tailscale.yaml @@ -6,6 +6,7 @@ metadata: namespace: kiwix annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Kiwix" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/loki/ingress-tailscale.yaml b/argocd/manifests/loki/ingress-tailscale.yaml index ed18781..96622c5 100644 --- a/argocd/manifests/loki/ingress-tailscale.yaml +++ b/argocd/manifests/loki/ingress-tailscale.yaml @@ -7,12 +7,13 @@ metadata: namespace: monitoring annotations: tailscale.com/funnel: "false" + tailscale.com/proxy-group: "ingress" + tailscale.com/tags: "tag:k8s,tag:flyio-target" gethomepage.dev/enabled: "false" spec: ingressClassName: tailscale rules: - - host: loki - http: + - http: paths: - path: / pathType: Prefix diff --git a/argocd/manifests/miniflux/ingress-tailscale.yaml b/argocd/manifests/miniflux/ingress-tailscale.yaml index 96c9162..01d2951 100644 --- a/argocd/manifests/miniflux/ingress-tailscale.yaml +++ b/argocd/manifests/miniflux/ingress-tailscale.yaml @@ -5,6 +5,7 @@ metadata: namespace: miniflux annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Miniflux" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/navidrome/ingress-tailscale.yaml b/argocd/manifests/navidrome/ingress-tailscale.yaml index 21ddfef..cf8ec72 100644 --- a/argocd/manifests/navidrome/ingress-tailscale.yaml +++ b/argocd/manifests/navidrome/ingress-tailscale.yaml @@ -6,6 +6,7 @@ metadata: namespace: navidrome annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "DJ" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/prometheus/ingress-tailscale.yaml b/argocd/manifests/prometheus/ingress-tailscale.yaml index 45b88a7..f610627 100644 --- a/argocd/manifests/prometheus/ingress-tailscale.yaml +++ b/argocd/manifests/prometheus/ingress-tailscale.yaml @@ -7,6 +7,8 @@ metadata: namespace: monitoring annotations: tailscale.com/funnel: "false" + tailscale.com/proxy-group: "ingress" + tailscale.com/tags: "tag:k8s,tag:flyio-target" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Prometheus" gethomepage.dev/group: "Observability" @@ -17,8 +19,7 @@ metadata: spec: ingressClassName: tailscale rules: - - host: prometheus - http: + - http: paths: - path: / pathType: Prefix diff --git a/argocd/manifests/tailscale-operator/kustomization.yaml b/argocd/manifests/tailscale-operator/kustomization.yaml index 65f3786..dec1bbc 100644 --- a/argocd/manifests/tailscale-operator/kustomization.yaml +++ b/argocd/manifests/tailscale-operator/kustomization.yaml @@ -6,6 +6,7 @@ namespace: tailscale resources: - operator.yaml - proxyclass.yaml + - proxygroup-ingress.yaml - dnsconfig.yaml - egress-forge.yaml - external-secret.yaml diff --git a/argocd/manifests/tailscale-operator/proxygroup-ingress.yaml b/argocd/manifests/tailscale-operator/proxygroup-ingress.yaml new file mode 100644 index 0000000..93f36b0 --- /dev/null +++ b/argocd/manifests/tailscale-operator/proxygroup-ingress.yaml @@ -0,0 +1,10 @@ +apiVersion: tailscale.com/v1alpha1 +kind: ProxyGroup +metadata: + name: ingress +spec: + type: ingress + replicas: 2 + proxyClass: default + tags: + - tag:k8s diff --git a/argocd/manifests/teslamate/ingress-tailscale.yaml b/argocd/manifests/teslamate/ingress-tailscale.yaml index 2f10f2d..08f2ceb 100644 --- a/argocd/manifests/teslamate/ingress-tailscale.yaml +++ b/argocd/manifests/teslamate/ingress-tailscale.yaml @@ -5,6 +5,7 @@ metadata: namespace: teslamate annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "TeslaMate" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/torrent/ingress-tailscale.yaml b/argocd/manifests/torrent/ingress-tailscale.yaml index 175b0f6..ff801fd 100644 --- a/argocd/manifests/torrent/ingress-tailscale.yaml +++ b/argocd/manifests/torrent/ingress-tailscale.yaml @@ -6,6 +6,7 @@ metadata: namespace: torrent annotations: tailscale.com/proxy-class: "default" + tailscale.com/proxy-group: "ingress" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Transmission" gethomepage.dev/group: "Apps" diff --git a/docs/changelog.d/restrict-flyio-proxy-acl.infra.md b/docs/changelog.d/restrict-flyio-proxy-acl.infra.md new file mode 100644 index 0000000..f12f7f4 --- /dev/null +++ b/docs/changelog.d/restrict-flyio-proxy-acl.infra.md @@ -0,0 +1 @@ +Restrict fly.io proxy ACLs to dedicated `tag:flyio-target` endpoints instead of broad `tag:k8s` and `tag:homelab` grants. Migrate all Tailscale Ingresses to a shared ProxyGroup with per-Ingress tag overrides (`tag:flyio-target` on docs, loki, prometheus). Add `autoApprovers` for VIP service routes. Enable `--accept-routes` on indri for ProxyGroup VIP routing. diff --git a/docs/explanation/architecture.md b/docs/explanation/architecture.md index 095c588..d9870c0 100644 --- a/docs/explanation/architecture.md +++ b/docs/explanation/architecture.md @@ -42,15 +42,17 @@ Two always-on devices form the infrastructure backbone: - All devices on tailnet `tail8d86e.ts.net` - ACLs control access between devices and services - MagicDNS provides `*.tail8d86e.ts.net` hostnames -- No port forwarding or public IPs needed +- No port forwarding or public IPs on homelab devices +- Selected services exposed publicly via [[flyio-proxy]] (Fly.io → Tailscale tunnel) ## Service Routing -Two DNS domains route to services: +Three DNS domains route to services: | Domain | Mechanism | Reachable from | |--------|-----------|----------------| -| `*.ops.eblu.me` | Caddy reverse proxy on indri | Everywhere (k8s pods, containers, tailnet) | +| `*.eblu.me` | [[flyio-proxy]] (Fly.io → Tailscale tunnel) | Public internet | +| `*.ops.eblu.me` | Caddy reverse proxy on indri | k8s pods, containers, tailnet clients | | `*.tail8d86e.ts.net` | Tailscale MagicDNS | Tailnet clients only | See [[routing]] for details on when to use which. diff --git a/docs/explanation/security-model.md b/docs/explanation/security-model.md index de5c22a..b7aea88 100644 --- a/docs/explanation/security-model.md +++ b/docs/explanation/security-model.md @@ -17,18 +17,22 @@ The foundational security decision is using [[tailscale]] as the network layer. ### Zero Trust Networking -BlumeOps has no public IP addresses or port forwarding. All services are only accessible via Tailscale: +BlumeOps infrastructure has no public IP addresses or port forwarding. Most services are only accessible via Tailscale: -- **No attack surface** from the public internet - **Encrypted by default** - WireGuard encryption for all traffic - **Identity-based access** - ACLs based on user/device identity, not IP addresses +- **Minimal public surface** - only selected services are exposed via [[flyio-proxy]] + +### Public Access via Fly.io + +A small number of services are exposed to the internet through a reverse proxy on Fly.io that tunnels back to the homelab over Tailscale. The proxy uses restricted ACLs (`tag:flyio-target`) so it can only reach explicitly tagged endpoints — a compromised proxy cannot route to arbitrary services on the tailnet. See [[flyio-proxy]] for details and [[expose-service-publicly]] for the security considerations. ### Defense in Depth Even within the tailnet, access is restricted: ``` -Internet ──X──▶ Services (no public access) +Internet ──▶ Fly.io proxy ──▶ tag:flyio-target only (docs, observability) Tailnet: Admin ────────▶ All services diff --git a/docs/how-to/expose-service-publicly.md b/docs/how-to/expose-service-publicly.md index 1f31302..7fbd79b 100644 --- a/docs/how-to/expose-service-publicly.md +++ b/docs/how-to/expose-service-publicly.md @@ -272,16 +272,16 @@ pulumi.export("flyio_authkey", flyio_key.key) **Add to `pulumi/tailscale/policy.hujson`:** -Tag owner: +Tag owner (allows the k8s operator to assign this tag to Ingress proxy nodes): ``` -"tag:flyio-proxy": ["autogroup:admin", "tag:blumeops"], +"tag:flyio-target": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], ``` -Access grant (Fly.io proxy → k8s services on HTTPS only): +Access grant (Fly.io proxy → explicitly tagged endpoints on HTTPS only): ``` { "src": ["tag:flyio-proxy"], - "dst": ["tag:k8s"], + "dst": ["tag:flyio-target"], "ip": ["tcp:443"], }, ``` @@ -290,11 +290,13 @@ ACL test: ``` { "src": "tag:flyio-proxy", - "accept": ["tag:k8s:443"], - "deny": ["tag:homelab:22", "tag:nas:445", "tag:registry:443"], + "accept": ["tag:flyio-target:443"], + "deny": ["tag:k8s:443", "tag:homelab:443", "tag:homelab:22", "tag:nas:445", "tag:registry:443"], }, ``` +Each service's Tailscale Ingress must be annotated with `tag:flyio-target` to be reachable by the proxy — see [[#7. Update Tailscale ACLs if needed]]. + Deploy: `mise run tailnet-preview` then `mise run tailnet-up`. After deploying, extract the auth key and set it as a Fly.io secret: @@ -572,20 +574,18 @@ curl -I https://wiki.eblu.me # Should return 200 with X-Cache-Status header ``` -### 7. Update Tailscale ACLs if needed +### 7. Tag the Tailscale Ingress with `tag:flyio-target` -The one-time setup grants `tag:flyio-proxy` access to `tag:k8s` on port -443. If the new service needs a different grant, add it to -`policy.hujson`. Examples: +The fly.io proxy can only reach endpoints tagged with `tag:flyio-target`. Add the annotation to the service's Tailscale Ingress: -- **Another k8s service** (e.g., Kiwix): No ACL change needed — already - covered by `tag:k8s:443`. -- **Forgejo on indri**: Needs a new grant for `tag:homelab` on the - relevant ports (e.g., `tcp:3001` for HTTP, `tcp:2200` for SSH). Add - this as a separate, narrow grant — do not widen the existing one. -- **Non-Tailscale-ingress service**: If the backend uses `tailscale - serve` instead of the k8s Tailscale operator, the Tailscale node will - have its own tag. Grant `tag:flyio-proxy` access to that specific tag. +```yaml +annotations: + tailscale.com/tags: "tag:k8s,tag:flyio-target" +``` + +Include `tag:k8s` to preserve existing access rules for the Ingress proxy node. The `tag:flyio-target` tag opts this specific endpoint into being reachable by the fly.io proxy — no broad ACL changes needed. + +For non-k8s services (e.g., Forgejo on indri), create a k8s ExternalName Service pointing to the host, then a Tailscale Ingress with the same annotation. --- @@ -691,7 +691,7 @@ dynamic, authenticated service like [[forgejo]]. - [ ] Audit access controls and permissions - [ ] Configure the service to log the forwarded client IP (not the proxy IP) - [ ] Set up fail2ban on indri with a filter for the service's log format -- [ ] Add narrow Tailscale ACL grant for `tag:flyio-proxy` to the service +- [ ] Tag the service's Tailscale Ingress with `tag:flyio-target` - [ ] Test the nginx config locally or in staging before deploying - [ ] Rehearse the break-glass shutoff (`mise run fly-shutoff`) @@ -732,5 +732,5 @@ After deploying DNS (`mise run dns-up`): 1. `curl -I https://docs.eblu.me` — returns 200 with `X-Cache-Status` header 2. `dig docs.eblu.me` — resolves to Fly.io IPs (not Tailscale IP) -3. `dig forge.ops.eblu.me` — still resolves to `100.98.163.89` (unchanged) +3. `dig forge.ops.eblu.me` — still resolves to indri's Tailscale IP (unchanged) 4. Second request to same URL shows `X-Cache-Status: HIT` diff --git a/docs/how-to/gandi-operations.md b/docs/how-to/gandi-operations.md index 6df294e..bebdd52 100644 --- a/docs/how-to/gandi-operations.md +++ b/docs/how-to/gandi-operations.md @@ -74,10 +74,10 @@ A successful preview confirms the new PAT is working. ## Break-Glass Override -If MagicDNS is unavailable and Pulumi can't resolve indri's IP, set the target IP manually: +If MagicDNS is unavailable and Pulumi can't resolve indri's IP, set the target IP manually. Find indri's current Tailscale IP via `tailscale status` or the admin console: ```bash -export BLUMEOPS_REVERSE_PROXY_IP=100.98.163.89 +export BLUMEOPS_REVERSE_PROXY_IP= mise run dns-up ``` diff --git a/docs/reference/infrastructure/gandi.md b/docs/reference/infrastructure/gandi.md index 37643e7..58a54e9 100644 --- a/docs/reference/infrastructure/gandi.md +++ b/docs/reference/infrastructure/gandi.md @@ -21,18 +21,30 @@ DNS hosting provider for the `eblu.me` domain, managed via Pulumi IaC. ## What It Does -Gandi hosts the DNS records that make `*.ops.eblu.me` resolve to [[indri]]'s Tailscale IP (100.98.163.89). Since Tailscale IPs are not publicly routable, this gives services real DNS names while keeping them private to the tailnet. +Gandi hosts the DNS records that make `*.ops.eblu.me` resolve to [[indri]]'s Tailscale IP (`indri.tail8d86e.ts.net`). Since Tailscale IPs are not publicly routable, this gives services real DNS names while keeping them private to the tailnet. The target IP is resolved dynamically from `indri.tail8d86e.ts.net` at deploy time, so if indri's Tailscale IP changes, re-running the deployment is sufficient. ## DNS Records +### Private services (Caddy on indri) + | Record | Type | Value | TTL | |--------|------|-------|-----| | `*.ops.eblu.me` | A | indri's Tailscale IP | 300s | | `ops.eblu.me` | A | indri's Tailscale IP | 300s | -Both records point to [[indri]], which runs [[caddy]] as the reverse proxy for all services. See [[routing]] for the full service URL map. +Both records point to [[indri]], which runs [[caddy]] as the reverse proxy for all private services. + +### Public services (Fly.io proxy) + +| Record | Type | Value | TTL | +|--------|------|-------|-----| +| `docs.eblu.me` | CNAME | `blumeops-proxy.fly.dev` | 300s | + +Public CNAMEs point to [[flyio-proxy]] on Fly.io. See [[expose-service-publicly]] for adding new public services. + +See [[routing]] for the full service URL map. ## Pulumi Configuration diff --git a/docs/reference/infrastructure/indri.md b/docs/reference/infrastructure/indri.md index cf8c60f..7f0b91d 100644 --- a/docs/reference/infrastructure/indri.md +++ b/docs/reference/infrastructure/indri.md @@ -16,7 +16,7 @@ Primary BlumeOps server. Mac Mini M1 (2020). | **Model** | Mac mini M1, 2020 (Macmini9,1) | | **Storage** | 2TB internal SSD | | **macOS** | 15.7.3 (Sequoia) | -| **Tailscale IP** | 100.98.163.89 | +| **Tailscale hostname** | `indri.tail8d86e.ts.net` | | **Tailscale Tag** | `tag:homelab` | | **UPS** | Anker SOLIX F2000 GaNPrime | diff --git a/docs/reference/infrastructure/routing.md b/docs/reference/infrastructure/routing.md index 12cde31..cf8e115 100644 --- a/docs/reference/infrastructure/routing.md +++ b/docs/reference/infrastructure/routing.md @@ -7,20 +7,21 @@ tags: # Service Routing -Services are accessible via two DNS domains with different reachability. +Services are accessible via three DNS domains with different reachability. ## DNS Domains | Domain | Proxy | Reachable From | |--------|-------|----------------| +| `*.eblu.me` | [[flyio-proxy]] (Fly.io → Tailscale tunnel) | Public internet | | `*.ops.eblu.me` | Caddy on indri | k8s pods, docker containers, tailnet clients | | `*.tail8d86e.ts.net` | Tailscale MagicDNS | Tailnet clients only | -**Use `*.ops.eblu.me`** for services that need pod-to-service communication. +**Use `*.ops.eblu.me`** for services that need pod-to-service communication. Use `*.eblu.me` for services exposed publicly via Fly.io. ## Caddy Services (`*.ops.eblu.me`) -DNS points to indri's Tailscale IP (100.98.163.89). TLS via Let's Encrypt (ACME DNS-01 with Gandi). +DNS points to [[indri]]'s Tailscale IP. TLS via Let's Encrypt (ACME DNS-01 with Gandi). | Service | URL | Description | |---------|-----|-------------| @@ -40,6 +41,14 @@ DNS points to indri's Tailscale IP (100.98.163.89). TLS via Let's Encrypt (ACME | [[postgresql]] | pg.ops.eblu.me:5432 | Database | | [[sifaka|Sifaka]] | https://nas.ops.eblu.me | NAS dashboard | +## Public Services (`*.eblu.me`) + +DNS CNAMEs point to `blumeops-proxy.fly.dev`. TLS via Fly.io-managed Let's Encrypt. Traffic tunnels back to the homelab over Tailscale. Only services tagged `tag:flyio-target` are reachable by the proxy — see [[flyio-proxy]] for details. + +| Service | URL | Description | +|---------|-----|-------------| +| [[docs]] | https://docs.eblu.me | Documentation site | + ## Tailscale-Only Services | Service | URL | Description | @@ -64,3 +73,5 @@ DNS points to indri's Tailscale IP (100.98.163.89). TLS via Let's Encrypt (ACME - [[gandi]] - DNS hosting for `eblu.me` - [[tailscale]] - ACL configuration - [[indri]] - Where services run +- [[flyio-proxy]] - Public reverse proxy for `*.eblu.me` +- [[expose-service-publicly]] - How to add a new public service diff --git a/docs/reference/infrastructure/tailscale.md b/docs/reference/infrastructure/tailscale.md index 20d06bc..e9aa582 100644 --- a/docs/reference/infrastructure/tailscale.md +++ b/docs/reference/infrastructure/tailscale.md @@ -28,6 +28,9 @@ ACLs managed via Pulumi in `pulumi/policy.hujson`. | `tag:blumeops` | indri, sifaka | Pulumi IaC managed resources | | `tag:registry` | indri | Container registry access | | `tag:k8s-api` | indri | Kubernetes API server access | +| `tag:k8s-operator` | (operator pod) | Tailscale operator for k8s | +| `tag:k8s` | (Ingress proxy pods) | Kubernetes Tailscale Ingress nodes | +| `tag:flyio-target` | (k8s Ingress nodes) | Endpoints reachable by fly.io proxy | **Important:** Don't tag user-owned devices (like gilbert). Tagging converts them to "tagged devices" which lose user identity and break user-based SSH rules. diff --git a/docs/reference/kubernetes/tailscale-operator.md b/docs/reference/kubernetes/tailscale-operator.md index ed41ea8..aa7b1a8 100644 --- a/docs/reference/kubernetes/tailscale-operator.md +++ b/docs/reference/kubernetes/tailscale-operator.md @@ -19,11 +19,16 @@ The Tailscale operator enables Kubernetes services to be exposed directly on the ## How It Works -When you create an Ingress with `ingressClassName: tailscale`: +Ingresses use a shared ProxyGroup (`ingress`) rather than per-service Tailscale nodes. When you create an Ingress with `ingressClassName: tailscale`: -1. Operator provisions a Tailscale node for the service -2. Service becomes accessible at `.tail8d86e.ts.net` -3. TLS is handled automatically via Tailscale +1. Operator configures the shared ProxyGroup pods to serve the new Ingress +2. Service gets a VIP (Virtual IP) address on the tailnet +3. Service becomes accessible at `.tail8d86e.ts.net` +4. TLS is handled automatically via Tailscale + +Tailnet clients must have `--accept-routes` enabled to route to VIP addresses. + +Services can be individually tagged (e.g., `tag:flyio-target`) via Ingress annotations to control which ACL grants apply. See [[expose-service-publicly]] for the tagging workflow. ## Limitations diff --git a/docs/reference/services/caddy.md b/docs/reference/services/caddy.md index 0ef0b31..ed3f943 100644 --- a/docs/reference/services/caddy.md +++ b/docs/reference/services/caddy.md @@ -81,7 +81,7 @@ The token is written to `~/.config/caddy/gandi-token` (chmod 0600) and sourced b ## Security Considerations -Caddy has no authentication layer — it is a plain reverse proxy. Access control relies entirely on Tailscale ACLs restricting which devices can reach indri on port 443. Currently `tag:homelab`, `autogroup:admin`, and `tag:flyio-proxy` can reach Caddy. The [[flyio-proxy]] grant exists so Alloy can push metrics/logs to Loki and Prometheus, but it means the Fly.io container can technically reach all Caddy-proxied services. See [[flyio-proxy#Security Considerations]] for the threat model. +Caddy has no authentication layer — it is a plain reverse proxy. Access control relies entirely on Tailscale ACLs restricting which devices can reach indri on port 443. Currently `tag:homelab` and `autogroup:admin` can reach Caddy. The [[flyio-proxy]] no longer routes through Caddy — it pushes logs and metrics directly to [[loki]] and [[prometheus]] via their Tailscale Ingress endpoints. ## Custom Build diff --git a/docs/reference/services/flyio-proxy.md b/docs/reference/services/flyio-proxy.md index e33a65f..f03c667 100644 --- a/docs/reference/services/flyio-proxy.md +++ b/docs/reference/services/flyio-proxy.md @@ -71,11 +71,11 @@ Alloy listens on `127.0.0.1:12345` for self-scraping its `/metrics` endpoint. Al ## Security Considerations -The `tag:flyio-proxy` ACL grants access to both `tag:k8s:443` (for proxying public services) and `tag:homelab:443` (for pushing metrics/logs to [[caddy|Caddy]]-proxied Loki and Prometheus). This means a compromised nginx config could route traffic to **any** Caddy-proxied service — not just the intended backends. Some of those services (Loki, Prometheus) have no auth; others ([[forgejo]], [[navidrome]], [[immich]]) do. +The `tag:flyio-proxy` ACL grants access only to `tag:flyio-target:443`. Services must explicitly opt in by adding a `tailscale.com/tags: "tag:k8s,tag:flyio-target"` annotation to their Tailscale Ingress. This means the proxy can only reach endpoints that have been individually tagged — a compromised nginx config cannot route to arbitrary services on the tailnet. -Exploitation requires either pushing a malicious image to Fly.io or modifying the nginx config — both of which require RCE on [[gilbert]] (where `fly` is authenticated) or access to [[1password]] (the deploy token). This is an acceptable boundary given that 1Password is already the trust root for the entire infrastructure. +Currently tagged as `tag:flyio-target`: [[docs]], [[loki]], [[prometheus]]. Loki and Prometheus are tagged so that [[alloy|Alloy]] (running inside the container) can push logs and metrics directly via their Tailscale Ingress endpoints — the restricted ACL means Caddy on indri (`tag:homelab`) is not reachable from the proxy. -If this surface area becomes a concern, an alternative would be to add dedicated Tailscale Ingress tags for Loki/Prometheus write endpoints and restrict `tag:flyio-proxy` to only those. +To expose an additional service through the proxy, add the `tag:flyio-target` annotation to its Tailscale Ingress. See [[expose-service-publicly]] for the full workflow. ## Secrets diff --git a/docs/reference/services/forgejo.md b/docs/reference/services/forgejo.md index 4fa7a25..1bed75e 100644 --- a/docs/reference/services/forgejo.md +++ b/docs/reference/services/forgejo.md @@ -71,6 +71,24 @@ The Ansible role authenticates to the Forgejo API using a Personal Access Token This is a bootstrapping requirement - the PAT enables IaC for all other secrets. +## Future: Public Access + +Forgejo can be exposed publicly at `forge.eblu.me` via [[flyio-proxy]]. Since Forgejo runs natively on [[indri]] (not in k8s), the pattern is: + +1. Create a k8s ExternalName Service pointing to indri's Tailscale IP +2. Create a Tailscale Ingress with `tailscale.com/tags: "tag:k8s,tag:flyio-target"` +3. Add the nginx server block and DNS CNAME + +Exposing a dynamic, authenticated service like Forgejo requires a full security review before going live: + +- Disable open user registration (require invites or admin approval) +- Configure fail2ban on indri with a filter for Forgejo's log format +- Ensure Forgejo logs the forwarded client IP (`X-Real-IP`) rather than the proxy's Tailscale IP +- Audit repository visibility defaults and permissions +- Rehearse the break-glass shutoff (`mise run fly-shutoff`) + +See [[expose-service-publicly]] for the full howto and dynamic service checklist. + ## Related - [[argocd]] - Uses Forgejo as git source diff --git a/fly/alloy.river b/fly/alloy.river index d2dedb7..213d8c5 100644 --- a/fly/alloy.river +++ b/fly/alloy.river @@ -94,10 +94,12 @@ loki.relabel "instance" { } } -// Write logs to Loki via Caddy (valid TLS, no skip_verify needed) +// Write logs to Loki via Tailscale Ingress (direct, bypasses Caddy) +// Uses direct Tailscale endpoint because flyio-proxy ACLs only allow +// tag:flyio-target — Caddy on indri (tag:homelab) is not reachable. loki.write "loki" { endpoint { - url = "https://loki.ops.eblu.me/loki/api/v1/push" + url = "https://loki.tail8d86e.ts.net/loki/api/v1/push" } } @@ -134,9 +136,11 @@ prometheus.relabel "instance" { } } -// Push metrics to Prometheus via Caddy (valid TLS, no skip_verify needed) +// Push metrics to Prometheus via Tailscale Ingress (direct, bypasses Caddy) +// Uses direct Tailscale endpoint because flyio-proxy ACLs only allow +// tag:flyio-target — Caddy on indri (tag:homelab) is not reachable. prometheus.remote_write "prometheus" { endpoint { - url = "https://prometheus.ops.eblu.me/api/v1/write" + url = "https://prometheus.tail8d86e.ts.net/api/v1/write" } } diff --git a/pulumi/tailscale/policy.hujson b/pulumi/tailscale/policy.hujson index 2a23872..0703353 100644 --- a/pulumi/tailscale/policy.hujson +++ b/pulumi/tailscale/policy.hujson @@ -61,10 +61,10 @@ }, // --- Fly.io proxy --- - // Public reverse proxy can reach k8s services and Caddy on HTTPS + // Public reverse proxy can only reach explicitly tagged endpoints { "src": ["tag:flyio-proxy"], - "dst": ["tag:k8s", "tag:homelab"], + "dst": ["tag:flyio-target"], "ip": ["tcp:443"], }, @@ -126,6 +126,15 @@ }, ], + // ============== Auto Approvers ============== + // Allow ProxyGroup pods (tag:k8s) to auto-approve VIP Services + // Required for multi-cluster Ingress per Tailscale docs + "autoApprovers": { + "services": { + "tag:k8s": ["tag:k8s"], + }, + }, + // ============== Tag Owners ============== "tagOwners": { "tag:blumeops": ["autogroup:admin", "tag:blumeops"], @@ -145,6 +154,7 @@ "tag:k8s": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], "tag:ci-gateway": ["autogroup:admin", "tag:blumeops"], "tag:flyio-proxy": ["autogroup:admin", "tag:blumeops"], + "tag:flyio-target": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], }, // ============== ACL Tests ============== @@ -175,11 +185,11 @@ "src": "tag:ci-gateway", "accept": ["tag:registry:443"], }, - // Fly.io proxy can reach k8s and Caddy on indri (HTTPS only), nothing else + // Fly.io proxy can only reach flyio-target tagged endpoints, nothing else { "src": "tag:flyio-proxy", - "accept": ["tag:k8s:443", "tag:homelab:443"], - "deny": ["tag:homelab:22", "tag:nas:445", "tag:registry:443"], + "accept": ["tag:flyio-target:443"], + "deny": ["tag:k8s:443", "tag:homelab:443", "tag:homelab:22", "tag:nas:445", "tag:registry:443"], }, ], }