diff --git a/argocd/manifests/docs/ingress-tailscale.yaml b/argocd/manifests/docs/ingress-tailscale.yaml index b895cfb..b76b6af 100644 --- a/argocd/manifests/docs/ingress-tailscale.yaml +++ b/argocd/manifests/docs/ingress-tailscale.yaml @@ -6,6 +6,7 @@ metadata: namespace: docs annotations: tailscale.com/proxy-class: "default" + tailscale.com/tags: "tag:k8s,tag:flyio-target" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Docs" gethomepage.dev/group: "Apps" diff --git a/argocd/manifests/loki/ingress-tailscale.yaml b/argocd/manifests/loki/ingress-tailscale.yaml index ed18781..c25d919 100644 --- a/argocd/manifests/loki/ingress-tailscale.yaml +++ b/argocd/manifests/loki/ingress-tailscale.yaml @@ -7,6 +7,7 @@ metadata: namespace: monitoring annotations: tailscale.com/funnel: "false" + tailscale.com/tags: "tag:k8s,tag:flyio-target" gethomepage.dev/enabled: "false" spec: ingressClassName: tailscale diff --git a/argocd/manifests/prometheus/ingress-tailscale.yaml b/argocd/manifests/prometheus/ingress-tailscale.yaml index 45b88a7..00aa05c 100644 --- a/argocd/manifests/prometheus/ingress-tailscale.yaml +++ b/argocd/manifests/prometheus/ingress-tailscale.yaml @@ -7,6 +7,7 @@ metadata: namespace: monitoring annotations: tailscale.com/funnel: "false" + tailscale.com/tags: "tag:k8s,tag:flyio-target" gethomepage.dev/enabled: "true" gethomepage.dev/name: "Prometheus" gethomepage.dev/group: "Observability" diff --git a/docs/changelog.d/restrict-flyio-proxy-acl.infra.md b/docs/changelog.d/restrict-flyio-proxy-acl.infra.md new file mode 100644 index 0000000..94115b9 --- /dev/null +++ b/docs/changelog.d/restrict-flyio-proxy-acl.infra.md @@ -0,0 +1 @@ +Restrict fly.io proxy ACLs to dedicated `tag:flyio-target` endpoints instead of broad `tag:k8s` and `tag:homelab` grants. Alloy now pushes logs/metrics directly to Loki and Prometheus via Tailscale Ingress, bypassing Caddy. diff --git a/docs/how-to/expose-service-publicly.md b/docs/how-to/expose-service-publicly.md index 1f31302..970dd3c 100644 --- a/docs/how-to/expose-service-publicly.md +++ b/docs/how-to/expose-service-publicly.md @@ -272,16 +272,16 @@ pulumi.export("flyio_authkey", flyio_key.key) **Add to `pulumi/tailscale/policy.hujson`:** -Tag owner: +Tag owner (allows the k8s operator to assign this tag to Ingress proxy nodes): ``` -"tag:flyio-proxy": ["autogroup:admin", "tag:blumeops"], +"tag:flyio-target": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], ``` -Access grant (Fly.io proxy → k8s services on HTTPS only): +Access grant (Fly.io proxy → explicitly tagged endpoints on HTTPS only): ``` { "src": ["tag:flyio-proxy"], - "dst": ["tag:k8s"], + "dst": ["tag:flyio-target"], "ip": ["tcp:443"], }, ``` @@ -290,11 +290,13 @@ ACL test: ``` { "src": "tag:flyio-proxy", - "accept": ["tag:k8s:443"], - "deny": ["tag:homelab:22", "tag:nas:445", "tag:registry:443"], + "accept": ["tag:flyio-target:443"], + "deny": ["tag:k8s:443", "tag:homelab:443", "tag:homelab:22", "tag:nas:445", "tag:registry:443"], }, ``` +Each service's Tailscale Ingress must be annotated with `tag:flyio-target` to be reachable by the proxy — see [[#7. Update Tailscale ACLs if needed]]. + Deploy: `mise run tailnet-preview` then `mise run tailnet-up`. After deploying, extract the auth key and set it as a Fly.io secret: @@ -572,20 +574,18 @@ curl -I https://wiki.eblu.me # Should return 200 with X-Cache-Status header ``` -### 7. Update Tailscale ACLs if needed +### 7. Tag the Tailscale Ingress with `tag:flyio-target` -The one-time setup grants `tag:flyio-proxy` access to `tag:k8s` on port -443. If the new service needs a different grant, add it to -`policy.hujson`. Examples: +The fly.io proxy can only reach endpoints tagged with `tag:flyio-target`. Add the annotation to the service's Tailscale Ingress: -- **Another k8s service** (e.g., Kiwix): No ACL change needed — already - covered by `tag:k8s:443`. -- **Forgejo on indri**: Needs a new grant for `tag:homelab` on the - relevant ports (e.g., `tcp:3001` for HTTP, `tcp:2200` for SSH). Add - this as a separate, narrow grant — do not widen the existing one. -- **Non-Tailscale-ingress service**: If the backend uses `tailscale - serve` instead of the k8s Tailscale operator, the Tailscale node will - have its own tag. Grant `tag:flyio-proxy` access to that specific tag. +```yaml +annotations: + tailscale.com/tags: "tag:k8s,tag:flyio-target" +``` + +Include `tag:k8s` to preserve existing access rules for the Ingress proxy node. The `tag:flyio-target` tag opts this specific endpoint into being reachable by the fly.io proxy — no broad ACL changes needed. + +For non-k8s services (e.g., Forgejo on indri), create a k8s ExternalName Service pointing to the host, then a Tailscale Ingress with the same annotation. --- @@ -691,7 +691,7 @@ dynamic, authenticated service like [[forgejo]]. - [ ] Audit access controls and permissions - [ ] Configure the service to log the forwarded client IP (not the proxy IP) - [ ] Set up fail2ban on indri with a filter for the service's log format -- [ ] Add narrow Tailscale ACL grant for `tag:flyio-proxy` to the service +- [ ] Tag the service's Tailscale Ingress with `tag:flyio-target` - [ ] Test the nginx config locally or in staging before deploying - [ ] Rehearse the break-glass shutoff (`mise run fly-shutoff`) diff --git a/docs/reference/infrastructure/tailscale.md b/docs/reference/infrastructure/tailscale.md index 20d06bc..e9aa582 100644 --- a/docs/reference/infrastructure/tailscale.md +++ b/docs/reference/infrastructure/tailscale.md @@ -28,6 +28,9 @@ ACLs managed via Pulumi in `pulumi/policy.hujson`. | `tag:blumeops` | indri, sifaka | Pulumi IaC managed resources | | `tag:registry` | indri | Container registry access | | `tag:k8s-api` | indri | Kubernetes API server access | +| `tag:k8s-operator` | (operator pod) | Tailscale operator for k8s | +| `tag:k8s` | (Ingress proxy pods) | Kubernetes Tailscale Ingress nodes | +| `tag:flyio-target` | (k8s Ingress nodes) | Endpoints reachable by fly.io proxy | **Important:** Don't tag user-owned devices (like gilbert). Tagging converts them to "tagged devices" which lose user identity and break user-based SSH rules. diff --git a/docs/reference/services/caddy.md b/docs/reference/services/caddy.md index 0ef0b31..ed3f943 100644 --- a/docs/reference/services/caddy.md +++ b/docs/reference/services/caddy.md @@ -81,7 +81,7 @@ The token is written to `~/.config/caddy/gandi-token` (chmod 0600) and sourced b ## Security Considerations -Caddy has no authentication layer — it is a plain reverse proxy. Access control relies entirely on Tailscale ACLs restricting which devices can reach indri on port 443. Currently `tag:homelab`, `autogroup:admin`, and `tag:flyio-proxy` can reach Caddy. The [[flyio-proxy]] grant exists so Alloy can push metrics/logs to Loki and Prometheus, but it means the Fly.io container can technically reach all Caddy-proxied services. See [[flyio-proxy#Security Considerations]] for the threat model. +Caddy has no authentication layer — it is a plain reverse proxy. Access control relies entirely on Tailscale ACLs restricting which devices can reach indri on port 443. Currently `tag:homelab` and `autogroup:admin` can reach Caddy. The [[flyio-proxy]] no longer routes through Caddy — it pushes logs and metrics directly to [[loki]] and [[prometheus]] via their Tailscale Ingress endpoints. ## Custom Build diff --git a/docs/reference/services/flyio-proxy.md b/docs/reference/services/flyio-proxy.md index e33a65f..244a6a1 100644 --- a/docs/reference/services/flyio-proxy.md +++ b/docs/reference/services/flyio-proxy.md @@ -71,11 +71,11 @@ Alloy listens on `127.0.0.1:12345` for self-scraping its `/metrics` endpoint. Al ## Security Considerations -The `tag:flyio-proxy` ACL grants access to both `tag:k8s:443` (for proxying public services) and `tag:homelab:443` (for pushing metrics/logs to [[caddy|Caddy]]-proxied Loki and Prometheus). This means a compromised nginx config could route traffic to **any** Caddy-proxied service — not just the intended backends. Some of those services (Loki, Prometheus) have no auth; others ([[forgejo]], [[navidrome]], [[immich]]) do. +The `tag:flyio-proxy` ACL grants access only to `tag:flyio-target:443`. Services must explicitly opt in by adding a `tailscale.com/tags: "tag:k8s,tag:flyio-target"` annotation to their Tailscale Ingress. This means the proxy can only reach endpoints that have been individually tagged — a compromised nginx config cannot route to arbitrary services on the tailnet. -Exploitation requires either pushing a malicious image to Fly.io or modifying the nginx config — both of which require RCE on [[gilbert]] (where `fly` is authenticated) or access to [[1password]] (the deploy token). This is an acceptable boundary given that 1Password is already the trust root for the entire infrastructure. +Currently tagged as `tag:flyio-target`: [[docs]], [[loki]], [[prometheus]]. Loki and Prometheus are reachable so that [[alloy|Alloy]] (running inside the container) can push logs and metrics directly via their Tailscale Ingress endpoints, bypassing [[caddy]] entirely. -If this surface area becomes a concern, an alternative would be to add dedicated Tailscale Ingress tags for Loki/Prometheus write endpoints and restrict `tag:flyio-proxy` to only those. +To expose an additional service through the proxy, add the `tag:flyio-target` annotation to its Tailscale Ingress. See [[expose-service-publicly]] for the full workflow. ## Secrets diff --git a/docs/reference/services/forgejo.md b/docs/reference/services/forgejo.md index 4fa7a25..1bed75e 100644 --- a/docs/reference/services/forgejo.md +++ b/docs/reference/services/forgejo.md @@ -71,6 +71,24 @@ The Ansible role authenticates to the Forgejo API using a Personal Access Token This is a bootstrapping requirement - the PAT enables IaC for all other secrets. +## Future: Public Access + +Forgejo can be exposed publicly at `forge.eblu.me` via [[flyio-proxy]]. Since Forgejo runs natively on [[indri]] (not in k8s), the pattern is: + +1. Create a k8s ExternalName Service pointing to indri's Tailscale IP +2. Create a Tailscale Ingress with `tailscale.com/tags: "tag:k8s,tag:flyio-target"` +3. Add the nginx server block and DNS CNAME + +Exposing a dynamic, authenticated service like Forgejo requires a full security review before going live: + +- Disable open user registration (require invites or admin approval) +- Configure fail2ban on indri with a filter for Forgejo's log format +- Ensure Forgejo logs the forwarded client IP (`X-Real-IP`) rather than the proxy's Tailscale IP +- Audit repository visibility defaults and permissions +- Rehearse the break-glass shutoff (`mise run fly-shutoff`) + +See [[expose-service-publicly]] for the full howto and dynamic service checklist. + ## Related - [[argocd]] - Uses Forgejo as git source diff --git a/fly/alloy.river b/fly/alloy.river index d2dedb7..4b63efe 100644 --- a/fly/alloy.river +++ b/fly/alloy.river @@ -94,10 +94,10 @@ loki.relabel "instance" { } } -// Write logs to Loki via Caddy (valid TLS, no skip_verify needed) +// Write logs to Loki via Tailscale Ingress (direct, bypasses Caddy) loki.write "loki" { endpoint { - url = "https://loki.ops.eblu.me/loki/api/v1/push" + url = "https://loki.tail8d86e.ts.net/loki/api/v1/push" } } @@ -134,9 +134,9 @@ prometheus.relabel "instance" { } } -// Push metrics to Prometheus via Caddy (valid TLS, no skip_verify needed) +// Push metrics to Prometheus via Tailscale Ingress (direct, bypasses Caddy) prometheus.remote_write "prometheus" { endpoint { - url = "https://prometheus.ops.eblu.me/api/v1/write" + url = "https://prometheus.tail8d86e.ts.net/api/v1/write" } } diff --git a/pulumi/tailscale/policy.hujson b/pulumi/tailscale/policy.hujson index 2a23872..471bd92 100644 --- a/pulumi/tailscale/policy.hujson +++ b/pulumi/tailscale/policy.hujson @@ -61,10 +61,10 @@ }, // --- Fly.io proxy --- - // Public reverse proxy can reach k8s services and Caddy on HTTPS + // Public reverse proxy can only reach explicitly tagged endpoints { "src": ["tag:flyio-proxy"], - "dst": ["tag:k8s", "tag:homelab"], + "dst": ["tag:flyio-target"], "ip": ["tcp:443"], }, @@ -145,6 +145,7 @@ "tag:k8s": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], "tag:ci-gateway": ["autogroup:admin", "tag:blumeops"], "tag:flyio-proxy": ["autogroup:admin", "tag:blumeops"], + "tag:flyio-target": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], }, // ============== ACL Tests ============== @@ -175,11 +176,11 @@ "src": "tag:ci-gateway", "accept": ["tag:registry:443"], }, - // Fly.io proxy can reach k8s and Caddy on indri (HTTPS only), nothing else + // Fly.io proxy can only reach flyio-target tagged endpoints, nothing else { "src": "tag:flyio-proxy", - "accept": ["tag:k8s:443", "tag:homelab:443"], - "deny": ["tag:homelab:22", "tag:nas:445", "tag:registry:443"], + "accept": ["tag:flyio-target:443"], + "deny": ["tag:k8s:443", "tag:homelab:443", "tag:homelab:22", "tag:nas:445", "tag:registry:443"], }, ], }