diff --git a/.forgejo/workflows/deploy-fly.yaml b/.forgejo/workflows/deploy-fly.yaml new file mode 100644 index 0000000..a38e845 --- /dev/null +++ b/.forgejo/workflows/deploy-fly.yaml @@ -0,0 +1,37 @@ +name: Deploy Fly.io Proxy + +on: + workflow_dispatch: + push: + branches: [main] + paths: + - 'fly/**' + +jobs: + deploy: + runs-on: k8s + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install flyctl + run: | + curl -L https://fly.io/install.sh | sh + echo "/root/.fly/bin" >> "$GITHUB_PATH" + + - name: Deploy to Fly.io + env: + FLY_API_TOKEN: ${{ secrets.FLY_DEPLOY_TOKEN }} + run: | + cd fly + fly deploy + + - name: Verify health + env: + FLY_API_TOKEN: ${{ secrets.FLY_DEPLOY_TOKEN }} + run: | + fly status -a blumeops-proxy + echo "" + echo "Health check:" + sleep 10 + curl -sf https://blumeops-proxy.fly.dev/healthz || echo "Warning: health check failed (may need DNS propagation)" diff --git a/Brewfile b/Brewfile index a87fe8c..e5207a2 100644 --- a/Brewfile +++ b/Brewfile @@ -4,4 +4,5 @@ brew "argocd" # ArgoCD CLI for GitOps management brew "bat" # Syntax-highlighted file concatenation brew "mise" # Task runner and toolchain manager brew "tea" # Gitea/Forgejo CLI for forge.ops.eblu.me +brew "flyctl" # Fly.io CLI for public proxy management brew "podman" # Container CLI (uses VM on macOS, for building/pushing images) diff --git a/docs/changelog.d/feature-flyio-proxy.feature.md b/docs/changelog.d/feature-flyio-proxy.feature.md new file mode 100644 index 0000000..78bb168 --- /dev/null +++ b/docs/changelog.d/feature-flyio-proxy.feature.md @@ -0,0 +1 @@ +Add Fly.io public reverse proxy infrastructure for exposing services to the internet (first target: docs.eblu.me) diff --git a/docs/how-to/expose-service-publicly.md b/docs/how-to/expose-service-publicly.md index cb318cf..824fc57 100644 --- a/docs/how-to/expose-service-publicly.md +++ b/docs/how-to/expose-service-publicly.md @@ -11,7 +11,7 @@ id: expose-service-publicly # Expose a Service Publicly via Fly.io + Tailscale -> **Status:** Plan — not yet implemented. First target: `docs.eblu.me`. +> **Status:** In progress — first target: `docs.eblu.me`. This guide describes how to expose a BlumeOps service to the public internet using a reverse proxy container on [Fly.io](https://fly.io) that tunnels back @@ -497,9 +497,41 @@ Key differences for dynamic services: - **WebSocket support** — many modern web apps use WebSockets - **Larger body size** — git pushes and file uploads need more than the default 1MB -### 2. Add DNS CNAME (Pulumi) +### 2. Add Fly.io certificate -Add to `pulumi/gandi/__main__.py`: +```bash +fly certs add wiki.eblu.me -a blumeops-proxy +``` + +Or add it to `mise-tasks/fly-setup` so it's captured for future runs. + +### 3. Deploy + +```bash +mise run fly-deploy +``` + +Or push the `fly/nginx.conf` change to main — the Forgejo workflow deploys automatically. + +### 4. Verify against fly.dev + +Test the proxy before touching DNS. Use the `Host` header to simulate +the real domain: + +```bash +# Health check +curl -sf https://blumeops-proxy.fly.dev/healthz + +# Simulate real domain request +curl -I -H "Host: wiki.eblu.me" https://blumeops-proxy.fly.dev/ +# Should return 200 with X-Cache-Status header +``` + +If this fails, debug without any public DNS impact. + +### 5. Add DNS CNAME (Pulumi) + +Only after verifying the proxy works. Add to `pulumi/gandi/__main__.py`: ```python wiki_public = gandi.livedns.Record( @@ -514,30 +546,14 @@ wiki_public = gandi.livedns.Record( Deploy: `mise run dns-preview` then `mise run dns-up`. -### 3. Add Fly.io certificate - -```bash -fly certs add wiki.eblu.me -a blumeops-proxy -``` - -Or add it to `mise-tasks/fly-setup` so it's captured for future runs. - -### 4. Deploy - -```bash -mise run fly-deploy -``` - -Or push the `fly/nginx.conf` change to main — the Forgejo workflow deploys automatically. - -### 5. Verify +### 6. Verify with real domain ```bash curl -I https://wiki.eblu.me # Should return 200 with X-Cache-Status header ``` -### 6. Update Tailscale ACLs if needed +### 7. Update Tailscale ACLs if needed The one-time setup grants `tag:flyio-proxy` access to `tag:k8s` on port 443. If the new service needs a different grant, add it to @@ -688,12 +704,23 @@ The "semi" for Fly.io secrets is a one-time operation backed by a repeatable mis ## Verification -After initial deployment of a service (using `docs.eblu.me` as example): +### Pre-DNS (verify against fly.dev) + +Test the proxy works before creating any public DNS records: + +1. `curl -sf https://blumeops-proxy.fly.dev/healthz` — returns `ok` +2. `curl -I -H "Host: docs.eblu.me" https://blumeops-proxy.fly.dev/` — returns 200 with `X-Cache-Status` header +3. `fly status -a blumeops-proxy` — shows healthy machine +4. All `*.ops.eblu.me` services still work from tailnet (unchanged) +5. `mise run services-check` passes + +If anything fails here, debug without public DNS impact. + +### Post-DNS (after CNAME is live) + +After deploying DNS (`mise run dns-up`): 1. `curl -I https://docs.eblu.me` — returns 200 with `X-Cache-Status` header 2. `dig docs.eblu.me` — resolves to Fly.io IPs (not Tailscale IP) 3. `dig forge.ops.eblu.me` — still resolves to `100.98.163.89` (unchanged) -4. All `*.ops.eblu.me` services work from tailnet -5. `mise run services-check` passes -6. `fly status -a blumeops-proxy` shows healthy machine -7. Second request to same URL shows `X-Cache-Status: HIT` +4. Second request to same URL shows `X-Cache-Status: HIT` diff --git a/fly/Dockerfile b/fly/Dockerfile new file mode 100644 index 0000000..7d71d85 --- /dev/null +++ b/fly/Dockerfile @@ -0,0 +1,17 @@ +FROM nginx:alpine + +# Copy tailscale binaries from official image +COPY --from=docker.io/tailscale/tailscale:stable \ + /usr/local/bin/tailscaled /usr/local/bin/tailscaled +COPY --from=docker.io/tailscale/tailscale:stable \ + /usr/local/bin/tailscale /usr/local/bin/tailscale + +RUN mkdir -p /var/run/tailscale /var/lib/tailscale + +COPY nginx.conf /etc/nginx/nginx.conf +COPY start.sh /start.sh +RUN chmod +x /start.sh + +EXPOSE 8080 + +CMD ["/start.sh"] diff --git a/fly/fly.toml b/fly/fly.toml new file mode 100644 index 0000000..676b215 --- /dev/null +++ b/fly/fly.toml @@ -0,0 +1,19 @@ +app = "blumeops-proxy" +primary_region = "sea" + +[build] + +[http_service] +internal_port = 8080 +force_https = true +auto_stop_machines = "off" +auto_start_machines = true +min_machines_running = 1 + +[checks] +[checks.health] +port = 8080 +type = "http" +interval = "30s" +timeout = "5s" +path = "/healthz" diff --git a/fly/nginx.conf b/fly/nginx.conf new file mode 100644 index 0000000..c2ed6bb --- /dev/null +++ b/fly/nginx.conf @@ -0,0 +1,56 @@ +worker_processes auto; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Rate limiting zones — define per-service zones as needed + limit_req_zone $binary_remote_addr zone=general:10m rate=10r/s; + + # Proxy cache: 200MB, evict after 24h of no access + proxy_cache_path /tmp/cache levels=1:2 keys_zone=services:10m + max_size=200m inactive=24h; + + # --- docs.eblu.me (static site) --- + server { + listen 8080; + server_name docs.eblu.me; + + limit_req zone=general burst=20 nodelay; + + location / { + proxy_pass https://docs.tail8d86e.ts.net; + proxy_ssl_verify off; + + # Cache aggressively — static site only. + # Do NOT use these settings for dynamic services. + proxy_cache services; + proxy_cache_valid 200 1d; + proxy_cache_valid 404 1m; + proxy_cache_use_stale error timeout updating; + proxy_cache_lock on; + + # Prevent cache-busting: ignore query strings and + # client cache-control headers. + # Safe for static sites; breaks dynamic services. + proxy_cache_key $host$uri; + proxy_ignore_headers Cache-Control Set-Cookie; + + add_header X-Cache-Status $upstream_cache_status; + } + + location /healthz { + return 200 "ok\n"; + } + } + + # Catch-all: reject unknown hosts + server { + listen 8080 default_server; + return 444; + } +} diff --git a/fly/start.sh b/fly/start.sh new file mode 100644 index 0000000..918c455 --- /dev/null +++ b/fly/start.sh @@ -0,0 +1,16 @@ +#!/bin/sh +set -e + +# Start tailscale in userspace networking mode (no TUN device needed) +tailscaled --tun=userspace-networking --statedir=/var/lib/tailscale & +sleep 2 + +# Authenticate and join tailnet +tailscale up --authkey="${TS_AUTHKEY}" --hostname=flyio-proxy + +# Wait for tailscale to be ready +until tailscale status > /dev/null 2>&1; do sleep 1; done +echo "Tailscale connected" + +# Start nginx +nginx -g "daemon off;" diff --git a/mise-tasks/fly-deploy b/mise-tasks/fly-deploy new file mode 100755 index 0000000..bb2b4f8 --- /dev/null +++ b/mise-tasks/fly-deploy @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +#MISE description="Deploy the Fly.io public proxy" + +set -euo pipefail + +cd "$(dirname "$0")/../fly" +fly deploy "$@" diff --git a/mise-tasks/fly-setup b/mise-tasks/fly-setup new file mode 100755 index 0000000..c5b1c71 --- /dev/null +++ b/mise-tasks/fly-setup @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +#MISE description="One-time setup: configure Fly.io secrets and certs (idempotent)" + +set -euo pipefail + +APP="blumeops-proxy" + +# Fetch Tailscale auth key from Pulumi state +echo "Fetching Tailscale auth key from Pulumi..." +TS_AUTHKEY=$(cd "$(dirname "$0")/../pulumi/tailscale" && pulumi stack output flyio_authkey --show-secrets) +fly secrets set TS_AUTHKEY="$TS_AUTHKEY" -a "$APP" +echo "Tailscale auth key set" + +# Add certs for all public domains (idempotent — fly ignores duplicates) +fly certs add docs.eblu.me -a "$APP" 2>/dev/null || true +# fly certs add wiki.eblu.me -a "$APP" 2>/dev/null || true # future services +echo "Certificates configured" + +echo "Done. Run 'mise run fly-deploy' to deploy." diff --git a/mise-tasks/fly-shutoff b/mise-tasks/fly-shutoff new file mode 100755 index 0000000..f9e4f90 --- /dev/null +++ b/mise-tasks/fly-shutoff @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +#MISE description="Emergency shutoff: stop all Fly.io proxy machines" + +set -euo pipefail + +APP="blumeops-proxy" + +echo "EMERGENCY SHUTOFF: Stopping all machines for $APP" +fly scale count 0 -a "$APP" --yes +echo "All machines stopped. Public services are offline." +echo "To restore: fly scale count 1 -a $APP" diff --git a/pulumi/gandi/__main__.py b/pulumi/gandi/__main__.py index 4361c91..55b7665 100644 --- a/pulumi/gandi/__main__.py +++ b/pulumi/gandi/__main__.py @@ -54,8 +54,22 @@ base_record = gandi.livedns.Record( values=[tailscale_ip], ) +# ============== Public Services (Fly.io proxy) ============== +# CNAME records pointing public subdomains to Fly.io for reverse proxying +# back to the tailnet. See docs/how-to/expose-service-publicly.md + +docs_public = gandi.livedns.Record( + "docs-public", + zone=domain, + name="docs", + type="CNAME", + ttl=300, + values=["blumeops-proxy.fly.dev."], +) + # ============== Exports ============== pulumi.export("domain", domain) pulumi.export("wildcard_fqdn", f"*.{subdomain}.{domain}") pulumi.export("base_fqdn", f"{subdomain}.{domain}") pulumi.export("target_ip", tailscale_ip) +pulumi.export("docs_public_fqdn", f"docs.{domain}") diff --git a/pulumi/tailscale/__main__.py b/pulumi/tailscale/__main__.py index 7c76c26..80e2793 100644 --- a/pulumi/tailscale/__main__.py +++ b/pulumi/tailscale/__main__.py @@ -70,9 +70,21 @@ sifaka_tags = tailscale.DeviceTags( ], ) +# ============== Auth Keys ============== + +# Auth key for Fly.io proxy container (public reverse proxy) +flyio_key = tailscale.TailnetKey( + "flyio-proxy-key", + reusable=True, + ephemeral=True, + tags=["tag:flyio-proxy"], + expiry=7776000, # 90 days +) + # ============== Exports ============== pulumi.export("acl_id", acl.id) pulumi.export("policy_hash", policy_hash) +pulumi.export("flyio_authkey", flyio_key.key) pulumi.export("indri_device_id", indri.node_id) pulumi.export("indri_tags", indri_tags.tags) diff --git a/pulumi/tailscale/policy.hujson b/pulumi/tailscale/policy.hujson index 9949ade..43542dd 100644 --- a/pulumi/tailscale/policy.hujson +++ b/pulumi/tailscale/policy.hujson @@ -60,6 +60,14 @@ "ip": ["*"], }, + // --- Fly.io proxy --- + // Public reverse proxy can reach k8s services on HTTPS only + { + "src": ["tag:flyio-proxy"], + "dst": ["tag:k8s"], + "ip": ["tcp:443"], + }, + // --- CI Gateway --- // Ephemeral CI containers can push images to registry { @@ -136,6 +144,7 @@ "tag:k8s-operator": ["autogroup:admin", "tag:blumeops"], "tag:k8s": ["autogroup:admin", "tag:blumeops", "tag:k8s-operator"], "tag:ci-gateway": ["autogroup:admin", "tag:blumeops"], + "tag:flyio-proxy": ["autogroup:admin", "tag:blumeops"], }, // ============== ACL Tests ============== @@ -166,5 +175,11 @@ "src": "tag:ci-gateway", "accept": ["tag:registry:443"], }, + // Fly.io proxy can reach k8s services (HTTPS only), nothing else + { + "src": "tag:flyio-proxy", + "accept": ["tag:k8s:443"], + "deny": ["tag:homelab:22", "tag:nas:445", "tag:registry:443"], + }, ], }