From b667f21e1027dabcaeaea3dbd17b442c3a7bf08c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Mon, 9 Feb 2026 11:06:41 -0800 Subject: [PATCH] Fix 502 errors during Fly.io proxy deploys The health check returned 200 immediately on nginx start, before Tailscale connected. Fly.io routed traffic to the new machine with a cold proxy cache and no MagicDNS, causing upstream DNS timeouts. Defer the health check by returning 503 until a sentinel file (/tmp/tailscale-ready) is created after Tailscale connects. This keeps the old machine serving traffic during the startup window. Co-Authored-By: Claude Opus 4.6 --- docs/changelog.d/fix-deploy-healthcheck-race.bugfix.md | 1 + fly/nginx.conf | 3 +++ fly/start.sh | 9 +++++---- 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 docs/changelog.d/fix-deploy-healthcheck-race.bugfix.md diff --git a/docs/changelog.d/fix-deploy-healthcheck-race.bugfix.md b/docs/changelog.d/fix-deploy-healthcheck-race.bugfix.md new file mode 100644 index 0000000..0220ca3 --- /dev/null +++ b/docs/changelog.d/fix-deploy-healthcheck-race.bugfix.md @@ -0,0 +1 @@ +Fix 502 errors during Fly.io proxy deploys by deferring health check until Tailscale is connected. diff --git a/fly/nginx.conf b/fly/nginx.conf index f01a1a3..d27ff15 100644 --- a/fly/nginx.conf +++ b/fly/nginx.conf @@ -76,6 +76,9 @@ http { listen 8080 default_server; location /healthz { + if (!-f /tmp/tailscale-ready) { + return 503 "starting\n"; + } return 200 "ok\n"; } diff --git a/fly/start.sh b/fly/start.sh index 620dfea..f923b81 100644 --- a/fly/start.sh +++ b/fly/start.sh @@ -1,9 +1,9 @@ #!/bin/sh set -e -# Start nginx immediately so port 8080 is bound before Fly's deploy checks. -# Upstream DNS resolution is deferred via resolver + variable in nginx.conf, -# so nginx starts cleanly even before Tailscale connects. +# Start nginx immediately so port 8080 is bound (avoids connection refused). +# Health check returns 503 until /tmp/tailscale-ready exists, so Fly.io +# keeps the old machine serving traffic until Tailscale connects. nginx -g "daemon off;" & NGINX_PID=$! echo "Nginx started (waiting for Tailscale before proxying)" @@ -16,8 +16,9 @@ sleep 2 # Authenticate and join tailnet tailscale up --authkey="${TS_AUTHKEY}" --hostname=flyio-proxy -# Wait for tailscale to be ready +# Wait for tailscale to be ready, then signal nginx health check until tailscale status > /dev/null 2>&1; do sleep 1; done +touch /tmp/tailscale-ready echo "Tailscale connected" # Start Alloy for observability (logs → Loki, metrics → Prometheus) -- 2.50.1 (Apple Git-155)