Mitigate Forgejo archive endpoint DoS from crawler abuse
All checks were successful
Deploy Fly.io Proxy / deploy (push) Successful in 1m35s

Crawlers hitting /archive/ endpoints with unique commit SHAs generated 54GB
of git bundles in 2 days, pegging Forgejo at 43% CPU. Fix at multiple layers:

- Redirect archive requests to tailnet at Fly proxy (302)
- Expand robots.txt: block /users/, /*/archive/, /*/releases/download/
- Cache release artifact downloads at nginx (immutable, 7d TTL)
- Enable [cron.archive_cleanup] with 2h TTL and run-at-start

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-04-17 14:21:22 -07:00
commit 7a42aeb77c
3 changed files with 39 additions and 2 deletions

View file

@ -61,6 +61,12 @@ MIN_INTERVAL = 10m
[cron.update_checker] [cron.update_checker]
ENABLED = false ENABLED = false
[cron.archive_cleanup]
ENABLED = true
RUN_AT_START = true
SCHEDULE = @midnight
OLDER_THAN = 2h
[session] [session]
PROVIDER = {{ forgejo_session_provider }} PROVIDER = {{ forgejo_session_provider }}

View file

@ -0,0 +1 @@
Mitigated Forgejo archive endpoint DoS: redirect public archive requests to tailnet, expanded robots.txt, enabled archive cleanup cron, cached release downloads at proxy.

View file

@ -155,10 +155,10 @@ http {
internal; internal;
} }
# Serve robots.txt directly block crawlers from mirror repos # Serve robots.txt directly block crawlers from expensive endpoints
location = /robots.txt { location = /robots.txt {
default_type text/plain; default_type text/plain;
return 200 "User-agent: *\nDisallow: /mirrors/\n"; return 200 "User-agent: *\nDisallow: /mirrors/\nDisallow: /users/\nDisallow: /*/archive/\nDisallow: /*/releases/download/\n";
} }
# Block swagger API docs use forge.ops.eblu.me from tailnet # Block swagger API docs use forge.ops.eblu.me from tailnet
@ -166,6 +166,15 @@ http {
return 403 "API documentation is only available at forge.ops.eblu.me (tailnet).\n"; return 403 "API documentation is only available at forge.ops.eblu.me (tailnet).\n";
} }
# Redirect archive endpoints to tailnet archive requests generate full
# git bundles on demand. Unauthenticated crawlers hitting unique commit
# SHAs cause unbounded CPU and disk usage (DoS vector). Legitimate users
# can download via forge.ops.eblu.me on the tailnet.
location ~ ^/[^/]+/[^/]+/archive/ {
default_type text/html;
return 302 https://forge.ops.eblu.me$request_uri;
}
# Rate-limit authentication endpoints # Rate-limit authentication endpoints
location ~ ^/user/(login|sign_up|forgot_password) { location ~ ^/user/(login|sign_up|forgot_password) {
limit_req zone=forge_auth burst=5 nodelay; limit_req zone=forge_auth burst=5 nodelay;
@ -186,6 +195,27 @@ http {
proxy_set_header Connection "upgrade"; proxy_set_header Connection "upgrade";
} }
# Cache release artifact downloads immutable files keyed by tag+filename.
# Avoids hammering Forgejo when crawlers or users re-download the same asset.
location ~ ^/[^/]+/[^/]+/releases/download/ {
set $upstream_forge_releases https://forge.tail8d86e.ts.net;
proxy_pass $upstream_forge_releases$request_uri;
proxy_ssl_verify off;
proxy_ssl_server_name on;
proxy_cache services;
proxy_cache_valid 200 7d;
proxy_cache_key $host$uri;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $http_fly_client_ip;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
add_header X-Cache-Status $upstream_cache_status;
add_header X-Clacks-Overhead "GNU Terry Pratchett" always;
}
# Selectively cache static assets only # Selectively cache static assets only
location ~* \.(css|js|png|jpg|svg|woff2?)$ { location ~* \.(css|js|png|jpg|svg|woff2?)$ {
set $upstream_forge_static https://forge.tail8d86e.ts.net; set $upstream_forge_static https://forge.tail8d86e.ts.net;