Mitigate Forgejo archive endpoint DoS from crawler abuse
All checks were successful
Deploy Fly.io Proxy / deploy (push) Successful in 1m35s
All checks were successful
Deploy Fly.io Proxy / deploy (push) Successful in 1m35s
Crawlers hitting /archive/ endpoints with unique commit SHAs generated 54GB of git bundles in 2 days, pegging Forgejo at 43% CPU. Fix at multiple layers: - Redirect archive requests to tailnet at Fly proxy (302) - Expand robots.txt: block /users/, /*/archive/, /*/releases/download/ - Cache release artifact downloads at nginx (immutable, 7d TTL) - Enable [cron.archive_cleanup] with 2h TTL and run-at-start Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5f38779d52
commit
7a42aeb77c
3 changed files with 39 additions and 2 deletions
|
|
@ -61,6 +61,12 @@ MIN_INTERVAL = 10m
|
|||
[cron.update_checker]
|
||||
ENABLED = false
|
||||
|
||||
[cron.archive_cleanup]
|
||||
ENABLED = true
|
||||
RUN_AT_START = true
|
||||
SCHEDULE = @midnight
|
||||
OLDER_THAN = 2h
|
||||
|
||||
[session]
|
||||
PROVIDER = {{ forgejo_session_provider }}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Mitigated Forgejo archive endpoint DoS: redirect public archive requests to tailnet, expanded robots.txt, enabled archive cleanup cron, cached release downloads at proxy.
|
||||
|
|
@ -155,10 +155,10 @@ http {
|
|||
internal;
|
||||
}
|
||||
|
||||
# Serve robots.txt directly — block crawlers from mirror repos
|
||||
# Serve robots.txt directly — block crawlers from expensive endpoints
|
||||
location = /robots.txt {
|
||||
default_type text/plain;
|
||||
return 200 "User-agent: *\nDisallow: /mirrors/\n";
|
||||
return 200 "User-agent: *\nDisallow: /mirrors/\nDisallow: /users/\nDisallow: /*/archive/\nDisallow: /*/releases/download/\n";
|
||||
}
|
||||
|
||||
# Block swagger API docs — use forge.ops.eblu.me from tailnet
|
||||
|
|
@ -166,6 +166,15 @@ http {
|
|||
return 403 "API documentation is only available at forge.ops.eblu.me (tailnet).\n";
|
||||
}
|
||||
|
||||
# Redirect archive endpoints to tailnet — archive requests generate full
|
||||
# git bundles on demand. Unauthenticated crawlers hitting unique commit
|
||||
# SHAs cause unbounded CPU and disk usage (DoS vector). Legitimate users
|
||||
# can download via forge.ops.eblu.me on the tailnet.
|
||||
location ~ ^/[^/]+/[^/]+/archive/ {
|
||||
default_type text/html;
|
||||
return 302 https://forge.ops.eblu.me$request_uri;
|
||||
}
|
||||
|
||||
# Rate-limit authentication endpoints
|
||||
location ~ ^/user/(login|sign_up|forgot_password) {
|
||||
limit_req zone=forge_auth burst=5 nodelay;
|
||||
|
|
@ -186,6 +195,27 @@ http {
|
|||
proxy_set_header Connection "upgrade";
|
||||
}
|
||||
|
||||
# Cache release artifact downloads — immutable files keyed by tag+filename.
|
||||
# Avoids hammering Forgejo when crawlers or users re-download the same asset.
|
||||
location ~ ^/[^/]+/[^/]+/releases/download/ {
|
||||
set $upstream_forge_releases https://forge.tail8d86e.ts.net;
|
||||
proxy_pass $upstream_forge_releases$request_uri;
|
||||
proxy_ssl_verify off;
|
||||
proxy_ssl_server_name on;
|
||||
|
||||
proxy_cache services;
|
||||
proxy_cache_valid 200 7d;
|
||||
proxy_cache_key $host$uri;
|
||||
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $http_fly_client_ip;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
add_header X-Cache-Status $upstream_cache_status;
|
||||
add_header X-Clacks-Overhead "GNU Terry Pratchett" always;
|
||||
}
|
||||
|
||||
# Selectively cache static assets only
|
||||
location ~* \.(css|js|png|jpg|svg|woff2?)$ {
|
||||
set $upstream_forge_static https://forge.tail8d86e.ts.net;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue