Block recursive crawler paths caused by SPA fallback + relative links: /tags/ depth >1 returns 404, global depth ≥5 returns 404. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
50 lines
1.6 KiB
Text
50 lines
1.6 KiB
Text
server {
|
|
listen 80;
|
|
server_name _;
|
|
root /usr/share/nginx/html;
|
|
index index.html;
|
|
|
|
# Enable gzip compression
|
|
gzip on;
|
|
gzip_types text/plain text/css application/json application/javascript text/xml application/xml text/javascript;
|
|
|
|
# Cache static assets
|
|
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$ {
|
|
expires 1y;
|
|
add_header Cache-Control "public, immutable";
|
|
}
|
|
|
|
# --- Spider-trap guards ------------------------------------------------
|
|
# Quartz emits relative links (../path). When a crawler resolves these
|
|
# from a phantom URL that was already served by the SPA fallback, the
|
|
# relative prefix compounds (e.g. /tags/ref/infra → /tags/ref/infra/ref/infra)
|
|
# creating an infinite tree of unique URIs — all served as 200 via the
|
|
# fallback to index.html. Two rules cut this off:
|
|
#
|
|
# 1. /tags/ is always flat (/tags/<name>), so block anything deeper.
|
|
# 2. Real content never exceeds depth 4 (/how-to/<cat>/<page>).
|
|
# A depth-5 cutoff gives headroom while stopping recursive paths.
|
|
#
|
|
# Together these caught ~95% of trap requests in the March 2026 incident.
|
|
# The proper fix is root-absolute links in Quartz (planned for fork).
|
|
|
|
location ~ "^/tags/[^/]+/" {
|
|
return 404;
|
|
}
|
|
|
|
location ~ "^(/[^/]+){5,}" {
|
|
return 404;
|
|
}
|
|
|
|
# SPA fallback - serve index.html for client-side routing
|
|
location / {
|
|
try_files $uri $uri/ $uri.html /index.html;
|
|
}
|
|
|
|
# Health check endpoint
|
|
location /healthz {
|
|
access_log off;
|
|
return 200 "ok\n";
|
|
add_header Content-Type text/plain;
|
|
}
|
|
}
|