The previous robots.txt had a typo blocking /explorer/ instead of /explore/, allowing Facebook's crawler to hit the spider trap. Also block /tags/ which has the same infinite relative-link issue. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
36 lines
1.1 KiB
Text
36 lines
1.1 KiB
Text
server {
|
|
listen 80;
|
|
server_name _;
|
|
root /usr/share/nginx/html;
|
|
index index.html;
|
|
|
|
# Enable gzip compression
|
|
gzip on;
|
|
gzip_types text/plain text/css application/json application/javascript text/xml application/xml text/javascript;
|
|
|
|
# Cache static assets
|
|
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$ {
|
|
expires 1y;
|
|
add_header Cache-Control "public, immutable";
|
|
}
|
|
|
|
# Serve robots.txt inline to prevent crawlers from entering /explore/ and /tags/,
|
|
# which is an SPA feature that generates infinite relative-link trees
|
|
# when crawled (the March 2026 spider-trap incident).
|
|
location = /robots.txt {
|
|
default_type text/plain;
|
|
return 200 "User-agent: *\nDisallow: /explore/\nDisallow: /tags/\n";
|
|
}
|
|
|
|
# SPA fallback - serve index.html for client-side routing
|
|
location / {
|
|
try_files $uri $uri/ $uri.html /index.html;
|
|
}
|
|
|
|
# Health check endpoint
|
|
location /healthz {
|
|
access_log off;
|
|
return 200 "ok\n";
|
|
add_header Content-Type text/plain;
|
|
}
|
|
}
|