Fix the Facebook crawler spider trap by disabling Quartz SPA mode and removing the nginx fallback to index.html. Non-existent URLs now return 404.html instead of the root SPA shell, preventing infinite recursive crawling. Remove hand-curated category index files (tutorials.md, reference.md, how-to.md, explanation.md) — Quartz auto-generates folder pages. Drop docs-check-index and docs-check-filenames hooks. Update docs-check-links to allow path-based wiki-links and only error on true ambiguity. Remove robots.txt exclusions since they're no longer needed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
34 lines
1,009 B
Text
34 lines
1,009 B
Text
server {
|
|
listen 80;
|
|
server_name _;
|
|
root /usr/share/nginx/html;
|
|
index index.html;
|
|
|
|
# Enable gzip compression
|
|
gzip on;
|
|
gzip_types text/plain text/css application/json application/javascript text/xml application/xml text/javascript;
|
|
|
|
# Cache static assets
|
|
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2)$ {
|
|
expires 1y;
|
|
add_header Cache-Control "public, immutable";
|
|
}
|
|
|
|
# Static file serving — no SPA fallback.
|
|
# Quartz generates complete HTML for every page, so all valid URLs
|
|
# map to real files. Non-existent paths get 404.html (generated by
|
|
# Quartz's NotFoundPage plugin), preventing the spider-trap issue
|
|
# where crawlers would get index.html for fabricated URLs.
|
|
location / {
|
|
try_files $uri $uri/ $uri.html =404;
|
|
}
|
|
|
|
error_page 404 /404.html;
|
|
|
|
# Health check endpoint
|
|
location /healthz {
|
|
access_log off;
|
|
return 200 "ok\n";
|
|
add_header Content-Type text/plain;
|
|
}
|
|
}
|