diff --git a/containers/quartz/default.conf b/containers/quartz/default.conf index 2705f1e..70b8fcc 100644 --- a/containers/quartz/default.conf +++ b/containers/quartz/default.conf @@ -14,26 +14,12 @@ server { add_header Cache-Control "public, immutable"; } - # --- Spider-trap guards ------------------------------------------------ - # Quartz emits relative links (../path). When a crawler resolves these - # from a phantom URL that was already served by the SPA fallback, the - # relative prefix compounds (e.g. /tags/ref/infra → /tags/ref/infra/ref/infra) - # creating an infinite tree of unique URIs — all served as 200 via the - # fallback to index.html. Two rules cut this off: - # - # 1. /tags/ is always flat (/tags/), so block anything deeper. - # 2. Real content never exceeds depth 4 (/how-to//). - # A depth-5 cutoff gives headroom while stopping recursive paths. - # - # Together these caught ~95% of trap requests in the March 2026 incident. - # The proper fix is root-absolute links in Quartz (planned for fork). - - location ~ "^/tags/[^/]+/" { - return 404; - } - - location ~ "^(/[^/]+){5,}" { - return 404; + # Serve robots.txt inline to prevent crawlers from entering /explorer/, + # which is an SPA feature that generates infinite relative-link trees + # when crawled (the March 2026 spider-trap incident). + location = /robots.txt { + default_type text/plain; + return 200 "User-agent: *\nDisallow: /explorer/\n"; } # SPA fallback - serve index.html for client-side routing diff --git a/docs/changelog.d/+robots-txt-explorer.infra.md b/docs/changelog.d/+robots-txt-explorer.infra.md new file mode 100644 index 0000000..25ece70 --- /dev/null +++ b/docs/changelog.d/+robots-txt-explorer.infra.md @@ -0,0 +1 @@ +Replace nginx spider-trap 404 guards with robots.txt disallowing /explorer/ to prevent crawler-induced infinite URL trees.