Fix spider trap: disable SPA mode, remove index files, relax wiki-links (#290)

## Summary Fixes the Facebook crawler spider trap that's been generating infinite recursive URLs like `/how-to/tutorials/tutorials/how-to/explanation/...` for several days. **Root cause:** Quartz SPA mode + nginx `try_files` fallback to `index.html` meant any fabricated URL returned the root HTML shell with HTTP 200. Crawlers followed relative links from those fake URLs, creating infinite recursion. **Fix:** - Disable Quartz SPA mode (`enableSPA: false`) — all pages are now fully static HTML - Replace nginx SPA fallback with `=404` + Quartz's static `404.html` - Remove `robots.txt` exclusions (no longer needed) **Docs cleanup (Obsidian.nvim compat no longer needed):** - Delete hand-curated category index files (`tutorials.md`, `reference.md`, `how-to.md`, `explanation.md`) — Quartz auto-generates folder pages - Delete `postgresql-storage.md` (redirect stub) and `migrate-forgejo-from-brew.md` (stale history) - Drop `docs-check-index` and `docs-check-filenames` prek hooks - Rewrite `docs-check-links` to allow path-based wiki-links (`[[path/to/file]]`) and only error on true ambiguity - Add `ai-docs` doc tree listing to replace index files for AI context - Add natural cross-links from reference cards to fix orphan docs ## Deployment and Testing - [ ] Merge and let the build pipeline run - [ ] Verify docs.eblu.me serves pages correctly with full page loads - [ ] Verify non-existent URLs return 404 - [ ] Monitor crawler traffic — should drop to near zero for fabricated URLs Reviewed-on: #290
2026-03-09 11:59:43 -07:00 · 2026-03-09 11:59:43 -07:00 · 4f0476a851
commit 4f0476a851
parent 953640d2b7
24 changed files with 110 additions and 666 deletions
--- a/mise-tasks/ai-docs
+++ b/mise-tasks/ai-docs
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-#MISE description="Prime AI context with key BlumeOps documentation (formerly zk-docs)"
+#MISE description="Prime AI context with key BlumeOps documentation"

 set -euo pipefail

@ -10,15 +10,17 @@ FILES=(
    "$DOCS_DIR/tutorials/ai-assistance-guide.md"
    "$DOCS_DIR/how-to/agent-change-process.md"
    "$DOCS_DIR/index.md"
-    "$DOCS_DIR/reference/reference.md"
-    "$DOCS_DIR/how-to/how-to.md"
    "$DOCS_DIR/how-to/operations/troubleshooting.md"
-    "$DOCS_DIR/explanation/explanation.md"
    "$DOCS_DIR/explanation/architecture.md"
-    "$DOCS_DIR/tutorials/tutorials.md"
    "$DOCS_DIR/reference/tools/mise-tasks.md"
 )

 # Concatenate files with headers showing paths
-# Defaults are tuned for AI consumption (plain text, file headers only)
 bat --style=header --color=never --decorations=always "$@" "${FILES[@]}"
+
+# Documentation tree — replaces the old hand-curated index files
+echo ""
+echo "=== Documentation Structure ==="
+echo "All docs under $DOCS_DIR (excluding changelog.d/):"
+echo ""
+find "$DOCS_DIR" -name '*.md' -not -path '*/changelog.d/*' | sort | sed "s|$DOCS_DIR/||"
--- a/mise-tasks/docs-check-filenames
+++ b/mise-tasks/docs-check-filenames
@ -1,85 +0,0 @@
-#!/usr/bin/env -S uv run --script
-# /// script
-# requires-python = ">=3.12"
-# dependencies = ["rich>=13.0.0"]
-# ///
-#MISE description="Detect duplicate filenames in documentation"
-"""Detect duplicate filenames in documentation.
-
-This script scans all markdown files in the docs/ directory (excluding
-changelog.d/ and zk/) and reports any duplicate filenames that could
-cause wiki-link resolution issues.
-
-With Quartz, wiki-links like [[filename]] resolve by filename,
-so filenames must be unique across the documentation.
-
-Usage: mise run docs-check-filenames
-"""
-
-import sys
-from collections import defaultdict
-from pathlib import Path
-
-from rich.console import Console
-from rich.table import Table
-
-DOCS_DIR = Path(__file__).parent.parent / "docs"
-
-
-def main() -> int:
-    console = Console()
-
-    # Collect all filenames and their paths
-    # Key: filename (without .md), Value: list of file paths
-    filenames: dict[str, list[str]] = defaultdict(list)
-
-    # Scan all markdown files (excluding zk/ and changelog.d/)
-    for md_file in sorted(DOCS_DIR.rglob("*.md")):
-        if "changelog.d" in md_file.parts or "zk" in md_file.parts:
-            continue
-
-        rel_path = str(md_file.relative_to(DOCS_DIR))
-        filename = md_file.stem  # filename without .md
-        filenames[filename].append(rel_path)
-
-    # Find duplicates
-    duplicates = {name: paths for name, paths in filenames.items() if len(paths) > 1}
-
-    # Print results
-    console.print("[bold]Doc Filename Inventory[/bold]")
-    console.print()
-    console.print("With Quartz, wiki-links like [[filename]] resolve by filename,")
-    console.print("so filenames must be unique across the documentation.")
-    console.print()
-
-    # Duplicates table (if any)
-    if duplicates:
-        console.print("[bold red]Duplicate Filenames Found[/bold red]")
-        dup_table = Table(show_header=True, header_style="bold")
-        dup_table.add_column("Filename")
-        dup_table.add_column("Paths")
-
-        for name in sorted(duplicates.keys()):
-            paths = duplicates[name]
-            dup_table.add_row(name, "\n".join(paths))
-
-        console.print(dup_table)
-        console.print()
-
-    # Summary
-    console.print(f"Total files: {sum(len(p) for p in filenames.values())}")
-    console.print(f"Unique filenames: {len(filenames)}")
-    console.print(f"Duplicate filenames: {len(duplicates)}")
-
-    if duplicates:
-        console.print()
-        console.print("[bold red]Action required:[/bold red] Rename files to ensure unique wiki-link resolution.")
-        return 1
-
-    console.print()
-    console.print("[bold green]All filenames are unique![/bold green]")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/mise-tasks/docs-check-index
+++ b/mise-tasks/docs-check-index
@ -1,117 +0,0 @@
-#!/usr/bin/env -S uv run --script
-# /// script
-# requires-python = ">=3.12"
-# dependencies = ["rich>=13.0.0"]
-# ///
-#MISE description="Check that every doc is referenced in its category index"
-"""Check that every doc in a Diataxis category is referenced in its index.
-
-Each Diataxis category (tutorials, reference, how-to, explanation) has an
-index file that should wiki-link to every doc in that category directory.
-
-A doc is considered referenced if its filename stem appears as a wiki-link
-target (e.g., alloy.md is matched by [[alloy]]) in the category index.
-
-Index files are excluded from the self-check.
-
-Usage: mise run docs-check-index
-"""
-
-import re
-import sys
-from pathlib import Path
-
-from rich.console import Console
-from rich.markup import escape
-from rich.table import Table
-
-DOCS_DIR = Path(__file__).parent.parent / "docs"
-
-# Category directories and their index files
-CATEGORIES = {
-    "tutorials": "tutorials/tutorials.md",
-    "reference": "reference/reference.md",
-    "how-to": "how-to/how-to.md",
-    "explanation": "explanation/explanation.md",
-}
-
-# Regex to match wiki-links: [[Target]] or [[Target|Display]]
-WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
-
-# Regex to match inline code (backticks)
-INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
-
-
-def extract_link_targets(file_path: Path) -> set[str]:
-    """Extract all wiki-link targets from a file (ignoring inline code)."""
-    content = file_path.read_text()
-    targets: set[str] = set()
-
-    for line in content.splitlines():
-        line_without_code = INLINE_CODE_PATTERN.sub("", line)
-        for match in WIKILINK_PATTERN.finditer(line_without_code):
-            targets.add(match.group(1).strip())
-
-    return targets
-
-
-def main() -> int:
-    console = Console()
-    console.print("[bold]Category Index Validation[/bold]")
-    console.print()
-
-    has_errors = False
-    missing: list[tuple[str, str, str]] = []  # (category, stem, file)
-
-    for category, index_rel in CATEGORIES.items():
-        index_path = DOCS_DIR / index_rel
-        if not index_path.exists():
-            console.print(f"[yellow]Warning: index file not found: {index_rel}[/yellow]")
-            continue
-
-        category_dir = DOCS_DIR / category
-        if not category_dir.is_dir():
-            continue
-
-        # Get all wiki-link targets from the index
-        index_targets = extract_link_targets(index_path)
-        index_stem = index_path.stem
-
-        # Check each doc in the category directory
-        for md_file in sorted(category_dir.rglob("*.md")):
-            if "changelog.d" in md_file.parts:
-                continue
-            stem = md_file.stem
-            # Skip the index file itself
-            if stem == index_stem:
-                continue
-            if stem not in index_targets:
-                rel_path = str(md_file.relative_to(DOCS_DIR))
-                missing.append((category, stem, rel_path))
-
-    if missing:
-        has_errors = True
-        console.print("[bold red]Docs Missing From Category Index[/bold red]")
-        console.print("These docs are not wiki-linked from their category index file.")
-        console.print()
-        table = Table(show_header=True, header_style="bold")
-        table.add_column("Category")
-        table.add_column("File")
-        table.add_column("Add To")
-
-        for category, stem, rel_path in missing:
-            table.add_row(category, rel_path, CATEGORIES[category])
-
-        console.print(table)
-        console.print()
-
-    if has_errors:
-        return 1
-
-    console.print(f"Checked {len(CATEGORIES)} category indexes.")
-    console.print("[bold green]All docs are referenced in their category index![/bold green]")
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/mise-tasks/docs-check-links
+++ b/mise-tasks/docs-check-links
@ -3,19 +3,21 @@
 # requires-python = ">=3.12"
 # dependencies = ["rich>=13.0.0"]
 # ///
-#MISE description="Validate all wiki-links point to existing doc filenames"
+#MISE description="Validate all wiki-links point to existing doc files"
 """Validate that all wiki-links in documentation point to existing files.

 This script scans all markdown files in the docs/ directory (excluding
-changelog.d/), extracts wiki-links, and verifies each link target
-exists as a unique filename in the documentation.
+changelog.d/), extracts wiki-links, and verifies each link target resolves
+to an existing file.

 Wiki-link formats supported:
- [[filename]] - links to filename.md (must be unique across all docs)
- [[target|Display Text]] - filename with display text
+- [[filename]] - resolves by stem (errors if ambiguous)
+- [[path/to/file]] - resolves by relative path from docs root
+- [[target|Display Text]] - either form with display text
+- [[target#Heading]] - with anchor fragment (file part validated)

-Path-based links (containing '/') are NOT supported to ensure all
-filenames are unique and links work correctly in obsidian.nvim.
+Resolution mirrors Quartz's "shortest" markdownLinkResolution:
+bare names resolve when unique; use paths to disambiguate duplicates.

 Usage: mise run docs-check-links
 """
@ -31,7 +33,6 @@ from rich.table import Table
 DOCS_DIR = Path(__file__).parent.parent / "docs"

 # Regex to match wiki-links: [[Target]] or [[Target|Display]]
-# Captures: group(1) = target (may have spaces), group(2) = full "|Display" part if present
 WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")

 # Regex to match inline code (backticks)
@ -68,51 +69,42 @@ def extract_wikilinks(file_path: Path) -> list[tuple[str, int, bool]]:
 def main() -> int:
    console = Console()

-    # Collect all valid targets (both filenames and paths)
-    valid_targets: set[str] = set()
-    # Track which filenames are ambiguous (appear multiple times)
-    filename_counts: dict[str, list[str]] = {}
+    # Build lookup structures:
+    # - path_targets: set of relative paths without extension (e.g., "reference/services/alloy")
+    # - stem_to_paths: map from filename stem to list of paths (for ambiguity detection)
+    path_targets: set[str] = set()
+    stem_to_paths: dict[str, list[str]] = {}

-    # Scan all markdown files (excluding changelog.d/)
    for md_file in DOCS_DIR.rglob("*.md"):
        if "changelog.d" in md_file.parts:
            continue
-        # Track filename occurrences
-        filename = md_file.stem
+        stem = md_file.stem
        rel_path_str = str(md_file.relative_to(DOCS_DIR).with_suffix(""))
-        if filename not in filename_counts:
-            filename_counts[filename] = []
-        filename_counts[filename].append(rel_path_str)
-        # Add relative path without extension (e.g., "reference/services/alloy")
-        valid_targets.add(rel_path_str)
+        path_targets.add(rel_path_str)
+        if stem not in stem_to_paths:
+            stem_to_paths[stem] = []
+        stem_to_paths[stem].append(rel_path_str)

-    # Only add filenames that are unique (not ambiguous)
-    ambiguous_filenames: set[str] = set()
-    for filename, paths in filename_counts.items():
-        if len(paths) == 1:
-            valid_targets.add(filename)
-        else:
-            ambiguous_filenames.add(filename)
-
-    # Special case: files at repo root that are copied into docs during build
-    # These are valid link targets even though they don't exist in docs/
+    # Special case: files at repo root copied into docs during build
    REPO_ROOT = DOCS_DIR.parent
    BUILD_TIME_DOCS = ["CHANGELOG.md"]
    for filename in BUILD_TIME_DOCS:
        if (REPO_ROOT / filename).exists():
-            valid_targets.add(Path(filename).stem)
+            stem = Path(filename).stem
+            if stem not in stem_to_paths:
+                stem_to_paths[stem] = []
+            stem_to_paths[stem].append(stem)
+            path_targets.add(stem)

-    # Collect all broken, ambiguous, path-based, and spaced links
+    # Collect errors
    broken_links: list[tuple[str, int, str]] = []
    ambiguous_links: list[tuple[str, int, str, list[str]]] = []
-    path_links: list[tuple[str, int, str]] = []
    spaced_links: list[tuple[str, int, str]] = []

-    # Track which doc stems are linked-to from other docs (for orphan detection)
-    all_doc_stems: set[str] = set(filename_counts.keys())
+    # Track linked stems for orphan detection
+    all_doc_stems: set[str] = set(stem_to_paths.keys())
    linked_stems: set[str] = set()

-    # Scan all markdown files for wiki-links (excluding changelog.d/)
    for md_file in sorted(DOCS_DIR.rglob("*.md")):
        if "changelog.d" in md_file.parts:
            continue
@ -123,35 +115,41 @@ def main() -> int:

        for target, line_num, has_spaces in links:
            if has_spaces:
-                # Links with spaces in target or around pipe are not allowed
                spaced_links.append((rel_path, line_num, target))
                continue

-            # Handle anchor links: [[#Heading]] or [[file#Heading]]
-            # Strip the #fragment for validation; pure anchors (#Heading) skip file check
+            # Strip anchor fragment for file validation
            file_target = target
            if "#" in target:
                file_target = target.split("#", 1)[0]
                if not file_target:
-                    # Pure in-page anchor like [[#Break-glass shutoff]] — always valid
+                    # Pure in-page anchor like [[#Heading]] — always valid
                    continue

            if "/" in file_target:
-                # Path-based links are not allowed - use simple filenames only
-                path_links.append((rel_path, line_num, target))
-            elif file_target in ambiguous_filenames:
-                # Link uses an ambiguous filename - needs to be renamed
-                ambiguous_links.append((rel_path, line_num, target, filename_counts[file_target]))
-            elif file_target not in valid_targets:
-                broken_links.append((rel_path, line_num, target))
-            elif file_target != source_stem:
-                # Valid link to a different doc — record it for orphan detection
-                linked_stems.add(file_target)
+                # Path-based link — resolve against path_targets
+                if file_target not in path_targets:
+                    broken_links.append((rel_path, line_num, target))
+                else:
+                    # Extract the stem for orphan tracking
+                    linked_stem = file_target.rsplit("/", 1)[-1]
+                    if linked_stem != source_stem:
+                        linked_stems.add(linked_stem)
+            else:
+                # Bare stem link — check for existence and ambiguity
+                paths = stem_to_paths.get(file_target)
+                if paths is None:
+                    broken_links.append((rel_path, line_num, target))
+                elif len(paths) > 1:
+                    # Ambiguous: multiple files share this stem
+                    ambiguous_links.append((rel_path, line_num, target, paths))
+                elif file_target != source_stem:
+                    linked_stems.add(file_target)

    # Print results
    console.print("[bold]Wiki-Link Validation[/bold]")
    console.print()
-    console.print(f"Found {len(valid_targets)} valid link targets in documentation.")
+    console.print(f"Found {len(path_targets)} valid link targets in documentation.")
    console.print()

    has_errors = False
@ -173,28 +171,11 @@ def main() -> int:
        console.print(table)
        console.print()

-    if path_links:
-        has_errors = True
-        console.print("[bold red]Path-Based Wiki-Links Found[/bold red]")
-        console.print("Wiki-links must use simple filenames only (no '/' paths).")
-        console.print("Rename files to be unique, then use [[filename]] format.")
-        console.print()
-        table = Table(show_header=True, header_style="bold")
-        table.add_column("File")
-        table.add_column("Line", justify="right")
-        table.add_column("Target")
-
-        for file_path, line_num, target in path_links:
-            table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))
-
-        console.print(table)
-        console.print()
-
    if ambiguous_links:
        has_errors = True
        console.print("[bold red]Ambiguous Wiki-Links Found[/bold red]")
-        console.print("These links use filenames that exist in multiple locations.")
-        console.print("Rename files to be unique across all documentation.")
+        console.print("These bare-name links match multiple files.")
+        console.print("Use a path-based link to disambiguate: [[path/to/file]]")
        console.print()
        table = Table(show_header=True, header_style="bold")
        table.add_column("File")
@ -221,7 +202,7 @@ def main() -> int:

        console.print(table)
        console.print()
-        console.print("Each wiki-link target must match a filename or path in docs/.")
+        console.print("Each wiki-link target must match a filename stem or path in docs/.")
        console.print()

    # Orphan detection: docs not linked from any other doc
@ -237,7 +218,7 @@ def main() -> int:
        table.add_column("Stem")

        for stem in orphan_stems:
-            paths = filename_counts[stem]
+            paths = stem_to_paths[stem]
            for path in paths:
                table.add_row(f"{path}.md", stem)