datasette-satisfactory/mise-tasks/docs-check-index
2026-04-27 19:10:12 -07:00

117 lines
3.7 KiB
Text
Executable file

#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["rich>=13.0.0"]
# ///
#MISE description="Check that every doc is referenced in its category index"
"""Check that every doc in a Diataxis category is referenced in its index.
Each Diataxis category (tutorials, reference, how-to, explanation) has an
index file that should wiki-link to every doc in that category directory.
A doc is considered referenced if its filename stem appears as a wiki-link
target (e.g., alloy.md is matched by [[alloy]]) in the category index.
Index files are excluded from the self-check.
Usage: mise run docs-check-index
"""
import re
import sys
from pathlib import Path
from rich.console import Console
from rich.markup import escape
from rich.table import Table
DOCS_DIR = Path(__file__).parent.parent / "docs"
# Category directories and their index files
CATEGORIES = {
"tutorials": "tutorials/tutorials.md",
"reference": "reference/reference.md",
"how-to": "how-to/how-to.md",
"explanation": "explanation/explanation.md",
}
# Regex to match wiki-links: [[Target]] or [[Target|Display]]
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
# Regex to match inline code (backticks)
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
def extract_link_targets(file_path: Path) -> set[str]:
"""Extract all wiki-link targets from a file (ignoring inline code)."""
content = file_path.read_text()
targets: set[str] = set()
for line in content.splitlines():
line_without_code = INLINE_CODE_PATTERN.sub("", line)
for match in WIKILINK_PATTERN.finditer(line_without_code):
targets.add(match.group(1).strip())
return targets
def main() -> int:
console = Console()
console.print("[bold]Category Index Validation[/bold]")
console.print()
has_errors = False
missing: list[tuple[str, str, str]] = [] # (category, stem, file)
for category, index_rel in CATEGORIES.items():
index_path = DOCS_DIR / index_rel
if not index_path.exists():
console.print(f"[yellow]Warning: index file not found: {index_rel}[/yellow]")
continue
category_dir = DOCS_DIR / category
if not category_dir.is_dir():
continue
# Get all wiki-link targets from the index
index_targets = extract_link_targets(index_path)
index_stem = index_path.stem
# Check each doc in the category directory
for md_file in sorted(category_dir.rglob("*.md")):
if "changelog.d" in md_file.parts:
continue
stem = md_file.stem
# Skip the index file itself
if stem == index_stem:
continue
if stem not in index_targets:
rel_path = str(md_file.relative_to(DOCS_DIR))
missing.append((category, stem, rel_path))
if missing:
has_errors = True
console.print("[bold red]Docs Missing From Category Index[/bold red]")
console.print("These docs are not wiki-linked from their category index file.")
console.print()
table = Table(show_header=True, header_style="bold")
table.add_column("Category")
table.add_column("File")
table.add_column("Add To")
for category, stem, rel_path in missing:
table.add_row(category, rel_path, CATEGORIES[category])
console.print(table)
console.print()
if has_errors:
return 1
console.print(f"Checked {len(CATEGORIES)} category indexes.")
console.print("[bold green]All docs are referenced in their category index![/bold green]")
return 0
if __name__ == "__main__":
sys.exit(main())