#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" # dependencies = ["rich==15.0.0"] # /// #MISE description="Validate all wiki-links point to existing doc files" """Validate that all wiki-links in documentation point to existing files. This script scans all markdown files in the docs/ directory (excluding changelog.d/), extracts wiki-links, and verifies each link target resolves to an existing file. Wiki-link formats supported: - [[filename]] - resolves by stem (errors if ambiguous) - [[path/to/file]] - resolves by relative path from docs root - [[target|Display Text]] - either form with display text - [[target#Heading]] - with anchor fragment (file part validated) Resolution mirrors Quartz's "shortest" markdownLinkResolution: bare names resolve when unique; use paths to disambiguate duplicates. Usage: mise run docs-check-links """ import re import sys from pathlib import Path from rich.console import Console from rich.markup import escape from rich.table import Table DOCS_DIR = Path(__file__).parent.parent / "docs" # Regex to match wiki-links: [[Target]] or [[Target|Display]] WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]") # Regex to match inline code (backticks) INLINE_CODE_PATTERN = re.compile(r"`[^`]+`") def extract_wikilinks(file_path: Path) -> list[tuple[str, int, bool]]: """Extract all wiki-link targets from a markdown file with line numbers. Returns list of (target, line_num, has_spaces) tuples. has_spaces is True if the target or pipe separator had surrounding spaces. Ignores wiki-links inside inline code (backticks) as these are examples. """ content = file_path.read_text() links = [] for line_num, line in enumerate(content.splitlines(), start=1): # Remove inline code before searching for wiki-links line_without_code = INLINE_CODE_PATTERN.sub("", line) for match in WIKILINK_PATTERN.finditer(line_without_code): raw_target = match.group(1) target = raw_target.strip() pipe_part = match.group(2) # "|Display" or None # Check for spaces: in target, or around the pipe has_spaces = raw_target != target if pipe_part and (raw_target.endswith(" ") or pipe_part.startswith("| ")): has_spaces = True links.append((target, line_num, has_spaces)) return links def main() -> int: console = Console() # Build lookup structures: # - path_targets: set of relative paths without extension (e.g., "reference/services/alloy") # - stem_to_paths: map from filename stem to list of paths (for ambiguity detection) path_targets: set[str] = set() stem_to_paths: dict[str, list[str]] = {} for md_file in DOCS_DIR.rglob("*.md"): if "changelog.d" in md_file.parts: continue stem = md_file.stem rel_path_str = str(md_file.relative_to(DOCS_DIR).with_suffix("")) path_targets.add(rel_path_str) if stem not in stem_to_paths: stem_to_paths[stem] = [] stem_to_paths[stem].append(rel_path_str) # Special case: files at repo root copied into docs during build REPO_ROOT = DOCS_DIR.parent BUILD_TIME_DOCS = ["CHANGELOG.md"] for filename in BUILD_TIME_DOCS: if (REPO_ROOT / filename).exists(): stem = Path(filename).stem if stem not in stem_to_paths: stem_to_paths[stem] = [] stem_to_paths[stem].append(stem) path_targets.add(stem) # Collect errors broken_links: list[tuple[str, int, str]] = [] ambiguous_links: list[tuple[str, int, str, list[str]]] = [] spaced_links: list[tuple[str, int, str]] = [] # Track linked stems for orphan detection all_doc_stems: set[str] = set(stem_to_paths.keys()) linked_stems: set[str] = set() for md_file in sorted(DOCS_DIR.rglob("*.md")): if "changelog.d" in md_file.parts: continue rel_path = str(md_file.relative_to(DOCS_DIR)) source_stem = md_file.stem links = extract_wikilinks(md_file) for target, line_num, has_spaces in links: if has_spaces: spaced_links.append((rel_path, line_num, target)) continue # Strip anchor fragment for file validation file_target = target if "#" in target: file_target = target.split("#", 1)[0] if not file_target: # Pure in-page anchor like [[#Heading]] — always valid continue if "/" in file_target: # Path-based link — resolve against path_targets if file_target not in path_targets: broken_links.append((rel_path, line_num, target)) else: # Extract the stem for orphan tracking linked_stem = file_target.rsplit("/", 1)[-1] if linked_stem != source_stem: linked_stems.add(linked_stem) else: # Bare stem link — check for existence and ambiguity paths = stem_to_paths.get(file_target) if paths is None: broken_links.append((rel_path, line_num, target)) elif len(paths) > 1: # Ambiguous: multiple files share this stem ambiguous_links.append((rel_path, line_num, target, paths)) elif file_target != source_stem: linked_stems.add(file_target) # Print results console.print("[bold]Wiki-Link Validation[/bold]") console.print() console.print(f"Found {len(path_targets)} valid link targets in documentation.") console.print() has_errors = False if spaced_links: has_errors = True console.print("[bold red]Wiki-Links With Spaces Found[/bold red]") console.print("Wiki-links must not have spaces in the target or around the pipe.") console.print("Use [[target|Display Text]] not [[target | Display Text]].") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") for file_path, line_num, target in spaced_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]")) console.print(table) console.print() if ambiguous_links: has_errors = True console.print("[bold red]Ambiguous Wiki-Links Found[/bold red]") console.print("These bare-name links match multiple files.") console.print("Use a path-based link to disambiguate: [[path/to/file]]") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") table.add_column("Possible Paths") for file_path, line_num, target, paths in ambiguous_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]"), "\n".join(paths)) console.print(table) console.print() if broken_links: has_errors = True console.print("[bold red]Broken Wiki-Links Found[/bold red]") table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") for file_path, line_num, target in broken_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]")) console.print(table) console.print() console.print("Each wiki-link target must match a filename stem or path in docs/.") console.print() # Orphan detection: docs not linked from any other doc ORPHAN_EXCEPTIONS = {"index"} orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS) if orphan_stems: has_errors = True console.print("[bold red]Orphan Documents Found[/bold red]") console.print("These docs are not linked from any other document.") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Stem") for stem in orphan_stems: paths = stem_to_paths[stem] for path in paths: table.add_row(f"{path}.md", stem) console.print(table) console.print() if has_errors: return 1 console.print("[bold green]All wiki-links are valid![/bold green]") return 0 if __name__ == "__main__": sys.exit(main())