#!/usr/bin/env -S uv run --script # /// script # requires-python = ">=3.12" # dependencies = ["rich>=13.0.0"] # /// #MISE description="Validate all wiki-links point to existing doc filenames" """Validate that all wiki-links in documentation point to existing files. This script scans all markdown files in the docs/ directory (excluding changelog.d/), extracts wiki-links, and verifies each link target exists as a unique filename in the documentation. Wiki-link formats supported: - [[filename]] - links to filename.md (must be unique across all docs) - [[target|Display Text]] - filename with display text Path-based links (containing '/') are NOT supported to ensure all filenames are unique and links work correctly in obsidian.nvim. Usage: mise run docs-check-links """ import re import sys from pathlib import Path from rich.console import Console from rich.markup import escape from rich.table import Table DOCS_DIR = Path(__file__).parent.parent / "docs" # Regex to match wiki-links: [[Target]] or [[Target|Display]] # Captures: group(1) = target (may have spaces), group(2) = full "|Display" part if present WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]") # Regex to match inline code (backticks) INLINE_CODE_PATTERN = re.compile(r"`[^`]+`") def extract_wikilinks(file_path: Path) -> list[tuple[str, int, bool]]: """Extract all wiki-link targets from a markdown file with line numbers. Returns list of (target, line_num, has_spaces) tuples. has_spaces is True if the target or pipe separator had surrounding spaces. Ignores wiki-links inside inline code (backticks) as these are examples. """ content = file_path.read_text() links = [] for line_num, line in enumerate(content.splitlines(), start=1): # Remove inline code before searching for wiki-links line_without_code = INLINE_CODE_PATTERN.sub("", line) for match in WIKILINK_PATTERN.finditer(line_without_code): raw_target = match.group(1) target = raw_target.strip() pipe_part = match.group(2) # "|Display" or None # Check for spaces: in target, or around the pipe has_spaces = raw_target != target if pipe_part and (raw_target.endswith(" ") or pipe_part.startswith("| ")): has_spaces = True links.append((target, line_num, has_spaces)) return links def main() -> int: console = Console() # Collect all valid targets (both filenames and paths) valid_targets: set[str] = set() # Track which filenames are ambiguous (appear multiple times) filename_counts: dict[str, list[str]] = {} # Scan all markdown files (excluding changelog.d/) for md_file in DOCS_DIR.rglob("*.md"): if "changelog.d" in md_file.parts: continue # Track filename occurrences filename = md_file.stem rel_path_str = str(md_file.relative_to(DOCS_DIR).with_suffix("")) if filename not in filename_counts: filename_counts[filename] = [] filename_counts[filename].append(rel_path_str) # Add relative path without extension (e.g., "reference/services/alloy") valid_targets.add(rel_path_str) # Only add filenames that are unique (not ambiguous) ambiguous_filenames: set[str] = set() for filename, paths in filename_counts.items(): if len(paths) == 1: valid_targets.add(filename) else: ambiguous_filenames.add(filename) # Special case: files at repo root that are copied into docs during build # These are valid link targets even though they don't exist in docs/ REPO_ROOT = DOCS_DIR.parent BUILD_TIME_DOCS = ["CHANGELOG.md"] for filename in BUILD_TIME_DOCS: if (REPO_ROOT / filename).exists(): valid_targets.add(Path(filename).stem) # Collect all broken, ambiguous, path-based, and spaced links broken_links: list[tuple[str, int, str]] = [] ambiguous_links: list[tuple[str, int, str, list[str]]] = [] path_links: list[tuple[str, int, str]] = [] spaced_links: list[tuple[str, int, str]] = [] # Track which doc stems are linked-to from other docs (for orphan detection) all_doc_stems: set[str] = set(filename_counts.keys()) linked_stems: set[str] = set() # Scan all markdown files for wiki-links (excluding changelog.d/) for md_file in sorted(DOCS_DIR.rglob("*.md")): if "changelog.d" in md_file.parts: continue rel_path = str(md_file.relative_to(DOCS_DIR)) source_stem = md_file.stem links = extract_wikilinks(md_file) for target, line_num, has_spaces in links: if has_spaces: # Links with spaces in target or around pipe are not allowed spaced_links.append((rel_path, line_num, target)) continue # Handle anchor links: [[#Heading]] or [[file#Heading]] # Strip the #fragment for validation; pure anchors (#Heading) skip file check file_target = target if "#" in target: file_target = target.split("#", 1)[0] if not file_target: # Pure in-page anchor like [[#Break-glass shutoff]] — always valid continue if "/" in file_target: # Path-based links are not allowed - use simple filenames only path_links.append((rel_path, line_num, target)) elif file_target in ambiguous_filenames: # Link uses an ambiguous filename - needs to be renamed ambiguous_links.append((rel_path, line_num, target, filename_counts[file_target])) elif file_target not in valid_targets: broken_links.append((rel_path, line_num, target)) elif file_target != source_stem: # Valid link to a different doc — record it for orphan detection linked_stems.add(file_target) # Print results console.print("[bold]Wiki-Link Validation[/bold]") console.print() console.print(f"Found {len(valid_targets)} valid link targets in documentation.") console.print() has_errors = False if spaced_links: has_errors = True console.print("[bold red]Wiki-Links With Spaces Found[/bold red]") console.print("Wiki-links must not have spaces in the target or around the pipe.") console.print("Use [[target|Display Text]] not [[target | Display Text]].") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") for file_path, line_num, target in spaced_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]")) console.print(table) console.print() if path_links: has_errors = True console.print("[bold red]Path-Based Wiki-Links Found[/bold red]") console.print("Wiki-links must use simple filenames only (no '/' paths).") console.print("Rename files to be unique, then use [[filename]] format.") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") for file_path, line_num, target in path_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]")) console.print(table) console.print() if ambiguous_links: has_errors = True console.print("[bold red]Ambiguous Wiki-Links Found[/bold red]") console.print("These links use filenames that exist in multiple locations.") console.print("Rename files to be unique across all documentation.") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") table.add_column("Possible Paths") for file_path, line_num, target, paths in ambiguous_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]"), "\n".join(paths)) console.print(table) console.print() if broken_links: has_errors = True console.print("[bold red]Broken Wiki-Links Found[/bold red]") table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Line", justify="right") table.add_column("Target") for file_path, line_num, target in broken_links: table.add_row(file_path, str(line_num), escape(f"[[{target}]]")) console.print(table) console.print() console.print("Each wiki-link target must match a filename or path in docs/.") console.print() # Orphan detection: docs not linked from any other doc ORPHAN_EXCEPTIONS = {"index"} orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS) if orphan_stems: has_errors = True console.print("[bold red]Orphan Documents Found[/bold red]") console.print("These docs are not linked from any other document.") console.print() table = Table(show_header=True, header_style="bold") table.add_column("File") table.add_column("Stem") for stem in orphan_stems: paths = filename_counts[stem] for path in paths: table.add_row(f"{path}.md", stem) console.print(table) console.print() if has_errors: return 1 console.print("[bold green]All wiki-links are valid![/bold green]") return 0 if __name__ == "__main__": sys.exit(main())