2026-02-03 15:55:31 -08:00
|
|
|
#!/usr/bin/env -S uv run --script
|
|
|
|
|
# /// script
|
|
|
|
|
# requires-python = ">=3.12"
|
2026-02-03 16:29:31 -08:00
|
|
|
# dependencies = ["rich>=13.0.0"]
|
2026-02-03 15:55:31 -08:00
|
|
|
# ///
|
2026-02-03 16:29:31 -08:00
|
|
|
#MISE description="Validate all wiki-links point to existing doc filenames"
|
2026-02-03 16:36:51 -08:00
|
|
|
"""Validate that all wiki-links in documentation point to existing files.
|
2026-02-03 15:55:31 -08:00
|
|
|
|
|
|
|
|
This script scans all markdown files in the docs/ directory (excluding
|
2026-02-03 21:17:58 -08:00
|
|
|
changelog.d/), extracts wiki-links, and verifies each link target
|
2026-02-03 16:36:51 -08:00
|
|
|
exists as a filename or path in the documentation.
|
2026-02-03 15:55:31 -08:00
|
|
|
|
|
|
|
|
Wiki-link formats supported:
|
2026-02-03 16:36:51 -08:00
|
|
|
- [[filename]] - links to filename.md (must be unique)
|
|
|
|
|
- [[path/to/file]] - links to path/to/file.md (for ambiguous filenames like index)
|
|
|
|
|
- [[target | Display Text]] - either format with display text
|
2026-02-03 15:55:31 -08:00
|
|
|
|
|
|
|
|
Usage: mise run doc-links
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
from rich.console import Console
|
|
|
|
|
from rich.markup import escape
|
|
|
|
|
from rich.table import Table
|
|
|
|
|
|
|
|
|
|
DOCS_DIR = Path(__file__).parent.parent / "docs"
|
|
|
|
|
|
2026-02-03 16:20:12 -08:00
|
|
|
# Regex to match wiki-links: [[Target]] or [[Target | Display]]
|
2026-02-03 16:29:31 -08:00
|
|
|
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(?:\s*\|\s*[^\]]+)?\]\]")
|
2026-02-03 15:55:31 -08:00
|
|
|
|
|
|
|
|
# Regex to match inline code (backticks)
|
|
|
|
|
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_wikilinks(file_path: Path) -> list[tuple[str, int]]:
|
|
|
|
|
"""Extract all wiki-link targets from a markdown file with line numbers.
|
|
|
|
|
|
|
|
|
|
Ignores wiki-links inside inline code (backticks) as these are examples.
|
|
|
|
|
"""
|
|
|
|
|
content = file_path.read_text()
|
|
|
|
|
links = []
|
|
|
|
|
|
|
|
|
|
for line_num, line in enumerate(content.splitlines(), start=1):
|
|
|
|
|
# Remove inline code before searching for wiki-links
|
|
|
|
|
line_without_code = INLINE_CODE_PATTERN.sub("", line)
|
|
|
|
|
for match in WIKILINK_PATTERN.finditer(line_without_code):
|
|
|
|
|
target = match.group(1).strip()
|
|
|
|
|
links.append((target, line_num))
|
|
|
|
|
|
|
|
|
|
return links
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main() -> int:
|
|
|
|
|
console = Console()
|
|
|
|
|
|
2026-02-03 16:36:51 -08:00
|
|
|
# Collect all valid targets (both filenames and paths)
|
|
|
|
|
valid_targets: set[str] = set()
|
|
|
|
|
# Track which filenames are ambiguous (appear multiple times)
|
|
|
|
|
filename_counts: dict[str, list[str]] = {}
|
2026-02-03 15:55:31 -08:00
|
|
|
|
2026-02-03 21:17:58 -08:00
|
|
|
# Scan all markdown files (excluding changelog.d/)
|
2026-02-03 15:55:31 -08:00
|
|
|
for md_file in DOCS_DIR.rglob("*.md"):
|
2026-02-03 21:17:58 -08:00
|
|
|
if "changelog.d" in md_file.parts:
|
2026-02-03 15:55:31 -08:00
|
|
|
continue
|
2026-02-03 16:36:51 -08:00
|
|
|
# Track filename occurrences
|
|
|
|
|
filename = md_file.stem
|
|
|
|
|
rel_path_str = str(md_file.relative_to(DOCS_DIR).with_suffix(""))
|
|
|
|
|
if filename not in filename_counts:
|
|
|
|
|
filename_counts[filename] = []
|
|
|
|
|
filename_counts[filename].append(rel_path_str)
|
|
|
|
|
# Add relative path without extension (e.g., "reference/services/alloy")
|
|
|
|
|
valid_targets.add(rel_path_str)
|
|
|
|
|
|
|
|
|
|
# Only add filenames that are unique (not ambiguous)
|
|
|
|
|
ambiguous_filenames: set[str] = set()
|
|
|
|
|
for filename, paths in filename_counts.items():
|
|
|
|
|
if len(paths) == 1:
|
|
|
|
|
valid_targets.add(filename)
|
|
|
|
|
else:
|
|
|
|
|
ambiguous_filenames.add(filename)
|
|
|
|
|
|
2026-02-04 08:13:16 -08:00
|
|
|
# Special case: files at repo root that are copied into docs during build
|
|
|
|
|
# These are valid link targets even though they don't exist in docs/
|
|
|
|
|
REPO_ROOT = DOCS_DIR.parent
|
|
|
|
|
BUILD_TIME_DOCS = ["CHANGELOG.md"]
|
|
|
|
|
for filename in BUILD_TIME_DOCS:
|
|
|
|
|
if (REPO_ROOT / filename).exists():
|
|
|
|
|
valid_targets.add(Path(filename).stem)
|
|
|
|
|
|
2026-02-03 16:36:51 -08:00
|
|
|
# Collect all broken and ambiguous links
|
2026-02-03 15:55:31 -08:00
|
|
|
broken_links: list[tuple[str, int, str]] = []
|
2026-02-03 16:36:51 -08:00
|
|
|
ambiguous_links: list[tuple[str, int, str, list[str]]] = []
|
2026-02-03 15:55:31 -08:00
|
|
|
|
2026-02-03 21:17:58 -08:00
|
|
|
# Scan all markdown files for wiki-links (excluding changelog.d/)
|
2026-02-03 15:55:31 -08:00
|
|
|
for md_file in sorted(DOCS_DIR.rglob("*.md")):
|
2026-02-03 21:17:58 -08:00
|
|
|
if "changelog.d" in md_file.parts:
|
2026-02-03 15:55:31 -08:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
rel_path = str(md_file.relative_to(DOCS_DIR))
|
2026-02-03 16:20:12 -08:00
|
|
|
links = extract_wikilinks(md_file)
|
2026-02-03 16:29:31 -08:00
|
|
|
|
2026-02-03 15:55:31 -08:00
|
|
|
for target, line_num in links:
|
2026-02-03 16:36:51 -08:00
|
|
|
if target in ambiguous_filenames:
|
|
|
|
|
# Link uses an ambiguous filename - needs to use full path
|
|
|
|
|
ambiguous_links.append((rel_path, line_num, target, filename_counts[target]))
|
|
|
|
|
elif target not in valid_targets:
|
2026-02-03 15:55:31 -08:00
|
|
|
broken_links.append((rel_path, line_num, target))
|
|
|
|
|
|
|
|
|
|
# Print results
|
|
|
|
|
console.print("[bold]Wiki-Link Validation[/bold]")
|
|
|
|
|
console.print()
|
2026-02-03 16:36:51 -08:00
|
|
|
console.print(f"Found {len(valid_targets)} valid link targets in documentation.")
|
2026-02-03 15:55:31 -08:00
|
|
|
console.print()
|
|
|
|
|
|
2026-02-03 16:36:51 -08:00
|
|
|
has_errors = False
|
|
|
|
|
|
|
|
|
|
if ambiguous_links:
|
|
|
|
|
has_errors = True
|
|
|
|
|
console.print("[bold red]Ambiguous Wiki-Links Found[/bold red]")
|
|
|
|
|
console.print("These links use filenames that exist in multiple locations.")
|
|
|
|
|
console.print("Use the full path instead (e.g., [[reference/index]] not [[index]]).")
|
|
|
|
|
console.print()
|
|
|
|
|
table = Table(show_header=True, header_style="bold")
|
|
|
|
|
table.add_column("File")
|
|
|
|
|
table.add_column("Line", justify="right")
|
|
|
|
|
table.add_column("Target")
|
|
|
|
|
table.add_column("Possible Paths")
|
|
|
|
|
|
|
|
|
|
for file_path, line_num, target, paths in ambiguous_links:
|
|
|
|
|
table.add_row(file_path, str(line_num), escape(f"[[{target}]]"), "\n".join(paths))
|
|
|
|
|
|
|
|
|
|
console.print(table)
|
|
|
|
|
console.print()
|
|
|
|
|
|
2026-02-03 15:55:31 -08:00
|
|
|
if broken_links:
|
2026-02-03 16:36:51 -08:00
|
|
|
has_errors = True
|
2026-02-03 15:55:31 -08:00
|
|
|
console.print("[bold red]Broken Wiki-Links Found[/bold red]")
|
|
|
|
|
table = Table(show_header=True, header_style="bold")
|
|
|
|
|
table.add_column("File")
|
|
|
|
|
table.add_column("Line", justify="right")
|
|
|
|
|
table.add_column("Target")
|
|
|
|
|
|
|
|
|
|
for file_path, line_num, target in broken_links:
|
|
|
|
|
table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))
|
|
|
|
|
|
|
|
|
|
console.print(table)
|
|
|
|
|
console.print()
|
2026-02-03 16:36:51 -08:00
|
|
|
console.print("Each wiki-link target must match a filename or path in docs/.")
|
2026-02-03 16:20:12 -08:00
|
|
|
console.print()
|
2026-02-03 16:36:51 -08:00
|
|
|
|
|
|
|
|
if has_errors:
|
2026-02-03 15:55:31 -08:00
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
console.print("[bold green]All wiki-links are valid![/bold green]")
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
sys.exit(main())
|