blumeops/mise-tasks/docs-check-links
Erich Blume 0d422f5234
All checks were successful
Deploy Fly.io Proxy / deploy (push) Successful in 2m51s
Update tooling dependencies (March 2026) (#307)
## Summary

Monthly tooling dependency update per [[update-tooling-dependencies]].

- **Prek hooks:** trufflehog v3.93.4→v3.94.0, ruff v0.15.2→v0.15.7, shfmt v3.12.0-2→v3.13.0-1, ansible-lint floor→26.3.0, ansible-core floor→2.18
- **Fly.io proxy:** nginx 1.28.2→1.29.6, Grafana Alloy v1.13.1→v1.14.1
- **Forgejo workflows:** actions/checkout v4.3.1→v6.0.2 (SHA-pinned across all 5 workflows)
- **Mise tasks:** tightened Python lower bounds — rich≥14.0.0, typer≥0.24.0, httpx≥0.28.1, pyyaml≥6.0.2

## Test plan

- [x] `prek run --all-files` passes
- [ ] Verify Fly.io deploy succeeds after merge (nginx minor bump + Alloy bump)
- [ ] Spot-check a workflow run with the new actions/checkout v6

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Reviewed-on: #307
2026-03-24 08:11:46 -07:00

236 lines
8.6 KiB
Text
Executable file

#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["rich>=14.0.0"]
# ///
#MISE description="Validate all wiki-links point to existing doc files"
"""Validate that all wiki-links in documentation point to existing files.
This script scans all markdown files in the docs/ directory (excluding
changelog.d/), extracts wiki-links, and verifies each link target resolves
to an existing file.
Wiki-link formats supported:
- [[filename]] - resolves by stem (errors if ambiguous)
- [[path/to/file]] - resolves by relative path from docs root
- [[target|Display Text]] - either form with display text
- [[target#Heading]] - with anchor fragment (file part validated)
Resolution mirrors Quartz's "shortest" markdownLinkResolution:
bare names resolve when unique; use paths to disambiguate duplicates.
Usage: mise run docs-check-links
"""
import re
import sys
from pathlib import Path
from rich.console import Console
from rich.markup import escape
from rich.table import Table
DOCS_DIR = Path(__file__).parent.parent / "docs"
# Regex to match wiki-links: [[Target]] or [[Target|Display]]
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
# Regex to match inline code (backticks)
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
def extract_wikilinks(file_path: Path) -> list[tuple[str, int, bool]]:
"""Extract all wiki-link targets from a markdown file with line numbers.
Returns list of (target, line_num, has_spaces) tuples.
has_spaces is True if the target or pipe separator had surrounding spaces.
Ignores wiki-links inside inline code (backticks) as these are examples.
"""
content = file_path.read_text()
links = []
for line_num, line in enumerate(content.splitlines(), start=1):
# Remove inline code before searching for wiki-links
line_without_code = INLINE_CODE_PATTERN.sub("", line)
for match in WIKILINK_PATTERN.finditer(line_without_code):
raw_target = match.group(1)
target = raw_target.strip()
pipe_part = match.group(2) # "|Display" or None
# Check for spaces: in target, or around the pipe
has_spaces = raw_target != target
if pipe_part and (raw_target.endswith(" ") or pipe_part.startswith("| ")):
has_spaces = True
links.append((target, line_num, has_spaces))
return links
def main() -> int:
console = Console()
# Build lookup structures:
# - path_targets: set of relative paths without extension (e.g., "reference/services/alloy")
# - stem_to_paths: map from filename stem to list of paths (for ambiguity detection)
path_targets: set[str] = set()
stem_to_paths: dict[str, list[str]] = {}
for md_file in DOCS_DIR.rglob("*.md"):
if "changelog.d" in md_file.parts:
continue
stem = md_file.stem
rel_path_str = str(md_file.relative_to(DOCS_DIR).with_suffix(""))
path_targets.add(rel_path_str)
if stem not in stem_to_paths:
stem_to_paths[stem] = []
stem_to_paths[stem].append(rel_path_str)
# Special case: files at repo root copied into docs during build
REPO_ROOT = DOCS_DIR.parent
BUILD_TIME_DOCS = ["CHANGELOG.md"]
for filename in BUILD_TIME_DOCS:
if (REPO_ROOT / filename).exists():
stem = Path(filename).stem
if stem not in stem_to_paths:
stem_to_paths[stem] = []
stem_to_paths[stem].append(stem)
path_targets.add(stem)
# Collect errors
broken_links: list[tuple[str, int, str]] = []
ambiguous_links: list[tuple[str, int, str, list[str]]] = []
spaced_links: list[tuple[str, int, str]] = []
# Track linked stems for orphan detection
all_doc_stems: set[str] = set(stem_to_paths.keys())
linked_stems: set[str] = set()
for md_file in sorted(DOCS_DIR.rglob("*.md")):
if "changelog.d" in md_file.parts:
continue
rel_path = str(md_file.relative_to(DOCS_DIR))
source_stem = md_file.stem
links = extract_wikilinks(md_file)
for target, line_num, has_spaces in links:
if has_spaces:
spaced_links.append((rel_path, line_num, target))
continue
# Strip anchor fragment for file validation
file_target = target
if "#" in target:
file_target = target.split("#", 1)[0]
if not file_target:
# Pure in-page anchor like [[#Heading]] — always valid
continue
if "/" in file_target:
# Path-based link — resolve against path_targets
if file_target not in path_targets:
broken_links.append((rel_path, line_num, target))
else:
# Extract the stem for orphan tracking
linked_stem = file_target.rsplit("/", 1)[-1]
if linked_stem != source_stem:
linked_stems.add(linked_stem)
else:
# Bare stem link — check for existence and ambiguity
paths = stem_to_paths.get(file_target)
if paths is None:
broken_links.append((rel_path, line_num, target))
elif len(paths) > 1:
# Ambiguous: multiple files share this stem
ambiguous_links.append((rel_path, line_num, target, paths))
elif file_target != source_stem:
linked_stems.add(file_target)
# Print results
console.print("[bold]Wiki-Link Validation[/bold]")
console.print()
console.print(f"Found {len(path_targets)} valid link targets in documentation.")
console.print()
has_errors = False
if spaced_links:
has_errors = True
console.print("[bold red]Wiki-Links With Spaces Found[/bold red]")
console.print("Wiki-links must not have spaces in the target or around the pipe.")
console.print("Use [[target|Display Text]] not [[target | Display Text]].")
console.print()
table = Table(show_header=True, header_style="bold")
table.add_column("File")
table.add_column("Line", justify="right")
table.add_column("Target")
for file_path, line_num, target in spaced_links:
table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))
console.print(table)
console.print()
if ambiguous_links:
has_errors = True
console.print("[bold red]Ambiguous Wiki-Links Found[/bold red]")
console.print("These bare-name links match multiple files.")
console.print("Use a path-based link to disambiguate: [[path/to/file]]")
console.print()
table = Table(show_header=True, header_style="bold")
table.add_column("File")
table.add_column("Line", justify="right")
table.add_column("Target")
table.add_column("Possible Paths")
for file_path, line_num, target, paths in ambiguous_links:
table.add_row(file_path, str(line_num), escape(f"[[{target}]]"), "\n".join(paths))
console.print(table)
console.print()
if broken_links:
has_errors = True
console.print("[bold red]Broken Wiki-Links Found[/bold red]")
table = Table(show_header=True, header_style="bold")
table.add_column("File")
table.add_column("Line", justify="right")
table.add_column("Target")
for file_path, line_num, target in broken_links:
table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))
console.print(table)
console.print()
console.print("Each wiki-link target must match a filename stem or path in docs/.")
console.print()
# Orphan detection: docs not linked from any other doc
ORPHAN_EXCEPTIONS = {"index"}
orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS)
if orphan_stems:
has_errors = True
console.print("[bold red]Orphan Documents Found[/bold red]")
console.print("These docs are not linked from any other document.")
console.print()
table = Table(show_header=True, header_style="bold")
table.add_column("File")
table.add_column("Stem")
for stem in orphan_stems:
paths = stem_to_paths[stem]
for path in paths:
table.add_row(f"{path}.md", stem)
console.print(table)
console.print()
if has_errors:
return 1
console.print("[bold green]All wiki-links are valid![/bold green]")
return 0
if __name__ == "__main__":
sys.exit(main())