Review exploring-the-docs and add doc consistency checks (#112)
## Summary - Reviewed and cleaned up exploring-the-docs tutorial: simplified wiki-links, fixed broken replication/ reference, added Related section, corrected zk-docs flags to match CLAUDE.md - Added orphan detection to doc-links (finds docs not linked from any other doc) - Added new doc tooling: `doc-index` (checks category index coverage), `doc-stale` (staleness report), `doc-tags` (tag inventory) - Added `doc-index` as a pre-commit hook - Updated use-pypi-proxy to document env-var-based proxy toggle for pip/uv - Updated ai-assistance-guide with new doc task descriptions ## Test plan - [ ] Run `mise run doc-links` — passes - [ ] Run `mise run doc-index` — passes - [ ] Run `mise run doc-stale` — informational output - [ ] Run `mise run doc-tags` — informational output - [ ] Pre-commit hooks pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/112
This commit is contained in:
parent
61c5328ec2
commit
060c7a24e3
13 changed files with 375 additions and 23 deletions
117
mise-tasks/doc-index
Executable file
117
mise-tasks/doc-index
Executable file
|
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = ["rich>=13.0.0"]
|
||||
# ///
|
||||
#MISE description="Check that every doc is referenced in its category index"
|
||||
"""Check that every doc in a Diataxis category is referenced in its index.
|
||||
|
||||
Each Diataxis category (tutorials, reference, how-to, explanation) has an
|
||||
index file that should wiki-link to every doc in that category directory.
|
||||
|
||||
A doc is considered referenced if its filename stem appears as a wiki-link
|
||||
target (e.g., alloy.md is matched by [[alloy]]) in the category index.
|
||||
|
||||
Index files are excluded from the self-check.
|
||||
|
||||
Usage: mise run doc-index
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from rich.console import Console
|
||||
from rich.markup import escape
|
||||
from rich.table import Table
|
||||
|
||||
DOCS_DIR = Path(__file__).parent.parent / "docs"
|
||||
|
||||
# Category directories and their index files
|
||||
CATEGORIES = {
|
||||
"tutorials": "tutorials/tutorials.md",
|
||||
"reference": "reference/reference.md",
|
||||
"how-to": "how-to/how-to.md",
|
||||
"explanation": "explanation/explanation.md",
|
||||
}
|
||||
|
||||
# Regex to match wiki-links: [[Target]] or [[Target|Display]]
|
||||
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
|
||||
|
||||
# Regex to match inline code (backticks)
|
||||
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
|
||||
|
||||
|
||||
def extract_link_targets(file_path: Path) -> set[str]:
|
||||
"""Extract all wiki-link targets from a file (ignoring inline code)."""
|
||||
content = file_path.read_text()
|
||||
targets: set[str] = set()
|
||||
|
||||
for line in content.splitlines():
|
||||
line_without_code = INLINE_CODE_PATTERN.sub("", line)
|
||||
for match in WIKILINK_PATTERN.finditer(line_without_code):
|
||||
targets.add(match.group(1).strip())
|
||||
|
||||
return targets
|
||||
|
||||
|
||||
def main() -> int:
|
||||
console = Console()
|
||||
console.print("[bold]Category Index Validation[/bold]")
|
||||
console.print()
|
||||
|
||||
has_errors = False
|
||||
missing: list[tuple[str, str, str]] = [] # (category, stem, file)
|
||||
|
||||
for category, index_rel in CATEGORIES.items():
|
||||
index_path = DOCS_DIR / index_rel
|
||||
if not index_path.exists():
|
||||
console.print(f"[yellow]Warning: index file not found: {index_rel}[/yellow]")
|
||||
continue
|
||||
|
||||
category_dir = DOCS_DIR / category
|
||||
if not category_dir.is_dir():
|
||||
continue
|
||||
|
||||
# Get all wiki-link targets from the index
|
||||
index_targets = extract_link_targets(index_path)
|
||||
index_stem = index_path.stem
|
||||
|
||||
# Check each doc in the category directory
|
||||
for md_file in sorted(category_dir.rglob("*.md")):
|
||||
if "changelog.d" in md_file.parts:
|
||||
continue
|
||||
stem = md_file.stem
|
||||
# Skip the index file itself
|
||||
if stem == index_stem:
|
||||
continue
|
||||
if stem not in index_targets:
|
||||
rel_path = str(md_file.relative_to(DOCS_DIR))
|
||||
missing.append((category, stem, rel_path))
|
||||
|
||||
if missing:
|
||||
has_errors = True
|
||||
console.print("[bold red]Docs Missing From Category Index[/bold red]")
|
||||
console.print("These docs are not wiki-linked from their category index file.")
|
||||
console.print()
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("Category")
|
||||
table.add_column("File")
|
||||
table.add_column("Add To")
|
||||
|
||||
for category, stem, rel_path in missing:
|
||||
table.add_row(category, rel_path, CATEGORIES[category])
|
||||
|
||||
console.print(table)
|
||||
console.print()
|
||||
|
||||
if has_errors:
|
||||
return 1
|
||||
|
||||
console.print(f"Checked {len(CATEGORIES)} category indexes.")
|
||||
console.print("[bold green]All docs are referenced in their category index![/bold green]")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -108,12 +108,17 @@ def main() -> int:
|
|||
path_links: list[tuple[str, int, str]] = []
|
||||
spaced_links: list[tuple[str, int, str]] = []
|
||||
|
||||
# Track which doc stems are linked-to from other docs (for orphan detection)
|
||||
all_doc_stems: set[str] = set(filename_counts.keys())
|
||||
linked_stems: set[str] = set()
|
||||
|
||||
# Scan all markdown files for wiki-links (excluding changelog.d/)
|
||||
for md_file in sorted(DOCS_DIR.rglob("*.md")):
|
||||
if "changelog.d" in md_file.parts:
|
||||
continue
|
||||
|
||||
rel_path = str(md_file.relative_to(DOCS_DIR))
|
||||
source_stem = md_file.stem
|
||||
links = extract_wikilinks(md_file)
|
||||
|
||||
for target, line_num, has_spaces in links:
|
||||
|
|
@ -128,6 +133,9 @@ def main() -> int:
|
|||
ambiguous_links.append((rel_path, line_num, target, filename_counts[target]))
|
||||
elif target not in valid_targets:
|
||||
broken_links.append((rel_path, line_num, target))
|
||||
elif target != source_stem:
|
||||
# Valid link to a different doc — record it for orphan detection
|
||||
linked_stems.add(target)
|
||||
|
||||
# Print results
|
||||
console.print("[bold]Wiki-Link Validation[/bold]")
|
||||
|
|
@ -205,6 +213,26 @@ def main() -> int:
|
|||
console.print("Each wiki-link target must match a filename or path in docs/.")
|
||||
console.print()
|
||||
|
||||
# Orphan detection: docs not linked from any other doc
|
||||
ORPHAN_EXCEPTIONS = {"index"}
|
||||
orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS)
|
||||
if orphan_stems:
|
||||
has_errors = True
|
||||
console.print("[bold red]Orphan Documents Found[/bold red]")
|
||||
console.print("These docs are not linked from any other document.")
|
||||
console.print()
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("File")
|
||||
table.add_column("Stem")
|
||||
|
||||
for stem in orphan_stems:
|
||||
paths = filename_counts[stem]
|
||||
for path in paths:
|
||||
table.add_row(f"{path}.md", stem)
|
||||
|
||||
console.print(table)
|
||||
console.print()
|
||||
|
||||
if has_errors:
|
||||
return 1
|
||||
|
||||
|
|
|
|||
101
mise-tasks/doc-stale
Executable file
101
mise-tasks/doc-stale
Executable file
|
|
@ -0,0 +1,101 @@
|
|||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = ["rich>=13.0.0", "typer>=0.9.0"]
|
||||
# ///
|
||||
#MISE description="Report docs by git-last-modified date, highlighting stale ones"
|
||||
"""Report documentation files sorted by git-last-modified date.
|
||||
|
||||
Scans all markdown files in docs/ (excluding changelog.d/) and shows
|
||||
their last modification date according to git. Docs older than the
|
||||
threshold (default 180 days) are highlighted as stale.
|
||||
|
||||
This is informational only — it always exits 0.
|
||||
|
||||
Usage: mise run doc-stale [-- --threshold 90]
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
DOCS_DIR = Path(__file__).parent.parent / "docs"
|
||||
|
||||
|
||||
def git_last_modified(file_path: Path) -> datetime | None:
|
||||
"""Get the last git commit date for a file."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "log", "-1", "--format=%aI", "--", str(file_path)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
date_str = result.stdout.strip()
|
||||
if not date_str:
|
||||
return None
|
||||
return datetime.fromisoformat(date_str)
|
||||
except subprocess.CalledProcessError:
|
||||
return None
|
||||
|
||||
|
||||
def main(
|
||||
threshold: Annotated[int, typer.Option(help="Days before a doc is considered stale")] = 180,
|
||||
) -> None:
|
||||
console = Console()
|
||||
threshold_days = threshold
|
||||
console.print("[bold]Documentation Staleness Report[/bold]")
|
||||
console.print(f"Threshold: {threshold_days} days")
|
||||
console.print()
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
entries: list[tuple[str, datetime, int, bool]] = []
|
||||
|
||||
for md_file in sorted(DOCS_DIR.rglob("*.md")):
|
||||
if "changelog.d" in md_file.parts:
|
||||
continue
|
||||
|
||||
last_modified = git_last_modified(md_file)
|
||||
if last_modified is None:
|
||||
continue
|
||||
|
||||
rel_path = str(md_file.relative_to(DOCS_DIR))
|
||||
age_days = (now - last_modified).days
|
||||
is_stale = age_days > threshold_days
|
||||
entries.append((rel_path, last_modified, age_days, is_stale))
|
||||
|
||||
# Sort oldest-first
|
||||
entries.sort(key=lambda e: e[1])
|
||||
|
||||
stale_count = sum(1 for e in entries if e[3])
|
||||
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("File")
|
||||
table.add_column("Last Modified", justify="right")
|
||||
table.add_column("Age (days)", justify="right")
|
||||
table.add_column("Status")
|
||||
|
||||
for rel_path, last_modified, age_days, is_stale in entries:
|
||||
date_str = last_modified.strftime("%Y-%m-%d")
|
||||
if is_stale:
|
||||
table.add_row(
|
||||
f"[red]{rel_path}[/red]",
|
||||
f"[red]{date_str}[/red]",
|
||||
f"[red]{age_days}[/red]",
|
||||
"[red]STALE[/red]",
|
||||
)
|
||||
else:
|
||||
table.add_row(rel_path, date_str, str(age_days), "[green]OK[/green]")
|
||||
|
||||
console.print(table)
|
||||
console.print()
|
||||
console.print(f"Total: {len(entries)} docs, {stale_count} stale")
|
||||
|
||||
if __name__ == "__main__":
|
||||
typer.run(main)
|
||||
89
mise-tasks/doc-tags
Executable file
89
mise-tasks/doc-tags
Executable file
|
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = ">=3.12"
|
||||
# dependencies = ["pyyaml>=6.0", "rich>=13.0.0"]
|
||||
# ///
|
||||
#MISE description="Print frontmatter tag inventory across all docs"
|
||||
"""Print every frontmatter tag with usage count and file list.
|
||||
|
||||
Scans all markdown files in docs/ (excluding changelog.d/) for YAML
|
||||
frontmatter tags, then displays a table sorted by count showing which
|
||||
docs use each tag.
|
||||
|
||||
This is informational only — it always exits 0.
|
||||
|
||||
Usage: mise run doc-tags
|
||||
"""
|
||||
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
DOCS_DIR = Path(__file__).parent.parent / "docs"
|
||||
|
||||
|
||||
def extract_frontmatter(file_path: Path) -> dict | None:
|
||||
"""Extract YAML frontmatter from a markdown file."""
|
||||
content = file_path.read_text()
|
||||
if not content.startswith("---"):
|
||||
return None
|
||||
|
||||
end_idx = content.find("---", 3)
|
||||
if end_idx == -1:
|
||||
return None
|
||||
|
||||
frontmatter_text = content[3:end_idx].strip()
|
||||
try:
|
||||
return yaml.safe_load(frontmatter_text) or {}
|
||||
except yaml.YAMLError:
|
||||
return None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
console = Console()
|
||||
console.print("[bold]Documentation Tag Inventory[/bold]")
|
||||
console.print()
|
||||
|
||||
# tag -> list of file paths
|
||||
tag_files: dict[str, list[str]] = defaultdict(list)
|
||||
|
||||
for md_file in sorted(DOCS_DIR.rglob("*.md")):
|
||||
if "changelog.d" in md_file.parts:
|
||||
continue
|
||||
|
||||
frontmatter = extract_frontmatter(md_file)
|
||||
if not frontmatter:
|
||||
continue
|
||||
|
||||
tags = frontmatter.get("tags", [])
|
||||
if not isinstance(tags, list):
|
||||
continue
|
||||
|
||||
rel_path = str(md_file.relative_to(DOCS_DIR))
|
||||
for tag in tags:
|
||||
tag_files[str(tag)].append(rel_path)
|
||||
|
||||
# Sort by count descending, then alphabetically
|
||||
sorted_tags = sorted(tag_files.items(), key=lambda t: (-len(t[1]), t[0]))
|
||||
|
||||
table = Table(show_header=True, header_style="bold")
|
||||
table.add_column("Tag")
|
||||
table.add_column("Count", justify="right")
|
||||
table.add_column("Files")
|
||||
|
||||
for tag, files in sorted_tags:
|
||||
table.add_row(tag, str(len(files)), "\n".join(files))
|
||||
|
||||
console.print(table)
|
||||
console.print()
|
||||
console.print(f"Total: {len(sorted_tags)} unique tags across {sum(len(f) for f in tag_files.values())} usages")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue