From 060c7a24e3f023429c0ecef7596f31572b01a0b1 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 5 Feb 2026 21:12:06 -0800 Subject: [PATCH] Review exploring-the-docs and add doc consistency checks (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Reviewed and cleaned up exploring-the-docs tutorial: simplified wiki-links, fixed broken replication/ reference, added Related section, corrected zk-docs flags to match CLAUDE.md - Added orphan detection to doc-links (finds docs not linked from any other doc) - Added new doc tooling: `doc-index` (checks category index coverage), `doc-stale` (staleness report), `doc-tags` (tag inventory) - Added `doc-index` as a pre-commit hook - Updated use-pypi-proxy to document env-var-based proxy toggle for pip/uv - Updated ai-assistance-guide with new doc task descriptions ## Test plan - [ ] Run `mise run doc-links` — passes - [ ] Run `mise run doc-index` — passes - [ ] Run `mise run doc-stale` — informational output - [ ] Run `mise run doc-tags` — informational output - [ ] Pre-commit hooks pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Reviewed-on: https://forge.ops.eblu.me/eblume/blumeops/pulls/112 --- .pre-commit-config.yaml | 6 + CHANGELOG.md | 2 +- .../doc-exploring-the-docs-review.doc.md | 1 + .../doc-exploring-the-docs-review.feature.md | 1 + .../doc-exploring-the-docs-review.infra.md | 1 + docs/how-to/use-pypi-proxy.md | 19 ++- docs/tutorials/ai-assistance-guide.md | 5 +- docs/tutorials/exploring-the-docs.md | 27 ++-- mise-tasks/doc-index | 117 ++++++++++++++++++ mise-tasks/doc-links | 28 +++++ mise-tasks/doc-stale | 101 +++++++++++++++ mise-tasks/doc-tags | 89 +++++++++++++ mise.toml | 1 + 13 files changed, 375 insertions(+), 23 deletions(-) create mode 100644 docs/changelog.d/doc-exploring-the-docs-review.doc.md create mode 100644 docs/changelog.d/doc-exploring-the-docs-review.feature.md create mode 100644 docs/changelog.d/doc-exploring-the-docs-review.infra.md create mode 100755 mise-tasks/doc-index create mode 100755 mise-tasks/doc-stale create mode 100755 mise-tasks/doc-tags diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8dfa412..438a742 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -110,3 +110,9 @@ repos: language: system files: ^docs/.*\.md$ pass_filenames: false + - id: doc-index + name: doc-index + entry: mise run doc-index + language: system + files: ^docs/.*\.md$ + pass_filenames: false diff --git a/CHANGELOG.md b/CHANGELOG.md index 13b887f..bf21ce2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -75,7 +75,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Miscellaneous -- , +- , ## [v1.2.1] - 2026-02-04 diff --git a/docs/changelog.d/doc-exploring-the-docs-review.doc.md b/docs/changelog.d/doc-exploring-the-docs-review.doc.md new file mode 100644 index 0000000..235d4b4 --- /dev/null +++ b/docs/changelog.d/doc-exploring-the-docs-review.doc.md @@ -0,0 +1 @@ +Review exploring-the-docs tutorial: simplify wiki-links, fix broken replication/ reference, add Related section, match zk-docs flags to CLAUDE.md. Update use-pypi-proxy to document env-var-based proxy toggle. diff --git a/docs/changelog.d/doc-exploring-the-docs-review.feature.md b/docs/changelog.d/doc-exploring-the-docs-review.feature.md new file mode 100644 index 0000000..b19d507 --- /dev/null +++ b/docs/changelog.d/doc-exploring-the-docs-review.feature.md @@ -0,0 +1 @@ +Add documentation consistency checks: orphan detection in doc-links, new doc-index (category index coverage), doc-stale (staleness report), and doc-tags (tag inventory). diff --git a/docs/changelog.d/doc-exploring-the-docs-review.infra.md b/docs/changelog.d/doc-exploring-the-docs-review.infra.md new file mode 100644 index 0000000..e2d182a --- /dev/null +++ b/docs/changelog.d/doc-exploring-the-docs-review.infra.md @@ -0,0 +1 @@ +Add pre-commit to mise.toml project tools. diff --git a/docs/how-to/use-pypi-proxy.md b/docs/how-to/use-pypi-proxy.md index b96ad46..fb3e2af 100644 --- a/docs/how-to/use-pypi-proxy.md +++ b/docs/how-to/use-pypi-proxy.md @@ -9,21 +9,20 @@ tags: How to configure clients and publish packages to [[devpi]]. -## Configure pip +## Configure pip/uv -Create `~/.config/pip/pip.conf`: +Point pip and uv at the proxy via environment variables: -```ini -[global] -index-url = https://pypi.ops.eblu.me/root/pypi/+simple/ -trusted-host = pypi.ops.eblu.me -``` - -Track with chezmoi: ```bash -chezmoi add ~/.config/pip/pip.conf +export PIP_INDEX_URL="https://pypi.ops.eblu.me/root/pypi/+simple/" +export UV_INDEX_URL="https://pypi.ops.eblu.me/root/pypi/+simple/" ``` +Unset both to fall back to public PyPI (e.g. when [[indri]] is offline). + +The [dotfiles repo](https://github.com/eblume/dotfiles) has shell config +that manages this toggle. + ## Upload Packages ```bash diff --git a/docs/tutorials/ai-assistance-guide.md b/docs/tutorials/ai-assistance-guide.md index 0b62620..06a259a 100644 --- a/docs/tutorials/ai-assistance-guide.md +++ b/docs/tutorials/ai-assistance-guide.md @@ -94,9 +94,12 @@ BlumeOps operations are driven by mise tasks. Run `mise tasks` to list all avail | `dns-up` | Apply DNS changes via Pulumi | | `tailnet-preview` | Preview Tailscale ACL changes | | `tailnet-up` | Apply Tailscale ACL changes via Pulumi | -| `doc-links` | Validate wiki-links in documentation | +| `doc-links` | Validate wiki-links in documentation (includes orphan detection) | +| `doc-index` | Check every doc is referenced in its category index | | `doc-titles` | Check for duplicate doc titles | | `doc-filenames` | Check for duplicate doc filenames | +| `doc-stale` | Report docs by last-modified date, highlight stale ones | +| `doc-tags` | Print frontmatter tag inventory across all docs | | `doc-random` | Select a random doc card for review | | `indri-runner-logs` | View Forgejo workflow logs from local runner | diff --git a/docs/tutorials/exploring-the-docs.md b/docs/tutorials/exploring-the-docs.md index 8cf09e1..3ea01de 100644 --- a/docs/tutorials/exploring-the-docs.md +++ b/docs/tutorials/exploring-the-docs.md @@ -27,8 +27,8 @@ The docs follow the [Diataxis](https://diataxis.fr/) framework: ### For Erich (Owner) You probably want quick access to operational details: -- [[how-to|How-to guides]] for common operations (deploy, troubleshoot, update ACLs) -- [[reference|Reference]] has service URLs, commands, and config locations +- [[how-to]] guides for common operations (deploy, troubleshoot, update ACLs) +- [[reference]] has service URLs, commands, and config locations - [[ai-assistance-guide]] explains how to work effectively with Claude - Run `mise run zk-docs` to prime AI context with key documentation @@ -36,30 +36,29 @@ You probably want quick access to operational details: Context for effective assistance: - Read [[ai-assistance-guide]] for operational conventions -- [[reference|Reference]] has the technical specifics you'll need +- [[reference]] has the technical specifics you'll need - The repo's `CLAUDE.md` has critical rules (especially the kubectl context requirement) ### For External Readers Understanding what this is: -- [[explanation|Explanation]] covers the "why" behind design decisions -- [[reference|Reference]] shows what's actually running +- [[explanation]] covers the "why" behind design decisions +- [[reference]] shows what's actually running - Browse service pages to see specific implementations ### For Contributors Getting started with changes: - [[contributing]] walks through the workflow -- [[how-to|How-to guides]] for specific tasks (deploy services, add roles) -- [[reference|Reference]] tells you where things live +- [[how-to]] guides for specific tasks (deploy services, add roles) +- [[reference]] tells you where things live ### For Replicators Replicators are people who want to build their own similar homelab GitOps setup, using BlumeOps as inspiration. -- [[replicating-blumeops]] provides the overview -- [[explanation|Explanation]] covers architecture and design rationale -- The `replication/` tutorials go deep on components +- [[replicating-blumeops]] provides the overview, with linked tutorials that go deep on individual components +- [[explanation]] covers architecture and design rationale - Reference pages show specific configuration choices ## Using Wiki Links @@ -77,7 +76,13 @@ Pre-commit hooks automatically validate that all wiki-links point to existing fi The `zk-docs` mise task concatenates key documentation files for AI context: ```bash -mise run zk-docs +mise run zk-docs -- --style=header --color=never --decorations=always ``` This outputs the AI assistance guide, reference index, how-to index, architecture overview, and tutorials index - providing Claude with essential context for BlumeOps operations. + +## Related + +- [[tutorials]] - Parent index of all tutorials +- [[update-documentation]] - How to publish doc changes +- [[review-documentation]] - Periodic doc review process diff --git a/mise-tasks/doc-index b/mise-tasks/doc-index new file mode 100755 index 0000000..dcc8e9c --- /dev/null +++ b/mise-tasks/doc-index @@ -0,0 +1,117 @@ +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.12" +# dependencies = ["rich>=13.0.0"] +# /// +#MISE description="Check that every doc is referenced in its category index" +"""Check that every doc in a Diataxis category is referenced in its index. + +Each Diataxis category (tutorials, reference, how-to, explanation) has an +index file that should wiki-link to every doc in that category directory. + +A doc is considered referenced if its filename stem appears as a wiki-link +target (e.g., alloy.md is matched by [[alloy]]) in the category index. + +Index files are excluded from the self-check. + +Usage: mise run doc-index +""" + +import re +import sys +from pathlib import Path + +from rich.console import Console +from rich.markup import escape +from rich.table import Table + +DOCS_DIR = Path(__file__).parent.parent / "docs" + +# Category directories and their index files +CATEGORIES = { + "tutorials": "tutorials/tutorials.md", + "reference": "reference/reference.md", + "how-to": "how-to/how-to.md", + "explanation": "explanation/explanation.md", +} + +# Regex to match wiki-links: [[Target]] or [[Target|Display]] +WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]") + +# Regex to match inline code (backticks) +INLINE_CODE_PATTERN = re.compile(r"`[^`]+`") + + +def extract_link_targets(file_path: Path) -> set[str]: + """Extract all wiki-link targets from a file (ignoring inline code).""" + content = file_path.read_text() + targets: set[str] = set() + + for line in content.splitlines(): + line_without_code = INLINE_CODE_PATTERN.sub("", line) + for match in WIKILINK_PATTERN.finditer(line_without_code): + targets.add(match.group(1).strip()) + + return targets + + +def main() -> int: + console = Console() + console.print("[bold]Category Index Validation[/bold]") + console.print() + + has_errors = False + missing: list[tuple[str, str, str]] = [] # (category, stem, file) + + for category, index_rel in CATEGORIES.items(): + index_path = DOCS_DIR / index_rel + if not index_path.exists(): + console.print(f"[yellow]Warning: index file not found: {index_rel}[/yellow]") + continue + + category_dir = DOCS_DIR / category + if not category_dir.is_dir(): + continue + + # Get all wiki-link targets from the index + index_targets = extract_link_targets(index_path) + index_stem = index_path.stem + + # Check each doc in the category directory + for md_file in sorted(category_dir.rglob("*.md")): + if "changelog.d" in md_file.parts: + continue + stem = md_file.stem + # Skip the index file itself + if stem == index_stem: + continue + if stem not in index_targets: + rel_path = str(md_file.relative_to(DOCS_DIR)) + missing.append((category, stem, rel_path)) + + if missing: + has_errors = True + console.print("[bold red]Docs Missing From Category Index[/bold red]") + console.print("These docs are not wiki-linked from their category index file.") + console.print() + table = Table(show_header=True, header_style="bold") + table.add_column("Category") + table.add_column("File") + table.add_column("Add To") + + for category, stem, rel_path in missing: + table.add_row(category, rel_path, CATEGORIES[category]) + + console.print(table) + console.print() + + if has_errors: + return 1 + + console.print(f"Checked {len(CATEGORIES)} category indexes.") + console.print("[bold green]All docs are referenced in their category index![/bold green]") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/mise-tasks/doc-links b/mise-tasks/doc-links index 460be11..6477b36 100755 --- a/mise-tasks/doc-links +++ b/mise-tasks/doc-links @@ -108,12 +108,17 @@ def main() -> int: path_links: list[tuple[str, int, str]] = [] spaced_links: list[tuple[str, int, str]] = [] + # Track which doc stems are linked-to from other docs (for orphan detection) + all_doc_stems: set[str] = set(filename_counts.keys()) + linked_stems: set[str] = set() + # Scan all markdown files for wiki-links (excluding changelog.d/) for md_file in sorted(DOCS_DIR.rglob("*.md")): if "changelog.d" in md_file.parts: continue rel_path = str(md_file.relative_to(DOCS_DIR)) + source_stem = md_file.stem links = extract_wikilinks(md_file) for target, line_num, has_spaces in links: @@ -128,6 +133,9 @@ def main() -> int: ambiguous_links.append((rel_path, line_num, target, filename_counts[target])) elif target not in valid_targets: broken_links.append((rel_path, line_num, target)) + elif target != source_stem: + # Valid link to a different doc — record it for orphan detection + linked_stems.add(target) # Print results console.print("[bold]Wiki-Link Validation[/bold]") @@ -205,6 +213,26 @@ def main() -> int: console.print("Each wiki-link target must match a filename or path in docs/.") console.print() + # Orphan detection: docs not linked from any other doc + ORPHAN_EXCEPTIONS = {"index"} + orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS) + if orphan_stems: + has_errors = True + console.print("[bold red]Orphan Documents Found[/bold red]") + console.print("These docs are not linked from any other document.") + console.print() + table = Table(show_header=True, header_style="bold") + table.add_column("File") + table.add_column("Stem") + + for stem in orphan_stems: + paths = filename_counts[stem] + for path in paths: + table.add_row(f"{path}.md", stem) + + console.print(table) + console.print() + if has_errors: return 1 diff --git a/mise-tasks/doc-stale b/mise-tasks/doc-stale new file mode 100755 index 0000000..7e1c279 --- /dev/null +++ b/mise-tasks/doc-stale @@ -0,0 +1,101 @@ +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.12" +# dependencies = ["rich>=13.0.0", "typer>=0.9.0"] +# /// +#MISE description="Report docs by git-last-modified date, highlighting stale ones" +"""Report documentation files sorted by git-last-modified date. + +Scans all markdown files in docs/ (excluding changelog.d/) and shows +their last modification date according to git. Docs older than the +threshold (default 180 days) are highlighted as stale. + +This is informational only — it always exits 0. + +Usage: mise run doc-stale [-- --threshold 90] +""" + +import subprocess +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Annotated + +import typer +from rich.console import Console +from rich.table import Table + +DOCS_DIR = Path(__file__).parent.parent / "docs" + + +def git_last_modified(file_path: Path) -> datetime | None: + """Get the last git commit date for a file.""" + try: + result = subprocess.run( + ["git", "log", "-1", "--format=%aI", "--", str(file_path)], + capture_output=True, + text=True, + check=True, + ) + date_str = result.stdout.strip() + if not date_str: + return None + return datetime.fromisoformat(date_str) + except subprocess.CalledProcessError: + return None + + +def main( + threshold: Annotated[int, typer.Option(help="Days before a doc is considered stale")] = 180, +) -> None: + console = Console() + threshold_days = threshold + console.print("[bold]Documentation Staleness Report[/bold]") + console.print(f"Threshold: {threshold_days} days") + console.print() + + now = datetime.now(timezone.utc) + entries: list[tuple[str, datetime, int, bool]] = [] + + for md_file in sorted(DOCS_DIR.rglob("*.md")): + if "changelog.d" in md_file.parts: + continue + + last_modified = git_last_modified(md_file) + if last_modified is None: + continue + + rel_path = str(md_file.relative_to(DOCS_DIR)) + age_days = (now - last_modified).days + is_stale = age_days > threshold_days + entries.append((rel_path, last_modified, age_days, is_stale)) + + # Sort oldest-first + entries.sort(key=lambda e: e[1]) + + stale_count = sum(1 for e in entries if e[3]) + + table = Table(show_header=True, header_style="bold") + table.add_column("File") + table.add_column("Last Modified", justify="right") + table.add_column("Age (days)", justify="right") + table.add_column("Status") + + for rel_path, last_modified, age_days, is_stale in entries: + date_str = last_modified.strftime("%Y-%m-%d") + if is_stale: + table.add_row( + f"[red]{rel_path}[/red]", + f"[red]{date_str}[/red]", + f"[red]{age_days}[/red]", + "[red]STALE[/red]", + ) + else: + table.add_row(rel_path, date_str, str(age_days), "[green]OK[/green]") + + console.print(table) + console.print() + console.print(f"Total: {len(entries)} docs, {stale_count} stale") + +if __name__ == "__main__": + typer.run(main) diff --git a/mise-tasks/doc-tags b/mise-tasks/doc-tags new file mode 100755 index 0000000..61022c9 --- /dev/null +++ b/mise-tasks/doc-tags @@ -0,0 +1,89 @@ +#!/usr/bin/env -S uv run --script +# /// script +# requires-python = ">=3.12" +# dependencies = ["pyyaml>=6.0", "rich>=13.0.0"] +# /// +#MISE description="Print frontmatter tag inventory across all docs" +"""Print every frontmatter tag with usage count and file list. + +Scans all markdown files in docs/ (excluding changelog.d/) for YAML +frontmatter tags, then displays a table sorted by count showing which +docs use each tag. + +This is informational only — it always exits 0. + +Usage: mise run doc-tags +""" + +import sys +from collections import defaultdict +from pathlib import Path + +import yaml +from rich.console import Console +from rich.table import Table + +DOCS_DIR = Path(__file__).parent.parent / "docs" + + +def extract_frontmatter(file_path: Path) -> dict | None: + """Extract YAML frontmatter from a markdown file.""" + content = file_path.read_text() + if not content.startswith("---"): + return None + + end_idx = content.find("---", 3) + if end_idx == -1: + return None + + frontmatter_text = content[3:end_idx].strip() + try: + return yaml.safe_load(frontmatter_text) or {} + except yaml.YAMLError: + return None + + +def main() -> int: + console = Console() + console.print("[bold]Documentation Tag Inventory[/bold]") + console.print() + + # tag -> list of file paths + tag_files: dict[str, list[str]] = defaultdict(list) + + for md_file in sorted(DOCS_DIR.rglob("*.md")): + if "changelog.d" in md_file.parts: + continue + + frontmatter = extract_frontmatter(md_file) + if not frontmatter: + continue + + tags = frontmatter.get("tags", []) + if not isinstance(tags, list): + continue + + rel_path = str(md_file.relative_to(DOCS_DIR)) + for tag in tags: + tag_files[str(tag)].append(rel_path) + + # Sort by count descending, then alphabetically + sorted_tags = sorted(tag_files.items(), key=lambda t: (-len(t[1]), t[0])) + + table = Table(show_header=True, header_style="bold") + table.add_column("Tag") + table.add_column("Count", justify="right") + table.add_column("Files") + + for tag, files in sorted_tags: + table.add_row(tag, str(len(files)), "\n".join(files)) + + console.print(table) + console.print() + console.print(f"Total: {len(sorted_tags)} unique tags across {sum(len(f) for f in tag_files.values())} usages") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/mise.toml b/mise.toml index 2861f91..2ce6646 100644 --- a/mise.toml +++ b/mise.toml @@ -1,3 +1,4 @@ [tools] "pipx:ansible-core" = { version = "latest", uvx = "true", uvx_args = "--with botocore --with boto3" } +"pipx:pre-commit" = { version = "latest", uvx = "true" } pulumi = "latest"