Add doc consistency checks and update pypi proxy docs

Add orphan detection to doc-links, and three new doc tasks: doc-index
(category index coverage), doc-stale (staleness report), doc-tags (tag
inventory). Register doc-index as a pre-commit hook. Update
use-pypi-proxy to document env-var-based proxy toggle.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-02-05 15:57:24 -08:00
commit a3dec7f6e2
9 changed files with 356 additions and 13 deletions

View file

@ -110,3 +110,9 @@ repos:
language: system
files: ^docs/.*\.md$
pass_filenames: false
- id: doc-index
name: doc-index
entry: mise run doc-index
language: system
files: ^docs/.*\.md$
pass_filenames: false

View file

@ -75,7 +75,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Miscellaneous
- ,
- ,
## [v1.2.1] - 2026-02-04

View file

@ -1 +1 @@
Review and clean up exploring-the-docs tutorial: simplify wiki-links, fix broken replication/ reference, add Related section, match zk-docs flags to CLAUDE.md.
Review and clean up exploring-the-docs tutorial: simplify wiki-links, fix broken replication/ reference, add Related section, match zk-docs flags to CLAUDE.md. Add documentation consistency checks: orphan detection in doc-links, new doc-index (category index coverage), doc-stale (staleness report), and doc-tags (tag inventory). Update use-pypi-proxy to document env-var-based proxy toggle.

View file

@ -9,21 +9,20 @@ tags:
How to configure clients and publish packages to [[devpi]].
## Configure pip
## Configure pip/uv
Create `~/.config/pip/pip.conf`:
Point pip and uv at the proxy via environment variables:
```ini
[global]
index-url = https://pypi.ops.eblu.me/root/pypi/+simple/
trusted-host = pypi.ops.eblu.me
```
Track with chezmoi:
```bash
chezmoi add ~/.config/pip/pip.conf
export PIP_INDEX_URL="https://pypi.ops.eblu.me/root/pypi/+simple/"
export UV_INDEX_URL="https://pypi.ops.eblu.me/root/pypi/+simple/"
```
Unset both to fall back to public PyPI (e.g. when [[indri]] is offline).
The [dotfiles repo](https://github.com/eblume/dotfiles) has shell config
that manages this toggle.
## Upload Packages
```bash

View file

@ -94,9 +94,12 @@ BlumeOps operations are driven by mise tasks. Run `mise tasks` to list all avail
| `dns-up` | Apply DNS changes via Pulumi |
| `tailnet-preview` | Preview Tailscale ACL changes |
| `tailnet-up` | Apply Tailscale ACL changes via Pulumi |
| `doc-links` | Validate wiki-links in documentation |
| `doc-links` | Validate wiki-links in documentation (includes orphan detection) |
| `doc-index` | Check every doc is referenced in its category index |
| `doc-titles` | Check for duplicate doc titles |
| `doc-filenames` | Check for duplicate doc filenames |
| `doc-stale` | Report docs by last-modified date, highlight stale ones |
| `doc-tags` | Print frontmatter tag inventory across all docs |
| `doc-random` | Select a random doc card for review |
| `indri-runner-logs` | View Forgejo workflow logs from local runner |

117
mise-tasks/doc-index Executable file
View file

@ -0,0 +1,117 @@
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["rich>=13.0.0"]
# ///
#MISE description="Check that every doc is referenced in its category index"
"""Check that every doc in a Diataxis category is referenced in its index.
Each Diataxis category (tutorials, reference, how-to, explanation) has an
index file that should wiki-link to every doc in that category directory.
A doc is considered referenced if its filename stem appears as a wiki-link
target (e.g., alloy.md is matched by [[alloy]]) in the category index.
Index files are excluded from the self-check.
Usage: mise run doc-index
"""
import re
import sys
from pathlib import Path
from rich.console import Console
from rich.markup import escape
from rich.table import Table
DOCS_DIR = Path(__file__).parent.parent / "docs"
# Category directories and their index files
CATEGORIES = {
"tutorials": "tutorials/tutorials.md",
"reference": "reference/reference.md",
"how-to": "how-to/how-to.md",
"explanation": "explanation/explanation.md",
}
# Regex to match wiki-links: [[Target]] or [[Target|Display]]
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")
# Regex to match inline code (backticks)
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")
def extract_link_targets(file_path: Path) -> set[str]:
"""Extract all wiki-link targets from a file (ignoring inline code)."""
content = file_path.read_text()
targets: set[str] = set()
for line in content.splitlines():
line_without_code = INLINE_CODE_PATTERN.sub("", line)
for match in WIKILINK_PATTERN.finditer(line_without_code):
targets.add(match.group(1).strip())
return targets
def main() -> int:
console = Console()
console.print("[bold]Category Index Validation[/bold]")
console.print()
has_errors = False
missing: list[tuple[str, str, str]] = [] # (category, stem, file)
for category, index_rel in CATEGORIES.items():
index_path = DOCS_DIR / index_rel
if not index_path.exists():
console.print(f"[yellow]Warning: index file not found: {index_rel}[/yellow]")
continue
category_dir = DOCS_DIR / category
if not category_dir.is_dir():
continue
# Get all wiki-link targets from the index
index_targets = extract_link_targets(index_path)
index_stem = index_path.stem
# Check each doc in the category directory
for md_file in sorted(category_dir.rglob("*.md")):
if "changelog.d" in md_file.parts:
continue
stem = md_file.stem
# Skip the index file itself
if stem == index_stem:
continue
if stem not in index_targets:
rel_path = str(md_file.relative_to(DOCS_DIR))
missing.append((category, stem, rel_path))
if missing:
has_errors = True
console.print("[bold red]Docs Missing From Category Index[/bold red]")
console.print("These docs are not wiki-linked from their category index file.")
console.print()
table = Table(show_header=True, header_style="bold")
table.add_column("Category")
table.add_column("File")
table.add_column("Add To")
for category, stem, rel_path in missing:
table.add_row(category, rel_path, CATEGORIES[category])
console.print(table)
console.print()
if has_errors:
return 1
console.print(f"Checked {len(CATEGORIES)} category indexes.")
console.print("[bold green]All docs are referenced in their category index![/bold green]")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -108,12 +108,17 @@ def main() -> int:
path_links: list[tuple[str, int, str]] = []
spaced_links: list[tuple[str, int, str]] = []
# Track which doc stems are linked-to from other docs (for orphan detection)
all_doc_stems: set[str] = set(filename_counts.keys())
linked_stems: set[str] = set()
# Scan all markdown files for wiki-links (excluding changelog.d/)
for md_file in sorted(DOCS_DIR.rglob("*.md")):
if "changelog.d" in md_file.parts:
continue
rel_path = str(md_file.relative_to(DOCS_DIR))
source_stem = md_file.stem
links = extract_wikilinks(md_file)
for target, line_num, has_spaces in links:
@ -128,6 +133,9 @@ def main() -> int:
ambiguous_links.append((rel_path, line_num, target, filename_counts[target]))
elif target not in valid_targets:
broken_links.append((rel_path, line_num, target))
elif target != source_stem:
# Valid link to a different doc — record it for orphan detection
linked_stems.add(target)
# Print results
console.print("[bold]Wiki-Link Validation[/bold]")
@ -205,6 +213,26 @@ def main() -> int:
console.print("Each wiki-link target must match a filename or path in docs/.")
console.print()
# Orphan detection: docs not linked from any other doc
ORPHAN_EXCEPTIONS = {"index"}
orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS)
if orphan_stems:
has_errors = True
console.print("[bold red]Orphan Documents Found[/bold red]")
console.print("These docs are not linked from any other document.")
console.print()
table = Table(show_header=True, header_style="bold")
table.add_column("File")
table.add_column("Stem")
for stem in orphan_stems:
paths = filename_counts[stem]
for path in paths:
table.add_row(f"{path}.md", stem)
console.print(table)
console.print()
if has_errors:
return 1

101
mise-tasks/doc-stale Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["rich>=13.0.0", "typer>=0.9.0"]
# ///
#MISE description="Report docs by git-last-modified date, highlighting stale ones"
"""Report documentation files sorted by git-last-modified date.
Scans all markdown files in docs/ (excluding changelog.d/) and shows
their last modification date according to git. Docs older than the
threshold (default 180 days) are highlighted as stale.
This is informational only — it always exits 0.
Usage: mise run doc-stale [-- --threshold 90]
"""
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Annotated
import typer
from rich.console import Console
from rich.table import Table
DOCS_DIR = Path(__file__).parent.parent / "docs"
def git_last_modified(file_path: Path) -> datetime | None:
"""Get the last git commit date for a file."""
try:
result = subprocess.run(
["git", "log", "-1", "--format=%aI", "--", str(file_path)],
capture_output=True,
text=True,
check=True,
)
date_str = result.stdout.strip()
if not date_str:
return None
return datetime.fromisoformat(date_str)
except subprocess.CalledProcessError:
return None
def main(
threshold: Annotated[int, typer.Option(help="Days before a doc is considered stale")] = 180,
) -> None:
console = Console()
threshold_days = threshold
console.print("[bold]Documentation Staleness Report[/bold]")
console.print(f"Threshold: {threshold_days} days")
console.print()
now = datetime.now(timezone.utc)
entries: list[tuple[str, datetime, int, bool]] = []
for md_file in sorted(DOCS_DIR.rglob("*.md")):
if "changelog.d" in md_file.parts:
continue
last_modified = git_last_modified(md_file)
if last_modified is None:
continue
rel_path = str(md_file.relative_to(DOCS_DIR))
age_days = (now - last_modified).days
is_stale = age_days > threshold_days
entries.append((rel_path, last_modified, age_days, is_stale))
# Sort oldest-first
entries.sort(key=lambda e: e[1])
stale_count = sum(1 for e in entries if e[3])
table = Table(show_header=True, header_style="bold")
table.add_column("File")
table.add_column("Last Modified", justify="right")
table.add_column("Age (days)", justify="right")
table.add_column("Status")
for rel_path, last_modified, age_days, is_stale in entries:
date_str = last_modified.strftime("%Y-%m-%d")
if is_stale:
table.add_row(
f"[red]{rel_path}[/red]",
f"[red]{date_str}[/red]",
f"[red]{age_days}[/red]",
"[red]STALE[/red]",
)
else:
table.add_row(rel_path, date_str, str(age_days), "[green]OK[/green]")
console.print(table)
console.print()
console.print(f"Total: {len(entries)} docs, {stale_count} stale")
if __name__ == "__main__":
typer.run(main)

89
mise-tasks/doc-tags Executable file
View file

@ -0,0 +1,89 @@
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["pyyaml>=6.0", "rich>=13.0.0"]
# ///
#MISE description="Print frontmatter tag inventory across all docs"
"""Print every frontmatter tag with usage count and file list.
Scans all markdown files in docs/ (excluding changelog.d/) for YAML
frontmatter tags, then displays a table sorted by count showing which
docs use each tag.
This is informational only — it always exits 0.
Usage: mise run doc-tags
"""
import sys
from collections import defaultdict
from pathlib import Path
import yaml
from rich.console import Console
from rich.table import Table
DOCS_DIR = Path(__file__).parent.parent / "docs"
def extract_frontmatter(file_path: Path) -> dict | None:
"""Extract YAML frontmatter from a markdown file."""
content = file_path.read_text()
if not content.startswith("---"):
return None
end_idx = content.find("---", 3)
if end_idx == -1:
return None
frontmatter_text = content[3:end_idx].strip()
try:
return yaml.safe_load(frontmatter_text) or {}
except yaml.YAMLError:
return None
def main() -> int:
console = Console()
console.print("[bold]Documentation Tag Inventory[/bold]")
console.print()
# tag -> list of file paths
tag_files: dict[str, list[str]] = defaultdict(list)
for md_file in sorted(DOCS_DIR.rglob("*.md")):
if "changelog.d" in md_file.parts:
continue
frontmatter = extract_frontmatter(md_file)
if not frontmatter:
continue
tags = frontmatter.get("tags", [])
if not isinstance(tags, list):
continue
rel_path = str(md_file.relative_to(DOCS_DIR))
for tag in tags:
tag_files[str(tag)].append(rel_path)
# Sort by count descending, then alphabetically
sorted_tags = sorted(tag_files.items(), key=lambda t: (-len(t[1]), t[0]))
table = Table(show_header=True, header_style="bold")
table.add_column("Tag")
table.add_column("Count", justify="right")
table.add_column("Files")
for tag, files in sorted_tags:
table.add_row(tag, str(len(files)), "\n".join(files))
console.print(table)
console.print()
console.print(f"Total: {len(sorted_tags)} unique tags across {sum(len(f) for f in tag_files.values())} usages")
return 0
if __name__ == "__main__":
sys.exit(main())