#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["rich>=13.0.0"]
# ///
#MISE description="Validate all wiki-links point to existing doc filenames"
"""Validate that all wiki-links in documentation point to existing files.

This script scans all markdown files in the docs/ directory (excluding
changelog.d/), extracts wiki-links, and verifies each link target
exists as a unique filename in the documentation.

Wiki-link formats supported:
- [[filename]] - links to filename.md (must be unique across all docs)
- [[target|Display Text]] - filename with display text

Path-based links (containing '/') are NOT supported to ensure all
filenames are unique and links work correctly in obsidian.nvim.

Usage: mise run docs-check-links
"""

import re
import sys
from pathlib import Path

from rich.console import Console
from rich.markup import escape
from rich.table import Table

DOCS_DIR = Path(__file__).parent.parent / "docs"

# Regex to match wiki-links: [[Target]] or [[Target|Display]]
# Captures: group(1) = target (may have spaces), group(2) = full "|Display" part if present
WIKILINK_PATTERN = re.compile(r"\[\[([^\]|]+)(\|[^\]]+)?\]\]")

# Regex to match inline code (backticks)
INLINE_CODE_PATTERN = re.compile(r"`[^`]+`")


def extract_wikilinks(file_path: Path) -> list[tuple[str, int, bool]]:
    """Extract all wiki-link targets from a markdown file with line numbers.

    Returns list of (target, line_num, has_spaces) tuples.
    has_spaces is True if the target or pipe separator had surrounding spaces.

    Ignores wiki-links inside inline code (backticks) as these are examples.
    """
    content = file_path.read_text()
    links = []

    for line_num, line in enumerate(content.splitlines(), start=1):
        # Remove inline code before searching for wiki-links
        line_without_code = INLINE_CODE_PATTERN.sub("", line)
        for match in WIKILINK_PATTERN.finditer(line_without_code):
            raw_target = match.group(1)
            target = raw_target.strip()
            pipe_part = match.group(2)  # "|Display" or None
            # Check for spaces: in target, or around the pipe
            has_spaces = raw_target != target
            if pipe_part and (raw_target.endswith(" ") or pipe_part.startswith("| ")):
                has_spaces = True
            links.append((target, line_num, has_spaces))

    return links


def main() -> int:
    console = Console()

    # Collect all valid targets (both filenames and paths)
    valid_targets: set[str] = set()
    # Track which filenames are ambiguous (appear multiple times)
    filename_counts: dict[str, list[str]] = {}

    # Scan all markdown files (excluding changelog.d/)
    for md_file in DOCS_DIR.rglob("*.md"):
        if "changelog.d" in md_file.parts:
            continue
        # Track filename occurrences
        filename = md_file.stem
        rel_path_str = str(md_file.relative_to(DOCS_DIR).with_suffix(""))
        if filename not in filename_counts:
            filename_counts[filename] = []
        filename_counts[filename].append(rel_path_str)
        # Add relative path without extension (e.g., "reference/services/alloy")
        valid_targets.add(rel_path_str)

    # Only add filenames that are unique (not ambiguous)
    ambiguous_filenames: set[str] = set()
    for filename, paths in filename_counts.items():
        if len(paths) == 1:
            valid_targets.add(filename)
        else:
            ambiguous_filenames.add(filename)

    # Special case: files at repo root that are copied into docs during build
    # These are valid link targets even though they don't exist in docs/
    REPO_ROOT = DOCS_DIR.parent
    BUILD_TIME_DOCS = ["CHANGELOG.md"]
    for filename in BUILD_TIME_DOCS:
        if (REPO_ROOT / filename).exists():
            valid_targets.add(Path(filename).stem)

    # Collect all broken, ambiguous, path-based, and spaced links
    broken_links: list[tuple[str, int, str]] = []
    ambiguous_links: list[tuple[str, int, str, list[str]]] = []
    path_links: list[tuple[str, int, str]] = []
    spaced_links: list[tuple[str, int, str]] = []

    # Track which doc stems are linked-to from other docs (for orphan detection)
    all_doc_stems: set[str] = set(filename_counts.keys())
    linked_stems: set[str] = set()

    # Scan all markdown files for wiki-links (excluding changelog.d/)
    for md_file in sorted(DOCS_DIR.rglob("*.md")):
        if "changelog.d" in md_file.parts:
            continue

        rel_path = str(md_file.relative_to(DOCS_DIR))
        source_stem = md_file.stem
        links = extract_wikilinks(md_file)

        for target, line_num, has_spaces in links:
            if has_spaces:
                # Links with spaces in target or around pipe are not allowed
                spaced_links.append((rel_path, line_num, target))
                continue

            # Handle anchor links: [[#Heading]] or [[file#Heading]]
            # Strip the #fragment for validation; pure anchors (#Heading) skip file check
            file_target = target
            if "#" in target:
                file_target = target.split("#", 1)[0]
                if not file_target:
                    # Pure in-page anchor like [[#Break-glass shutoff]] — always valid
                    continue

            if "/" in file_target:
                # Path-based links are not allowed - use simple filenames only
                path_links.append((rel_path, line_num, target))
            elif file_target in ambiguous_filenames:
                # Link uses an ambiguous filename - needs to be renamed
                ambiguous_links.append((rel_path, line_num, target, filename_counts[file_target]))
            elif file_target not in valid_targets:
                broken_links.append((rel_path, line_num, target))
            elif file_target != source_stem:
                # Valid link to a different doc — record it for orphan detection
                linked_stems.add(file_target)

    # Print results
    console.print("[bold]Wiki-Link Validation[/bold]")
    console.print()
    console.print(f"Found {len(valid_targets)} valid link targets in documentation.")
    console.print()

    has_errors = False

    if spaced_links:
        has_errors = True
        console.print("[bold red]Wiki-Links With Spaces Found[/bold red]")
        console.print("Wiki-links must not have spaces in the target or around the pipe.")
        console.print("Use [[target|Display Text]] not [[target | Display Text]].")
        console.print()
        table = Table(show_header=True, header_style="bold")
        table.add_column("File")
        table.add_column("Line", justify="right")
        table.add_column("Target")

        for file_path, line_num, target in spaced_links:
            table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))

        console.print(table)
        console.print()

    if path_links:
        has_errors = True
        console.print("[bold red]Path-Based Wiki-Links Found[/bold red]")
        console.print("Wiki-links must use simple filenames only (no '/' paths).")
        console.print("Rename files to be unique, then use [[filename]] format.")
        console.print()
        table = Table(show_header=True, header_style="bold")
        table.add_column("File")
        table.add_column("Line", justify="right")
        table.add_column("Target")

        for file_path, line_num, target in path_links:
            table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))

        console.print(table)
        console.print()

    if ambiguous_links:
        has_errors = True
        console.print("[bold red]Ambiguous Wiki-Links Found[/bold red]")
        console.print("These links use filenames that exist in multiple locations.")
        console.print("Rename files to be unique across all documentation.")
        console.print()
        table = Table(show_header=True, header_style="bold")
        table.add_column("File")
        table.add_column("Line", justify="right")
        table.add_column("Target")
        table.add_column("Possible Paths")

        for file_path, line_num, target, paths in ambiguous_links:
            table.add_row(file_path, str(line_num), escape(f"[[{target}]]"), "\n".join(paths))

        console.print(table)
        console.print()

    if broken_links:
        has_errors = True
        console.print("[bold red]Broken Wiki-Links Found[/bold red]")
        table = Table(show_header=True, header_style="bold")
        table.add_column("File")
        table.add_column("Line", justify="right")
        table.add_column("Target")

        for file_path, line_num, target in broken_links:
            table.add_row(file_path, str(line_num), escape(f"[[{target}]]"))

        console.print(table)
        console.print()
        console.print("Each wiki-link target must match a filename or path in docs/.")
        console.print()

    # Orphan detection: docs not linked from any other doc
    ORPHAN_EXCEPTIONS = {"index"}
    orphan_stems = sorted(all_doc_stems - linked_stems - ORPHAN_EXCEPTIONS)
    if orphan_stems:
        has_errors = True
        console.print("[bold red]Orphan Documents Found[/bold red]")
        console.print("These docs are not linked from any other document.")
        console.print()
        table = Table(show_header=True, header_style="bold")
        table.add_column("File")
        table.add_column("Stem")

        for stem in orphan_stems:
            paths = filename_counts[stem]
            for path in paths:
                table.add_row(f"{path}.md", stem)

        console.print(table)
        console.print()

    if has_errors:
        return 1

    console.print("[bold green]All wiki-links are valid![/bold green]")
    return 0


if __name__ == "__main__":
    sys.exit(main())
