#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.12"
# dependencies = ["rich>=14.0.0", "typer>=0.24.0"]
# ///
#MISE description="Bundle the interview handoff — transcript + source + built artifacts — into a single .zip"
#USAGE flag "--transcript-session <prefix>" help="Session UUID prefix (default: most recent Claude Code session for this repo)"
#USAGE flag "--docs-html <tarball>" help="Pre-built docs site tarball (optional); extracts into docs_html/"
"""Produce one .zip in the current directory containing everything the
interviewer needs after the debrief:

  * README.md    — top-level orientation (what's in the bundle, how to run)
  * docs/        — Diataxis project documentation, markdown source
  * docs_html/   — rendered static docs site (only if --docs-html given)
  * source/      — `git archive HEAD` expanded (tracked files only)
  * dist/        — `uv build` output (sdist + wheel)
  * transcript/  — AI session transcript (HTML, extracted)

Flat layout: every artifact is inflated, one outer zip, no nested
compression — a single unzip gives a browsable tree.
"""

import datetime
import shutil
import subprocess
import tempfile
import zipfile
from pathlib import Path
from typing import Annotated

import typer
from rich.console import Console

REPO_ROOT = Path(__file__).resolve().parent.parent
SESSIONS_DIR = (
    Path.home()
    / ".claude"
    / "projects"
    / "-Users-eblume-code-personal-mercury-interview-project"
)

console = Console(stderr=True)
app = typer.Typer(add_completion=False)


def _newest_session_uuid() -> str:
    sessions = sorted(
        SESSIONS_DIR.glob("*.jsonl"),
        key=lambda p: p.stat().st_mtime,
        reverse=True,
    )
    if not sessions:
        raise typer.Exit(code=1)
    return sessions[0].stem


def _run_transcript(uuid: str, dest: Path) -> None:
    """Invoke the transcript task and extract its HTML output into dest."""
    transcripts_dir = REPO_ROOT / "transcripts"
    before = {p.name for p in transcripts_dir.glob("*.zip")} if transcripts_dir.exists() else set()

    console.print(f"[cyan]→ generating transcript for session {uuid[:8]}…[/cyan]")
    subprocess.run(
        ["mise", "run", "transcript", uuid[:8]],
        check=True,
        cwd=REPO_ROOT,
    )

    produced = [p for p in transcripts_dir.glob("*.zip") if p.name not in before]
    if not produced:
        # Fallback: pick newest (handles a rerun on the same session).
        produced = sorted(transcripts_dir.glob("*.zip"), key=lambda p: p.stat().st_mtime)[-1:]
    if not produced:
        raise typer.Exit(code=1)

    dest.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(produced[0]) as z:
        z.extractall(dest)


def _run_git_archive(dest: Path) -> None:
    console.print("[cyan]→ git archive HEAD…[/cyan]")
    dest.mkdir(parents=True, exist_ok=True)
    tar_path = dest.parent / "source.tar"
    with tar_path.open("wb") as f:
        subprocess.run(
            ["git", "archive", "--format=tar", "HEAD"],
            check=True,
            cwd=REPO_ROOT,
            stdout=f,
        )
    subprocess.run(["tar", "xf", str(tar_path), "-C", str(dest)], check=True)
    tar_path.unlink()


def _run_uv_build(dest: Path) -> None:
    console.print("[cyan]→ uv build…[/cyan]")
    build_out = REPO_ROOT / "dist"
    # Clean only the .whl/.tar.gz we care about — leave any release-output artefacts.
    if build_out.exists():
        for f in build_out.iterdir():
            if f.suffix in {".whl"} or f.name.endswith(".tar.gz"):
                f.unlink()
    subprocess.run(["uv", "build"], check=True, cwd=REPO_ROOT, capture_output=True)
    dest.mkdir(parents=True, exist_ok=True)
    for f in build_out.iterdir():
        if f.suffix == ".whl" or f.name.endswith(".tar.gz"):
            shutil.copy(f, dest)


def _short_sha() -> str:
    return subprocess.run(
        ["git", "rev-parse", "--short", "HEAD"],
        check=True,
        cwd=REPO_ROOT,
        capture_output=True,
        text=True,
    ).stdout.strip()


def _copy_docs(dest: Path) -> None:
    console.print("[cyan]→ copying docs/ …[/cyan]")
    shutil.copytree(REPO_ROOT / "docs", dest, ignore=shutil.ignore_patterns("changelog.d"))


def _extract_docs_html(tarball: Path, dest: Path) -> None:
    console.print(f"[cyan]→ extracting docs_html from {tarball.name}…[/cyan]")
    import tarfile

    dest.mkdir(parents=True, exist_ok=True)
    with tarfile.open(tarball) as tf:
        tf.extractall(dest, filter="data")


def _write_readme(dest: Path, sha: str, stamp_human: str, has_docs_html: bool) -> None:
    docs_html_line = (
        "- `docs_html/` — rendered static docs site. See *Viewing docs_html* below.\n"
        if has_docs_html
        else "- `docs_html/` — not included in this bundle (rebuild with `--docs-html <tarball>`).\n"
    )
    docs_html_section = (
        """
## Viewing docs_html

The rendered site uses client-side `fetch()` for search and page indexing,
which browsers block under `file://` for security. Open `index.html` directly
and you'll see CORS errors in the console. Serve it over HTTP instead:

```sh
cd docs_html
python3 -m http.server 8080
# then open http://localhost:8080
```

Any static HTTP server works — this is just the zero-dependency option.
"""
        if has_docs_html
        else ""
    )
    dest.write_text(
        f"""# Mercury Phase 2 Interview Handoff

Bundle for commit `{sha}`, generated {stamp_human}.

## Contents

- `README.md` — this file.
- `docs/` — project documentation, Diataxis-structured markdown.
{docs_html_line}- `source/` — clean `git archive HEAD` of the repository (tracked files only, no `.git`).
- `dist/` — `uv build` output: sdist and wheel, ready to install.
- `transcript/` — Claude Code session transcript (HTML + raw JSONL).

## Running the matcher

Requires Python 3.12+ and [uv](https://docs.astral.sh/uv/).

**From the source tree:**

```sh
cd source
uv run mercury match interview/mercury-customers.json interview/third-party-banks.json
```

**From the built wheel (no dev environment needed):**

```sh
uvx --from dist/mercury-*-py3-none-any.whl mercury \\
  match source/interview/mercury-customers.json source/interview/third-party-banks.json
```

Expected output: totals (6 match / 3 mismatch) followed by one verdict per link.

## Running the tests

```sh
cd source
uv run --extra dev pytest
```

The end-to-end spec test is `tests/test_cli.py::test_anchor_full_output`, which
asserts the full printed output against the 9-link ground truth.

## Where to start reading

1. **`source/interview/plan.md`** — what we were trying to build, the approach, the ground-truth verdict table, and a post-interview stretch goal to port the engine to splink.
2. **`docs/explanation/matching-approach.md`** — Fellegi–Sunter framework, where our implementation is inspired by the 1969 paper, and where we deviate (no probability model, no A₂ review tier, no frequency-based weights, etc).
3. **`docs/reference/scoring-*.md`** — one reference card per field scorer (phone, email, name, nicknames) plus one for the combiner. Each explains normalization, the agreement rule, and the combiner weight.
4. **`source/src/mercury/`** — the implementation, ~4 small modules.
"""
    )


@app.command()
def main(
    transcript_session: Annotated[
        str | None,
        typer.Option(help="Session UUID prefix (default: most recent for this repo)"),
    ] = None,
    docs_html: Annotated[
        Path | None,
        typer.Option(
            help="Pre-built docs site tarball (e.g. docs-vX.Y.Z.tar.gz from a Forgejo release)",
            exists=True,
            dir_okay=False,
            readable=True,
        ),
    ] = None,
) -> None:
    uuid = transcript_session or _newest_session_uuid()
    sha = _short_sha()
    now = datetime.datetime.now()
    stamp = now.strftime("%Y%m%dT%H%M%S")
    stamp_human = now.strftime("%Y-%m-%d %H:%M %Z").strip()
    bundle_name = f"mercury-handoff-{sha}-{stamp}.zip"
    output_path = Path.cwd() / bundle_name

    with tempfile.TemporaryDirectory(prefix="mercury-handoff-") as tmpdir:
        tmp = Path(tmpdir)
        _run_transcript(uuid, tmp / "transcript")
        _run_git_archive(tmp / "source")
        _run_uv_build(tmp / "dist")
        _copy_docs(tmp / "docs")
        if docs_html is not None:
            _extract_docs_html(docs_html, tmp / "docs_html")
        _write_readme(tmp / "README.md", sha, stamp_human, has_docs_html=docs_html is not None)

        console.print(f"[cyan]→ zipping bundle → {bundle_name}…[/cyan]")
        with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
            for f in sorted(tmp.rglob("*")):
                if f.is_file():
                    zf.write(f, f.relative_to(tmp))

    size_mb = output_path.stat().st_size / 1024 / 1024
    console.print(f"[bold green]✓[/bold green] {output_path} ({size_mb:.1f} MiB)")
    print(output_path)


if __name__ == "__main__":
    app()
