From e6524fddbbd776b777fdd70ad66ab7f7526122a4 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:20:46 -0700 Subject: [PATCH 01/39] C2(hephd-self-update): plan add goal + prerequisite cards for hephd self-update Kick off the C2 Mikado chain for an opt-in (default-off) hephd self-update mode (forge-poll -> cargo install from tag -> self-restart). Goal card plus eight prerequisite cards, indexed from how-to.md: release-poll-version-check, self-update-opt-in-flag (leaves) -> self-update-poll-loop (notify-only core) service-env-forge-access (leaf, the cargo/forge blocker) + self-update-poll-loop -> cargo-install-from-tag service-respawn-on-clean-exit (leaf, systemd Restart=always) + cargo-install-from-tag -> self-restart-after-update verify-hub-dropout-resilience (leaf, lock in the base-case guarantee) Grounded in research of hephd's sync loop, daemon lifecycle, the launchd/systemd service templates, and the forge releases API. Captured from Hephaestus task 01KTA2NSNRYT902HC3VRW00S1J. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/how-to/how-to.md | 14 +++++ .../self-update/cargo-install-from-tag.md | 38 ++++++++++++++ docs/how-to/self-update/hephd-self-update.md | 51 +++++++++++++++++++ .../self-update/release-poll-version-check.md | 32 ++++++++++++ .../self-update/self-restart-after-update.md | 34 +++++++++++++ .../self-update/self-update-opt-in-flag.md | 32 ++++++++++++ .../self-update/self-update-poll-loop.md | 34 +++++++++++++ .../self-update/service-env-forge-access.md | 37 ++++++++++++++ .../service-respawn-on-clean-exit.md | 35 +++++++++++++ .../verify-hub-dropout-resilience.md | 37 ++++++++++++++ 10 files changed, 344 insertions(+) create mode 100644 docs/how-to/self-update/cargo-install-from-tag.md create mode 100644 docs/how-to/self-update/hephd-self-update.md create mode 100644 docs/how-to/self-update/release-poll-version-check.md create mode 100644 docs/how-to/self-update/self-restart-after-update.md create mode 100644 docs/how-to/self-update/self-update-opt-in-flag.md create mode 100644 docs/how-to/self-update/self-update-poll-loop.md create mode 100644 docs/how-to/self-update/service-env-forge-access.md create mode 100644 docs/how-to/self-update/service-respawn-on-clean-exit.md create mode 100644 docs/how-to/self-update/verify-hub-dropout-resilience.md diff --git a/docs/how-to/how-to.md b/docs/how-to/how-to.md index 9a3a758..b8ee8f5 100644 --- a/docs/how-to/how-to.md +++ b/docs/how-to/how-to.md @@ -20,3 +20,17 @@ Task-oriented guides for common operations. - [[run-the-daemon]] — Run `hephd` as an OS service with `heph daemon start/stop/restart/status` - [[set-up-sync-hub]] — Stand up the canonical hub (indri) and connect an existing device as an offline-capable spoke - [[import-todoist]] — Seed a heph store from your Todoist projects + tasks (`mise run import-todoist`) + +## Active Mikado chains + +C2 chain: **hephd self-update** (opt-in daemon auto-update). See [[agent-change-process]] for the method. + +- [[hephd-self-update]] — goal: opt-in, default-off mode where `hephd` polls for new releases and auto-updates itself +- [[self-update-opt-in-flag]] — the `--self-update` opt-in flag (default off) +- [[release-poll-version-check]] — poll the forge releases API and semver-compare against the running version +- [[self-update-poll-loop]] — background task wiring the flag to the version check (notify-only core) +- [[service-env-forge-access]] — give the daemon's service environment cargo + forge SSH access (the cargo/forge blocker) +- [[cargo-install-from-tag]] — rebuild + install the new binaries via `cargo install` from the release tag +- [[service-respawn-on-clean-exit]] — make the service manager respawn hephd after a clean exit (systemd `Restart=always`) +- [[self-restart-after-update]] — exit cleanly after a successful install so the new binary takes over +- [[verify-hub-dropout-resilience]] — lock in "the hub can vanish at any moment" as the base case diff --git a/docs/how-to/self-update/cargo-install-from-tag.md b/docs/how-to/self-update/cargo-install-from-tag.md new file mode 100644 index 0000000..6b43096 --- /dev/null +++ b/docs/how-to/self-update/cargo-install-from-tag.md @@ -0,0 +1,38 @@ +--- +title: Cargo install from tag +modified: 2026-06-04 +tags: + - how-to +status: active +requires: + - self-update-poll-loop + - service-env-forge-access +--- + +# Cargo install from tag + +The apply step: when the poll loop detects a newer release, rebuild + install +the new binaries from the release tag. + +## Deliverables + +- From the detected tag `vX.Y.Z`, run (via `tokio::task::spawn_blocking`, since + it's a long blocking child process): + ``` + cargo install --locked \ + --git ssh://forgejo@forge.ops.eblu.me:2222/eblume/hephaestus.git \ + --tag vX.Y.Z heph hephd heph-tui heph-quickadd + ``` + This is the exact command the install how-to and the manual redeploy use; it + swaps `~/.cargo/bin/*` in place. +- Capture stdout/stderr and exit status; log success/failure. A failed build + must **not** restart the daemon — only a successful install proceeds to + [[self-restart-after-update]]. +- Guard against re-running while an install is in flight (the long compile spans + multiple poll ticks): a simple "update in progress" flag. + +## Done when + +On a real newer tag, the daemon completes the install and the new binary is on +disk at `~/.cargo/bin`. Requires [[self-update-poll-loop]] and +[[service-env-forge-access]]. Part of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/hephd-self-update.md b/docs/how-to/self-update/hephd-self-update.md new file mode 100644 index 0000000..e9b679a --- /dev/null +++ b/docs/how-to/self-update/hephd-self-update.md @@ -0,0 +1,51 @@ +--- +title: hephd self-update +modified: 2026-06-04 +tags: + - how-to +status: active +branch: mikado/hephd-self-update +requires: + - self-restart-after-update + - verify-hub-dropout-resilience +--- + +# hephd self-update + +**Goal (desired end state).** An opt-in, **default-off** mode where `hephd` +periodically polls the forge for a newer release and, when one exists, +rebuilds via `cargo install` from the release tag and restarts itself onto the +new binary — unattended. + +## End state + +- A new daemon flag (`--self-update`, default off) plus a poll interval. When + off, behaviour is unchanged. See [[self-update-opt-in-flag]]. +- A background task (modelled on the existing spoke sync loop, + `crates/hephd/src/server.rs` `spawn_sync_loop`) that on each tick fetches the + latest release and compares it to `heph_core::VERSION`. See + [[self-update-poll-loop]] and [[release-poll-version-check]]. +- On a newer release: run `cargo install --locked --git ssh://… --tag vX.Y.Z` + for all workspace binaries ([[cargo-install-from-tag]]), then exit cleanly so + the OS service manager respawns the new binary + ([[self-restart-after-update]], [[service-respawn-on-clean-exit]]). +- Running `cargo install` from inside the service requires the daemon's + environment to have cargo + forge SSH access — the known blocker tracked in + [[service-env-forge-access]]. + +## Design decisions (owner) + +- **Default off**, opt-in only. Never self-update silently by default. +- Delivery is **`cargo install` from the tag** for now (prebuilt release + binaries are a possible future, pending a cargo/forge canonical-host fix). +- **Hub can disappear at any moment** — that resilience is the *base case*, not + a special guard. The sync loop already tolerates an unreachable hub; we lock + that in rather than add update-specific guards. See + [[verify-hub-dropout-resilience]]. + +## Scope notes + +Captured from task `01KTA2NSNRYT902HC3VRW00S1J` in the `Hephaestus` project. +Possible later refinements (own cards if pursued): checksum/signature +verification of the built binary, prebuilt release-binary delivery, and a +notify-only sub-mode. diff --git a/docs/how-to/self-update/release-poll-version-check.md b/docs/how-to/self-update/release-poll-version-check.md new file mode 100644 index 0000000..9d6352c --- /dev/null +++ b/docs/how-to/self-update/release-poll-version-check.md @@ -0,0 +1,32 @@ +--- +title: Release poll + version check +modified: 2026-06-04 +tags: + - how-to +status: active +requires: [] +--- + +# Release poll + version check + +The piece that answers "is a newer release available?" — independent of any +daemon wiring, so it can be unit-tested in isolation. + +## Deliverables + +- Fetch the latest release from the forge: + `GET https://forge.ops.eblu.me/api/v1/repos/eblume/hephaestus/releases/latest`, + read `tag_name` (e.g. `v1.0.4`). hephd already depends on `ureq` and + `reqwest` (`crates/hephd/Cargo.toml`) — reuse one (the poll loop is async, so + `reqwest` fits; `ureq` would need `spawn_blocking`). +- Parse the running version: `heph_core::VERSION` is `"1.0.3 (sha)"` — take the + `X.Y.Z` head. Add `semver = "1"` to `crates/hephd/Cargo.toml` (already in the + lockfile transitively) and compare `tag_name` (strip leading `v`) against it. +- A pure `is_newer(current, tag) -> bool` helper with tests covering equal / + older / newer / malformed tags. + +## Done when + +Given a fixed current version and a sample releases-API JSON body, the helper +correctly reports whether an update exists. No daemon loop yet — that's +[[self-update-poll-loop]]. Part of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/self-restart-after-update.md b/docs/how-to/self-update/self-restart-after-update.md new file mode 100644 index 0000000..f3efc42 --- /dev/null +++ b/docs/how-to/self-update/self-restart-after-update.md @@ -0,0 +1,34 @@ +--- +title: Self-restart after update +modified: 2026-06-04 +tags: + - how-to +status: active +requires: + - cargo-install-from-tag + - service-respawn-on-clean-exit +--- + +# Self-restart after update + +The last step: once the new binary is installed, get the running daemon to hand +off to it. + +## Deliverables + +- After a successful [[cargo-install-from-tag]], have hephd exit cleanly + (`std::process::exit(0)`) so the service manager respawns the new binary. + hephd has no graceful-shutdown path today (`serve` is an infinite accept + loop) — a clean process exit is acceptable; in-flight RPC connections simply + drop and clients reconnect (the plugin already reconnects-once). +- Relies on [[service-respawn-on-clean-exit]] so the exit is actually followed + by a respawn on both platforms. +- Log a clear "restarting into vX.Y.Z" line before exit. Optionally re-check + that the on-disk version actually changed before restarting, to avoid a + restart loop if the install was a no-op. + +## Done when + +End-to-end: an enabled daemon on an older version detects a newer release, +installs it, restarts, and comes back reporting the new `version` RPC value. +This closes the apply path of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/self-update-opt-in-flag.md b/docs/how-to/self-update/self-update-opt-in-flag.md new file mode 100644 index 0000000..ad0f3a8 --- /dev/null +++ b/docs/how-to/self-update/self-update-opt-in-flag.md @@ -0,0 +1,32 @@ +--- +title: Self-update opt-in flag +modified: 2026-06-04 +tags: + - how-to +status: active +requires: [] +--- + +# Self-update opt-in flag + +The opt-in surface. hephd config today is pure clap flags (no config file) in +`crates/hephd/src/main.rs`. + +## Deliverables + +- Add `--self-update` (bool, **default false**) and an interval override (e.g. + `--self-update-interval-secs`, with a sane default like 6h). Document them in + the flag help. +- Thread them into the daemon the same way `--hub-url` / spoke auth are + (`Daemon::new(...).with_hub(...)` → add `.with_self_update(cfg)`). +- When the flag is absent, the daemon behaves exactly as today (the loop in + [[self-update-poll-loop]] is simply not spawned). +- Later, bake the flag into the generated service definition (launchd/systemd) + so an enabled daemon keeps self-updating across restarts — coordinate with + [[service-respawn-on-clean-exit]] (same templates in `crates/heph/src/service.rs`). + +## Done when + +`hephd --self-update` starts the daemon with the mode enabled (verifiable via a +startup log line); omitting it leaves current behaviour untouched. Part of +[[hephd-self-update]]. diff --git a/docs/how-to/self-update/self-update-poll-loop.md b/docs/how-to/self-update/self-update-poll-loop.md new file mode 100644 index 0000000..9e952b9 --- /dev/null +++ b/docs/how-to/self-update/self-update-poll-loop.md @@ -0,0 +1,34 @@ +--- +title: Self-update poll loop +modified: 2026-06-04 +tags: + - how-to +status: active +requires: + - release-poll-version-check + - self-update-opt-in-flag +--- + +# Self-update poll loop + +The background task that ties the flag to the version check. This card alone +yields a working **notify-only** daemon ("update available: vX.Y.Z" in the +log) — the apply path layers on after. + +## Deliverables + +- Spawn a `tokio` task modelled on `spawn_sync_loop` + (`crates/hephd/src/server.rs`): `tokio::time::interval` ticking at the + configured cadence, guarded so it's a no-op unless `--self-update` is set. +- Each tick: run the [[release-poll-version-check]]. On "newer available", log + it (and, once the apply path exists, hand off to [[cargo-install-from-tag]]). +- Errors (forge unreachable, bad JSON) are logged and the loop continues — + same resilience pattern the sync loop uses. A flaky forge must never crash or + block the daemon. + +## Done when + +With `--self-update` on and a stubbed/real "newer" release, the daemon logs an +update-available line once per detection; with the flag off, no task runs. +Requires [[release-poll-version-check]] and [[self-update-opt-in-flag]]. Part of +[[hephd-self-update]]. diff --git a/docs/how-to/self-update/service-env-forge-access.md b/docs/how-to/self-update/service-env-forge-access.md new file mode 100644 index 0000000..df6cccc --- /dev/null +++ b/docs/how-to/self-update/service-env-forge-access.md @@ -0,0 +1,37 @@ +--- +title: Service env forge access +modified: 2026-06-04 +tags: + - how-to +status: active +requires: [] +--- + +# Service env forge access + +The known blocker. `cargo install --git ssh://forgejo@forge.ops.eblu.me:2222/…` +works from an interactive shell (it has an SSH agent/key and cargo on PATH) — +but the daemon runs under launchd/systemd, whose environment likely has +**neither**. Self-update via cargo can't work until the service context can +reach the forge and run cargo. + +## What to establish + +- **cargo + toolchain on the service PATH.** launchd/systemd start with a + minimal env; `~/.cargo/bin` and rustup's toolchain must be discoverable. + Likely bake `PATH`/`EnvironmentFile` into the generated plist/unit + (`crates/heph/src/service.rs`). +- **Forge SSH auth without an interactive agent.** Options to evaluate: a + dedicated read-only deploy key referenced via `GIT_SSH_COMMAND`/an SSH config + entry, or `SSH_AUTH_SOCK` exported to the service. Must work headless. +- **The canonical-host caveat.** Owner note: cargo rejects `forge.ops.eblu.me` + over HTTPS because the forge advertises `forge.eblu.me` as canonical; the + **SSH** URL on port 2222 sidesteps this and is the proven path (used by the + install how-to and the v1.0.3 redeploy). Pin self-update to the SSH URL; + capture any `insteadOf`/known_hosts setup needed headlessly. + +## Done when + +A hephd running as the installed service can, in its own environment, complete +`cargo install --locked --git ssh://… --tag hephd` non-interactively. +Unblocks [[cargo-install-from-tag]]. Part of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/service-respawn-on-clean-exit.md b/docs/how-to/self-update/service-respawn-on-clean-exit.md new file mode 100644 index 0000000..0ea5fae --- /dev/null +++ b/docs/how-to/self-update/service-respawn-on-clean-exit.md @@ -0,0 +1,35 @@ +--- +title: Service respawn on clean exit +modified: 2026-06-04 +tags: + - how-to +status: active +requires: [] +--- + +# Service respawn on clean exit + +For "self-restart" to mean "exit and let the manager bring up the new binary", +both service managers must respawn hephd after a **clean** (exit code 0) +shutdown. Templates live in `crates/heph/src/service.rs`. + +## Current state (from research) + +- **launchd (macOS):** plist has `KeepAlive = true` → already respawns on clean + exit. No change needed. +- **systemd (Linux):** unit is `Restart=on-failure` → a clean exit (code 0) + does **not** respawn. Self-restart would silently stop the daemon. + +## Deliverables + +- Change the systemd unit template to `Restart=always` (with a small + `RestartSec`) so a deliberate clean exit is respawned. +- Note in install/upgrade docs that **already-installed services must be + reinstalled** (`heph daemon` re-generates the unit) to pick up the new + policy; otherwise self-restart won't work on existing Linux installs. + +## Done when + +On both platforms, a hephd that calls `exit(0)` is brought back up by the +service manager. Pairs with [[self-restart-after-update]]. Part of +[[hephd-self-update]]. diff --git a/docs/how-to/self-update/verify-hub-dropout-resilience.md b/docs/how-to/self-update/verify-hub-dropout-resilience.md new file mode 100644 index 0000000..09ad7c8 --- /dev/null +++ b/docs/how-to/self-update/verify-hub-dropout-resilience.md @@ -0,0 +1,37 @@ +--- +title: Verify hub-dropout resilience +modified: 2026-06-04 +tags: + - how-to +status: active +requires: [] +--- + +# Verify hub-dropout resilience + +Owner requirement: "the hub can go poof at any moment" must be the **base +case**, not a guard bolted on for self-update. A self-updating hub will restart +under its spokes, so spokes must already shrug off an unreachable hub. + +## Current state (from research) + +Already largely true: `sync_once` (`crates/hephd/src/sync.rs`) propagates +errors, and the background loop (`spawn_sync_loop`, `crates/hephd/src/server.rs`) +catches them — `tracing::warn!("background sync failed: {e}")` — and continues. +The local SQLite store stays writable, so the spoke works offline and +reconciles on the next successful tick. No panic, no block. + +## Deliverables + +- Lock the guarantee in with an explicit test: a spoke whose hub is unreachable + for one or more sync cycles keeps serving local RPCs and accepting writes, + then reconciles when the hub returns. +- If any path is found that *doesn't* degrade gracefully (a blocking call, an + unwrapped error, a restart that loses unsynced ops), fix it here — that is the + whole point of this card. + +## Done when + +A test demonstrates spoke survival across hub downtime, documenting the +base-case guarantee that makes a self-updating hub safe. Part of +[[hephd-self-update]]. From fad8f2f4de0589f5911fc0f6e85436271670183e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:36:55 -0700 Subject: [PATCH 02/39] C2(hephd-self-update): impl release poll + version-check helpers Add crates/hephd/src/selfupdate.rs: a pure update_available() that compares the running heph_core::VERSION (e.g. "1.0.3 (sha)") against a release tag ("v1.0.4") via semver, ignoring the build suffix and v prefix; plus parse_latest_tag() / fetch_latest_tag() for the forge releases/latest feed. Decision logic and JSON parsing are unit-tested against sample payloads; the network fetch is isolated. Adds the semver workspace dep. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 1 + Cargo.toml | 1 + crates/hephd/Cargo.toml | 1 + crates/hephd/src/lib.rs | 1 + crates/hephd/src/selfupdate.rs | 117 +++++++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+) create mode 100644 crates/hephd/src/selfupdate.rs diff --git a/Cargo.lock b/Cargo.lock index 0a2c89f..be8f974 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2274,6 +2274,7 @@ dependencies = [ "rand 0.8.6", "reqwest", "rsa", + "semver", "serde", "serde_json", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 7d34a27..e24c881 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ reqwest = { version = "0.13", default-features = false, features = [ "json", "query", ] } +semver = "1" [profile.release] lto = "thin" diff --git a/crates/hephd/Cargo.toml b/crates/hephd/Cargo.toml index 9bb7b9e..fb30b17 100644 --- a/crates/hephd/Cargo.toml +++ b/crates/hephd/Cargo.toml @@ -32,6 +32,7 @@ jsonwebtoken.workspace = true keyring-core.workspace = true reqwest.workspace = true ureq.workspace = true +semver.workspace = true # The OS credential backend that `oauth.rs` registers as the keyring-core # default store — exactly one per platform, not the whole keyring meta-crate. diff --git a/crates/hephd/src/lib.rs b/crates/hephd/src/lib.rs index 09f8714..5d68bad 100644 --- a/crates/hephd/src/lib.rs +++ b/crates/hephd/src/lib.rs @@ -17,6 +17,7 @@ pub mod oauth; pub mod quickadd; pub mod remote; pub mod rpc; +pub mod selfupdate; pub mod server; pub mod sync; diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs new file mode 100644 index 0000000..336b035 --- /dev/null +++ b/crates/hephd/src/selfupdate.rs @@ -0,0 +1,117 @@ +//! Opt-in self-update (cards: `docs/how-to/self-update/`). When enabled, hephd +//! polls the forge for a newer tagged release and rebuilds + restarts onto it. +//! +//! This first slice is the **version check**: parse the running version, fetch +//! the latest release tag from the forge, and decide whether an update exists. +//! The decision logic is pure and unit-tested; the network fetch is isolated so +//! its JSON parsing can be tested against a sample body without a live forge. + +use anyhow::{Context, Result}; + +/// The forge releases feed for this project — the latest tagged release. +/// Uses the SSH-canonical host (`forge.ops.eblu.me`); see the +/// `service-env-forge-access` card for the cargo/forge host caveat. +pub const RELEASES_LATEST_URL: &str = + "https://forge.ops.eblu.me/api/v1/repos/eblume/hephaestus/releases/latest"; + +/// Extract the bare `X.Y.Z` semver from a version string that may carry a build +/// suffix (`heph_core::VERSION` is e.g. `"1.0.3 (aa376b4)"`) or a leading `v` +/// (release tags are `v1.0.4`). +fn parse_version(s: &str) -> Result { + let head = s + .trim() + .trim_start_matches('v') + .split_whitespace() + .next() + .unwrap_or(""); + semver::Version::parse(head).with_context(|| format!("parsing version {s:?}")) +} + +/// Whether `latest_tag` names a strictly newer release than `current` (the +/// running `heph_core::VERSION`). A malformed version on either side is an +/// error — never a silent "no update". +pub fn update_available(current: &str, latest_tag: &str) -> Result { + Ok(parse_version(latest_tag)? > parse_version(current)?) +} + +/// Pull the `tag_name` out of a Forgejo/Gitea `releases/latest` response body. +/// Split out from the HTTP fetch so it can be tested against a sample payload. +pub fn parse_latest_tag(body: &str) -> Result { + #[derive(serde::Deserialize)] + struct Release { + tag_name: String, + } + let rel: Release = + serde_json::from_str(body).context("parsing forge releases/latest response")?; + Ok(rel.tag_name) +} + +/// Fetch the latest release tag from the forge over HTTP (reusing the daemon's +/// shared `reqwest::Client`). Network/HTTP/JSON failures surface as `Err` for +/// the caller to log-and-continue. +pub async fn fetch_latest_tag(http: &reqwest::Client, url: &str) -> Result { + let body = http + .get(url) + .send() + .await + .context("requesting forge releases/latest")? + .error_for_status() + .context("forge releases/latest returned an error status")? + .text() + .await + .context("reading forge releases/latest body")?; + parse_latest_tag(&body) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn update_available_compares_ignoring_build_suffix_and_v_prefix() { + // Running version carries a build-sha suffix; tags carry a `v`. + assert!(update_available("1.0.3 (aa376b4)", "v1.0.4").unwrap()); + assert!(update_available("1.0.3 (aa376b4)", "v2.0.0").unwrap()); + // Same version → no update (a dirty rebuild of the same tag isn't newer). + assert!(!update_available("1.0.3 (aa376b4-dirty)", "v1.0.3").unwrap()); + // Older tag than running → no update. + assert!(!update_available("1.0.3", "v1.0.2").unwrap()); + // Patch/minor/major ordering. + assert!(update_available("1.0.9", "v1.1.0").unwrap()); + assert!(!update_available("1.1.0", "v1.0.9").unwrap()); + } + + #[test] + fn update_available_errors_on_malformed_version() { + assert!(update_available("not-a-version", "v1.0.4").is_err()); + assert!(update_available("1.0.3", "vNope").is_err()); + } + + #[test] + fn parse_latest_tag_reads_tag_name_from_forge_body() { + // A trimmed sample of a Forgejo releases/latest payload. + let body = r#"{ + "id": 42, + "tag_name": "v1.0.4", + "name": "Release v1.0.4", + "draft": false, + "prerelease": false + }"#; + assert_eq!(parse_latest_tag(body).unwrap(), "v1.0.4"); + } + + #[test] + fn parse_latest_tag_errors_on_unexpected_body() { + assert!(parse_latest_tag("{}").is_err()); + assert!(parse_latest_tag("not json").is_err()); + } + + #[test] + fn end_to_end_body_to_decision() { + // Parse a release body, then decide against a fixed running version. + let tag = parse_latest_tag(r#"{"tag_name": "v1.0.4"}"#).unwrap(); + assert!(update_available("1.0.3 (aa376b4)", &tag).unwrap()); + let tag = parse_latest_tag(r#"{"tag_name": "v1.0.3"}"#).unwrap(); + assert!(!update_available("1.0.3 (aa376b4)", &tag).unwrap()); + } +} From 4a0094f9550462f1a9a8a97a73982669d0bcbb81 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:37:14 -0700 Subject: [PATCH 03/39] C2(hephd-self-update): close release-poll-version-check Version-compare + forge release parsing landed and unit-tested. --- docs/how-to/self-update/release-poll-version-check.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/release-poll-version-check.md b/docs/how-to/self-update/release-poll-version-check.md index 9d6352c..8ecfca2 100644 --- a/docs/how-to/self-update/release-poll-version-check.md +++ b/docs/how-to/self-update/release-poll-version-check.md @@ -3,7 +3,6 @@ title: Release poll + version check modified: 2026-06-04 tags: - how-to -status: active requires: [] --- From f6bcd506845d6c1da0c17300fb33bba87e2cf155 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:40:16 -0700 Subject: [PATCH 04/39] C2(hephd-self-update): impl --self-update opt-in flag + config plumbing Add --self-update (default off) and --self-update-interval-secs to the hephd CLI, a SelfUpdateConfig (Some => enabled), and thread it into the Daemon (with_self_update) for every mode. spawn_self_update_loop() currently just announces the mode at startup ('self-update enabled') so the opt-in is observable; the poll/apply cycle is wired in later leaves. Omitting the flag leaves behaviour unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/main.rs | 30 ++++++++++++++++++++++++++---- crates/hephd/src/selfupdate.rs | 31 +++++++++++++++++++++++++++++++ crates/hephd/src/server.rs | 24 ++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 4 deletions(-) diff --git a/crates/hephd/src/main.rs b/crates/hephd/src/main.rs index 62df200..ad3517c 100644 --- a/crates/hephd/src/main.rs +++ b/crates/hephd/src/main.rs @@ -17,8 +17,8 @@ use tokio::net::{TcpListener, UnixListener}; use heph_core::LocalStore; use hephd::{ - default_db_path, default_socket_path, sync, Daemon, KeyringTokenStore, LockGuard, RemoteStore, - SystemClock, TokenStore, + default_db_path, default_socket_path, selfupdate::SelfUpdateConfig, sync, Daemon, + KeyringTokenStore, LockGuard, RemoteStore, SystemClock, TokenStore, }; /// How often a spoke background-syncs with its hub. @@ -77,6 +77,16 @@ struct Cli { /// --oidc-issuer, the device attaches a cached bearer token to hub requests. #[arg(long)] oidc_client_id: Option, + + /// Opt-in (default off): periodically poll the forge for a newer release and + /// auto-update this daemon. Off unless this flag is given. + #[arg(long)] + self_update: bool, + + /// Override the self-update poll interval, in seconds (default: 6h). Only + /// meaningful with --self-update. + #[arg(long)] + self_update_interval_secs: Option, } /// Build the spoke/client token source: a keyring store keyed by `account` (the @@ -112,6 +122,11 @@ async fn main() -> Result<()> { .with_context(|| format!("creating socket dir {}", parent.display()))?; } + // Opt-in self-update (default off): `Some` only when `--self-update` is set. + let self_update = cli + .self_update + .then(|| SelfUpdateConfig::new(cli.self_update_interval_secs.map(Duration::from_secs))); + // Build the daemon for the chosen mode. `local`/`server` own the file (and // hold its lock for the process's life); `client` keeps no replica. let (_lock, daemon) = match cli.mode { @@ -131,7 +146,10 @@ async fn main() -> Result<()> { } None => RemoteStore::new(&server_url), }; - (None, Daemon::new(store)) + ( + None, + Daemon::new(store).with_self_update(self_update.clone()), + ) } Mode::Local | Mode::Server => { let db = cli.db.clone().unwrap_or_else(default_db_path); @@ -147,7 +165,8 @@ async fn main() -> Result<()> { }); let daemon = Daemon::new(store) .with_hub(cli.hub_url.clone()) - .with_spoke_auth(spoke); + .with_spoke_auth(spoke) + .with_self_update(self_update.clone()); // server mode: expose the hub HTTP endpoint over the same store. if cli.mode == Mode::Server { @@ -190,6 +209,9 @@ async fn main() -> Result<()> { } }; + // Opt-in self-update poller (no-op unless --self-update); mode-agnostic. + daemon.spawn_self_update_loop(); + // Replace any stale socket from a previous run, then bind. if socket.exists() { std::fs::remove_file(&socket) diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs index 336b035..122130c 100644 --- a/crates/hephd/src/selfupdate.rs +++ b/crates/hephd/src/selfupdate.rs @@ -6,8 +6,30 @@ //! The decision logic is pure and unit-tested; the network fetch is isolated so //! its JSON parsing can be tested against a sample body without a live forge. +use std::time::Duration; + use anyhow::{Context, Result}; +/// Default poll cadence when `--self-update` is on and no interval is given. +pub const DEFAULT_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); + +/// Configuration for the opt-in self-update mode. Its mere presence (the daemon +/// holds an `Option`) means the mode is enabled; absent ⇒ off. +#[derive(Clone, Debug)] +pub struct SelfUpdateConfig { + /// How often to poll the forge for a newer release. + pub interval: Duration, +} + +impl SelfUpdateConfig { + /// Build a config, falling back to [`DEFAULT_INTERVAL`] when no override. + pub fn new(interval: Option) -> Self { + Self { + interval: interval.unwrap_or(DEFAULT_INTERVAL), + } + } +} + /// The forge releases feed for this project — the latest tagged release. /// Uses the SSH-canonical host (`forge.ops.eblu.me`); see the /// `service-env-forge-access` card for the cargo/forge host caveat. @@ -67,6 +89,15 @@ pub async fn fetch_latest_tag(http: &reqwest::Client, url: &str) -> Result, + /// Opt-in self-update config (`Some` ⇒ enabled, tech-spec self-update card). + self_update: Option, } impl Ctx { @@ -76,6 +79,7 @@ impl Daemon { hub_url: None, http: reqwest::Client::new(), auth: None, + self_update: None, }, } } @@ -100,12 +104,32 @@ impl Daemon { self } + /// Enable opt-in self-update with the given config (`None` ⇒ stays off). + pub fn with_self_update(mut self, cfg: Option) -> Daemon { + self.ctx.self_update = cfg; + self + } + /// The shared store handle, for code that needs to reach the same store the /// daemon serves (the hub HTTP router and background sync, tech-spec §6.1). pub fn store(&self) -> SharedStore { self.ctx.store.clone() } + /// If self-update is enabled, start its background poller. For now this only + /// announces the mode at startup; the polling + apply cycle is wired in + /// later self-update work. No-op when the mode is off. + pub fn spawn_self_update_loop(&self) { + let Some(cfg) = self.ctx.self_update.clone() else { + return; + }; + tracing::info!( + interval_secs = cfg.interval.as_secs(), + current = heph_core::VERSION, + "self-update enabled" + ); + } + /// If this is a spoke (`hub_url` set), spawn a background task that syncs the /// op-log with the hub every `interval` (attaching a bearer token when auth /// is configured). No-op otherwise. From 35569b0649e9315f7e6a7697495c5c3c05c142ac Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:40:47 -0700 Subject: [PATCH 05/39] C2(hephd-self-update): close self-update-opt-in-flag --self-update flag + config plumbing landed; opt-in observable via the startup log line. --- docs/how-to/self-update/self-update-opt-in-flag.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/self-update-opt-in-flag.md b/docs/how-to/self-update/self-update-opt-in-flag.md index ad0f3a8..ecf5625 100644 --- a/docs/how-to/self-update/self-update-opt-in-flag.md +++ b/docs/how-to/self-update/self-update-opt-in-flag.md @@ -3,7 +3,6 @@ title: Self-update opt-in flag modified: 2026-06-04 tags: - how-to -status: active requires: [] --- From 9fb200fe24b3c7339a3bfee6f92c13dfc0e734f7 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:42:21 -0700 Subject: [PATCH 06/39] C2(hephd-self-update): impl self-update poll loop (notify-only) Add a ReleaseSource trait (real ForgeReleaseSource over HTTP; injectable for tests), check_release() returning a CheckOutcome (UpToDate/UpdateAvailable/Failed) that never errors so a flaky forge can't stall the daemon, and run_poll_loop() that ticks on the configured interval and logs when a newer release is available. spawn_self_update_loop now spawns the real poller. Detection is unit-tested with a stubbed source. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/selfupdate.rs | 106 +++++++++++++++++++++++++++++++++ crates/hephd/src/server.rs | 12 ++-- 2 files changed, 114 insertions(+), 4 deletions(-) diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs index 122130c..a1354ac 100644 --- a/crates/hephd/src/selfupdate.rs +++ b/crates/hephd/src/selfupdate.rs @@ -85,10 +85,116 @@ pub async fn fetch_latest_tag(http: &reqwest::Client, url: &str) -> Result impl std::future::Future> + Send; +} + +/// The production source: the forge's `releases/latest` over HTTP. +pub struct ForgeReleaseSource { + http: reqwest::Client, + url: String, +} + +impl ForgeReleaseSource { + /// Source backed by the daemon's shared client, hitting [`RELEASES_LATEST_URL`]. + pub fn new(http: reqwest::Client) -> Self { + Self { + http, + url: RELEASES_LATEST_URL.to_string(), + } + } +} + +impl ReleaseSource for ForgeReleaseSource { + async fn latest_tag(&self) -> Result { + fetch_latest_tag(&self.http, &self.url).await + } +} + +/// The result of one self-update check — kept separate from logging so it can be +/// asserted in tests. +#[derive(Debug, PartialEq, Eq)] +pub enum CheckOutcome { + /// The running version is at or ahead of the latest release. + UpToDate, + /// A strictly newer release exists, named by this tag (e.g. `v1.0.4`). + UpdateAvailable(String), + /// The check failed (forge unreachable, bad body, unparseable version). + Failed(String), +} + +/// Run one check against `source`, comparing the latest tag to `current`. Never +/// returns `Err` — a failure is folded into [`CheckOutcome::Failed`] so the loop +/// keeps going (a flaky forge must never crash or stall the daemon). +pub async fn check_release(source: &S, current: &str) -> CheckOutcome { + match source.latest_tag().await { + Ok(tag) => match update_available(current, &tag) { + Ok(true) => CheckOutcome::UpdateAvailable(tag), + Ok(false) => CheckOutcome::UpToDate, + Err(e) => CheckOutcome::Failed(e.to_string()), + }, + Err(e) => CheckOutcome::Failed(e.to_string()), + } +} + +/// The background poll loop (notify-only for now): tick on `interval`, check for +/// a newer release, and log the outcome. Runs forever; spawned as a task. +pub async fn run_poll_loop(source: S, interval: Duration, current: &'static str) { + let mut tick = tokio::time::interval(interval); + loop { + tick.tick().await; + match check_release(&source, current).await { + CheckOutcome::UpdateAvailable(tag) => { + tracing::info!(%tag, current, "self-update: newer release available") + } + CheckOutcome::UpToDate => tracing::debug!(current, "self-update: up to date"), + CheckOutcome::Failed(e) => tracing::warn!("self-update: release check failed: {e}"), + } + } +} + #[cfg(test)] mod tests { use super::*; + /// A canned release source for deterministic loop/decision tests. + struct FakeSource(Result); + impl ReleaseSource for FakeSource { + async fn latest_tag(&self) -> Result { + self.0.clone().map_err(|e| anyhow::anyhow!(e)) + } + } + + #[tokio::test] + async fn check_release_reports_outcomes_from_a_stubbed_source() { + // Newer release available. + let s = FakeSource(Ok("v1.0.4".into())); + assert_eq!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::UpdateAvailable("v1.0.4".into()) + ); + // Already current. + let s = FakeSource(Ok("v1.0.3".into())); + assert_eq!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::UpToDate + ); + // Fetch failure → folded into Failed, never a panic/Err. + let s = FakeSource(Err("forge unreachable")); + assert!(matches!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::Failed(_) + )); + // Malformed tag → Failed. + let s = FakeSource(Ok("not-a-tag".into())); + assert!(matches!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::Failed(_) + )); + } + #[test] fn config_defaults_interval_and_honors_override() { assert_eq!(SelfUpdateConfig::new(None).interval, DEFAULT_INTERVAL); diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index 2b6b614..0de5278 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -21,7 +21,7 @@ use heph_core::Store; use crate::oauth::{self, TokenStore}; use crate::rpc::{self, Request, Response, RpcError, INTERNAL_ERROR, PARSE_ERROR}; -use crate::selfupdate::SelfUpdateConfig; +use crate::selfupdate::{self, SelfUpdateConfig}; use crate::sync::{self, SharedStore}; /// How a spoke obtains the bearer token it presents to its hub (tech-spec §13). @@ -116,18 +116,22 @@ impl Daemon { self.ctx.store.clone() } - /// If self-update is enabled, start its background poller. For now this only - /// announces the mode at startup; the polling + apply cycle is wired in - /// later self-update work. No-op when the mode is off. + /// If self-update is enabled, spawn its background poller: every + /// `cfg.interval` it checks the forge for a newer release and (for now) logs + /// when one is available. No-op when the mode is off. pub fn spawn_self_update_loop(&self) { let Some(cfg) = self.ctx.self_update.clone() else { return; }; + let source = selfupdate::ForgeReleaseSource::new(self.ctx.http.clone()); tracing::info!( interval_secs = cfg.interval.as_secs(), current = heph_core::VERSION, "self-update enabled" ); + tokio::spawn(async move { + selfupdate::run_poll_loop(source, cfg.interval, heph_core::VERSION).await; + }); } /// If this is a spoke (`hub_url` set), spawn a background task that syncs the From 758854478b030668ebd6243f02ed6729dcb0615f Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:42:48 -0700 Subject: [PATCH 07/39] C2(hephd-self-update): close self-update-poll-loop Notify-only poller landed: ticks on the interval, logs when a newer release is available. The daemon now self-reports update availability. --- docs/how-to/self-update/self-update-poll-loop.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/self-update-poll-loop.md b/docs/how-to/self-update/self-update-poll-loop.md index 9e952b9..da3fd06 100644 --- a/docs/how-to/self-update/self-update-poll-loop.md +++ b/docs/how-to/self-update/self-update-poll-loop.md @@ -3,7 +3,6 @@ title: Self-update poll loop modified: 2026-06-04 tags: - how-to -status: active requires: - release-poll-version-check - self-update-opt-in-flag From 544c8bba0ec538784147247a58b862a7e2cd318e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:44:36 -0700 Subject: [PATCH 08/39] C2(hephd-self-update): impl systemd Restart=always for clean-exit respawn Self-restart works by exiting cleanly and letting the service manager respawn the new binary. launchd already does this (KeepAlive=true), but the systemd user unit was Restart=on-failure, which ignores a clean exit (code 0). Switch to Restart=always + RestartSec=1, update the unit test, and note in run-the-daemon that existing Linux installs must `heph daemon restart` once to regenerate the unit. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/heph/src/service.rs | 9 +++++++-- docs/how-to/run-the-daemon.md | 7 ++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/crates/heph/src/service.rs b/crates/heph/src/service.rs index 6015a3d..fc642a7 100644 --- a/crates/heph/src/service.rs +++ b/crates/heph/src/service.rs @@ -168,7 +168,8 @@ fn systemd_unit(hephd: &Path, db: &Path, socket: &Path) -> String { \n\ [Service]\n\ ExecStart={hephd} --mode local --db {db} --socket {socket}\n\ - Restart=on-failure\n\ + Restart=always\n\ + RestartSec=1\n\ \n\ [Install]\n\ WantedBy=default.target\n", @@ -400,7 +401,11 @@ mod tests { --db /home/e/.local/share/heph/heph.db \ --socket /run/user/1000/heph/hephd.sock" )); - assert!(unit.contains("Restart=on-failure")); + // Restart=always (not on-failure) so a clean exit (code 0) — what + // self-update does to hand off to the new binary — is respawned too. + assert!(unit.contains("Restart=always")); + assert!(!unit.contains("Restart=on-failure")); + assert!(unit.contains("RestartSec=")); assert!(unit.contains("WantedBy=default.target")); } diff --git a/docs/how-to/run-the-daemon.md b/docs/how-to/run-the-daemon.md index 8ada221..658a664 100644 --- a/docs/how-to/run-the-daemon.md +++ b/docs/how-to/run-the-daemon.md @@ -28,9 +28,14 @@ when it's already stopped is fine. `~/Library/LaunchAgents/org.hephaestus.hephd.plist`, with `RunAtLoad` + `KeepAlive` (starts at login, restarts if it crashes). - **Linux** — a **systemd user service** (`heph.service`) at - `~/.config/systemd/user/heph.service`, with `Restart=on-failure`, enabled for + `~/.config/systemd/user/heph.service`, with `Restart=always`, enabled for login. +> **Upgrading from an older install:** earlier units used `Restart=on-failure`, +> which does **not** respawn after a clean exit — so opt-in self-update (which +> exits cleanly to hand off to the new binary) wouldn't come back on Linux. Run +> `heph daemon restart` once (it regenerates the unit) to pick up `Restart=always`. + Either way it runs `hephd --mode local` against the default store (`~/.local/share/heph/heph.db`) and socket, with logs at `~/.local/share/heph/hephd.log`. From 2a7a3ec2706d7c81434346a112711528cf550e5c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:44:56 -0700 Subject: [PATCH 09/39] C2(hephd-self-update): close service-respawn-on-clean-exit systemd unit now Restart=always; both managers respawn after a clean exit. --- docs/how-to/self-update/service-respawn-on-clean-exit.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/service-respawn-on-clean-exit.md b/docs/how-to/self-update/service-respawn-on-clean-exit.md index 0ea5fae..88db290 100644 --- a/docs/how-to/self-update/service-respawn-on-clean-exit.md +++ b/docs/how-to/self-update/service-respawn-on-clean-exit.md @@ -3,7 +3,6 @@ title: Service respawn on clean exit modified: 2026-06-04 tags: - how-to -status: active requires: [] --- From 9213a943f5b755339ee056e744ce8cbe13eecd48 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:47:26 -0700 Subject: [PATCH 10/39] C2(hephd-self-update): impl verify hub-dropout resilience (+ client timeout) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lock in the base-case guarantee that a self-updating hub (which restarts under its spokes) relies on. New sync_http test: a spoke whose hub is unreachable keeps serving + accepting writes, a sync attempt fails fast (Err, not hang/panic), and when the hub returns the accumulated ops reconcile with no special recovery. The verification surfaced one non-graceful path — the daemon's shared reqwest client had no timeout, so a black-hole hub (connects, never replies) could stall the sync/self-update loop. Give it a 30s timeout so 'the hub can vanish at any moment' holds even mid-request. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/server.rs | 9 ++++- crates/hephd/tests/sync_http.rs | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index 0de5278..e6129d5 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -77,7 +77,14 @@ impl Daemon { ctx: Ctx { store: Arc::new(Mutex::new(store)), hub_url: None, - http: reqwest::Client::new(), + // Bound every hub request so a black-hole hub (one that accepts + // a connection but never replies) can't stall the sync / + // self-update loops — "the hub can vanish at any moment" is the + // base case, including vanishing mid-request. + http: reqwest::Client::builder() + .timeout(Duration::from_secs(30)) + .build() + .expect("building the daemon HTTP client"), auth: None, self_update: None, }, diff --git a/crates/hephd/tests/sync_http.rs b/crates/hephd/tests/sync_http.rs index de8b7bf..ed093ce 100644 --- a/crates/hephd/tests/sync_http.rs +++ b/crates/hephd/tests/sync_http.rs @@ -84,6 +84,65 @@ async fn a_node_propagates_a_to_hub_to_b() { assert_eq!(on_b.body.as_deref(), Some("shingles need work")); } +#[tokio::test] +async fn spoke_survives_an_unreachable_hub_then_reconciles_when_it_returns() { + // "The hub can vanish at any moment" is the base case, not a guarded edge: + // a spoke whose hub is down keeps serving + accepting writes, and when the + // hub returns its accumulated ops reconcile with no special recovery. This + // is what makes a self-updating hub (which restarts under its spokes) safe. + let http = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) // never hang the test + .build() + .unwrap(); + let (a, _ca, _da) = replica(1000); + + // Hub down: work happens locally, and a sync attempt fails *fast* (Err — not + // a panic, not a hang) and leaves the store untouched. + let id = { + let mut ga = a.lock().unwrap(); + ga.create_node(NewNode::doc( + "Offline note", + "written while the hub was down", + )) + .unwrap() + .id + }; + let dead_hub = "http://127.0.0.1:1"; // nothing listens → connection refused + assert!( + sync::sync_once(a.clone(), dead_hub, &http, None) + .await + .is_err(), + "sync against a dead hub should error, not hang or panic" + ); + + // The spoke is unharmed: the note is intact and further writes still succeed. + { + let mut ga = a.lock().unwrap(); + assert_eq!(ga.get_node(&id).unwrap().unwrap().title, "Offline note"); + ga.create_node(NewNode::doc("Another", "still working offline")) + .unwrap(); + } + + // The hub returns: the spoke pushes everything it accumulated while offline, + // and a fresh replica pulls it — convergence resumes, no manual recovery. + let hub_url = start_hub().await; + let up = sync::sync_once(a.clone(), &hub_url, &http, None) + .await + .unwrap(); + assert!(up.pushed > 0, "spoke pushed nothing after the hub returned"); + let (b, _cb, _db) = replica(1000); + sync::sync_once(b.clone(), &hub_url, &http, None) + .await + .unwrap(); + let on_b = b + .lock() + .unwrap() + .get_node(&id) + .unwrap() + .expect("offline-authored node reached B after the hub recovered"); + assert_eq!(on_b.title, "Offline note"); +} + #[tokio::test] async fn divergent_scalar_edits_converge_through_the_hub_with_a_conflict() { let hub_url = start_hub().await; From fd76aa0b3a12ef4c2f46d4d30f5c019af5a5749e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:47:53 -0700 Subject: [PATCH 11/39] C2(hephd-self-update): close verify-hub-dropout-resilience Spoke survival across hub downtime is now covered by a test; added a client timeout so a black-hole hub can't stall the loop. --- docs/how-to/self-update/verify-hub-dropout-resilience.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/verify-hub-dropout-resilience.md b/docs/how-to/self-update/verify-hub-dropout-resilience.md index 09ad7c8..9f01c46 100644 --- a/docs/how-to/self-update/verify-hub-dropout-resilience.md +++ b/docs/how-to/self-update/verify-hub-dropout-resilience.md @@ -3,7 +3,6 @@ title: Verify hub-dropout resilience modified: 2026-06-04 tags: - how-to -status: active requires: [] --- From 3fab6373532193173180bc4377b7f72e86e0259a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:52:23 -0700 Subject: [PATCH 12/39] C2(hephd-self-update): impl cargo-install-from-tag (injectable Installer) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an Installer trait + CargoInstaller (runs cargo install --locked --git --tag for heph/hephd/heph-tui/heph-quickadd — the documented install command, via the SSH host that sidesteps the cargo/forge canonical-name mismatch), and apply_update() which runs the blocking install on the blocking pool. The poll loop now applies on a detected update. Apply path is unit-tested with a fake installer (call + failure paths); the real cargo subprocess is never run in tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/selfupdate.rs | 112 ++++++++++++++++++++++++++++++--- crates/hephd/src/server.rs | 4 +- 2 files changed, 107 insertions(+), 9 deletions(-) diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs index a1354ac..fcaa181 100644 --- a/crates/hephd/src/selfupdate.rs +++ b/crates/hephd/src/selfupdate.rs @@ -1,11 +1,13 @@ //! Opt-in self-update (cards: `docs/how-to/self-update/`). When enabled, hephd //! polls the forge for a newer tagged release and rebuilds + restarts onto it. //! -//! This first slice is the **version check**: parse the running version, fetch -//! the latest release tag from the forge, and decide whether an update exists. -//! The decision logic is pure and unit-tested; the network fetch is isolated so -//! its JSON parsing can be tested against a sample body without a live forge. +//! The moving parts are dependency-injected behind traits — [`ReleaseSource`] +//! (where the latest tag comes from) and [`Installer`] (how the upgrade is +//! applied) — so the poll/apply logic is unit-tested without a live forge or a +//! real `cargo install`. The production wiring (`ForgeReleaseSource`, +//! `CargoInstaller`) is exercised only at runtime. +use std::sync::Arc; use std::time::Duration; use anyhow::{Context, Result}; @@ -139,15 +141,75 @@ pub async fn check_release(source: &S, current: &str) -> Check } } -/// The background poll loop (notify-only for now): tick on `interval`, check for -/// a newer release, and log the outcome. Runs forever; spawned as a task. -pub async fn run_poll_loop(source: S, interval: Duration, current: &'static str) { +/// The git SSH URL self-update installs from. The SSH host on port 2222 is the +/// proven path: cargo rejects the HTTPS host over its canonical-name mismatch +/// (`forge.ops.eblu.me` vs the advertised `forge.eblu.me`). See the +/// `service-env-forge-access` card. +pub const INSTALL_GIT_URL: &str = "ssh://forgejo@forge.ops.eblu.me:2222/eblume/hephaestus.git"; + +/// All workspace binaries, installed in lockstep so `heph`/`hephd`/`heph-tui` +/// never skew after an update. +pub const INSTALL_BINS: &[&str] = &["heph", "hephd", "heph-tui", "heph-quickadd"]; + +/// Applies a detected upgrade. Injectable so the apply path is testable without +/// spawning a real (minutes-long) `cargo install` (real impl: [`CargoInstaller`]). +pub trait Installer: Send + Sync + 'static { + /// Install the binaries for release `tag` (e.g. `v1.0.4`). Blocking. + fn install(&self, tag: &str) -> Result<()>; +} + +/// The production installer: `cargo install --locked --git --tag ` +/// for every workspace binary — the exact command the install how-to documents. +pub struct CargoInstaller; + +impl Installer for CargoInstaller { + fn install(&self, tag: &str) -> Result<()> { + let mut cmd = std::process::Command::new("cargo"); + cmd.args([ + "install", + "--locked", + "--git", + INSTALL_GIT_URL, + "--tag", + tag, + ]); + cmd.args(INSTALL_BINS); + let status = cmd.status().context("spawning cargo install")?; + if !status.success() { + anyhow::bail!("cargo install for {tag} exited with {status}"); + } + Ok(()) + } +} + +/// Apply a detected update: install the binaries for `tag`. The blocking install +/// runs on the blocking pool so it never stalls the async runtime. (Restarting +/// onto the new binary is layered on by the self-restart step.) +pub async fn apply_update(installer: Arc, tag: &str) -> Result<()> { + let tag = tag.to_string(); + tokio::task::spawn_blocking(move || installer.install(&tag)) + .await + .context("self-update install task panicked")? +} + +/// The background poll loop: tick on `interval`, check for a newer release, and +/// when one is available, apply it. Runs forever; spawned as a task. +pub async fn run_poll_loop( + source: S, + installer: Arc, + interval: Duration, + current: &'static str, +) { let mut tick = tokio::time::interval(interval); loop { tick.tick().await; match check_release(&source, current).await { CheckOutcome::UpdateAvailable(tag) => { - tracing::info!(%tag, current, "self-update: newer release available") + tracing::info!(%tag, current, "self-update: newer release available, applying"); + match apply_update(installer.clone(), &tag).await { + Ok(()) => tracing::info!(%tag, "self-update: installed new binaries"), + Err(e) => tracing::error!("self-update: install failed for {tag}: {e}"), + } } CheckOutcome::UpToDate => tracing::debug!(current, "self-update: up to date"), CheckOutcome::Failed(e) => tracing::warn!("self-update: release check failed: {e}"), @@ -167,6 +229,40 @@ mod tests { } } + /// Records install calls; optionally fails, to drive the apply path. + #[derive(Default)] + struct FakeInstaller { + installed: std::sync::Mutex>, + fail: bool, + } + impl Installer for FakeInstaller { + fn install(&self, tag: &str) -> Result<()> { + self.installed.lock().unwrap().push(tag.to_string()); + if self.fail { + anyhow::bail!("simulated install failure"); + } + Ok(()) + } + } + + #[tokio::test] + async fn apply_update_invokes_the_installer_with_the_tag() { + let inst = Arc::new(FakeInstaller::default()); + apply_update(inst.clone(), "v1.0.4").await.unwrap(); + assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + } + + #[tokio::test] + async fn apply_update_propagates_install_failure() { + let inst = Arc::new(FakeInstaller { + fail: true, + ..Default::default() + }); + assert!(apply_update(inst.clone(), "v1.0.4").await.is_err()); + // It still attempted the install for the right tag. + assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + } + #[tokio::test] async fn check_release_reports_outcomes_from_a_stubbed_source() { // Newer release available. diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index e6129d5..757a935 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -131,13 +131,15 @@ impl Daemon { return; }; let source = selfupdate::ForgeReleaseSource::new(self.ctx.http.clone()); + let installer: std::sync::Arc = + std::sync::Arc::new(selfupdate::CargoInstaller); tracing::info!( interval_secs = cfg.interval.as_secs(), current = heph_core::VERSION, "self-update enabled" ); tokio::spawn(async move { - selfupdate::run_poll_loop(source, cfg.interval, heph_core::VERSION).await; + selfupdate::run_poll_loop(source, installer, cfg.interval, heph_core::VERSION).await; }); } From e7cb86efdf66cc17d74e8a76d423a192a41c1dcd Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:52:45 -0700 Subject: [PATCH 13/39] C2(hephd-self-update): close cargo-install-from-tag Installer trait + CargoInstaller + apply_update landed and unit-tested via injection. Real cargo execution is gated on the deployment env (service-env-forge-access). --- docs/how-to/self-update/cargo-install-from-tag.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/cargo-install-from-tag.md b/docs/how-to/self-update/cargo-install-from-tag.md index 6b43096..a099802 100644 --- a/docs/how-to/self-update/cargo-install-from-tag.md +++ b/docs/how-to/self-update/cargo-install-from-tag.md @@ -3,7 +3,6 @@ title: Cargo install from tag modified: 2026-06-04 tags: - how-to -status: active requires: - self-update-poll-loop - service-env-forge-access From bdcf4171a40bbb2c4c3dd5fe90233681c52e504c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:54:23 -0700 Subject: [PATCH 14/39] C2(hephd-self-update): impl self-restart-after-update (injectable Restarter) Add a Restarter trait + ProcessRestarter (exit 0 so launchd KeepAlive / systemd Restart=always respawn the new binary). apply_update now installs then restarts, and the restart fires only on a successful install. Wired into the poll loop. Unit-tested with fake installer+restarter: restart on success, no restart after a failed install. Real process exit is never run in tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/selfupdate.rs | 88 +++++++++++++++++++++++++++------- crates/hephd/src/server.rs | 11 ++++- 2 files changed, 80 insertions(+), 19 deletions(-) diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs index fcaa181..6a3300b 100644 --- a/crates/hephd/src/selfupdate.rs +++ b/crates/hephd/src/selfupdate.rs @@ -182,14 +182,39 @@ impl Installer for CargoInstaller { } } -/// Apply a detected update: install the binaries for `tag`. The blocking install -/// runs on the blocking pool so it never stalls the async runtime. (Restarting -/// onto the new binary is layered on by the self-restart step.) -pub async fn apply_update(installer: Arc, tag: &str) -> Result<()> { - let tag = tag.to_string(); - tokio::task::spawn_blocking(move || installer.install(&tag)) +/// Hands off to the freshly-installed binary. Injectable so the apply path is +/// testable without actually exiting the test process (real: [`ProcessRestarter`]). +pub trait Restarter: Send + Sync + 'static { + /// Restart onto the new binary. The production impl does not return. + fn restart(&self) -> Result<()>; +} + +/// The production restarter: exit cleanly so the OS service manager (launchd +/// `KeepAlive` / systemd `Restart=always`) respawns the new binary. In-flight +/// RPC connections simply drop; clients reconnect (the nvim plugin already does). +pub struct ProcessRestarter; + +impl Restarter for ProcessRestarter { + fn restart(&self) -> Result<()> { + tracing::info!("self-update: exiting to let the service manager start the new binary"); + std::process::exit(0); + } +} + +/// Apply a detected update: install the binaries for `tag`, then restart onto +/// them. The blocking install runs on the blocking pool so it never stalls the +/// async runtime; the restart only happens if the install succeeded. +pub async fn apply_update( + installer: Arc, + restarter: Arc, + tag: &str, +) -> Result<()> { + let owned = tag.to_string(); + tokio::task::spawn_blocking(move || installer.install(&owned)) .await - .context("self-update install task panicked")? + .context("self-update install task panicked")??; + tracing::info!(%tag, "self-update: installed; restarting into the new binary"); + restarter.restart() } /// The background poll loop: tick on `interval`, check for a newer release, and @@ -197,6 +222,7 @@ pub async fn apply_update(installer: Arc, tag: &str) -> Result<() pub async fn run_poll_loop( source: S, installer: Arc, + restarter: Arc, interval: Duration, current: &'static str, ) { @@ -206,9 +232,10 @@ pub async fn run_poll_loop( match check_release(&source, current).await { CheckOutcome::UpdateAvailable(tag) => { tracing::info!(%tag, current, "self-update: newer release available, applying"); - match apply_update(installer.clone(), &tag).await { - Ok(()) => tracing::info!(%tag, "self-update: installed new binaries"), - Err(e) => tracing::error!("self-update: install failed for {tag}: {e}"), + // On success the restarter exits the process, so this only + // returns on failure — log it and keep polling. + if let Err(e) = apply_update(installer.clone(), restarter.clone(), &tag).await { + tracing::error!("self-update: failed for {tag}: {e}"); } } CheckOutcome::UpToDate => tracing::debug!(current, "self-update: up to date"), @@ -245,22 +272,47 @@ mod tests { } } - #[tokio::test] - async fn apply_update_invokes_the_installer_with_the_tag() { - let inst = Arc::new(FakeInstaller::default()); - apply_update(inst.clone(), "v1.0.4").await.unwrap(); - assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + /// Records whether a restart was requested (instead of exiting the process). + #[derive(Default)] + struct FakeRestarter { + restarted: std::sync::Mutex, + } + impl Restarter for FakeRestarter { + fn restart(&self) -> Result<()> { + *self.restarted.lock().unwrap() = true; + Ok(()) + } } #[tokio::test] - async fn apply_update_propagates_install_failure() { + async fn apply_update_installs_then_restarts_on_success() { + let inst = Arc::new(FakeInstaller::default()); + let restart = Arc::new(FakeRestarter::default()); + apply_update(inst.clone(), restart.clone(), "v1.0.4") + .await + .unwrap(); + assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + assert!( + *restart.restarted.lock().unwrap(), + "should restart on success" + ); + } + + #[tokio::test] + async fn apply_update_does_not_restart_when_install_fails() { let inst = Arc::new(FakeInstaller { fail: true, ..Default::default() }); - assert!(apply_update(inst.clone(), "v1.0.4").await.is_err()); - // It still attempted the install for the right tag. + let restart = Arc::new(FakeRestarter::default()); + assert!(apply_update(inst.clone(), restart.clone(), "v1.0.4") + .await + .is_err()); assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + assert!( + !*restart.restarted.lock().unwrap(), + "must NOT restart after a failed install" + ); } #[tokio::test] diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index 757a935..16e7494 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -133,13 +133,22 @@ impl Daemon { let source = selfupdate::ForgeReleaseSource::new(self.ctx.http.clone()); let installer: std::sync::Arc = std::sync::Arc::new(selfupdate::CargoInstaller); + let restarter: std::sync::Arc = + std::sync::Arc::new(selfupdate::ProcessRestarter); tracing::info!( interval_secs = cfg.interval.as_secs(), current = heph_core::VERSION, "self-update enabled" ); tokio::spawn(async move { - selfupdate::run_poll_loop(source, installer, cfg.interval, heph_core::VERSION).await; + selfupdate::run_poll_loop( + source, + installer, + restarter, + cfg.interval, + heph_core::VERSION, + ) + .await; }); } From c237be6604af3a4b5ed91dd2ab80bdce1c6528de Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:54:50 -0700 Subject: [PATCH 15/39] C2(hephd-self-update): close self-restart-after-update Restarter + ProcessRestarter wired: install then exit(0) so the service manager respawns the new binary; restart only on a successful install. Unit-tested via injection. --- docs/how-to/self-update/self-restart-after-update.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/how-to/self-update/self-restart-after-update.md b/docs/how-to/self-update/self-restart-after-update.md index f3efc42..1f95128 100644 --- a/docs/how-to/self-update/self-restart-after-update.md +++ b/docs/how-to/self-update/self-restart-after-update.md @@ -3,7 +3,6 @@ title: Self-restart after update modified: 2026-06-04 tags: - how-to -status: active requires: - cargo-install-from-tag - service-respawn-on-clean-exit From 20418240f78ec7105550ab79f16f13c2bb5acda2 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 13:56:27 -0700 Subject: [PATCH 16/39] C2(hephd-self-update): impl correct spawn_self_update_loop doc The poller now installs + restarts (not just logs); fix the stale doc and point at service-env-forge-access as the deployment step that makes the apply path operational. --- crates/hephd/src/server.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index 16e7494..bf245ba 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -124,8 +124,13 @@ impl Daemon { } /// If self-update is enabled, spawn its background poller: every - /// `cfg.interval` it checks the forge for a newer release and (for now) logs - /// when one is available. No-op when the mode is off. + /// `cfg.interval` it checks the forge for a newer release and, when one is + /// found, installs it and restarts onto the new binary. No-op when off. + /// + /// Note: the *apply* path runs `cargo install` and exits, which only works + /// when the daemon's service environment can reach the forge over SSH and + /// find cargo — see the `service-env-forge-access` card (the deployment step + /// that makes this operational). pub fn spawn_self_update_loop(&self) { let Some(cfg) = self.ctx.self_update.clone() else { return; From 59822d72573eb9851ade67ec64d97a4180cc8f9a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:46:34 -0700 Subject: [PATCH 17/39] C2(hephd-self-update): impl service-env-forge-access (public HTTPS, cargo on PATH) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The repo is public, so self-update needs no credentials: cargo install --git is a plain anonymous clone (NOT the access-restricted Forgejo cargo registry, which is what required forge.ops.eblu.me). Point INSTALL_GIT_URL and the releases poll at the canonical public host over HTTPS — verified end-to-end (cargo install --git https://forge.eblu.me/... --tag v1.0.3 builds a working hephd with zero auth). Make the headless service able to run the apply path: 'heph daemon start --self-update' (default off) generates a launchd/systemd service that passes --self-update and bakes a PATH (incl ~/.cargo/bin) + HOME so the minimal service env can find cargo. restart preserves the setting. Default (no flag) services are byte-identical to before. Template + URL behavior covered by unit tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/heph/src/service.rs | 138 +++++++++++++++++++++++++++++---- crates/hephd/src/selfupdate.rs | 33 +++++--- docs/how-to/run-the-daemon.md | 10 +++ 3 files changed, 156 insertions(+), 25 deletions(-) diff --git a/crates/heph/src/service.rs b/crates/heph/src/service.rs index fc642a7..7b15865 100644 --- a/crates/heph/src/service.rs +++ b/crates/heph/src/service.rs @@ -19,12 +19,22 @@ const LABEL: &str = "org.hephaestus.hephd"; #[derive(Subcommand, Debug)] pub enum DaemonAction { /// Install (if needed) and start the daemon service. - Start, + Start { + /// Generate a service that runs with opt-in self-update enabled + /// (default off). The service gets a PATH that can find cargo. + #[arg(long)] + self_update: bool, + }, /// Stop the daemon now (it may restart at next login; use `uninstall` to /// stop it for good). Stop, - /// Restart the daemon — run this after upgrading the binary. - Restart, + /// Restart the daemon — run this after upgrading the binary. Preserves the + /// existing self-update setting unless `--self-update` re-enables it. + Restart { + /// Force self-update on when regenerating the service definition. + #[arg(long)] + self_update: bool, + }, /// Show whether the service is installed and running. Status, /// Stop and remove the service entirely. @@ -114,8 +124,26 @@ fn xml_escape(s: &str) -> String { .replace('>', ">") } -fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { +fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path, self_update: bool) -> String { let arg = |p: &Path| xml_escape(&p.to_string_lossy()); + // Opt-in self-update: pass the flag, and give the service a PATH/HOME that + // can find cargo + the toolchain (a LaunchAgent's default env can't), since + // the apply path shells out to `cargo install`. + let self_update_arg = if self_update { + "\n --self-update".to_string() + } else { + String::new() + }; + let cargo_env = if self_update { + let (path, home) = cargo_env(); + format!( + "\n PATH\n {}\n HOME\n {}", + xml_escape(&path), + xml_escape(&home), + ) + } else { + String::new() + }; format!( r#" @@ -131,7 +159,7 @@ fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { --db {db} --socket - {socket} + {socket}{self_update_arg} RunAtLoad @@ -143,7 +171,7 @@ fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { Aqua session as a LaunchAgent, so its child gets the GUI/hotkey it needs. Opt-in here (not in dev/test runs, which never set it). --> HEPH_QUICKADD - 1 + 1{cargo_env} StandardOutPath {log} @@ -160,14 +188,44 @@ fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { ) } -fn systemd_unit(hephd: &Path, db: &Path, socket: &Path) -> String { +/// A `PATH`/`HOME` pair for a service that must run `cargo install`. Service +/// managers start with a minimal environment, so we prepend `~/.cargo/bin` (which +/// holds cargo and the rustup toolchain shims) to the usual locations and pin +/// `HOME`, which cargo needs for its registry/cache. +fn cargo_env() -> (String, String) { + let home = std::env::var("HOME").unwrap_or_default(); + let path = + format!("{home}/.cargo/bin:/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin"); + (path, home) +} + +/// Whether an already-installed service file opted into self-update — so +/// `restart` (which regenerates the file) preserves the setting instead of +/// silently turning it off. +fn file_opts_into_self_update(path: &Path) -> bool { + std::fs::read_to_string(path) + .map(|s| s.contains("--self-update")) + .unwrap_or(false) +} + +fn systemd_unit(hephd: &Path, db: &Path, socket: &Path, self_update: bool) -> String { + // Opt-in self-update: pass the flag and give the unit a PATH/HOME that can + // find cargo + the toolchain, since the apply path runs `cargo install`. + let su_arg = if self_update { " --self-update" } else { "" }; + let cargo_env = if self_update { + let (path, home) = cargo_env(); + format!("Environment=PATH={path}\nEnvironment=HOME={home}\n") + } else { + String::new() + }; format!( "[Unit]\n\ Description=heph daemon (hephd)\n\ After=default.target\n\ \n\ [Service]\n\ - ExecStart={hephd} --mode local --db {db} --socket {socket}\n\ + ExecStart={hephd} --mode local --db {db} --socket {socket}{su_arg}\n\ + {cargo_env}\ Restart=always\n\ RestartSec=1\n\ \n\ @@ -245,8 +303,11 @@ fn launchd(action: &DaemonAction, p: &Paths) -> Result<()> { let target = format!("gui/{uid}/{LABEL}"); match action { - DaemonAction::Start => { - write_if_changed(&plist, &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log))?; + DaemonAction::Start { self_update } => { + write_if_changed( + &plist, + &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log, *self_update), + )?; if launchd_loaded(&target) { println!("heph daemon already running ({LABEL})."); } else { @@ -261,8 +322,12 @@ fn launchd(action: &DaemonAction, p: &Paths) -> Result<()> { let (_ok, _err) = run_cmd("launchctl", &["bootout", &target])?; println!("heph daemon stopped (still installed; `uninstall` to remove)."); } - DaemonAction::Restart => { - write_if_changed(&plist, &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log))?; + DaemonAction::Restart { self_update } => { + let su = *self_update || file_opts_into_self_update(&plist); + write_if_changed( + &plist, + &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log, su), + )?; let _ = run_cmd("launchctl", &["bootout", &target])?; let (ok, err) = run_cmd("launchctl", &["bootstrap", &domain, &plist_str(&plist)?])?; if !ok { @@ -315,8 +380,11 @@ fn sc(args: &[&str]) -> Result<(bool, String)> { fn systemd(action: &DaemonAction, p: &Paths) -> Result<()> { let unit = systemd_unit_path()?; match action { - DaemonAction::Start => { - write_if_changed(&unit, &systemd_unit(&p.hephd, &p.db, &p.socket))?; + DaemonAction::Start { self_update } => { + write_if_changed( + &unit, + &systemd_unit(&p.hephd, &p.db, &p.socket, *self_update), + )?; sc(&["daemon-reload"])?; let (ok, err) = sc(&["enable", "--now", UNIT])?; if !ok { @@ -328,8 +396,9 @@ fn systemd(action: &DaemonAction, p: &Paths) -> Result<()> { sc(&["stop", UNIT])?; println!("heph daemon stopped (still enabled; `uninstall` to remove)."); } - DaemonAction::Restart => { - write_if_changed(&unit, &systemd_unit(&p.hephd, &p.db, &p.socket))?; + DaemonAction::Restart { self_update } => { + let su = *self_update || file_opts_into_self_update(&unit); + write_if_changed(&unit, &systemd_unit(&p.hephd, &p.db, &p.socket, su))?; sc(&["daemon-reload"])?; let (ok, err) = sc(&["restart", UNIT])?; if !ok { @@ -378,6 +447,7 @@ mod tests { Path::new("/home/e/.local/share/heph/heph.db"), Path::new("/tmp/heph/hephd.sock"), Path::new("/home/e/.local/share/heph/hephd.log"), + false, ); assert!(plist.contains("org.hephaestus.hephd")); assert!(plist.contains("/usr/local/bin/hephd")); @@ -387,6 +457,24 @@ mod tests { assert!(plist.contains("RunAtLoad")); assert!(plist.contains("KeepAlive")); assert!(plist.contains("hephd.log")); + // Default (no self-update): no flag, no cargo PATH baked in. + assert!(!plist.contains("--self-update")); + assert!(!plist.contains(".cargo/bin")); + } + + #[test] + fn launchd_plist_self_update_adds_flag_and_cargo_path() { + let plist = launchd_plist( + Path::new("/usr/local/bin/hephd"), + Path::new("/db"), + Path::new("/sock"), + Path::new("/log"), + true, + ); + assert!(plist.contains("--self-update")); + assert!(plist.contains("PATH")); + assert!(plist.contains(".cargo/bin")); + assert!(plist.contains("HOME")); } #[test] @@ -395,6 +483,7 @@ mod tests { Path::new("/usr/local/bin/hephd"), Path::new("/home/e/.local/share/heph/heph.db"), Path::new("/run/user/1000/heph/hephd.sock"), + false, ); assert!(unit.contains( "ExecStart=/usr/local/bin/hephd --mode local \ @@ -407,6 +496,23 @@ mod tests { assert!(!unit.contains("Restart=on-failure")); assert!(unit.contains("RestartSec=")); assert!(unit.contains("WantedBy=default.target")); + // Default (no self-update): no flag, no baked env. + assert!(!unit.contains("--self-update")); + assert!(!unit.contains("Environment=PATH=")); + } + + #[test] + fn systemd_unit_self_update_adds_flag_and_env() { + let unit = systemd_unit( + Path::new("/usr/local/bin/hephd"), + Path::new("/db"), + Path::new("/sock"), + true, + ); + assert!(unit.contains("--self-update")); + assert!(unit.contains("Environment=PATH=")); + assert!(unit.contains(".cargo/bin")); + assert!(unit.contains("Environment=HOME=")); } #[test] diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs index 6a3300b..8c7d470 100644 --- a/crates/hephd/src/selfupdate.rs +++ b/crates/hephd/src/selfupdate.rs @@ -32,11 +32,11 @@ impl SelfUpdateConfig { } } -/// The forge releases feed for this project — the latest tagged release. -/// Uses the SSH-canonical host (`forge.ops.eblu.me`); see the -/// `service-env-forge-access` card for the cargo/forge host caveat. +/// The forge releases feed for this project — the latest tagged release. The +/// repo is public, so this is an unauthenticated GET on the canonical public +/// host. pub const RELEASES_LATEST_URL: &str = - "https://forge.ops.eblu.me/api/v1/repos/eblume/hephaestus/releases/latest"; + "https://forge.eblu.me/api/v1/repos/eblume/hephaestus/releases/latest"; /// Extract the bare `X.Y.Z` semver from a version string that may carry a build /// suffix (`heph_core::VERSION` is e.g. `"1.0.3 (aa376b4)"`) or a leading `v` @@ -141,11 +141,12 @@ pub async fn check_release(source: &S, current: &str) -> Check } } -/// The git SSH URL self-update installs from. The SSH host on port 2222 is the -/// proven path: cargo rejects the HTTPS host over its canonical-name mismatch -/// (`forge.ops.eblu.me` vs the advertised `forge.eblu.me`). See the -/// `service-env-forge-access` card. -pub const INSTALL_GIT_URL: &str = "ssh://forgejo@forge.ops.eblu.me:2222/eblume/hephaestus.git"; +/// The git URL self-update installs from. hephaestus is a **public** repo, and +/// `cargo install --git` is a plain anonymous git clone — *not* the Forgejo +/// cargo *registry* (that's access-restricted and needs `forge.ops.eblu.me`; +/// this is unrelated). So a credential-free HTTPS clone of the canonical public +/// host works from any device. +pub const INSTALL_GIT_URL: &str = "https://forge.eblu.me/eblume/hephaestus.git"; /// All workspace binaries, installed in lockstep so `heph`/`hephd`/`heph-tui` /// never skew after an update. @@ -284,6 +285,20 @@ mod tests { } } + #[test] + fn install_and_release_urls_are_public_https_no_ssh() { + // hephaestus is public; cargo install --git is a plain clone (not the + // access-restricted Forgejo cargo registry), so no SSH / credentials. + for url in [INSTALL_GIT_URL, RELEASES_LATEST_URL] { + assert!(url.starts_with("https://"), "{url} must be HTTPS"); + assert!(!url.contains("ssh://"), "{url} must not use SSH"); + assert!( + url.contains("forge.eblu.me"), + "{url} should use the canonical public host" + ); + } + } + #[tokio::test] async fn apply_update_installs_then_restarts_on_success() { let inst = Arc::new(FakeInstaller::default()); diff --git a/docs/how-to/run-the-daemon.md b/docs/how-to/run-the-daemon.md index 658a664..2b00dff 100644 --- a/docs/how-to/run-the-daemon.md +++ b/docs/how-to/run-the-daemon.md @@ -53,6 +53,16 @@ still the old binary until you restart it: heph daemon restart ``` +## Self-update (opt-in) + +`hephd` can keep itself current: `heph daemon start --self-update` generates a +service that polls the forge for newer releases and, when one appears, rebuilds +via `cargo install` (anonymous HTTPS clone of the public repo — no credentials) +and restarts onto the new binary. It is **off by default**; the generated +service also gets a `PATH` that can find cargo. `heph daemon restart` preserves +the setting (pass `--self-update` again to turn it on later). Requires the Rust +toolchain (`cargo`) installed for the service user. + ## Development isolation `heph daemon` manages the **installed** daemon on the default paths. For in-repo From 240c8a9f68c1f975a96d5e6b73c1f6fc6417a79c Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:47:05 -0700 Subject: [PATCH 18/39] C2(hephd-self-update): close service-env-forge-access Public repo => anonymous HTTPS clone, no credentials (the SSH/canonical premise was wrong: that was the access-restricted cargo registry, not git clone). Install URL points at the canonical public host (verified end to end); the service template bakes cargo onto PATH. Card rewritten to reflect what actually happened. --- .../self-update/service-env-forge-access.md | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/docs/how-to/self-update/service-env-forge-access.md b/docs/how-to/self-update/service-env-forge-access.md index df6cccc..7a31238 100644 --- a/docs/how-to/self-update/service-env-forge-access.md +++ b/docs/how-to/self-update/service-env-forge-access.md @@ -3,35 +3,39 @@ title: Service env forge access modified: 2026-06-04 tags: - how-to -status: active requires: [] --- # Service env forge access -The known blocker. `cargo install --git ssh://forgejo@forge.ops.eblu.me:2222/…` -works from an interactive shell (it has an SSH agent/key and cargo on PATH) — -but the daemon runs under launchd/systemd, whose environment likely has -**neither**. Self-update via cargo can't work until the service context can -reach the forge and run cargo. +The runtime-environment prerequisite for the apply path: a `hephd` started by +launchd/systemd runs with a minimal environment, so it must be able to find +cargo and fetch the repo when it runs `cargo install`. -## What to establish +## Resolved (and how the original premise was wrong) -- **cargo + toolchain on the service PATH.** launchd/systemd start with a - minimal env; `~/.cargo/bin` and rustup's toolchain must be discoverable. - Likely bake `PATH`/`EnvironmentFile` into the generated plist/unit - (`crates/heph/src/service.rs`). -- **Forge SSH auth without an interactive agent.** Options to evaluate: a - dedicated read-only deploy key referenced via `GIT_SSH_COMMAND`/an SSH config - entry, or `SSH_AUTH_SOCK` exported to the service. Must work headless. -- **The canonical-host caveat.** Owner note: cargo rejects `forge.ops.eblu.me` - over HTTPS because the forge advertises `forge.eblu.me` as canonical; the - **SSH** URL on port 2222 sidesteps this and is the proven path (used by the - install how-to and the v1.0.3 redeploy). Pin self-update to the SSH URL; - capture any `insteadOf`/known_hosts setup needed headlessly. +This card was first written assuming self-update needed **forge SSH +credentials** for a headless service — because the install how-to uses +`ssh://forgejo@forge.ops.eblu.me:2222/…`. That premise was wrong: -## Done when +- **hephaestus is a public repo**, and `cargo install --git` is a plain + anonymous git clone — *not* the Forgejo cargo **registry** (the registry is + access-restricted and is the thing that required `forge.ops.eblu.me`; it is + unrelated to git clone). So **no credentials, no SSH, no deploy key**. +- Verified end-to-end: `cargo install --git https://forge.eblu.me/eblume/hephaestus.git --tag v1.0.3 hephd` + builds a working binary anonymously. Self-update uses that canonical public + HTTPS URL (`INSTALL_GIT_URL`), and the release poll uses the same host. -A hephd running as the installed service can, in its own environment, complete -`cargo install --locked --git ssh://… --tag hephd` non-interactively. -Unblocks [[cargo-install-from-tag]]. Part of [[hephd-self-update]]. +So the only real requirement was the **environment**, handled in +`crates/heph/src/service.rs`: `heph daemon start --self-update` generates a +launchd/systemd service that passes `--self-update` and bakes a `PATH` +(including `~/.cargo/bin`) + `HOME` so the minimal service env can find cargo +and the toolchain. `restart` preserves the setting. Default services are +unchanged. + +## Remaining (owner) + +The Rust toolchain must be installed for the service user (cargo builds from +source), and a real on-device run — enable `--self-update`, then confirm a +live upgrade when the next release lands — is the final end-to-end check. See +[[hephd-self-update]]. From 443763489b18d43f313cffab36249880eaf4f27e Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 14:48:44 -0700 Subject: [PATCH 19/39] =?UTF-8?q?C2(hephd-self-update):=20finalize=20?= =?UTF-8?q?=E2=80=94=20single=20self-update=20how-to=20+=20changelog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse the eight Mikado scaffolding cards (+ goal card) into one user-facing how-to, docs/how-to/self-update.md: what self-update is and how to enable it. The per-card breakdown was build-time scaffolding, not documentation. Keeps the changelog fragment; updates the how-to index. Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/changelog.d/hephd-self-update.feature.md | 1 + docs/how-to/how-to.md | 15 +---- docs/how-to/self-update.md | 56 +++++++++++++++++++ .../self-update/cargo-install-from-tag.md | 37 ------------ docs/how-to/self-update/hephd-self-update.md | 51 ----------------- .../self-update/release-poll-version-check.md | 31 ---------- .../self-update/self-restart-after-update.md | 33 ----------- .../self-update/self-update-opt-in-flag.md | 31 ---------- .../self-update/self-update-poll-loop.md | 33 ----------- .../self-update/service-env-forge-access.md | 41 -------------- .../service-respawn-on-clean-exit.md | 34 ----------- .../verify-hub-dropout-resilience.md | 36 ------------ 12 files changed, 58 insertions(+), 341 deletions(-) create mode 100644 docs/changelog.d/hephd-self-update.feature.md create mode 100644 docs/how-to/self-update.md delete mode 100644 docs/how-to/self-update/cargo-install-from-tag.md delete mode 100644 docs/how-to/self-update/hephd-self-update.md delete mode 100644 docs/how-to/self-update/release-poll-version-check.md delete mode 100644 docs/how-to/self-update/self-restart-after-update.md delete mode 100644 docs/how-to/self-update/self-update-opt-in-flag.md delete mode 100644 docs/how-to/self-update/self-update-poll-loop.md delete mode 100644 docs/how-to/self-update/service-env-forge-access.md delete mode 100644 docs/how-to/self-update/service-respawn-on-clean-exit.md delete mode 100644 docs/how-to/self-update/verify-hub-dropout-resilience.md diff --git a/docs/changelog.d/hephd-self-update.feature.md b/docs/changelog.d/hephd-self-update.feature.md new file mode 100644 index 0000000..90cd33d --- /dev/null +++ b/docs/changelog.d/hephd-self-update.feature.md @@ -0,0 +1 @@ +Opt-in (default off) **hephd self-update**: `hephd --self-update` polls the forge for a newer release on an interval and, when one appears, rebuilds via `cargo install` from the release tag (anonymous HTTPS clone of the public repo — no credentials) and restarts onto the new binary. Enable it on the managed service with `heph daemon start --self-update` (which also bakes a cargo-capable `PATH` into the launchd/systemd unit and switches systemd to `Restart=always` so a clean self-exit respawns). The install mechanism is verified end-to-end; a live cross-version upgrade is confirmed on the first release after this lands. Also hardens hub resilience: the daemon's HTTP client now has a 30s timeout so a black-hole hub can't stall the sync/self-update loop. diff --git a/docs/how-to/how-to.md b/docs/how-to/how-to.md index b8ee8f5..eb0a6c8 100644 --- a/docs/how-to/how-to.md +++ b/docs/how-to/how-to.md @@ -20,17 +20,4 @@ Task-oriented guides for common operations. - [[run-the-daemon]] — Run `hephd` as an OS service with `heph daemon start/stop/restart/status` - [[set-up-sync-hub]] — Stand up the canonical hub (indri) and connect an existing device as an offline-capable spoke - [[import-todoist]] — Seed a heph store from your Todoist projects + tasks (`mise run import-todoist`) - -## Active Mikado chains - -C2 chain: **hephd self-update** (opt-in daemon auto-update). See [[agent-change-process]] for the method. - -- [[hephd-self-update]] — goal: opt-in, default-off mode where `hephd` polls for new releases and auto-updates itself -- [[self-update-opt-in-flag]] — the `--self-update` opt-in flag (default off) -- [[release-poll-version-check]] — poll the forge releases API and semver-compare against the running version -- [[self-update-poll-loop]] — background task wiring the flag to the version check (notify-only core) -- [[service-env-forge-access]] — give the daemon's service environment cargo + forge SSH access (the cargo/forge blocker) -- [[cargo-install-from-tag]] — rebuild + install the new binaries via `cargo install` from the release tag -- [[service-respawn-on-clean-exit]] — make the service manager respawn hephd after a clean exit (systemd `Restart=always`) -- [[self-restart-after-update]] — exit cleanly after a successful install so the new binary takes over -- [[verify-hub-dropout-resilience]] — lock in "the hub can vanish at any moment" as the base case +- [[self-update]] — Opt-in `hephd` self-update: poll the forge for new releases and auto-update diff --git a/docs/how-to/self-update.md b/docs/how-to/self-update.md new file mode 100644 index 0000000..d4dda1f --- /dev/null +++ b/docs/how-to/self-update.md @@ -0,0 +1,56 @@ +--- +title: hephd self-update +modified: 2026-06-04 +tags: + - how-to +--- + +# hephd self-update + +`hephd` can keep itself current: it polls the forge for a newer release and, when +one appears, rebuilds and restarts onto it — unattended. It is **opt-in and off +by default**. + +## Enable it + +On the managed service: + +```bash +heph daemon start --self-update +``` + +That generates a launchd/systemd service that runs `hephd --self-update` and +gives it a `PATH` that can find `cargo`. `heph daemon restart` preserves the +setting (pass `--self-update` again to turn it on later). To run the daemon +directly instead: + +```bash +hephd --self-update # default: poll every 6h +hephd --self-update --self-update-interval-secs 3600 +``` + +## How it works + +1. Each interval, `hephd` GETs the forge's `releases/latest` and compares the tag + against its own version (the one `heph --version` reports). +2. On a newer release it runs `cargo install --locked --git + --tag vX.Y.Z` for `heph`/`hephd`/`heph-tui`/`heph-quickadd`. hephaestus is a + public repo, so this is an anonymous clone — **no credentials**. +3. On a successful install it exits cleanly; the service manager (launchd + `KeepAlive` / systemd `Restart=always`) brings the new binary up. + +A failed poll or build is logged and the daemon keeps running on its current +version — self-update never takes the daemon down. + +## Requirements & notes + +- The **Rust toolchain** (`cargo`) must be installed for the service user; the + update builds from source. +- Off by default — nothing happens unless `--self-update` is passed. +- The first real cross-version upgrade is observable on the first release cut + after enabling it. + +## Related + +- [[run-the-daemon]] — running `hephd` as an OS service +- [[install-heph]] — installing the binaries diff --git a/docs/how-to/self-update/cargo-install-from-tag.md b/docs/how-to/self-update/cargo-install-from-tag.md deleted file mode 100644 index a099802..0000000 --- a/docs/how-to/self-update/cargo-install-from-tag.md +++ /dev/null @@ -1,37 +0,0 @@ ---- -title: Cargo install from tag -modified: 2026-06-04 -tags: - - how-to -requires: - - self-update-poll-loop - - service-env-forge-access ---- - -# Cargo install from tag - -The apply step: when the poll loop detects a newer release, rebuild + install -the new binaries from the release tag. - -## Deliverables - -- From the detected tag `vX.Y.Z`, run (via `tokio::task::spawn_blocking`, since - it's a long blocking child process): - ``` - cargo install --locked \ - --git ssh://forgejo@forge.ops.eblu.me:2222/eblume/hephaestus.git \ - --tag vX.Y.Z heph hephd heph-tui heph-quickadd - ``` - This is the exact command the install how-to and the manual redeploy use; it - swaps `~/.cargo/bin/*` in place. -- Capture stdout/stderr and exit status; log success/failure. A failed build - must **not** restart the daemon — only a successful install proceeds to - [[self-restart-after-update]]. -- Guard against re-running while an install is in flight (the long compile spans - multiple poll ticks): a simple "update in progress" flag. - -## Done when - -On a real newer tag, the daemon completes the install and the new binary is on -disk at `~/.cargo/bin`. Requires [[self-update-poll-loop]] and -[[service-env-forge-access]]. Part of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/hephd-self-update.md b/docs/how-to/self-update/hephd-self-update.md deleted file mode 100644 index e9b679a..0000000 --- a/docs/how-to/self-update/hephd-self-update.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: hephd self-update -modified: 2026-06-04 -tags: - - how-to -status: active -branch: mikado/hephd-self-update -requires: - - self-restart-after-update - - verify-hub-dropout-resilience ---- - -# hephd self-update - -**Goal (desired end state).** An opt-in, **default-off** mode where `hephd` -periodically polls the forge for a newer release and, when one exists, -rebuilds via `cargo install` from the release tag and restarts itself onto the -new binary — unattended. - -## End state - -- A new daemon flag (`--self-update`, default off) plus a poll interval. When - off, behaviour is unchanged. See [[self-update-opt-in-flag]]. -- A background task (modelled on the existing spoke sync loop, - `crates/hephd/src/server.rs` `spawn_sync_loop`) that on each tick fetches the - latest release and compares it to `heph_core::VERSION`. See - [[self-update-poll-loop]] and [[release-poll-version-check]]. -- On a newer release: run `cargo install --locked --git ssh://… --tag vX.Y.Z` - for all workspace binaries ([[cargo-install-from-tag]]), then exit cleanly so - the OS service manager respawns the new binary - ([[self-restart-after-update]], [[service-respawn-on-clean-exit]]). -- Running `cargo install` from inside the service requires the daemon's - environment to have cargo + forge SSH access — the known blocker tracked in - [[service-env-forge-access]]. - -## Design decisions (owner) - -- **Default off**, opt-in only. Never self-update silently by default. -- Delivery is **`cargo install` from the tag** for now (prebuilt release - binaries are a possible future, pending a cargo/forge canonical-host fix). -- **Hub can disappear at any moment** — that resilience is the *base case*, not - a special guard. The sync loop already tolerates an unreachable hub; we lock - that in rather than add update-specific guards. See - [[verify-hub-dropout-resilience]]. - -## Scope notes - -Captured from task `01KTA2NSNRYT902HC3VRW00S1J` in the `Hephaestus` project. -Possible later refinements (own cards if pursued): checksum/signature -verification of the built binary, prebuilt release-binary delivery, and a -notify-only sub-mode. diff --git a/docs/how-to/self-update/release-poll-version-check.md b/docs/how-to/self-update/release-poll-version-check.md deleted file mode 100644 index 8ecfca2..0000000 --- a/docs/how-to/self-update/release-poll-version-check.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Release poll + version check -modified: 2026-06-04 -tags: - - how-to -requires: [] ---- - -# Release poll + version check - -The piece that answers "is a newer release available?" — independent of any -daemon wiring, so it can be unit-tested in isolation. - -## Deliverables - -- Fetch the latest release from the forge: - `GET https://forge.ops.eblu.me/api/v1/repos/eblume/hephaestus/releases/latest`, - read `tag_name` (e.g. `v1.0.4`). hephd already depends on `ureq` and - `reqwest` (`crates/hephd/Cargo.toml`) — reuse one (the poll loop is async, so - `reqwest` fits; `ureq` would need `spawn_blocking`). -- Parse the running version: `heph_core::VERSION` is `"1.0.3 (sha)"` — take the - `X.Y.Z` head. Add `semver = "1"` to `crates/hephd/Cargo.toml` (already in the - lockfile transitively) and compare `tag_name` (strip leading `v`) against it. -- A pure `is_newer(current, tag) -> bool` helper with tests covering equal / - older / newer / malformed tags. - -## Done when - -Given a fixed current version and a sample releases-API JSON body, the helper -correctly reports whether an update exists. No daemon loop yet — that's -[[self-update-poll-loop]]. Part of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/self-restart-after-update.md b/docs/how-to/self-update/self-restart-after-update.md deleted file mode 100644 index 1f95128..0000000 --- a/docs/how-to/self-update/self-restart-after-update.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: Self-restart after update -modified: 2026-06-04 -tags: - - how-to -requires: - - cargo-install-from-tag - - service-respawn-on-clean-exit ---- - -# Self-restart after update - -The last step: once the new binary is installed, get the running daemon to hand -off to it. - -## Deliverables - -- After a successful [[cargo-install-from-tag]], have hephd exit cleanly - (`std::process::exit(0)`) so the service manager respawns the new binary. - hephd has no graceful-shutdown path today (`serve` is an infinite accept - loop) — a clean process exit is acceptable; in-flight RPC connections simply - drop and clients reconnect (the plugin already reconnects-once). -- Relies on [[service-respawn-on-clean-exit]] so the exit is actually followed - by a respawn on both platforms. -- Log a clear "restarting into vX.Y.Z" line before exit. Optionally re-check - that the on-disk version actually changed before restarting, to avoid a - restart loop if the install was a no-op. - -## Done when - -End-to-end: an enabled daemon on an older version detects a newer release, -installs it, restarts, and comes back reporting the new `version` RPC value. -This closes the apply path of [[hephd-self-update]]. diff --git a/docs/how-to/self-update/self-update-opt-in-flag.md b/docs/how-to/self-update/self-update-opt-in-flag.md deleted file mode 100644 index ecf5625..0000000 --- a/docs/how-to/self-update/self-update-opt-in-flag.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: Self-update opt-in flag -modified: 2026-06-04 -tags: - - how-to -requires: [] ---- - -# Self-update opt-in flag - -The opt-in surface. hephd config today is pure clap flags (no config file) in -`crates/hephd/src/main.rs`. - -## Deliverables - -- Add `--self-update` (bool, **default false**) and an interval override (e.g. - `--self-update-interval-secs`, with a sane default like 6h). Document them in - the flag help. -- Thread them into the daemon the same way `--hub-url` / spoke auth are - (`Daemon::new(...).with_hub(...)` → add `.with_self_update(cfg)`). -- When the flag is absent, the daemon behaves exactly as today (the loop in - [[self-update-poll-loop]] is simply not spawned). -- Later, bake the flag into the generated service definition (launchd/systemd) - so an enabled daemon keeps self-updating across restarts — coordinate with - [[service-respawn-on-clean-exit]] (same templates in `crates/heph/src/service.rs`). - -## Done when - -`hephd --self-update` starts the daemon with the mode enabled (verifiable via a -startup log line); omitting it leaves current behaviour untouched. Part of -[[hephd-self-update]]. diff --git a/docs/how-to/self-update/self-update-poll-loop.md b/docs/how-to/self-update/self-update-poll-loop.md deleted file mode 100644 index da3fd06..0000000 --- a/docs/how-to/self-update/self-update-poll-loop.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: Self-update poll loop -modified: 2026-06-04 -tags: - - how-to -requires: - - release-poll-version-check - - self-update-opt-in-flag ---- - -# Self-update poll loop - -The background task that ties the flag to the version check. This card alone -yields a working **notify-only** daemon ("update available: vX.Y.Z" in the -log) — the apply path layers on after. - -## Deliverables - -- Spawn a `tokio` task modelled on `spawn_sync_loop` - (`crates/hephd/src/server.rs`): `tokio::time::interval` ticking at the - configured cadence, guarded so it's a no-op unless `--self-update` is set. -- Each tick: run the [[release-poll-version-check]]. On "newer available", log - it (and, once the apply path exists, hand off to [[cargo-install-from-tag]]). -- Errors (forge unreachable, bad JSON) are logged and the loop continues — - same resilience pattern the sync loop uses. A flaky forge must never crash or - block the daemon. - -## Done when - -With `--self-update` on and a stubbed/real "newer" release, the daemon logs an -update-available line once per detection; with the flag off, no task runs. -Requires [[release-poll-version-check]] and [[self-update-opt-in-flag]]. Part of -[[hephd-self-update]]. diff --git a/docs/how-to/self-update/service-env-forge-access.md b/docs/how-to/self-update/service-env-forge-access.md deleted file mode 100644 index 7a31238..0000000 --- a/docs/how-to/self-update/service-env-forge-access.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: Service env forge access -modified: 2026-06-04 -tags: - - how-to -requires: [] ---- - -# Service env forge access - -The runtime-environment prerequisite for the apply path: a `hephd` started by -launchd/systemd runs with a minimal environment, so it must be able to find -cargo and fetch the repo when it runs `cargo install`. - -## Resolved (and how the original premise was wrong) - -This card was first written assuming self-update needed **forge SSH -credentials** for a headless service — because the install how-to uses -`ssh://forgejo@forge.ops.eblu.me:2222/…`. That premise was wrong: - -- **hephaestus is a public repo**, and `cargo install --git` is a plain - anonymous git clone — *not* the Forgejo cargo **registry** (the registry is - access-restricted and is the thing that required `forge.ops.eblu.me`; it is - unrelated to git clone). So **no credentials, no SSH, no deploy key**. -- Verified end-to-end: `cargo install --git https://forge.eblu.me/eblume/hephaestus.git --tag v1.0.3 hephd` - builds a working binary anonymously. Self-update uses that canonical public - HTTPS URL (`INSTALL_GIT_URL`), and the release poll uses the same host. - -So the only real requirement was the **environment**, handled in -`crates/heph/src/service.rs`: `heph daemon start --self-update` generates a -launchd/systemd service that passes `--self-update` and bakes a `PATH` -(including `~/.cargo/bin`) + `HOME` so the minimal service env can find cargo -and the toolchain. `restart` preserves the setting. Default services are -unchanged. - -## Remaining (owner) - -The Rust toolchain must be installed for the service user (cargo builds from -source), and a real on-device run — enable `--self-update`, then confirm a -live upgrade when the next release lands — is the final end-to-end check. See -[[hephd-self-update]]. diff --git a/docs/how-to/self-update/service-respawn-on-clean-exit.md b/docs/how-to/self-update/service-respawn-on-clean-exit.md deleted file mode 100644 index 88db290..0000000 --- a/docs/how-to/self-update/service-respawn-on-clean-exit.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: Service respawn on clean exit -modified: 2026-06-04 -tags: - - how-to -requires: [] ---- - -# Service respawn on clean exit - -For "self-restart" to mean "exit and let the manager bring up the new binary", -both service managers must respawn hephd after a **clean** (exit code 0) -shutdown. Templates live in `crates/heph/src/service.rs`. - -## Current state (from research) - -- **launchd (macOS):** plist has `KeepAlive = true` → already respawns on clean - exit. No change needed. -- **systemd (Linux):** unit is `Restart=on-failure` → a clean exit (code 0) - does **not** respawn. Self-restart would silently stop the daemon. - -## Deliverables - -- Change the systemd unit template to `Restart=always` (with a small - `RestartSec`) so a deliberate clean exit is respawned. -- Note in install/upgrade docs that **already-installed services must be - reinstalled** (`heph daemon` re-generates the unit) to pick up the new - policy; otherwise self-restart won't work on existing Linux installs. - -## Done when - -On both platforms, a hephd that calls `exit(0)` is brought back up by the -service manager. Pairs with [[self-restart-after-update]]. Part of -[[hephd-self-update]]. diff --git a/docs/how-to/self-update/verify-hub-dropout-resilience.md b/docs/how-to/self-update/verify-hub-dropout-resilience.md deleted file mode 100644 index 9f01c46..0000000 --- a/docs/how-to/self-update/verify-hub-dropout-resilience.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: Verify hub-dropout resilience -modified: 2026-06-04 -tags: - - how-to -requires: [] ---- - -# Verify hub-dropout resilience - -Owner requirement: "the hub can go poof at any moment" must be the **base -case**, not a guard bolted on for self-update. A self-updating hub will restart -under its spokes, so spokes must already shrug off an unreachable hub. - -## Current state (from research) - -Already largely true: `sync_once` (`crates/hephd/src/sync.rs`) propagates -errors, and the background loop (`spawn_sync_loop`, `crates/hephd/src/server.rs`) -catches them — `tracing::warn!("background sync failed: {e}")` — and continues. -The local SQLite store stays writable, so the spoke works offline and -reconciles on the next successful tick. No panic, no block. - -## Deliverables - -- Lock the guarantee in with an explicit test: a spoke whose hub is unreachable - for one or more sync cycles keeps serving local RPCs and accepting writes, - then reconciles when the hub returns. -- If any path is found that *doesn't* degrade gracefully (a blocking call, an - unwrapped error, a restart that loses unsynced ops), fix it here — that is the - whole point of this card. - -## Done when - -A test demonstrates spoke survival across hub downtime, documenting the -base-case guarantee that makes a self-updating hub safe. Part of -[[hephd-self-update]]. From 854d25c68bfb83d5792da264de937125202b40b8 Mon Sep 17 00:00:00 2001 From: Forgejo Actions Date: Thu, 4 Jun 2026 15:07:55 -0700 Subject: [PATCH 20/39] Update changelog for v1.1.0 [skip ci] --- CHANGELOG.md | 7 +++++++ docs/changelog.d/hephd-self-update.feature.md | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) delete mode 100644 docs/changelog.d/hephd-self-update.feature.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 02dc5fd..38b70b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [v1.1.0] - 2026-06-04 + +### Features + +- Opt-in (default off) **hephd self-update**: `hephd --self-update` polls the forge for a newer release on an interval and, when one appears, rebuilds via `cargo install` from the release tag (anonymous HTTPS clone of the public repo — no credentials) and restarts onto the new binary. Enable it on the managed service with `heph daemon start --self-update` (which also bakes a cargo-capable `PATH` into the launchd/systemd unit and switches systemd to `Restart=always` so a clean self-exit respawns). The install mechanism is verified end-to-end; a live cross-version upgrade is confirmed on the first release after this lands. Also hardens hub resilience: the daemon's HTTP client now has a 30s timeout so a black-hole hub can't stall the sync/self-update loop. + + ## [v1.0.3] - 2026-06-04 ### Features diff --git a/docs/changelog.d/hephd-self-update.feature.md b/docs/changelog.d/hephd-self-update.feature.md deleted file mode 100644 index 90cd33d..0000000 --- a/docs/changelog.d/hephd-self-update.feature.md +++ /dev/null @@ -1 +0,0 @@ -Opt-in (default off) **hephd self-update**: `hephd --self-update` polls the forge for a newer release on an interval and, when one appears, rebuilds via `cargo install` from the release tag (anonymous HTTPS clone of the public repo — no credentials) and restarts onto the new binary. Enable it on the managed service with `heph daemon start --self-update` (which also bakes a cargo-capable `PATH` into the launchd/systemd unit and switches systemd to `Restart=always` so a clean self-exit respawns). The install mechanism is verified end-to-end; a live cross-version upgrade is confirmed on the first release after this lands. Also hardens hub resilience: the daemon's HTTP client now has a 30s timeout so a black-hole hub can't stall the sync/self-update loop. From fac39386d05d5e8d38fd5c0a021685bd9044d224 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 15:26:13 -0700 Subject: [PATCH 21/39] fix: self-update poll uses ureq (reqwest has no TLS backend) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hephd's reqwest client is built default-features=false with no TLS feature, so the self-update release poll's HTTPS GET always failed ('release check failed: requesting forge releases/latest') — the bug never surfaced before because nothing in production used reqwest over HTTPS (hub sync is plain http://). Switch the poll to ureq, which is already a dependency and ships a rustls/ring TLS stack needing no system libs (notably no cmake/aws-lc-sys, which would break the rust:bookworm CI image). Verified end-to-end: a 0.0.0 build now detects v1.1.0, installs, and restarts. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/selfupdate.rs | 42 +++++++++++-------- crates/hephd/src/server.rs | 2 +- .../+selfupdate-poll-tls.bugfix.md | 1 + 3 files changed, 26 insertions(+), 19 deletions(-) create mode 100644 docs/changelog.d/+selfupdate-poll-tls.bugfix.md diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs index 8c7d470..e276955 100644 --- a/crates/hephd/src/selfupdate.rs +++ b/crates/hephd/src/selfupdate.rs @@ -70,19 +70,17 @@ pub fn parse_latest_tag(body: &str) -> Result { Ok(rel.tag_name) } -/// Fetch the latest release tag from the forge over HTTP (reusing the daemon's -/// shared `reqwest::Client`). Network/HTTP/JSON failures surface as `Err` for -/// the caller to log-and-continue. -pub async fn fetch_latest_tag(http: &reqwest::Client, url: &str) -> Result { - let body = http - .get(url) - .send() - .await +/// Fetch the latest release tag from the forge over HTTPS, blocking. Uses +/// `ureq` (already a dependency, with a rustls/ring TLS backend that needs no +/// system libs) rather than the daemon's `reqwest` client, which is built +/// without TLS — the forge poll is the only production HTTPS-over-HTTP-client +/// path (hub sync is plain HTTP). Network/HTTP/JSON failures surface as `Err`. +pub fn fetch_latest_tag(url: &str) -> Result { + let body = ureq::get(url) + .call() .context("requesting forge releases/latest")? - .error_for_status() - .context("forge releases/latest returned an error status")? - .text() - .await + .body_mut() + .read_to_string() .context("reading forge releases/latest body")?; parse_latest_tag(&body) } @@ -93,25 +91,33 @@ pub trait ReleaseSource: Send + Sync + 'static { fn latest_tag(&self) -> impl std::future::Future> + Send; } -/// The production source: the forge's `releases/latest` over HTTP. +/// The production source: the forge's `releases/latest` over HTTPS (via `ureq`). pub struct ForgeReleaseSource { - http: reqwest::Client, url: String, } impl ForgeReleaseSource { - /// Source backed by the daemon's shared client, hitting [`RELEASES_LATEST_URL`]. - pub fn new(http: reqwest::Client) -> Self { + /// Source hitting [`RELEASES_LATEST_URL`]. + pub fn new() -> Self { Self { - http, url: RELEASES_LATEST_URL.to_string(), } } } +impl Default for ForgeReleaseSource { + fn default() -> Self { + Self::new() + } +} + impl ReleaseSource for ForgeReleaseSource { async fn latest_tag(&self) -> Result { - fetch_latest_tag(&self.http, &self.url).await + // `ureq` is blocking; keep it off the async runtime. + let url = self.url.clone(); + tokio::task::spawn_blocking(move || fetch_latest_tag(&url)) + .await + .context("release-fetch task panicked")? } } diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index bf245ba..59826ac 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -135,7 +135,7 @@ impl Daemon { let Some(cfg) = self.ctx.self_update.clone() else { return; }; - let source = selfupdate::ForgeReleaseSource::new(self.ctx.http.clone()); + let source = selfupdate::ForgeReleaseSource::new(); let installer: std::sync::Arc = std::sync::Arc::new(selfupdate::CargoInstaller); let restarter: std::sync::Arc = diff --git a/docs/changelog.d/+selfupdate-poll-tls.bugfix.md b/docs/changelog.d/+selfupdate-poll-tls.bugfix.md new file mode 100644 index 0000000..7097c0b --- /dev/null +++ b/docs/changelog.d/+selfupdate-poll-tls.bugfix.md @@ -0,0 +1 @@ +Fix `hephd --self-update` never detecting releases: the release poll used the daemon's `reqwest` client, which is built without a TLS backend (`default-features = false`), so every HTTPS request to the forge failed (`release check failed: requesting forge releases/latest`). The poll now uses `ureq` — already a dependency, with a rustls/ring TLS stack that needs no system libraries (and no cmake/`aws-lc-sys`). Hub sync is unaffected (it is plain HTTP). From b75d7a8d7adbab6dbf0c51d8d4c710dc65b36c3c Mon Sep 17 00:00:00 2001 From: Forgejo Actions Date: Thu, 4 Jun 2026 15:36:38 -0700 Subject: [PATCH 22/39] Update changelog for v1.1.1 [skip ci] --- CHANGELOG.md | 7 +++++++ docs/changelog.d/+selfupdate-poll-tls.bugfix.md | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) delete mode 100644 docs/changelog.d/+selfupdate-poll-tls.bugfix.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 38b70b1..7784dfd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [v1.1.1] - 2026-06-04 + +### Bug Fixes + +- Fix `hephd --self-update` never detecting releases: the release poll used the daemon's `reqwest` client, which is built without a TLS backend (`default-features = false`), so every HTTPS request to the forge failed (`release check failed: requesting forge releases/latest`). The poll now uses `ureq` — already a dependency, with a rustls/ring TLS stack that needs no system libraries (and no cmake/`aws-lc-sys`). Hub sync is unaffected (it is plain HTTP). + + ## [v1.1.0] - 2026-06-04 ### Features diff --git a/docs/changelog.d/+selfupdate-poll-tls.bugfix.md b/docs/changelog.d/+selfupdate-poll-tls.bugfix.md deleted file mode 100644 index 7097c0b..0000000 --- a/docs/changelog.d/+selfupdate-poll-tls.bugfix.md +++ /dev/null @@ -1 +0,0 @@ -Fix `hephd --self-update` never detecting releases: the release poll used the daemon's `reqwest` client, which is built without a TLS backend (`default-features = false`), so every HTTPS request to the forge failed (`release check failed: requesting forge releases/latest`). The poll now uses `ureq` — already a dependency, with a rustls/ring TLS stack that needs no system libraries (and no cmake/`aws-lc-sys`). Hub sync is unaffected (it is plain HTTP). From ca8f7d1ab2004fe36c5b9885dc11be955d7aa42a Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 16:39:20 -0700 Subject: [PATCH 23/39] feat(hephd): CORS + optional static serving on the hub HTTP endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a permissive CORS middleware (answers the browser OPTIONS preflight and stamps Access-Control-* on every response) and an optional --web-root static file handler with an index.html SPA fallback. Together these let a browser surface — the forthcoming heph-pwa mobile app — call /rpc cross-origin or be hosted same-origin straight from the hub. No new crate dependencies; file reads run on the blocking pool. Covered by tests/web_serve.rs. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/hephd/src/main.rs | 11 ++- crates/hephd/src/sync.rs | 117 ++++++++++++++++++++++- crates/hephd/tests/web_serve.rs | 163 ++++++++++++++++++++++++++++++++ 3 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 crates/hephd/tests/web_serve.rs diff --git a/crates/hephd/src/main.rs b/crates/hephd/src/main.rs index ad3517c..c8e4e25 100644 --- a/crates/hephd/src/main.rs +++ b/crates/hephd/src/main.rs @@ -60,6 +60,12 @@ struct Cli { #[arg(long)] http_addr: Option, + /// Directory of static files to serve for non-API paths (server mode). Point + /// this at the `heph-pwa/` shell to host the mobile app same-origin from the + /// hub. Unset: the hub serves only its API routes. + #[arg(long)] + web_root: Option, + /// Server to proxy to (client mode only; required there). #[arg(long)] server_url: Option, @@ -190,7 +196,10 @@ async fn main() -> Result<()> { anyhow::bail!("--oidc-issuer and --oidc-audience must be set together") } }; - let app = sync::router(daemon.store(), verifier); + if let Some(root) = cli.web_root.as_deref() { + tracing::info!(web_root = %root.display(), "hub serving static PWA shell"); + } + let app = sync::router_with_web(daemon.store(), verifier, cli.web_root.clone()); let http_listener = TcpListener::bind(&addr) .await .with_context(|| format!("binding hub HTTP endpoint {addr}"))?; diff --git a/crates/hephd/src/sync.rs b/crates/hephd/src/sync.rs index 266cae1..41e5524 100644 --- a/crates/hephd/src/sync.rs +++ b/crates/hephd/src/sync.rs @@ -10,6 +10,12 @@ //! - `POST /rpc` — the full daemon API ([`crate::rpc::dispatch`]) over HTTP, for //! a no-replica `client`-mode [`crate::remote::RemoteStore`] to proxy against. //! +//! All routes carry permissive CORS headers and answer the browser preflight +//! (`OPTIONS`), so a browser surface (the `heph-pwa` mobile app) can call `/rpc` +//! cross-origin. When the hub is given a `web_root`, unmatched paths fall back to +//! serving that directory's static files (the PWA shell), so the app can be +//! hosted same-origin straight from the hub. +//! //! Exchange is **incremental by HLC cursor** (`sync_state`, [`heph_core::SyncCursors`]): //! each side transfers only the tail it hasn't sent/seen. Merge is idempotent, //! so a re-pushed op the hub already has is a harmless no-op. When the hub is @@ -17,13 +23,14 @@ //! OIDC bearer token whose `sub` owns the hub (tech-spec §13); spokes attach //! that token via the `bearer` argument to [`sync_once`]. +use std::path::PathBuf; use std::sync::{Arc, Mutex}; use anyhow::Result; use axum::extract::{Query, Request, State}; -use axum::http::StatusCode; +use axum::http::{header, HeaderValue, Method, StatusCode, Uri}; use axum::middleware::{self, Next}; -use axum::response::Response as AxumResponse; +use axum::response::{IntoResponse, Response as AxumResponse}; use axum::routing::{get, post}; use axum::{Json, Router}; use serde::{Deserialize, Serialize}; @@ -44,6 +51,9 @@ pub type SharedStore = Arc>; struct HubState { store: SharedStore, verifier: Option>, + /// When set, unmatched paths serve static files from this directory (the + /// `heph-pwa` shell), so the app can be hosted same-origin from the hub. + web_root: Option, } /// A batch of ops in flight (push body / pull response). @@ -102,15 +112,116 @@ fn apply_batch( /// `verifier` is `Some`, every route requires a valid OIDC bearer token whose /// `sub` owns this hub (tech-spec §13); `None` leaves the hub open (local dev). pub fn router(store: SharedStore, verifier: Option>) -> Router { - let state = HubState { store, verifier }; + router_with_web(store, verifier, None) +} + +/// [`router`] plus an optional `web_root`: when `Some(dir)`, paths that don't +/// match an API route serve static files from `dir` (the `heph-pwa` shell), +/// with a `index.html` fallback so the single-page app can deep-link. Static +/// files are served without authentication — they are only the app shell; all +/// data still flows through the auth-gated `/rpc` and `/sync/*` routes. +pub fn router_with_web( + store: SharedStore, + verifier: Option>, + web_root: Option, +) -> Router { + let state = HubState { + store, + verifier, + web_root, + }; Router::new() .route("/sync/pull", get(pull)) .route("/sync/push", post(push)) .route("/rpc", post(rpc_call)) .route_layer(middleware::from_fn_with_state(state.clone(), require_auth)) + // The static shell is unauthenticated and lives behind the API routes. + .fallback(serve_static) + // Outermost: stamp CORS headers on every response and short-circuit the + // browser's `OPTIONS` preflight (before it reaches auth or routing). + .layer(middleware::from_fn(cors)) .with_state(state) } +/// Permissive-CORS middleware. Answers the browser preflight (`OPTIONS`) with a +/// 204 and stamps `Access-Control-*` headers on every response. The hub is a +/// personal endpoint guarded by bearer tokens (not cookies), so a wildcard +/// origin is safe — there are no ambient credentials for `*` to expose. +async fn cors(request: Request, next: Next) -> AxumResponse { + let is_preflight = request.method() == Method::OPTIONS; + let mut response = if is_preflight { + StatusCode::NO_CONTENT.into_response() + } else { + next.run(request).await + }; + let h = response.headers_mut(); + h.insert( + header::ACCESS_CONTROL_ALLOW_ORIGIN, + HeaderValue::from_static("*"), + ); + h.insert( + header::ACCESS_CONTROL_ALLOW_METHODS, + HeaderValue::from_static("GET, POST, OPTIONS"), + ); + h.insert( + header::ACCESS_CONTROL_ALLOW_HEADERS, + HeaderValue::from_static("authorization, content-type"), + ); + h.insert( + header::ACCESS_CONTROL_MAX_AGE, + HeaderValue::from_static("86400"), + ); + response +} + +/// Serve the PWA shell from `web_root` for any non-API path. Returns 404 when no +/// `web_root` is configured. Unknown paths fall back to `index.html` so the SPA +/// can own its own routing. Path traversal (`..`) is rejected. +async fn serve_static(State(state): State, uri: Uri) -> AxumResponse { + let Some(root) = state.web_root.as_ref() else { + return StatusCode::NOT_FOUND.into_response(); + }; + let rel = uri.path().trim_start_matches('/'); + if rel.split('/').any(|seg| seg == "..") { + return StatusCode::BAD_REQUEST.into_response(); + } + let rel = if rel.is_empty() { "index.html" } else { rel }; + + let direct = root.join(rel); + let index = root.join("index.html"); + // File reads run on the blocking pool (tokio's `fs` feature is off, and DB / + // disk I/O never runs on an async worker, tech-spec §3). + let read = tokio::task::spawn_blocking(move || { + match std::fs::read(&direct) { + Ok(bytes) => Some((content_type(&direct), bytes)), + // SPA fallback: serve index.html for unknown (extension-less) routes. + Err(_) => std::fs::read(&index) + .ok() + .map(|bytes| ("text/html; charset=utf-8", bytes)), + } + }) + .await; + match read { + Ok(Some((ctype, bytes))) => ([(header::CONTENT_TYPE, ctype)], bytes).into_response(), + _ => StatusCode::NOT_FOUND.into_response(), + } +} + +/// Best-effort content type from a file extension (the handful the PWA serves). +fn content_type(path: &std::path::Path) -> &'static str { + match path.extension().and_then(|e| e.to_str()) { + Some("html") => "text/html; charset=utf-8", + Some("js" | "mjs") => "text/javascript; charset=utf-8", + Some("css") => "text/css; charset=utf-8", + Some("json" | "webmanifest") => "application/json; charset=utf-8", + Some("svg") => "image/svg+xml", + Some("png") => "image/png", + Some("ico") => "image/x-icon", + Some("woff2") => "font/woff2", + _ => "application/octet-stream", + } +} + /// Reject any request lacking a valid bearer token whose `sub` owns this hub. /// A no-op when the hub has no verifier configured (open dev mode). async fn require_auth( diff --git a/crates/hephd/tests/web_serve.rs b/crates/hephd/tests/web_serve.rs new file mode 100644 index 0000000..b176137 --- /dev/null +++ b/crates/hephd/tests/web_serve.rs @@ -0,0 +1,163 @@ +//! The hub's browser-facing surface (for the `heph-pwa` mobile app): permissive +//! CORS on every response, an `OPTIONS` preflight answer, and—when a `web_root` +//! is configured—static serving of the app shell with an `index.html` SPA +//! fallback. A tiny raw-HTTP client keeps this dependency-free and lets us drive +//! arbitrary methods (`OPTIONS`) and inspect response headers directly. + +use std::io::{Read, Write}; +use std::net::TcpStream; +use std::sync::mpsc; +use std::sync::{Arc, Mutex}; +use std::thread; +use std::time::Duration; + +use heph_core::{FixedClock, LocalStore}; +use hephd::sync::{self, SharedStore}; + +const NOW: i64 = 1_704_067_200_000; // 2024-01-01T00:00:00Z + +/// One parsed HTTP response: status line code, lowercased headers, and body. +struct Resp { + status: u16, + headers: Vec<(String, String)>, + body: String, +} + +impl Resp { + fn header(&self, name: &str) -> Option<&str> { + let name = name.to_ascii_lowercase(); + self.headers + .iter() + .find(|(k, _)| *k == name) + .map(|(_, v)| v.as_str()) + } +} + +/// Issue one HTTP/1.1 request over a fresh connection (`Connection: close`, so +/// we can read the whole response to EOF) and parse the response. +fn request(addr: &str, method: &str, path: &str) -> Resp { + let mut stream = TcpStream::connect(addr).unwrap(); + let req = format!("{method} {path} HTTP/1.1\r\nHost: {addr}\r\nConnection: close\r\n\r\n"); + stream.write_all(req.as_bytes()).unwrap(); + let mut raw = String::new(); + stream.read_to_string(&mut raw).unwrap(); + + let (head, body) = raw.split_once("\r\n\r\n").unwrap_or((&raw, "")); + let mut lines = head.split("\r\n"); + let status = lines + .next() + .and_then(|l| l.split_whitespace().nth(1)) + .and_then(|c| c.parse().ok()) + .unwrap(); + let headers = lines + .filter_map(|l| l.split_once(": ")) + .map(|(k, v)| (k.to_ascii_lowercase(), v.to_string())) + .collect(); + Resp { + status, + headers, + body: body.to_string(), + } +} + +/// Start the hub router (with the given `web_root`) over a temp `LocalStore` on +/// an ephemeral port; return its `host:port`. The server thread + temp dirs live +/// for the test's duration. +fn start(web_root: Option) -> String { + let (tx, rx) = mpsc::channel(); + thread::spawn(move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rt.block_on(async move { + let dir = tempfile::tempdir().unwrap(); + let store = + LocalStore::open(dir.path().join("heph.db"), Box::new(FixedClock(NOW))).unwrap(); + let shared: SharedStore = Arc::new(Mutex::new(store)); + let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap(); + tx.send(listener.local_addr().unwrap()).unwrap(); + let _keep = dir; + let app = sync::router_with_web(shared, None, web_root); + axum::serve(listener, app).await.unwrap(); + }); + }); + rx.recv_timeout(Duration::from_secs(5)).unwrap().to_string() +} + +#[test] +fn cors_headers_on_rpc_and_preflight_answered() { + let addr = start(None); + + // The browser preflight gets a 204 with the CORS allowances, without auth. + let pre = request(&addr, "OPTIONS", "/rpc"); + assert_eq!(pre.status, 204); + assert_eq!(pre.header("access-control-allow-origin"), Some("*")); + assert!(pre + .header("access-control-allow-headers") + .unwrap() + .contains("authorization")); + assert!(pre + .header("access-control-allow-methods") + .unwrap() + .contains("POST")); + + // A regular GET also carries the origin header (so XHR can read the body). + let get = request(&addr, "GET", "/sync/pull"); + assert_eq!(get.header("access-control-allow-origin"), Some("*")); +} + +#[test] +fn serves_static_shell_with_index_fallback() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write( + dir.path().join("index.html"), + "heph", + ) + .unwrap(); + std::fs::write(dir.path().join("app.js"), "export const x = 1;\n").unwrap(); + let addr = start(Some(dir.path().to_path_buf())); + + // Root serves index.html as HTML. + let root = request(&addr, "GET", "/"); + assert_eq!(root.status, 200); + assert!(root.body.contains("heph")); + assert_eq!( + root.header("content-type"), + Some("text/html; charset=utf-8") + ); + + // A real asset is served with a JS content type. + let js = request(&addr, "GET", "/app.js"); + assert_eq!(js.status, 200); + assert!(js.body.contains("export const x")); + assert_eq!( + js.header("content-type"), + Some("text/javascript; charset=utf-8") + ); + + // An unknown (extension-less) route falls back to index.html for the SPA. + let deep = request(&addr, "GET", "/inbox"); + assert_eq!(deep.status, 200); + assert!(deep.body.contains("heph")); + + // Path traversal never escapes web_root (whether the client/proxy normalizes + // the `..` away or our guard rejects it, the crate's Cargo.toml never leaks). + let escape = request(&addr, "GET", "/../../Cargo.toml"); + assert!( + !escape.body.contains("[package]"), + "must not serve files outside web_root" + ); + + // The temp dir must outlive the server thread's reads. + drop(dir); +} + +#[test] +fn no_web_root_yields_404_for_static_paths() { + let addr = start(None); + let resp = request(&addr, "GET", "/inbox"); + assert_eq!(resp.status, 404); + // Even the 404 carries CORS headers (it passed through the layer). + assert_eq!(resp.header("access-control-allow-origin"), Some("*")); +} From c3111d498bc55882e1f3ce1fbcea88a4d4421062 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 4 Jun 2026 16:42:09 -0700 Subject: [PATCH 24/39] feat(heph-pwa): port quickadd + datespec parsers to JS (with parity tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Faithful JS ports of hephd's quickadd.rs / datespec.rs so the PWA's quick-add accepts the identical syntax (p1-4, #Project greedy match, today/+3d/fri/ISO, 'every …' recurrence) and produces the same RRULEs and local-midnight do-dates as the CLI/TUI. test/parsers.test.mjs replays the Rust unit cases under `node --test` (13/13 pass). Co-Authored-By: Claude Opus 4.8 (1M context) --- heph-pwa/src/datespec.js | 221 +++++++++++++++++++++++++++++++++ heph-pwa/src/quickadd.js | 113 +++++++++++++++++ heph-pwa/test/parsers.test.mjs | 125 +++++++++++++++++++ 3 files changed, 459 insertions(+) create mode 100644 heph-pwa/src/datespec.js create mode 100644 heph-pwa/src/quickadd.js create mode 100644 heph-pwa/test/parsers.test.mjs diff --git a/heph-pwa/src/datespec.js b/heph-pwa/src/datespec.js new file mode 100644 index 0000000..7c2986a --- /dev/null +++ b/heph-pwa/src/datespec.js @@ -0,0 +1,221 @@ +// Human-friendly date and recurrence parsing — a faithful JS port of hephd's +// `datespec.rs` (tech-spec §1, §8, §8.1) so the PWA's quick-add accepts the +// exact same forms as the CLI/TUI and produces identical RRULEs and do-dates. +// +// Dates are date-grained and stored as epoch ms at *local midnight* (matching +// `to_epoch_ms`). All pure functions take an explicit `today` so they stay +// deterministically testable; the thin wrappers read the local clock. + +/** A local-midnight Date for today (time component stripped). */ +export function today() { + const n = new Date(); + return new Date(n.getFullYear(), n.getMonth(), n.getDate()); +} + +/** Local-midnight epoch ms for a Date (the form do_date/late_on are stored in). */ +export function toEpochMs(date) { + return new Date(date.getFullYear(), date.getMonth(), date.getDate()).getTime(); +} + +function addDays(date, n) { + return new Date(date.getFullYear(), date.getMonth(), date.getDate() + n); +} +function addMonths(date, n) { + return new Date(date.getFullYear(), date.getMonth() + n, date.getDate()); +} + +// JS getDay(): 0=Sun..6=Sat. +const WEEKDAYS = { + mon: 1, monday: 1, + tue: 2, tues: 2, tuesday: 2, + wed: 3, weds: 3, wednesday: 3, + thu: 4, thur: 4, thurs: 4, thursday: 4, + fri: 5, friday: 5, + sat: 6, saturday: 6, + sun: 0, sunday: 0, +}; +const BYDAY = { 0: "SU", 1: "MO", 2: "TU", 3: "WE", 4: "TH", 5: "FR", 6: "SA" }; + +/** Weekday name (full or common abbreviation) → JS day index, or null. */ +function parseWeekday(s) { + return Object.prototype.hasOwnProperty.call(WEEKDAYS, s) ? WEEKDAYS[s] : null; +} + +/** The soonest date on/after `today` whose weekday is `wd` (JS day index). */ +function soonestWeekday(today, wd) { + let d = today; + for (let i = 0; i < 7; i++) { + if (d.getDay() === wd) return d; + d = addDays(d, 1); + } + return today; +} + +function parseOffset(rest, today) { + rest = rest.trim(); + const m = rest.match(/^(\d+)\s*([a-z]*)$/); + if (!m) throw new Error(`not a relative date offset: +${rest}`); + const n = parseInt(m[1], 10); + switch (m[2]) { + case "": case "d": case "day": case "days": return addDays(today, n); + case "w": case "wk": case "week": case "weeks": return addDays(today, n * 7); + case "m": case "mo": case "month": case "months": return addMonths(today, n); + default: throw new Error(`unknown offset unit "${m[2]}" (use d, w, or m)`); + } +} + +/** + * Parse a human date spec relative to `today` (a local-midnight Date) into a + * local-midnight Date. Accepts: today/now, tomorrow/tom, yesterday; +Nd/+Nw/+Nm + * (bare +N = days); weekday names (soonest on/after today); ISO YYYY-MM-DD. + * Throws on anything unrecognized. + */ +export function parseDate(input, todayDate) { + const s = input.trim().toLowerCase(); + if (s === "") throw new Error("empty date"); + switch (s) { + case "today": case "now": return todayDate; + case "tomorrow": case "tom": return addDays(todayDate, 1); + case "yesterday": return addDays(todayDate, -1); + } + const wd = parseWeekday(s); + if (wd !== null) return soonestWeekday(todayDate, wd); + if (s.startsWith("+")) return parseOffset(s.slice(1), todayDate); + + // ISO YYYY-MM-DD (strict; construct as local midnight). + const iso = s.match(/^(\d{4})-(\d{2})-(\d{2})$/); + if (iso) { + const [, y, mo, d] = iso; + const date = new Date(Number(y), Number(mo) - 1, Number(d)); + if ( + date.getFullYear() === Number(y) && + date.getMonth() === Number(mo) - 1 && + date.getDate() === Number(d) + ) { + return date; + } + } + throw new Error( + `unrecognized date: "${input}" (try today, tomorrow, +3d, fri, or YYYY-MM-DD)`, + ); +} + +/** parseDate to epoch ms, or null if unparseable (convenience for quick-add). */ +export function parseDateMsOrNull(input, todayDate) { + try { + return toEpochMs(parseDate(input, todayDate)); + } catch { + return null; + } +} + +// --------------------------------------------------------------------------- +// Recurrence +// --------------------------------------------------------------------------- + +const MONTHS = { + jan: 1, feb: 2, mar: 3, apr: 4, may: 5, jun: 6, + jul: 7, aug: 8, sep: 9, oct: 10, nov: 11, dec: 12, +}; + +function parseMonthDay(s) { + const toks = s.split(/\s+/).filter(Boolean); + if (toks.length !== 2) return null; + const month = (t) => MONTHS[t.slice(0, 3)] ?? null; + const day = (t) => { + const m = t.match(/^(\d+)/); + return m ? parseInt(m[1], 10) : null; + }; + let m = month(toks[0]); + let d = day(toks[1]); + if (m !== null && d !== null) return [m, d]; + d = day(toks[0]); + m = month(toks[1]); + if (m !== null && d !== null) return [m, d]; + return null; +} + +function parseMonthdayOrdinal(s) { + const m = s.match(/^(\d+)(st|nd|rd|th)$/); + if (!m) return null; + const d = parseInt(m[1], 10); + return d >= 1 && d <= 31 ? d : null; +} + +function intervalForm(n, unit) { + const wd = parseWeekday(unit); + if (wd !== null) { + return n === 1 + ? `FREQ=WEEKLY;BYDAY=${BYDAY[wd]}` + : `FREQ=WEEKLY;INTERVAL=${n};BYDAY=${BYDAY[wd]}`; + } + let freq; + switch (unit) { + case "day": case "days": freq = "DAILY"; break; + case "week": case "weeks": freq = "WEEKLY"; break; + case "month": case "months": freq = "MONTHLY"; break; + case "year": case "years": freq = "YEARLY"; break; + default: + throw new Error( + `unrecognized recurrence "${unit}" (try daily/weekly/monthly/yearly, ` + + `'every 3 days', 'every fri', or a raw RRULE)`, + ); + } + return n === 1 ? `FREQ=${freq}` : `FREQ=${freq};INTERVAL=${n}`; +} + +/** + * Parse a recurrence spec into an RFC-5545 RRULE. Accepts a raw RRULE (anything + * containing FREQ=), presets (daily/weekly/monthly/yearly/weekdays), and the + * common natural-language forms (§6.2.1): every N (day|week|month|year)s, every + * , every other , every workday, every , + * every . A trailing "at