diff --git a/Cargo.lock b/Cargo.lock index 0a2c89f..be8f974 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2274,6 +2274,7 @@ dependencies = [ "rand 0.8.6", "reqwest", "rsa", + "semver", "serde", "serde_json", "tempfile", diff --git a/Cargo.toml b/Cargo.toml index 7d34a27..e24c881 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,6 +59,7 @@ reqwest = { version = "0.13", default-features = false, features = [ "json", "query", ] } +semver = "1" [profile.release] lto = "thin" diff --git a/crates/heph/src/service.rs b/crates/heph/src/service.rs index 6015a3d..7b15865 100644 --- a/crates/heph/src/service.rs +++ b/crates/heph/src/service.rs @@ -19,12 +19,22 @@ const LABEL: &str = "org.hephaestus.hephd"; #[derive(Subcommand, Debug)] pub enum DaemonAction { /// Install (if needed) and start the daemon service. - Start, + Start { + /// Generate a service that runs with opt-in self-update enabled + /// (default off). The service gets a PATH that can find cargo. + #[arg(long)] + self_update: bool, + }, /// Stop the daemon now (it may restart at next login; use `uninstall` to /// stop it for good). Stop, - /// Restart the daemon — run this after upgrading the binary. - Restart, + /// Restart the daemon — run this after upgrading the binary. Preserves the + /// existing self-update setting unless `--self-update` re-enables it. + Restart { + /// Force self-update on when regenerating the service definition. + #[arg(long)] + self_update: bool, + }, /// Show whether the service is installed and running. Status, /// Stop and remove the service entirely. @@ -114,8 +124,26 @@ fn xml_escape(s: &str) -> String { .replace('>', ">") } -fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { +fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path, self_update: bool) -> String { let arg = |p: &Path| xml_escape(&p.to_string_lossy()); + // Opt-in self-update: pass the flag, and give the service a PATH/HOME that + // can find cargo + the toolchain (a LaunchAgent's default env can't), since + // the apply path shells out to `cargo install`. + let self_update_arg = if self_update { + "\n --self-update".to_string() + } else { + String::new() + }; + let cargo_env = if self_update { + let (path, home) = cargo_env(); + format!( + "\n PATH\n {}\n HOME\n {}", + xml_escape(&path), + xml_escape(&home), + ) + } else { + String::new() + }; format!( r#" @@ -131,7 +159,7 @@ fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { --db {db} --socket - {socket} + {socket}{self_update_arg} RunAtLoad @@ -143,7 +171,7 @@ fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { Aqua session as a LaunchAgent, so its child gets the GUI/hotkey it needs. Opt-in here (not in dev/test runs, which never set it). --> HEPH_QUICKADD - 1 + 1{cargo_env} StandardOutPath {log} @@ -160,15 +188,46 @@ fn launchd_plist(hephd: &Path, db: &Path, socket: &Path, log: &Path) -> String { ) } -fn systemd_unit(hephd: &Path, db: &Path, socket: &Path) -> String { +/// A `PATH`/`HOME` pair for a service that must run `cargo install`. Service +/// managers start with a minimal environment, so we prepend `~/.cargo/bin` (which +/// holds cargo and the rustup toolchain shims) to the usual locations and pin +/// `HOME`, which cargo needs for its registry/cache. +fn cargo_env() -> (String, String) { + let home = std::env::var("HOME").unwrap_or_default(); + let path = + format!("{home}/.cargo/bin:/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin"); + (path, home) +} + +/// Whether an already-installed service file opted into self-update — so +/// `restart` (which regenerates the file) preserves the setting instead of +/// silently turning it off. +fn file_opts_into_self_update(path: &Path) -> bool { + std::fs::read_to_string(path) + .map(|s| s.contains("--self-update")) + .unwrap_or(false) +} + +fn systemd_unit(hephd: &Path, db: &Path, socket: &Path, self_update: bool) -> String { + // Opt-in self-update: pass the flag and give the unit a PATH/HOME that can + // find cargo + the toolchain, since the apply path runs `cargo install`. + let su_arg = if self_update { " --self-update" } else { "" }; + let cargo_env = if self_update { + let (path, home) = cargo_env(); + format!("Environment=PATH={path}\nEnvironment=HOME={home}\n") + } else { + String::new() + }; format!( "[Unit]\n\ Description=heph daemon (hephd)\n\ After=default.target\n\ \n\ [Service]\n\ - ExecStart={hephd} --mode local --db {db} --socket {socket}\n\ - Restart=on-failure\n\ + ExecStart={hephd} --mode local --db {db} --socket {socket}{su_arg}\n\ + {cargo_env}\ + Restart=always\n\ + RestartSec=1\n\ \n\ [Install]\n\ WantedBy=default.target\n", @@ -244,8 +303,11 @@ fn launchd(action: &DaemonAction, p: &Paths) -> Result<()> { let target = format!("gui/{uid}/{LABEL}"); match action { - DaemonAction::Start => { - write_if_changed(&plist, &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log))?; + DaemonAction::Start { self_update } => { + write_if_changed( + &plist, + &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log, *self_update), + )?; if launchd_loaded(&target) { println!("heph daemon already running ({LABEL})."); } else { @@ -260,8 +322,12 @@ fn launchd(action: &DaemonAction, p: &Paths) -> Result<()> { let (_ok, _err) = run_cmd("launchctl", &["bootout", &target])?; println!("heph daemon stopped (still installed; `uninstall` to remove)."); } - DaemonAction::Restart => { - write_if_changed(&plist, &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log))?; + DaemonAction::Restart { self_update } => { + let su = *self_update || file_opts_into_self_update(&plist); + write_if_changed( + &plist, + &launchd_plist(&p.hephd, &p.db, &p.socket, &p.log, su), + )?; let _ = run_cmd("launchctl", &["bootout", &target])?; let (ok, err) = run_cmd("launchctl", &["bootstrap", &domain, &plist_str(&plist)?])?; if !ok { @@ -314,8 +380,11 @@ fn sc(args: &[&str]) -> Result<(bool, String)> { fn systemd(action: &DaemonAction, p: &Paths) -> Result<()> { let unit = systemd_unit_path()?; match action { - DaemonAction::Start => { - write_if_changed(&unit, &systemd_unit(&p.hephd, &p.db, &p.socket))?; + DaemonAction::Start { self_update } => { + write_if_changed( + &unit, + &systemd_unit(&p.hephd, &p.db, &p.socket, *self_update), + )?; sc(&["daemon-reload"])?; let (ok, err) = sc(&["enable", "--now", UNIT])?; if !ok { @@ -327,8 +396,9 @@ fn systemd(action: &DaemonAction, p: &Paths) -> Result<()> { sc(&["stop", UNIT])?; println!("heph daemon stopped (still enabled; `uninstall` to remove)."); } - DaemonAction::Restart => { - write_if_changed(&unit, &systemd_unit(&p.hephd, &p.db, &p.socket))?; + DaemonAction::Restart { self_update } => { + let su = *self_update || file_opts_into_self_update(&unit); + write_if_changed(&unit, &systemd_unit(&p.hephd, &p.db, &p.socket, su))?; sc(&["daemon-reload"])?; let (ok, err) = sc(&["restart", UNIT])?; if !ok { @@ -377,6 +447,7 @@ mod tests { Path::new("/home/e/.local/share/heph/heph.db"), Path::new("/tmp/heph/hephd.sock"), Path::new("/home/e/.local/share/heph/hephd.log"), + false, ); assert!(plist.contains("org.hephaestus.hephd")); assert!(plist.contains("/usr/local/bin/hephd")); @@ -386,6 +457,24 @@ mod tests { assert!(plist.contains("RunAtLoad")); assert!(plist.contains("KeepAlive")); assert!(plist.contains("hephd.log")); + // Default (no self-update): no flag, no cargo PATH baked in. + assert!(!plist.contains("--self-update")); + assert!(!plist.contains(".cargo/bin")); + } + + #[test] + fn launchd_plist_self_update_adds_flag_and_cargo_path() { + let plist = launchd_plist( + Path::new("/usr/local/bin/hephd"), + Path::new("/db"), + Path::new("/sock"), + Path::new("/log"), + true, + ); + assert!(plist.contains("--self-update")); + assert!(plist.contains("PATH")); + assert!(plist.contains(".cargo/bin")); + assert!(plist.contains("HOME")); } #[test] @@ -394,14 +483,36 @@ mod tests { Path::new("/usr/local/bin/hephd"), Path::new("/home/e/.local/share/heph/heph.db"), Path::new("/run/user/1000/heph/hephd.sock"), + false, ); assert!(unit.contains( "ExecStart=/usr/local/bin/hephd --mode local \ --db /home/e/.local/share/heph/heph.db \ --socket /run/user/1000/heph/hephd.sock" )); - assert!(unit.contains("Restart=on-failure")); + // Restart=always (not on-failure) so a clean exit (code 0) — what + // self-update does to hand off to the new binary — is respawned too. + assert!(unit.contains("Restart=always")); + assert!(!unit.contains("Restart=on-failure")); + assert!(unit.contains("RestartSec=")); assert!(unit.contains("WantedBy=default.target")); + // Default (no self-update): no flag, no baked env. + assert!(!unit.contains("--self-update")); + assert!(!unit.contains("Environment=PATH=")); + } + + #[test] + fn systemd_unit_self_update_adds_flag_and_env() { + let unit = systemd_unit( + Path::new("/usr/local/bin/hephd"), + Path::new("/db"), + Path::new("/sock"), + true, + ); + assert!(unit.contains("--self-update")); + assert!(unit.contains("Environment=PATH=")); + assert!(unit.contains(".cargo/bin")); + assert!(unit.contains("Environment=HOME=")); } #[test] diff --git a/crates/hephd/Cargo.toml b/crates/hephd/Cargo.toml index 9bb7b9e..fb30b17 100644 --- a/crates/hephd/Cargo.toml +++ b/crates/hephd/Cargo.toml @@ -32,6 +32,7 @@ jsonwebtoken.workspace = true keyring-core.workspace = true reqwest.workspace = true ureq.workspace = true +semver.workspace = true # The OS credential backend that `oauth.rs` registers as the keyring-core # default store — exactly one per platform, not the whole keyring meta-crate. diff --git a/crates/hephd/src/lib.rs b/crates/hephd/src/lib.rs index 09f8714..5d68bad 100644 --- a/crates/hephd/src/lib.rs +++ b/crates/hephd/src/lib.rs @@ -17,6 +17,7 @@ pub mod oauth; pub mod quickadd; pub mod remote; pub mod rpc; +pub mod selfupdate; pub mod server; pub mod sync; diff --git a/crates/hephd/src/main.rs b/crates/hephd/src/main.rs index 62df200..ad3517c 100644 --- a/crates/hephd/src/main.rs +++ b/crates/hephd/src/main.rs @@ -17,8 +17,8 @@ use tokio::net::{TcpListener, UnixListener}; use heph_core::LocalStore; use hephd::{ - default_db_path, default_socket_path, sync, Daemon, KeyringTokenStore, LockGuard, RemoteStore, - SystemClock, TokenStore, + default_db_path, default_socket_path, selfupdate::SelfUpdateConfig, sync, Daemon, + KeyringTokenStore, LockGuard, RemoteStore, SystemClock, TokenStore, }; /// How often a spoke background-syncs with its hub. @@ -77,6 +77,16 @@ struct Cli { /// --oidc-issuer, the device attaches a cached bearer token to hub requests. #[arg(long)] oidc_client_id: Option, + + /// Opt-in (default off): periodically poll the forge for a newer release and + /// auto-update this daemon. Off unless this flag is given. + #[arg(long)] + self_update: bool, + + /// Override the self-update poll interval, in seconds (default: 6h). Only + /// meaningful with --self-update. + #[arg(long)] + self_update_interval_secs: Option, } /// Build the spoke/client token source: a keyring store keyed by `account` (the @@ -112,6 +122,11 @@ async fn main() -> Result<()> { .with_context(|| format!("creating socket dir {}", parent.display()))?; } + // Opt-in self-update (default off): `Some` only when `--self-update` is set. + let self_update = cli + .self_update + .then(|| SelfUpdateConfig::new(cli.self_update_interval_secs.map(Duration::from_secs))); + // Build the daemon for the chosen mode. `local`/`server` own the file (and // hold its lock for the process's life); `client` keeps no replica. let (_lock, daemon) = match cli.mode { @@ -131,7 +146,10 @@ async fn main() -> Result<()> { } None => RemoteStore::new(&server_url), }; - (None, Daemon::new(store)) + ( + None, + Daemon::new(store).with_self_update(self_update.clone()), + ) } Mode::Local | Mode::Server => { let db = cli.db.clone().unwrap_or_else(default_db_path); @@ -147,7 +165,8 @@ async fn main() -> Result<()> { }); let daemon = Daemon::new(store) .with_hub(cli.hub_url.clone()) - .with_spoke_auth(spoke); + .with_spoke_auth(spoke) + .with_self_update(self_update.clone()); // server mode: expose the hub HTTP endpoint over the same store. if cli.mode == Mode::Server { @@ -190,6 +209,9 @@ async fn main() -> Result<()> { } }; + // Opt-in self-update poller (no-op unless --self-update); mode-agnostic. + daemon.spawn_self_update_loop(); + // Replace any stale socket from a previous run, then bind. if socket.exists() { std::fs::remove_file(&socket) diff --git a/crates/hephd/src/selfupdate.rs b/crates/hephd/src/selfupdate.rs new file mode 100644 index 0000000..8c7d470 --- /dev/null +++ b/crates/hephd/src/selfupdate.rs @@ -0,0 +1,417 @@ +//! Opt-in self-update (cards: `docs/how-to/self-update/`). When enabled, hephd +//! polls the forge for a newer tagged release and rebuilds + restarts onto it. +//! +//! The moving parts are dependency-injected behind traits — [`ReleaseSource`] +//! (where the latest tag comes from) and [`Installer`] (how the upgrade is +//! applied) — so the poll/apply logic is unit-tested without a live forge or a +//! real `cargo install`. The production wiring (`ForgeReleaseSource`, +//! `CargoInstaller`) is exercised only at runtime. + +use std::sync::Arc; +use std::time::Duration; + +use anyhow::{Context, Result}; + +/// Default poll cadence when `--self-update` is on and no interval is given. +pub const DEFAULT_INTERVAL: Duration = Duration::from_secs(6 * 60 * 60); + +/// Configuration for the opt-in self-update mode. Its mere presence (the daemon +/// holds an `Option`) means the mode is enabled; absent ⇒ off. +#[derive(Clone, Debug)] +pub struct SelfUpdateConfig { + /// How often to poll the forge for a newer release. + pub interval: Duration, +} + +impl SelfUpdateConfig { + /// Build a config, falling back to [`DEFAULT_INTERVAL`] when no override. + pub fn new(interval: Option) -> Self { + Self { + interval: interval.unwrap_or(DEFAULT_INTERVAL), + } + } +} + +/// The forge releases feed for this project — the latest tagged release. The +/// repo is public, so this is an unauthenticated GET on the canonical public +/// host. +pub const RELEASES_LATEST_URL: &str = + "https://forge.eblu.me/api/v1/repos/eblume/hephaestus/releases/latest"; + +/// Extract the bare `X.Y.Z` semver from a version string that may carry a build +/// suffix (`heph_core::VERSION` is e.g. `"1.0.3 (aa376b4)"`) or a leading `v` +/// (release tags are `v1.0.4`). +fn parse_version(s: &str) -> Result { + let head = s + .trim() + .trim_start_matches('v') + .split_whitespace() + .next() + .unwrap_or(""); + semver::Version::parse(head).with_context(|| format!("parsing version {s:?}")) +} + +/// Whether `latest_tag` names a strictly newer release than `current` (the +/// running `heph_core::VERSION`). A malformed version on either side is an +/// error — never a silent "no update". +pub fn update_available(current: &str, latest_tag: &str) -> Result { + Ok(parse_version(latest_tag)? > parse_version(current)?) +} + +/// Pull the `tag_name` out of a Forgejo/Gitea `releases/latest` response body. +/// Split out from the HTTP fetch so it can be tested against a sample payload. +pub fn parse_latest_tag(body: &str) -> Result { + #[derive(serde::Deserialize)] + struct Release { + tag_name: String, + } + let rel: Release = + serde_json::from_str(body).context("parsing forge releases/latest response")?; + Ok(rel.tag_name) +} + +/// Fetch the latest release tag from the forge over HTTP (reusing the daemon's +/// shared `reqwest::Client`). Network/HTTP/JSON failures surface as `Err` for +/// the caller to log-and-continue. +pub async fn fetch_latest_tag(http: &reqwest::Client, url: &str) -> Result { + let body = http + .get(url) + .send() + .await + .context("requesting forge releases/latest")? + .error_for_status() + .context("forge releases/latest returned an error status")? + .text() + .await + .context("reading forge releases/latest body")?; + parse_latest_tag(&body) +} + +/// Where "the latest release tag" comes from. Injectable so the poll loop can +/// be exercised without hitting the network (real impl: [`ForgeReleaseSource`]). +pub trait ReleaseSource: Send + Sync + 'static { + fn latest_tag(&self) -> impl std::future::Future> + Send; +} + +/// The production source: the forge's `releases/latest` over HTTP. +pub struct ForgeReleaseSource { + http: reqwest::Client, + url: String, +} + +impl ForgeReleaseSource { + /// Source backed by the daemon's shared client, hitting [`RELEASES_LATEST_URL`]. + pub fn new(http: reqwest::Client) -> Self { + Self { + http, + url: RELEASES_LATEST_URL.to_string(), + } + } +} + +impl ReleaseSource for ForgeReleaseSource { + async fn latest_tag(&self) -> Result { + fetch_latest_tag(&self.http, &self.url).await + } +} + +/// The result of one self-update check — kept separate from logging so it can be +/// asserted in tests. +#[derive(Debug, PartialEq, Eq)] +pub enum CheckOutcome { + /// The running version is at or ahead of the latest release. + UpToDate, + /// A strictly newer release exists, named by this tag (e.g. `v1.0.4`). + UpdateAvailable(String), + /// The check failed (forge unreachable, bad body, unparseable version). + Failed(String), +} + +/// Run one check against `source`, comparing the latest tag to `current`. Never +/// returns `Err` — a failure is folded into [`CheckOutcome::Failed`] so the loop +/// keeps going (a flaky forge must never crash or stall the daemon). +pub async fn check_release(source: &S, current: &str) -> CheckOutcome { + match source.latest_tag().await { + Ok(tag) => match update_available(current, &tag) { + Ok(true) => CheckOutcome::UpdateAvailable(tag), + Ok(false) => CheckOutcome::UpToDate, + Err(e) => CheckOutcome::Failed(e.to_string()), + }, + Err(e) => CheckOutcome::Failed(e.to_string()), + } +} + +/// The git URL self-update installs from. hephaestus is a **public** repo, and +/// `cargo install --git` is a plain anonymous git clone — *not* the Forgejo +/// cargo *registry* (that's access-restricted and needs `forge.ops.eblu.me`; +/// this is unrelated). So a credential-free HTTPS clone of the canonical public +/// host works from any device. +pub const INSTALL_GIT_URL: &str = "https://forge.eblu.me/eblume/hephaestus.git"; + +/// All workspace binaries, installed in lockstep so `heph`/`hephd`/`heph-tui` +/// never skew after an update. +pub const INSTALL_BINS: &[&str] = &["heph", "hephd", "heph-tui", "heph-quickadd"]; + +/// Applies a detected upgrade. Injectable so the apply path is testable without +/// spawning a real (minutes-long) `cargo install` (real impl: [`CargoInstaller`]). +pub trait Installer: Send + Sync + 'static { + /// Install the binaries for release `tag` (e.g. `v1.0.4`). Blocking. + fn install(&self, tag: &str) -> Result<()>; +} + +/// The production installer: `cargo install --locked --git --tag ` +/// for every workspace binary — the exact command the install how-to documents. +pub struct CargoInstaller; + +impl Installer for CargoInstaller { + fn install(&self, tag: &str) -> Result<()> { + let mut cmd = std::process::Command::new("cargo"); + cmd.args([ + "install", + "--locked", + "--git", + INSTALL_GIT_URL, + "--tag", + tag, + ]); + cmd.args(INSTALL_BINS); + let status = cmd.status().context("spawning cargo install")?; + if !status.success() { + anyhow::bail!("cargo install for {tag} exited with {status}"); + } + Ok(()) + } +} + +/// Hands off to the freshly-installed binary. Injectable so the apply path is +/// testable without actually exiting the test process (real: [`ProcessRestarter`]). +pub trait Restarter: Send + Sync + 'static { + /// Restart onto the new binary. The production impl does not return. + fn restart(&self) -> Result<()>; +} + +/// The production restarter: exit cleanly so the OS service manager (launchd +/// `KeepAlive` / systemd `Restart=always`) respawns the new binary. In-flight +/// RPC connections simply drop; clients reconnect (the nvim plugin already does). +pub struct ProcessRestarter; + +impl Restarter for ProcessRestarter { + fn restart(&self) -> Result<()> { + tracing::info!("self-update: exiting to let the service manager start the new binary"); + std::process::exit(0); + } +} + +/// Apply a detected update: install the binaries for `tag`, then restart onto +/// them. The blocking install runs on the blocking pool so it never stalls the +/// async runtime; the restart only happens if the install succeeded. +pub async fn apply_update( + installer: Arc, + restarter: Arc, + tag: &str, +) -> Result<()> { + let owned = tag.to_string(); + tokio::task::spawn_blocking(move || installer.install(&owned)) + .await + .context("self-update install task panicked")??; + tracing::info!(%tag, "self-update: installed; restarting into the new binary"); + restarter.restart() +} + +/// The background poll loop: tick on `interval`, check for a newer release, and +/// when one is available, apply it. Runs forever; spawned as a task. +pub async fn run_poll_loop( + source: S, + installer: Arc, + restarter: Arc, + interval: Duration, + current: &'static str, +) { + let mut tick = tokio::time::interval(interval); + loop { + tick.tick().await; + match check_release(&source, current).await { + CheckOutcome::UpdateAvailable(tag) => { + tracing::info!(%tag, current, "self-update: newer release available, applying"); + // On success the restarter exits the process, so this only + // returns on failure — log it and keep polling. + if let Err(e) = apply_update(installer.clone(), restarter.clone(), &tag).await { + tracing::error!("self-update: failed for {tag}: {e}"); + } + } + CheckOutcome::UpToDate => tracing::debug!(current, "self-update: up to date"), + CheckOutcome::Failed(e) => tracing::warn!("self-update: release check failed: {e}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// A canned release source for deterministic loop/decision tests. + struct FakeSource(Result); + impl ReleaseSource for FakeSource { + async fn latest_tag(&self) -> Result { + self.0.clone().map_err(|e| anyhow::anyhow!(e)) + } + } + + /// Records install calls; optionally fails, to drive the apply path. + #[derive(Default)] + struct FakeInstaller { + installed: std::sync::Mutex>, + fail: bool, + } + impl Installer for FakeInstaller { + fn install(&self, tag: &str) -> Result<()> { + self.installed.lock().unwrap().push(tag.to_string()); + if self.fail { + anyhow::bail!("simulated install failure"); + } + Ok(()) + } + } + + /// Records whether a restart was requested (instead of exiting the process). + #[derive(Default)] + struct FakeRestarter { + restarted: std::sync::Mutex, + } + impl Restarter for FakeRestarter { + fn restart(&self) -> Result<()> { + *self.restarted.lock().unwrap() = true; + Ok(()) + } + } + + #[test] + fn install_and_release_urls_are_public_https_no_ssh() { + // hephaestus is public; cargo install --git is a plain clone (not the + // access-restricted Forgejo cargo registry), so no SSH / credentials. + for url in [INSTALL_GIT_URL, RELEASES_LATEST_URL] { + assert!(url.starts_with("https://"), "{url} must be HTTPS"); + assert!(!url.contains("ssh://"), "{url} must not use SSH"); + assert!( + url.contains("forge.eblu.me"), + "{url} should use the canonical public host" + ); + } + } + + #[tokio::test] + async fn apply_update_installs_then_restarts_on_success() { + let inst = Arc::new(FakeInstaller::default()); + let restart = Arc::new(FakeRestarter::default()); + apply_update(inst.clone(), restart.clone(), "v1.0.4") + .await + .unwrap(); + assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + assert!( + *restart.restarted.lock().unwrap(), + "should restart on success" + ); + } + + #[tokio::test] + async fn apply_update_does_not_restart_when_install_fails() { + let inst = Arc::new(FakeInstaller { + fail: true, + ..Default::default() + }); + let restart = Arc::new(FakeRestarter::default()); + assert!(apply_update(inst.clone(), restart.clone(), "v1.0.4") + .await + .is_err()); + assert_eq!(*inst.installed.lock().unwrap(), vec!["v1.0.4".to_string()]); + assert!( + !*restart.restarted.lock().unwrap(), + "must NOT restart after a failed install" + ); + } + + #[tokio::test] + async fn check_release_reports_outcomes_from_a_stubbed_source() { + // Newer release available. + let s = FakeSource(Ok("v1.0.4".into())); + assert_eq!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::UpdateAvailable("v1.0.4".into()) + ); + // Already current. + let s = FakeSource(Ok("v1.0.3".into())); + assert_eq!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::UpToDate + ); + // Fetch failure → folded into Failed, never a panic/Err. + let s = FakeSource(Err("forge unreachable")); + assert!(matches!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::Failed(_) + )); + // Malformed tag → Failed. + let s = FakeSource(Ok("not-a-tag".into())); + assert!(matches!( + check_release(&s, "1.0.3 (sha)").await, + CheckOutcome::Failed(_) + )); + } + + #[test] + fn config_defaults_interval_and_honors_override() { + assert_eq!(SelfUpdateConfig::new(None).interval, DEFAULT_INTERVAL); + assert_eq!( + SelfUpdateConfig::new(Some(Duration::from_secs(900))).interval, + Duration::from_secs(900) + ); + } + + #[test] + fn update_available_compares_ignoring_build_suffix_and_v_prefix() { + // Running version carries a build-sha suffix; tags carry a `v`. + assert!(update_available("1.0.3 (aa376b4)", "v1.0.4").unwrap()); + assert!(update_available("1.0.3 (aa376b4)", "v2.0.0").unwrap()); + // Same version → no update (a dirty rebuild of the same tag isn't newer). + assert!(!update_available("1.0.3 (aa376b4-dirty)", "v1.0.3").unwrap()); + // Older tag than running → no update. + assert!(!update_available("1.0.3", "v1.0.2").unwrap()); + // Patch/minor/major ordering. + assert!(update_available("1.0.9", "v1.1.0").unwrap()); + assert!(!update_available("1.1.0", "v1.0.9").unwrap()); + } + + #[test] + fn update_available_errors_on_malformed_version() { + assert!(update_available("not-a-version", "v1.0.4").is_err()); + assert!(update_available("1.0.3", "vNope").is_err()); + } + + #[test] + fn parse_latest_tag_reads_tag_name_from_forge_body() { + // A trimmed sample of a Forgejo releases/latest payload. + let body = r#"{ + "id": 42, + "tag_name": "v1.0.4", + "name": "Release v1.0.4", + "draft": false, + "prerelease": false + }"#; + assert_eq!(parse_latest_tag(body).unwrap(), "v1.0.4"); + } + + #[test] + fn parse_latest_tag_errors_on_unexpected_body() { + assert!(parse_latest_tag("{}").is_err()); + assert!(parse_latest_tag("not json").is_err()); + } + + #[test] + fn end_to_end_body_to_decision() { + // Parse a release body, then decide against a fixed running version. + let tag = parse_latest_tag(r#"{"tag_name": "v1.0.4"}"#).unwrap(); + assert!(update_available("1.0.3 (aa376b4)", &tag).unwrap()); + let tag = parse_latest_tag(r#"{"tag_name": "v1.0.3"}"#).unwrap(); + assert!(!update_available("1.0.3 (aa376b4)", &tag).unwrap()); + } +} diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index 389b7ea..bf245ba 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -21,6 +21,7 @@ use heph_core::Store; use crate::oauth::{self, TokenStore}; use crate::rpc::{self, Request, Response, RpcError, INTERNAL_ERROR, PARSE_ERROR}; +use crate::selfupdate::{self, SelfUpdateConfig}; use crate::sync::{self, SharedStore}; /// How a spoke obtains the bearer token it presents to its hub (tech-spec §13). @@ -40,6 +41,8 @@ struct Ctx { http: reqwest::Client, /// Token source for authenticated sync (None ⇒ unauthenticated hub). auth: Option, + /// Opt-in self-update config (`Some` ⇒ enabled, tech-spec self-update card). + self_update: Option, } impl Ctx { @@ -74,8 +77,16 @@ impl Daemon { ctx: Ctx { store: Arc::new(Mutex::new(store)), hub_url: None, - http: reqwest::Client::new(), + // Bound every hub request so a black-hole hub (one that accepts + // a connection but never replies) can't stall the sync / + // self-update loops — "the hub can vanish at any moment" is the + // base case, including vanishing mid-request. + http: reqwest::Client::builder() + .timeout(Duration::from_secs(30)) + .build() + .expect("building the daemon HTTP client"), auth: None, + self_update: None, }, } } @@ -100,12 +111,52 @@ impl Daemon { self } + /// Enable opt-in self-update with the given config (`None` ⇒ stays off). + pub fn with_self_update(mut self, cfg: Option) -> Daemon { + self.ctx.self_update = cfg; + self + } + /// The shared store handle, for code that needs to reach the same store the /// daemon serves (the hub HTTP router and background sync, tech-spec §6.1). pub fn store(&self) -> SharedStore { self.ctx.store.clone() } + /// If self-update is enabled, spawn its background poller: every + /// `cfg.interval` it checks the forge for a newer release and, when one is + /// found, installs it and restarts onto the new binary. No-op when off. + /// + /// Note: the *apply* path runs `cargo install` and exits, which only works + /// when the daemon's service environment can reach the forge over SSH and + /// find cargo — see the `service-env-forge-access` card (the deployment step + /// that makes this operational). + pub fn spawn_self_update_loop(&self) { + let Some(cfg) = self.ctx.self_update.clone() else { + return; + }; + let source = selfupdate::ForgeReleaseSource::new(self.ctx.http.clone()); + let installer: std::sync::Arc = + std::sync::Arc::new(selfupdate::CargoInstaller); + let restarter: std::sync::Arc = + std::sync::Arc::new(selfupdate::ProcessRestarter); + tracing::info!( + interval_secs = cfg.interval.as_secs(), + current = heph_core::VERSION, + "self-update enabled" + ); + tokio::spawn(async move { + selfupdate::run_poll_loop( + source, + installer, + restarter, + cfg.interval, + heph_core::VERSION, + ) + .await; + }); + } + /// If this is a spoke (`hub_url` set), spawn a background task that syncs the /// op-log with the hub every `interval` (attaching a bearer token when auth /// is configured). No-op otherwise. diff --git a/crates/hephd/tests/sync_http.rs b/crates/hephd/tests/sync_http.rs index de8b7bf..ed093ce 100644 --- a/crates/hephd/tests/sync_http.rs +++ b/crates/hephd/tests/sync_http.rs @@ -84,6 +84,65 @@ async fn a_node_propagates_a_to_hub_to_b() { assert_eq!(on_b.body.as_deref(), Some("shingles need work")); } +#[tokio::test] +async fn spoke_survives_an_unreachable_hub_then_reconciles_when_it_returns() { + // "The hub can vanish at any moment" is the base case, not a guarded edge: + // a spoke whose hub is down keeps serving + accepting writes, and when the + // hub returns its accumulated ops reconcile with no special recovery. This + // is what makes a self-updating hub (which restarts under its spokes) safe. + let http = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(5)) // never hang the test + .build() + .unwrap(); + let (a, _ca, _da) = replica(1000); + + // Hub down: work happens locally, and a sync attempt fails *fast* (Err — not + // a panic, not a hang) and leaves the store untouched. + let id = { + let mut ga = a.lock().unwrap(); + ga.create_node(NewNode::doc( + "Offline note", + "written while the hub was down", + )) + .unwrap() + .id + }; + let dead_hub = "http://127.0.0.1:1"; // nothing listens → connection refused + assert!( + sync::sync_once(a.clone(), dead_hub, &http, None) + .await + .is_err(), + "sync against a dead hub should error, not hang or panic" + ); + + // The spoke is unharmed: the note is intact and further writes still succeed. + { + let mut ga = a.lock().unwrap(); + assert_eq!(ga.get_node(&id).unwrap().unwrap().title, "Offline note"); + ga.create_node(NewNode::doc("Another", "still working offline")) + .unwrap(); + } + + // The hub returns: the spoke pushes everything it accumulated while offline, + // and a fresh replica pulls it — convergence resumes, no manual recovery. + let hub_url = start_hub().await; + let up = sync::sync_once(a.clone(), &hub_url, &http, None) + .await + .unwrap(); + assert!(up.pushed > 0, "spoke pushed nothing after the hub returned"); + let (b, _cb, _db) = replica(1000); + sync::sync_once(b.clone(), &hub_url, &http, None) + .await + .unwrap(); + let on_b = b + .lock() + .unwrap() + .get_node(&id) + .unwrap() + .expect("offline-authored node reached B after the hub recovered"); + assert_eq!(on_b.title, "Offline note"); +} + #[tokio::test] async fn divergent_scalar_edits_converge_through_the_hub_with_a_conflict() { let hub_url = start_hub().await; diff --git a/docs/changelog.d/hephd-self-update.feature.md b/docs/changelog.d/hephd-self-update.feature.md new file mode 100644 index 0000000..90cd33d --- /dev/null +++ b/docs/changelog.d/hephd-self-update.feature.md @@ -0,0 +1 @@ +Opt-in (default off) **hephd self-update**: `hephd --self-update` polls the forge for a newer release on an interval and, when one appears, rebuilds via `cargo install` from the release tag (anonymous HTTPS clone of the public repo — no credentials) and restarts onto the new binary. Enable it on the managed service with `heph daemon start --self-update` (which also bakes a cargo-capable `PATH` into the launchd/systemd unit and switches systemd to `Restart=always` so a clean self-exit respawns). The install mechanism is verified end-to-end; a live cross-version upgrade is confirmed on the first release after this lands. Also hardens hub resilience: the daemon's HTTP client now has a 30s timeout so a black-hole hub can't stall the sync/self-update loop. diff --git a/docs/how-to/how-to.md b/docs/how-to/how-to.md index 9a3a758..eb0a6c8 100644 --- a/docs/how-to/how-to.md +++ b/docs/how-to/how-to.md @@ -20,3 +20,4 @@ Task-oriented guides for common operations. - [[run-the-daemon]] — Run `hephd` as an OS service with `heph daemon start/stop/restart/status` - [[set-up-sync-hub]] — Stand up the canonical hub (indri) and connect an existing device as an offline-capable spoke - [[import-todoist]] — Seed a heph store from your Todoist projects + tasks (`mise run import-todoist`) +- [[self-update]] — Opt-in `hephd` self-update: poll the forge for new releases and auto-update diff --git a/docs/how-to/run-the-daemon.md b/docs/how-to/run-the-daemon.md index 8ada221..2b00dff 100644 --- a/docs/how-to/run-the-daemon.md +++ b/docs/how-to/run-the-daemon.md @@ -28,9 +28,14 @@ when it's already stopped is fine. `~/Library/LaunchAgents/org.hephaestus.hephd.plist`, with `RunAtLoad` + `KeepAlive` (starts at login, restarts if it crashes). - **Linux** — a **systemd user service** (`heph.service`) at - `~/.config/systemd/user/heph.service`, with `Restart=on-failure`, enabled for + `~/.config/systemd/user/heph.service`, with `Restart=always`, enabled for login. +> **Upgrading from an older install:** earlier units used `Restart=on-failure`, +> which does **not** respawn after a clean exit — so opt-in self-update (which +> exits cleanly to hand off to the new binary) wouldn't come back on Linux. Run +> `heph daemon restart` once (it regenerates the unit) to pick up `Restart=always`. + Either way it runs `hephd --mode local` against the default store (`~/.local/share/heph/heph.db`) and socket, with logs at `~/.local/share/heph/hephd.log`. @@ -48,6 +53,16 @@ still the old binary until you restart it: heph daemon restart ``` +## Self-update (opt-in) + +`hephd` can keep itself current: `heph daemon start --self-update` generates a +service that polls the forge for newer releases and, when one appears, rebuilds +via `cargo install` (anonymous HTTPS clone of the public repo — no credentials) +and restarts onto the new binary. It is **off by default**; the generated +service also gets a `PATH` that can find cargo. `heph daemon restart` preserves +the setting (pass `--self-update` again to turn it on later). Requires the Rust +toolchain (`cargo`) installed for the service user. + ## Development isolation `heph daemon` manages the **installed** daemon on the default paths. For in-repo diff --git a/docs/how-to/self-update.md b/docs/how-to/self-update.md new file mode 100644 index 0000000..d4dda1f --- /dev/null +++ b/docs/how-to/self-update.md @@ -0,0 +1,56 @@ +--- +title: hephd self-update +modified: 2026-06-04 +tags: + - how-to +--- + +# hephd self-update + +`hephd` can keep itself current: it polls the forge for a newer release and, when +one appears, rebuilds and restarts onto it — unattended. It is **opt-in and off +by default**. + +## Enable it + +On the managed service: + +```bash +heph daemon start --self-update +``` + +That generates a launchd/systemd service that runs `hephd --self-update` and +gives it a `PATH` that can find `cargo`. `heph daemon restart` preserves the +setting (pass `--self-update` again to turn it on later). To run the daemon +directly instead: + +```bash +hephd --self-update # default: poll every 6h +hephd --self-update --self-update-interval-secs 3600 +``` + +## How it works + +1. Each interval, `hephd` GETs the forge's `releases/latest` and compares the tag + against its own version (the one `heph --version` reports). +2. On a newer release it runs `cargo install --locked --git + --tag vX.Y.Z` for `heph`/`hephd`/`heph-tui`/`heph-quickadd`. hephaestus is a + public repo, so this is an anonymous clone — **no credentials**. +3. On a successful install it exits cleanly; the service manager (launchd + `KeepAlive` / systemd `Restart=always`) brings the new binary up. + +A failed poll or build is logged and the daemon keeps running on its current +version — self-update never takes the daemon down. + +## Requirements & notes + +- The **Rust toolchain** (`cargo`) must be installed for the service user; the + update builds from source. +- Off by default — nothing happens unless `--self-update` is passed. +- The first real cross-version upgrade is observable on the first release cut + after enabling it. + +## Related + +- [[run-the-daemon]] — running `hephd` as an OS service +- [[install-heph]] — installing the binaries