diff --git a/crates/heph-tui/src/ui.rs b/crates/heph-tui/src/ui.rs index 6e15453..f6d2f37 100644 --- a/crates/heph-tui/src/ui.rs +++ b/crates/heph-tui/src/ui.rs @@ -570,7 +570,9 @@ fn sync_indicator(sync: &SyncStatus, now: i64) -> Vec> { let health = sync.health.clone().unwrap_or_default(); let mut spans = vec![if health.auth_failure { - Span::styled("⚠ auth", red) + // Point at the recovery command — `heph auth status` prints the exact + // `heph auth login …` to run (the full command is too long for the bar). + Span::styled("⚠ auth · heph auth status", red) } else if let Some(ts) = health.last_success_ms { Span::styled(format!("⟳ {}", fmt_age(now, ts)), dim) } else if health.last_error.is_some() { @@ -639,7 +641,7 @@ mod tests { }, 0, ); - assert_eq!(render(&auth, NOW), "⚠ auth"); + assert_eq!(render(&auth, NOW), "⚠ auth · heph auth status"); // Errored with no prior success → offline. let offline = spoke( diff --git a/crates/heph/src/main.rs b/crates/heph/src/main.rs index c327f1d..28d3b5e 100644 --- a/crates/heph/src/main.rs +++ b/crates/heph/src/main.rs @@ -344,7 +344,7 @@ enum ConflictAction { }, } -#[derive(Subcommand, Debug)] +#[derive(Subcommand, Debug, Clone)] enum AuthAction { /// Log in via the device-code flow; caches the bearer token for hub sync. Login { @@ -367,6 +367,9 @@ enum AuthAction { #[arg(long)] hub_url: String, }, + /// Show this spoke's auth health and, if re-auth is needed, the exact + /// `heph auth login` command to run. Queries the daemon. + Status, } /// Run the device-code flow (or clear a token) — no daemon needed. @@ -396,10 +399,63 @@ fn run_auth(action: AuthAction) -> Result<()> { KeyringTokenStore::new(hub_url.as_str()).clear()?; println!("Logged out of {hub_url}."); } + AuthAction::Status => unreachable!("auth status is handled via the daemon"), } Ok(()) } +/// Render `heph auth status` from a `sync.status` RPC response: hub/issuer/client +/// id, whether auth is healthy or needs re-login, and — when it does — the exact +/// command to run (built daemon-side, keyed under the right hub URL). +fn print_auth_status(status: &Value) { + let Some(hub) = status.get("hub_url").and_then(Value::as_str) else { + println!("This instance is standalone (no hub configured); auth does not apply."); + return; + }; + let auth = status.get("auth"); + let issuer = auth.and_then(|a| a.get("issuer")).and_then(Value::as_str); + let client_id = auth + .and_then(|a| a.get("client_id")) + .and_then(Value::as_str); + let health = status.get("health"); + let auth_failure = health + .and_then(|h| h.get("auth_failure")) + .and_then(Value::as_bool) + .unwrap_or(false); + let last_error = health + .and_then(|h| h.get("last_error")) + .and_then(Value::as_str); + let last_success = health + .and_then(|h| h.get("last_success_ms")) + .and_then(Value::as_i64); + + println!("hub : {hub}"); + if let Some(iss) = issuer { + println!("issuer : {iss}"); + } + if let Some(cid) = client_id { + println!("client id : {cid}"); + } + println!( + "auth : {}", + if auth_failure { + "FAILED — re-authentication required" + } else if last_success.is_some() { + "ok" + } else { + "unknown (no successful sync yet)" + } + ); + if let Some(err) = last_error { + println!("last error : {err}"); + } + if auth_failure { + if let Some(cmd) = status.get("reauth_command").and_then(Value::as_str) { + println!("\nTo re-authenticate, run:\n {cmd}"); + } + } +} + fn main() -> Result<()> { let cli = Cli::parse(); @@ -407,9 +463,13 @@ fn main() -> Result<()> { if let Command::Daemon { action } = &cli.command { return service::run(action); } - // `auth` runs locally (device-code flow + keyring); it needs no daemon. - if let Command::Auth { action } = cli.command { - return run_auth(action); + // `auth login`/`logout` run locally (device-code flow + keyring); they need + // no daemon. `auth status` reads live sync health, so it falls through to the + // connected path below. + if let Command::Auth { action } = &cli.command { + if !matches!(action, AuthAction::Status) { + return run_auth(action.clone()); + } } let socket = cli.socket.unwrap_or_else(default_socket_path); @@ -790,7 +850,13 @@ fn main() -> Result<()> { let n = result.as_u64().unwrap_or(0); println!("Rewrote legacy [[Name]] links to [[id]] in {n} node(s)."); } - Command::Auth { .. } => unreachable!("auth is handled before connecting"), + Command::Auth { + action: AuthAction::Status, + } => { + let result = client.call("sync.status", json!({}))?; + print_auth_status(&result); + } + Command::Auth { .. } => unreachable!("auth login/logout handled before connecting"), Command::Daemon { .. } => unreachable!("daemon is handled before connecting"), } Ok(()) diff --git a/crates/hephd/src/auth.rs b/crates/hephd/src/auth.rs index c601d90..6b80e95 100644 --- a/crates/hephd/src/auth.rs +++ b/crates/hephd/src/auth.rs @@ -38,9 +38,45 @@ pub enum AuthError { /// The token was present but failed validation. #[error("invalid token: {0}")] Invalid(String), - /// The identity provider could not be reached to fetch keys. + /// The identity provider could not be reached at all (DNS, TLS, connection + /// refused, timeout) — a transport failure, distinct from a rejection. #[error("identity provider unreachable: {0}")] - Provider(String), + Unreachable(String), + /// The identity provider *was* reached but returned an HTTP error response — + /// e.g. `400 invalid_grant` on a refresh, meaning the token was rejected + /// (expired/rotated/session-invalidated), not that the IdP was down. The + /// distinction matters: "unreachable" sends debugging toward the network; + /// this points at the token/authorization. + #[error("identity provider rejected the request: {0}")] + Rejected(String), + /// Some other failure in the auth path that is neither a transport failure + /// nor an HTTP rejection — a malformed/unparseable IdP response, or a local + /// credential-store (keyring) error. Kept distinct so neither is mislabeled + /// as "unreachable". + #[error("auth error: {0}")] + Other(String), +} + +impl AuthError { + /// Build a [`AuthError::Rejected`] from an HTTP status and the OAuth error + /// body (RFC 6749 §5.2), e.g. `HTTP 400 (invalid_grant): Token is expired`. + pub fn rejected(status: u16, error: Option<&str>, description: Option<&str>) -> AuthError { + let mut msg = format!("HTTP {status}"); + if let Some(e) = error.filter(|e| !e.is_empty()) { + msg.push_str(&format!(" ({e})")); + } + if let Some(d) = description.filter(|d| !d.is_empty()) { + msg.push_str(&format!(": {d}")); + } + AuthError::Rejected(msg) + } + + /// Whether this is an authorization-level rejection (the IdP refused the + /// grant) rather than a transport failure — i.e. re-authentication is the + /// likely fix, not network troubleshooting. + pub fn is_rejection(&self) -> bool { + matches!(self, AuthError::Rejected(_)) + } } /// Verifies a bearer token and returns its [`Claims`]. A trait so the hub can be @@ -92,16 +128,13 @@ impl OidcVerifier { .http .get(url) .call() - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Unreachable(e.to_string()))?; if !resp.status().is_success() { - return Err(AuthError::Provider(format!( - "{url} returned {}", - resp.status() - ))); + return Err(AuthError::rejected(resp.status().as_u16(), None, None)); } resp.body_mut() .read_json() - .map_err(|e| AuthError::Provider(e.to_string())) + .map_err(|e| AuthError::Unreachable(e.to_string())) } /// Resolve the JWKS URI from the provider's discovery document. @@ -169,3 +202,38 @@ impl TokenVerifier for OidcVerifier { Some((&self.issuer, &self.audience)) } } + +#[cfg(test)] +mod tests { + use super::AuthError; + + #[test] + fn rejected_formats_status_error_and_description() { + let e = AuthError::rejected(400, Some("invalid_grant"), Some("Token is not active")); + assert!(e.is_rejection()); + assert_eq!( + e.to_string(), + "identity provider rejected the request: HTTP 400 (invalid_grant): Token is not active" + ); + } + + #[test] + fn rejected_omits_absent_or_empty_oauth_fields() { + // No OAuth body (e.g. a bare 503) → just the status. + assert_eq!( + AuthError::rejected(503, None, None).to_string(), + "identity provider rejected the request: HTTP 503" + ); + // Empty strings are treated as absent, not rendered as "()" / ": ". + assert_eq!( + AuthError::rejected(400, Some(""), Some("")).to_string(), + "identity provider rejected the request: HTTP 400" + ); + } + + #[test] + fn unreachable_is_not_a_rejection() { + assert!(!AuthError::Unreachable("connection refused".into()).is_rejection()); + assert!(!AuthError::Other("keyring locked".into()).is_rejection()); + } +} diff --git a/crates/hephd/src/oauth.rs b/crates/hephd/src/oauth.rs index 53ee5f0..4af704f 100644 --- a/crates/hephd/src/oauth.rs +++ b/crates/hephd/src/oauth.rs @@ -109,7 +109,7 @@ impl KeyringTokenStore { } }); keyring_core::Entry::new(&self.service, &self.account) - .map_err(|e| AuthError::Provider(e.to_string())) + .map_err(|e| AuthError::Other(e.to_string())) } } @@ -119,16 +119,16 @@ impl TokenStore for KeyringTokenStore { serde_json::from_str(&secret).ok() } fn save(&self, token: &StoredToken) -> Result<(), AuthError> { - let json = serde_json::to_string(token).map_err(|e| AuthError::Provider(e.to_string()))?; + let json = serde_json::to_string(token).map_err(|e| AuthError::Other(e.to_string()))?; self.entry()? .set_password(&json) - .map_err(|e| AuthError::Provider(e.to_string())) + .map_err(|e| AuthError::Other(e.to_string())) } fn clear(&self) -> Result<(), AuthError> { match self.entry()?.delete_credential() { Ok(()) => Ok(()), Err(keyring_core::Error::NoEntry) => Ok(()), - Err(e) => Err(AuthError::Provider(e.to_string())), + Err(e) => Err(AuthError::Other(e.to_string())), } } } @@ -187,6 +187,9 @@ impl TokenResponse { #[derive(Debug, Deserialize)] struct TokenErrorBody { error: String, + /// Human-readable detail the provider may include (RFC 6749 §5.2). + #[serde(default)] + error_description: Option, } /// Drives the OAuth 2.0 device-code flow against one provider. @@ -208,17 +211,14 @@ impl DeviceFlow { let mut resp = http .get(&url) .call() - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Unreachable(e.to_string()))?; if !resp.status().is_success() { - return Err(AuthError::Provider(format!( - "discovery returned {}", - resp.status() - ))); + return Err(AuthError::rejected(resp.status().as_u16(), None, None)); } let doc: DiscoveryDoc = resp .body_mut() .read_json() - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Other(e.to_string()))?; Ok(DeviceFlow { client_id: client_id.to_string(), http, @@ -233,16 +233,13 @@ impl DeviceFlow { .http .post(&self.device_authorization_endpoint) .send_form([("client_id", self.client_id.as_str()), ("scope", scope)]) - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Unreachable(e.to_string()))?; if !resp.status().is_success() { - return Err(AuthError::Provider(format!( - "device authorization returned {}", - resp.status() - ))); + return Err(AuthError::rejected(resp.status().as_u16(), None, None)); } resp.body_mut() .read_json() - .map_err(|e| AuthError::Provider(e.to_string())) + .map_err(|e| AuthError::Other(e.to_string())) } /// Poll the token endpoint until the user authorizes, the code expires, or @@ -267,13 +264,13 @@ impl DeviceFlow { ("device_code", auth.device_code.as_str()), ("client_id", self.client_id.as_str()), ]) - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Unreachable(e.to_string()))?; if response.status().is_success() { let token: TokenResponse = response .body_mut() .read_json() - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Other(e.to_string()))?; return Ok(token.into_stored()); } @@ -281,7 +278,7 @@ impl DeviceFlow { let body: TokenErrorBody = response .body_mut() .read_json() - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Other(e.to_string()))?; match body.error.as_str() { "authorization_pending" => {} "slow_down" => interval += 5, @@ -301,17 +298,24 @@ impl DeviceFlow { ("refresh_token", refresh_token), ("client_id", self.client_id.as_str()), ]) - .map_err(|e| AuthError::Provider(e.to_string()))?; + .map_err(|e| AuthError::Unreachable(e.to_string()))?; if !response.status().is_success() { - return Err(AuthError::Provider(format!( - "token refresh returned {}", - response.status() - ))); + // The IdP was reached and refused the grant (typically a `400 + // invalid_grant` once the refresh token is expired/rotated). Report + // it as a *rejection* with the OAuth error body — not "unreachable", + // which would misdirect debugging toward the network. + let status = response.status().as_u16(); + let body = response.body_mut().read_json::().ok(); + return Err(AuthError::rejected( + status, + body.as_ref().map(|b| b.error.as_str()), + body.as_ref().and_then(|b| b.error_description.as_deref()), + )); } let mut token: StoredToken = response .body_mut() .read_json::() - .map_err(|e| AuthError::Provider(e.to_string()))? + .map_err(|e| AuthError::Other(e.to_string()))? .into_stored(); // Providers may omit the refresh token on refresh — keep the old one. if token.refresh_token.is_none() { diff --git a/crates/hephd/src/server.rs b/crates/hephd/src/server.rs index 30c5d5a..89dee78 100644 --- a/crates/hephd/src/server.rs +++ b/crates/hephd/src/server.rs @@ -20,6 +20,7 @@ use tokio::net::{UnixListener, UnixStream}; use heph_core::Store; +use crate::auth::AuthError; use crate::oauth::{self, TokenStore}; use crate::rpc::{self, Request, Response, RpcError, INTERNAL_ERROR, PARSE_ERROR}; use crate::selfupdate::{self, SelfUpdateConfig}; @@ -80,10 +81,25 @@ fn is_auth_error(e: &anyhow::Error) -> bool { .is_some_and(|s| s == reqwest::StatusCode::UNAUTHORIZED) } -/// Fold one exchange outcome into the shared [`SyncHealth`]. -fn record_sync_outcome(health: &Arc>, result: &Result) { +/// The exact `heph auth login …` command that re-authenticates this spoke, built +/// from the hub URL + issuer + client id the daemon is configured with — so the +/// surfaced error tells the user *what to run*, not just that auth failed. +/// `None` for an unauthenticated / standalone instance. The hub-URL string must +/// match what the credential store is keyed under, which is exactly `hub_url`. +fn reauth_command(hub_url: Option<&str>, auth: Option<&SpokeAuth>) -> Option { + let (hub, auth) = (hub_url?, auth?); + Some(format!( + "heph auth login --hub-url {hub} --issuer {} --client-id {}", + auth.issuer, auth.client_id + )) +} + +/// Fold one exchange outcome into the shared [`SyncHealth`]. On an auth failure +/// (a 401 from the hub) the recorded error carries the actionable re-login +/// command, so `heph sync --status` / `heph auth status` / the TUI show the fix. +fn record_sync_outcome(ctx: &Ctx, result: &Result) { let now = now_ms(); - let mut h = health.lock().expect("sync_health mutex poisoned"); + let mut h = ctx.sync_health.lock().expect("sync_health mutex poisoned"); h.last_attempt_ms = Some(now); match result { Ok(_) => { @@ -92,28 +108,67 @@ fn record_sync_outcome(health: &Arc>, result: &Result { - h.auth_failure = is_auth_error(e); - h.last_error = Some(e.to_string()); + let auth_failure = is_auth_error(e); + h.auth_failure = auth_failure; + h.last_error = Some(annotate_reauth( + e.to_string(), + auth_failure, + ctx.hub_url.as_deref(), + ctx.auth.as_ref(), + )); } } } +/// Record a failure to obtain a bearer token (the refresh step, before any hub +/// request). A *rejection* (the IdP refused the refresh) is an auth failure and +/// gets the re-login hint; a transport failure stays a transient error. Surfacing +/// this here means `last_error` reflects the real cause (e.g. `invalid_grant`) +/// instead of only the downstream 401 on `/sync/pull`. +fn record_bearer_failure(ctx: &Ctx, err: &AuthError) { + let now = now_ms(); + let auth_failure = err.is_rejection(); + let mut h = ctx.sync_health.lock().expect("sync_health mutex poisoned"); + h.last_attempt_ms = Some(now); + h.auth_failure = auth_failure; + h.last_error = Some(annotate_reauth( + format!("could not obtain bearer token: {err}"), + auth_failure, + ctx.hub_url.as_deref(), + ctx.auth.as_ref(), + )); +} + +/// Append the actionable re-login command to `msg` when this is an auth failure +/// and the spoke has auth configured. +fn annotate_reauth( + msg: String, + auth_failure: bool, + hub_url: Option<&str>, + auth: Option<&SpokeAuth>, +) -> String { + match reauth_command(hub_url, auth) { + Some(cmd) if auth_failure => format!("{msg} — re-authenticate: {cmd}"), + _ => msg, + } +} + impl Ctx { - /// The current bearer token for hub sync (refreshing if expired), or `None` - /// if this spoke has no auth configured / no usable token. - async fn bearer(&self) -> Option { - let auth = self.auth.clone()?; - let result = tokio::task::spawn_blocking(move || { + /// The current bearer token for hub sync (refreshing if expired). `Ok(None)` + /// means this spoke has no auth configured / no token stored (it syncs + /// unauthenticated); `Err` means token acquisition genuinely failed (the + /// caller records it and skips the attempt rather than 401ing the hub). + async fn bearer(&self) -> Result, AuthError> { + let Some(auth) = self.auth.clone() else { + return Ok(None); + }; + match tokio::task::spawn_blocking(move || { oauth::current_bearer(auth.store.as_ref(), &auth.issuer, &auth.client_id) }) - .await; - match result { - Ok(Ok(token)) => token, - Ok(Err(e)) => { - tracing::warn!("could not obtain bearer token: {e}"); - None - } - Err(_) => None, + .await + { + Ok(res) => res, + Err(_join) => Ok(None), // the blocking task panicked; treat as no token } } } @@ -223,10 +278,20 @@ impl Daemon { let mut tick = tokio::time::interval(interval); loop { tick.tick().await; - let bearer = ctx.bearer().await; + let bearer = match ctx.bearer().await { + Ok(b) => b, + Err(e) => { + // Couldn't get a token — record the real cause (e.g. a + // rejected refresh) and skip; sending an unauthenticated + // request would only 401 and mask it. + record_bearer_failure(&ctx, &e); + tracing::warn!("background sync: could not obtain bearer token: {e}"); + continue; + } + }; let result = sync::sync_once(ctx.store.clone(), &hub, &ctx.http, bearer.as_deref()).await; - record_sync_outcome(&ctx.sync_health, &result); + record_sync_outcome(&ctx, &result); match result { Ok(report) => tracing::debug!(?report, "background sync"), Err(e) => tracing::warn!("background sync failed: {e}"), @@ -321,9 +386,25 @@ async fn sync_now(ctx: &Ctx) -> Result { message: "no hub_url configured; this instance is standalone".into(), }); }; - let bearer = ctx.bearer().await; + let bearer = match ctx.bearer().await { + Ok(b) => b, + Err(e) => { + // Token acquisition failed — record the real cause (with a re-login + // hint when it's a rejection) and surface it instead of a downstream 401. + record_bearer_failure(ctx, &e); + return Err(RpcError { + code: INTERNAL_ERROR, + message: annotate_reauth( + format!("sync failed: could not obtain bearer token: {e}"), + e.is_rejection(), + ctx.hub_url.as_deref(), + ctx.auth.as_ref(), + ), + }); + } + }; let result = sync::sync_once(ctx.store.clone(), &hub_url, &ctx.http, bearer.as_deref()).await; - record_sync_outcome(&ctx.sync_health, &result); + record_sync_outcome(ctx, &result); match result { Ok(report) => Ok(json!(report)), Err(e) => Err(RpcError { @@ -374,10 +455,22 @@ async fn sync_status(ctx: &Ctx) -> Result { .expect("sync_health mutex poisoned") .clone(); + // Non-secret OIDC params (issuer/client-id) + the exact re-login command, so + // `heph auth status` can show the fix without reconstructing it client-side + // (and keyed under the right hub URL — see the per-URL token-keying gotcha). + let auth = ctx.auth.as_ref().map(|a| { + json!({ + "issuer": a.issuer, + "client_id": a.client_id, + }) + }); + Ok(json!({ "hub_url": hub_url, "cursors": cursors, "conflicts": conflicts, "health": health, + "auth": auth, + "reauth_command": reauth_command(Some(&hub_url), ctx.auth.as_ref()), })) } diff --git a/crates/hephd/src/sync.rs b/crates/hephd/src/sync.rs index bfaa323..9beac05 100644 --- a/crates/hephd/src/sync.rs +++ b/crates/hephd/src/sync.rs @@ -261,8 +261,14 @@ async fn require_auth( .await .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)? .map_err(|e| match e { - AuthError::Provider(_) => StatusCode::SERVICE_UNAVAILABLE, - _ => StatusCode::UNAUTHORIZED, + // The token itself is missing/bad → tell the client it's unauthorized. + AuthError::Missing | AuthError::Invalid(_) => StatusCode::UNAUTHORIZED, + // We couldn't reach/process the IdP to fetch verification keys — a + // transient hub-side problem, not the client's token. Ask them to + // retry rather than claiming their token is invalid. + AuthError::Unreachable(_) | AuthError::Rejected(_) | AuthError::Other(_) => { + StatusCode::SERVICE_UNAVAILABLE + } })?; // Multi-tenancy seam: resolve the token's identity to the owner it may act diff --git a/crates/hephd/tests/oauth.rs b/crates/hephd/tests/oauth.rs index f61c872..0a1c709 100644 --- a/crates/hephd/tests/oauth.rs +++ b/crates/hephd/tests/oauth.rs @@ -90,11 +90,25 @@ async fn token(State(s): State, Form(form): Form Json(json!({ - "access_token": "access-2", - "expires_in": 3600, - })) - .into_response(), + Some("refresh_token") => { + // A rotated/expired refresh token is refused with `400 invalid_grant` + // (RFC 6749 §5.2) — the case that used to be mislabeled "unreachable". + if form.get("refresh_token").map(String::as_str) == Some("refresh-expired") { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ + "error": "invalid_grant", + "error_description": "Token is not active", + })), + ) + .into_response(); + } + Json(json!({ + "access_token": "access-2", + "expires_in": 3600, + })) + .into_response() + } _ => ( StatusCode::BAD_REQUEST, Json(json!({ "error": "unsupported_grant_type" })), @@ -129,6 +143,48 @@ fn refresh_keeps_the_old_refresh_token_when_omitted() { assert_eq!(refreshed.refresh_token.as_deref(), Some("refresh-1")); } +#[test] +fn refresh_rejected_by_idp_is_a_rejection_not_unreachable() { + let issuer = start_idp(); + let flow = DeviceFlow::discover(&issuer, "heph-cli").unwrap(); + let err = flow.refresh("refresh-expired").unwrap_err(); + // The whole point of the fix: a reachable IdP that returns 400 is a + // *rejection*, carrying the OAuth error body — not "unreachable". + assert!(err.is_rejection(), "expected a rejection, got: {err}"); + let msg = err.to_string(); + assert!( + msg.contains("rejected"), + "message should say rejected: {msg}" + ); + assert!( + msg.contains("invalid_grant"), + "should include the OAuth error: {msg}" + ); + assert!( + msg.contains("Token is not active"), + "should include error_description: {msg}" + ); + assert!( + !msg.contains("unreachable"), + "must NOT claim the IdP was unreachable: {msg}" + ); +} + +#[test] +fn discovery_against_a_dead_idp_is_unreachable_not_a_rejection() { + use hephd::AuthError; + // Port 1 refuses the connection → a genuine transport failure. + let err = match DeviceFlow::discover("http://127.0.0.1:1/application/o/heph/", "heph-cli") { + Ok(_) => panic!("discovery should fail against a dead IdP"), + Err(e) => e, + }; + assert!( + matches!(err, AuthError::Unreachable(_)), + "a connection failure must be Unreachable, got: {err}" + ); + assert!(!err.is_rejection()); +} + #[test] fn memory_token_store_round_trips_and_reports_expiry() { let store = MemoryTokenStore::default(); diff --git a/docs/changelog.d/auth-error-clarity.bugfix.md b/docs/changelog.d/auth-error-clarity.bugfix.md new file mode 100644 index 0000000..83ba854 --- /dev/null +++ b/docs/changelog.d/auth-error-clarity.bugfix.md @@ -0,0 +1 @@ +hephd no longer reports a rejected OAuth refresh as "identity provider unreachable". A reachable IdP that returns an HTTP error (e.g. `400 invalid_grant` once a refresh token expires/rotates) is now surfaced as a *rejection* — `identity provider rejected the request: HTTP 400 (invalid_grant): …` — with the OAuth error body, distinct from a genuine transport failure. This stops the wording from misdirecting incident response toward the network when the real fix is re-authentication. diff --git a/docs/changelog.d/auth-error-clarity.feature.md b/docs/changelog.d/auth-error-clarity.feature.md new file mode 100644 index 0000000..ab67867 --- /dev/null +++ b/docs/changelog.d/auth-error-clarity.feature.md @@ -0,0 +1 @@ +Spoke auth failures now tell you how to recover. When a refresh token is rejected or the hub returns 401, `hephd` records the real cause plus the exact `heph auth login --hub-url … --issuer … --client-id …` command (keyed to this spoke's hub) in its sync health. A new `heph auth status` prints that health and the re-login command, `heph sync --status`'s `last_error` carries it, and `heph-tui`'s status line points at it with a `⚠ auth · heph auth status` chip. diff --git a/docs/how-to/set-up-sync-hub.md b/docs/how-to/set-up-sync-hub.md index a5b56ea..4d654a9 100644 --- a/docs/how-to/set-up-sync-hub.md +++ b/docs/how-to/set-up-sync-hub.md @@ -130,19 +130,41 @@ spoke is visible at a glance rather than buried in the daemon log. Make a change on `gilbert`, force a sync, and confirm it appears via the hub. +### When sync stops authenticating + +A spoke's refresh token can expire or be rotated (e.g. the IdP session lapses). +The spoke then can't refresh on its own and needs a re-login — but this is +**visible, not silent**: + +- `heph-tui` shows a red `⚠ auth · heph auth status` chip in the status line. +- `heph auth status` prints the auth health and the **exact** re-login command, + pre-filled with this spoke's hub URL / issuer / client id: + + ```bash + heph auth status + ``` + +- `heph sync --status`'s `last_error` names the real cause — a refresh + *rejection* (e.g. `HTTP 400 (invalid_grant)`), not a misleading "identity + provider unreachable" — and carries the same `heph auth login …` hint. + +Run the printed `heph auth login …` command to restore sync. + ## Current gaps (finalized by the blumeops deployment) -The flag-level flow above works today; two enablers make it a clean, managed +The flag-level flow above works today; one enabler makes it a clean, managed deployment rather than a hand-run process — tracked in the `Hephaestus` project: -- **`heph daemon` only generates a `--mode local` service** (no `--hub-url` / - `--oidc-*`). So for now the hub and the spoke config are expressed as `hephd` - flags (run directly, or via the blumeops-managed systemd unit), not via - `heph daemon start`. - **Path A seeding is manual** (copy the store + reset the device origin). A small enabler — seed a hub from a snapshot with a fresh origin, or `hephd --owner-id` — would make this one step. +> `heph daemon start`/`restart` can now bake the spoke/hub config (`--hub-url`, +> `--mode server`, `--http-addr`, `--oidc-*`) into the generated service (see +> [[run-the-daemon]]). The canonical hub on `indri` is still provisioned via the +> blumeops-managed systemd unit by deployment choice, not because `heph daemon` +> can't express it. + ## Related - [[run-the-daemon]] — manage the local daemon as an OS service