generated from eblume/project-template
All checks were successful
Build / validate (pull_request) Successful in 6m12s
The spoke OAuth path funneled every failure into one `AuthError::Provider` whose Display was hardcoded "identity provider unreachable". So a reachable IdP returning `400 invalid_grant` on a refresh was reported as "unreachable", misdirecting incident response toward the network when the fix is re-auth. The real refresh cause was also swallowed — `bearer()` logged it and returned None, so sync health only ever showed the downstream 401 on /sync/pull. Wording fix (auth.rs / oauth.rs): - Split AuthError into Unreachable (transport), Rejected (IdP returned an HTTP error — carries the RFC 6749 §5.2 error/error_description), and Other (keyring / malformed response, previously mislabeled too). - refresh()/discover()/start()/poll() classify transport vs status; refresh reads the OAuth error body on a non-2xx. - Hub-side token verify maps IdP-infra failures → 503, token failures → 401. Recovery UX (server.rs / heph / heph-tui): - bearer() returns Result; the sync paths record the real acquisition failure (with a re-login hint when it's a rejection) instead of a masked 401. - sync health's last_error carries the exact `heph auth login --hub-url … --issuer … --client-id …` command (keyed to the configured hub); sync.status also returns issuer/client_id + the command. - New `heph auth status` prints auth health and the re-login command. - heph-tui's auth chip points at it: `⚠ auth · heph auth status`. Closes the duplicate "misleading identity provider unreachable" tasks and the "actionable re-auth guidance" task. Also corrects a now-stale set-up-sync-hub gap note (daemon config baking landed in the prior PR). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
350 lines
13 KiB
Rust
350 lines
13 KiB
Rust
//! Client-side OIDC: the OAuth 2.0 device-code flow (RFC 8628), token storage,
|
|
//! and refresh (tech-spec §13).
|
|
//!
|
|
//! A spoke (`local` + `hub_url`) or a `client` uses this to obtain the bearer
|
|
//! token it presents to the hub. The flow is **blocking** — it is interactive
|
|
//! (`heph auth login` waits for the user to authorize in a browser) and the
|
|
//! daemon only refreshes from its blocking pool. Tokens persist in a
|
|
//! [`TokenStore`] (the OS keyring in production, in-memory in tests).
|
|
|
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use crate::auth::AuthError;
|
|
|
|
/// The standard device-code grant type.
|
|
const DEVICE_GRANT: &str = "urn:ietf:params:oauth:grant-type:device_code";
|
|
/// Treat a token as expired this many seconds early, to avoid races.
|
|
const EXPIRY_SKEW: u64 = 30;
|
|
|
|
/// Persisted OIDC tokens for one provider.
|
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct StoredToken {
|
|
/// The bearer token presented to the hub.
|
|
pub access_token: String,
|
|
/// Used to obtain a fresh access token without re-authenticating.
|
|
pub refresh_token: Option<String>,
|
|
/// Unix seconds at which `access_token` expires.
|
|
pub expires_at: u64,
|
|
}
|
|
|
|
impl StoredToken {
|
|
/// Whether the access token is expired (or within the safety skew).
|
|
pub fn is_expired(&self, now: u64) -> bool {
|
|
now + EXPIRY_SKEW >= self.expires_at
|
|
}
|
|
}
|
|
|
|
/// Current unix time in seconds.
|
|
fn now_secs() -> u64 {
|
|
SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap_or_default()
|
|
.as_secs()
|
|
}
|
|
|
|
/// Where tokens persist between runs.
|
|
pub trait TokenStore: Send + Sync {
|
|
/// Load the stored token, if any.
|
|
fn load(&self) -> Option<StoredToken>;
|
|
/// Persist (replacing) the token.
|
|
fn save(&self, token: &StoredToken) -> Result<(), AuthError>;
|
|
/// Remove any stored token.
|
|
fn clear(&self) -> Result<(), AuthError>;
|
|
}
|
|
|
|
/// An in-memory [`TokenStore`] for tests.
|
|
#[derive(Default)]
|
|
pub struct MemoryTokenStore(std::sync::Mutex<Option<StoredToken>>);
|
|
|
|
impl TokenStore for MemoryTokenStore {
|
|
fn load(&self) -> Option<StoredToken> {
|
|
self.0.lock().expect("token lock poisoned").clone()
|
|
}
|
|
fn save(&self, token: &StoredToken) -> Result<(), AuthError> {
|
|
*self.0.lock().expect("token lock poisoned") = Some(token.clone());
|
|
Ok(())
|
|
}
|
|
fn clear(&self) -> Result<(), AuthError> {
|
|
*self.0.lock().expect("token lock poisoned") = None;
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// A [`TokenStore`] backed by the OS keyring (Keychain / Secret Service). The
|
|
/// token JSON is stored as the secret for `(service, account)`.
|
|
pub struct KeyringTokenStore {
|
|
service: String,
|
|
account: String,
|
|
}
|
|
|
|
impl KeyringTokenStore {
|
|
/// Store tokens under this service, keyed by `account` (the hub url).
|
|
pub fn new(account: impl Into<String>) -> KeyringTokenStore {
|
|
KeyringTokenStore {
|
|
service: "hephaestus".into(),
|
|
account: account.into(),
|
|
}
|
|
}
|
|
|
|
fn entry(&self) -> Result<keyring_core::Entry, AuthError> {
|
|
// keyring-core holds the cross-platform `Entry`/`Error` types but no
|
|
// backend — a credential store must be registered before any entry is
|
|
// built. Register the OS-native store once, lazily, on first use
|
|
// (idempotent across both surfaces). We register a single backend per
|
|
// platform (macOS Keychain / Linux Secret Service) rather than pulling
|
|
// the `keyring` meta-crate, which compiles every backend at once.
|
|
static NATIVE_STORE: std::sync::Once = std::sync::Once::new();
|
|
NATIVE_STORE.call_once(|| {
|
|
#[cfg(target_os = "macos")]
|
|
if let Ok(store) = apple_native_keyring_store::keychain::Store::new() {
|
|
keyring_core::set_default_store(store);
|
|
}
|
|
// The D-Bus Secret Service (not the kernel keyutils store, which is
|
|
// wiped on logout/reboot and would silently drop a persisted token).
|
|
#[cfg(target_os = "linux")]
|
|
if let Ok(store) = dbus_secret_service_keyring_store::Store::new() {
|
|
keyring_core::set_default_store(store);
|
|
}
|
|
});
|
|
keyring_core::Entry::new(&self.service, &self.account)
|
|
.map_err(|e| AuthError::Other(e.to_string()))
|
|
}
|
|
}
|
|
|
|
impl TokenStore for KeyringTokenStore {
|
|
fn load(&self) -> Option<StoredToken> {
|
|
let secret = self.entry().ok()?.get_password().ok()?;
|
|
serde_json::from_str(&secret).ok()
|
|
}
|
|
fn save(&self, token: &StoredToken) -> Result<(), AuthError> {
|
|
let json = serde_json::to_string(token).map_err(|e| AuthError::Other(e.to_string()))?;
|
|
self.entry()?
|
|
.set_password(&json)
|
|
.map_err(|e| AuthError::Other(e.to_string()))
|
|
}
|
|
fn clear(&self) -> Result<(), AuthError> {
|
|
match self.entry()?.delete_credential() {
|
|
Ok(()) => Ok(()),
|
|
Err(keyring_core::Error::NoEntry) => Ok(()),
|
|
Err(e) => Err(AuthError::Other(e.to_string())),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// The device-authorization response (RFC 8628 §3.2).
|
|
#[derive(Debug, Clone, Deserialize)]
|
|
pub struct DeviceAuth {
|
|
/// The code the daemon polls the token endpoint with.
|
|
pub device_code: String,
|
|
/// The short code the user types at the verification page.
|
|
pub user_code: String,
|
|
/// Where the user goes to authorize.
|
|
pub verification_uri: String,
|
|
/// Verification URI with the code pre-filled (optional).
|
|
#[serde(default)]
|
|
pub verification_uri_complete: Option<String>,
|
|
/// Seconds between polls.
|
|
#[serde(default = "default_interval")]
|
|
pub interval: u64,
|
|
/// Seconds until `device_code` expires.
|
|
pub expires_in: u64,
|
|
}
|
|
|
|
fn default_interval() -> u64 {
|
|
5
|
|
}
|
|
|
|
/// Discovery fields the device flow needs.
|
|
#[derive(Debug, Deserialize)]
|
|
struct DiscoveryDoc {
|
|
device_authorization_endpoint: String,
|
|
token_endpoint: String,
|
|
}
|
|
|
|
/// A token-endpoint success response.
|
|
#[derive(Debug, Deserialize)]
|
|
struct TokenResponse {
|
|
access_token: String,
|
|
#[serde(default)]
|
|
refresh_token: Option<String>,
|
|
#[serde(default)]
|
|
expires_in: Option<u64>,
|
|
}
|
|
|
|
impl TokenResponse {
|
|
fn into_stored(self) -> StoredToken {
|
|
StoredToken {
|
|
access_token: self.access_token,
|
|
refresh_token: self.refresh_token,
|
|
expires_at: now_secs() + self.expires_in.unwrap_or(3600),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A token-endpoint error response (RFC 6749 §5.2 / RFC 8628 §3.5).
|
|
#[derive(Debug, Deserialize)]
|
|
struct TokenErrorBody {
|
|
error: String,
|
|
/// Human-readable detail the provider may include (RFC 6749 §5.2).
|
|
#[serde(default)]
|
|
error_description: Option<String>,
|
|
}
|
|
|
|
/// Drives the OAuth 2.0 device-code flow against one provider.
|
|
pub struct DeviceFlow {
|
|
client_id: String,
|
|
http: ureq::Agent,
|
|
device_authorization_endpoint: String,
|
|
token_endpoint: String,
|
|
}
|
|
|
|
impl DeviceFlow {
|
|
/// Discover the device + token endpoints from `issuer` and build a flow.
|
|
pub fn discover(issuer: &str, client_id: &str) -> Result<DeviceFlow, AuthError> {
|
|
let http = crate::blocking_agent();
|
|
let url = format!(
|
|
"{}/.well-known/openid-configuration",
|
|
issuer.trim_end_matches('/')
|
|
);
|
|
let mut resp = http
|
|
.get(&url)
|
|
.call()
|
|
.map_err(|e| AuthError::Unreachable(e.to_string()))?;
|
|
if !resp.status().is_success() {
|
|
return Err(AuthError::rejected(resp.status().as_u16(), None, None));
|
|
}
|
|
let doc: DiscoveryDoc = resp
|
|
.body_mut()
|
|
.read_json()
|
|
.map_err(|e| AuthError::Other(e.to_string()))?;
|
|
Ok(DeviceFlow {
|
|
client_id: client_id.to_string(),
|
|
http,
|
|
device_authorization_endpoint: doc.device_authorization_endpoint,
|
|
token_endpoint: doc.token_endpoint,
|
|
})
|
|
}
|
|
|
|
/// Request a device + user code (RFC 8628 §3.1).
|
|
pub fn start(&self, scope: &str) -> Result<DeviceAuth, AuthError> {
|
|
let mut resp = self
|
|
.http
|
|
.post(&self.device_authorization_endpoint)
|
|
.send_form([("client_id", self.client_id.as_str()), ("scope", scope)])
|
|
.map_err(|e| AuthError::Unreachable(e.to_string()))?;
|
|
if !resp.status().is_success() {
|
|
return Err(AuthError::rejected(resp.status().as_u16(), None, None));
|
|
}
|
|
resp.body_mut()
|
|
.read_json()
|
|
.map_err(|e| AuthError::Other(e.to_string()))
|
|
}
|
|
|
|
/// Poll the token endpoint until the user authorizes, the code expires, or
|
|
/// access is denied. `sleep` is injected so tests need not wait in real
|
|
/// time (production passes [`std::thread::sleep`]).
|
|
pub fn poll(
|
|
&self,
|
|
auth: &DeviceAuth,
|
|
sleep: impl Fn(Duration),
|
|
) -> Result<StoredToken, AuthError> {
|
|
let deadline = now_secs() + auth.expires_in;
|
|
let mut interval = auth.interval.max(1);
|
|
loop {
|
|
if now_secs() >= deadline {
|
|
return Err(AuthError::Invalid("device code expired".into()));
|
|
}
|
|
let mut response = self
|
|
.http
|
|
.post(&self.token_endpoint)
|
|
.send_form([
|
|
("grant_type", DEVICE_GRANT),
|
|
("device_code", auth.device_code.as_str()),
|
|
("client_id", self.client_id.as_str()),
|
|
])
|
|
.map_err(|e| AuthError::Unreachable(e.to_string()))?;
|
|
|
|
if response.status().is_success() {
|
|
let token: TokenResponse = response
|
|
.body_mut()
|
|
.read_json()
|
|
.map_err(|e| AuthError::Other(e.to_string()))?;
|
|
return Ok(token.into_stored());
|
|
}
|
|
|
|
// A non-success is either "keep waiting" or a terminal failure.
|
|
let body: TokenErrorBody = response
|
|
.body_mut()
|
|
.read_json()
|
|
.map_err(|e| AuthError::Other(e.to_string()))?;
|
|
match body.error.as_str() {
|
|
"authorization_pending" => {}
|
|
"slow_down" => interval += 5,
|
|
other => return Err(AuthError::Invalid(format!("device flow failed: {other}"))),
|
|
}
|
|
sleep(Duration::from_secs(interval));
|
|
}
|
|
}
|
|
|
|
/// Exchange a refresh token for a fresh access token (RFC 6749 §6).
|
|
pub fn refresh(&self, refresh_token: &str) -> Result<StoredToken, AuthError> {
|
|
let mut response = self
|
|
.http
|
|
.post(&self.token_endpoint)
|
|
.send_form([
|
|
("grant_type", "refresh_token"),
|
|
("refresh_token", refresh_token),
|
|
("client_id", self.client_id.as_str()),
|
|
])
|
|
.map_err(|e| AuthError::Unreachable(e.to_string()))?;
|
|
if !response.status().is_success() {
|
|
// The IdP was reached and refused the grant (typically a `400
|
|
// invalid_grant` once the refresh token is expired/rotated). Report
|
|
// it as a *rejection* with the OAuth error body — not "unreachable",
|
|
// which would misdirect debugging toward the network.
|
|
let status = response.status().as_u16();
|
|
let body = response.body_mut().read_json::<TokenErrorBody>().ok();
|
|
return Err(AuthError::rejected(
|
|
status,
|
|
body.as_ref().map(|b| b.error.as_str()),
|
|
body.as_ref().and_then(|b| b.error_description.as_deref()),
|
|
));
|
|
}
|
|
let mut token: StoredToken = response
|
|
.body_mut()
|
|
.read_json::<TokenResponse>()
|
|
.map_err(|e| AuthError::Other(e.to_string()))?
|
|
.into_stored();
|
|
// Providers may omit the refresh token on refresh — keep the old one.
|
|
if token.refresh_token.is_none() {
|
|
token.refresh_token = Some(refresh_token.to_string());
|
|
}
|
|
Ok(token)
|
|
}
|
|
}
|
|
|
|
/// Return a usable access token from `store`, refreshing via `issuer`/`client_id`
|
|
/// if the stored one is expired. Returns `None` if nothing is stored; errors if
|
|
/// a refresh was needed but failed. Saves a refreshed token back to `store`.
|
|
pub fn current_bearer(
|
|
store: &dyn TokenStore,
|
|
issuer: &str,
|
|
client_id: &str,
|
|
) -> Result<Option<String>, AuthError> {
|
|
let Some(token) = store.load() else {
|
|
return Ok(None);
|
|
};
|
|
if !token.is_expired(now_secs()) {
|
|
return Ok(Some(token.access_token));
|
|
}
|
|
let Some(refresh) = token.refresh_token.clone() else {
|
|
return Err(AuthError::Invalid(
|
|
"token expired and no refresh token".into(),
|
|
));
|
|
};
|
|
let refreshed = DeviceFlow::discover(issuer, client_id)?.refresh(&refresh)?;
|
|
store.save(&refreshed)?;
|
|
Ok(Some(refreshed.access_token))
|
|
}
|