Merge pull request 'v1 prep: multi-tenancy seam (resolve_owner) + hub-setup how-to' (#4) from feature/v1-hub-prep into main

Reviewed-on: #4
2026-06-04 08:00:31 -07:00 · 2026-06-04 08:00:31 -07:00 · 80f83cbba8
commit 80f83cbba8
parent 3f7012921b b6a96013ca
14 changed files with 233 additions and 1564 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -41,8 +41,19 @@ clap = { version = "4", features = ["derive"] }
 ratatui = "0.30"
 axum = "0.8"
 jsonwebtoken = { version = "10", features = ["rust_crypto"] }
-keyring = { version = "4" }
+# keyring 4's `keyring` meta-crate compiles *every* platform credential backend
+# for the target (on Linux: the zbus + libdbus secret-service stacks, keyutils,
+# and a sqlite/zstd db-keystore — ~290 crates). We use exactly one backend per
+# platform, so depend on keyring-core (the API) + a single store crate per OS.
 keyring-core = "1"
+apple-native-keyring-store = { version = "1", features = ["keychain"] }
+# vendored: build libdbus from bundled source so the build needs no system
+# libdbus-1-dev (the CI rust:1-bookworm image has none). crypto-rust: pure-Rust
+# session crypto, no OpenSSL.
+dbus-secret-service-keyring-store = { version = "1", features = [
+  "crypto-rust",
+  "vendored",
+] }
 ureq = { version = "3", features = ["json"] }
 reqwest = { version = "0.13", default-features = false, features = [
  "json",
--- a/crates/heph-core/src/sqlite/mod.rs
+++ b/crates/heph-core/src/sqlite/mod.rs
@ -423,7 +423,7 @@ impl Store for LocalStore {
        syncstate::record(&self.conn, peer, pushed, pulled, now)
    }

-    fn authorize_owner_sub(&mut self, sub: &str) -> Result<bool> {
+    fn resolve_owner(&mut self, sub: &str) -> Result<Option<String>> {
        // The owner's bound identity (NULL until first authenticated sync).
        let current: Option<String> = self
            .conn
@ -436,13 +436,15 @@ impl Store for LocalStore {
            .flatten();
        match current {
            None => {
+                // Claim-on-first: bind this sub to the store's owner.
                self.conn.execute(
                    "UPDATE users SET oidc_sub = ?1 WHERE id = ?2",
                    (sub, &self.owner_id),
                )?;
-                Ok(true)
+                Ok(Some(self.owner_id.clone()))
            }
-            Some(existing) => Ok(existing == sub),
+            Some(existing) if existing == sub => Ok(Some(self.owner_id.clone())),
+            Some(_) => Ok(None),
        }
    }

--- a/crates/heph-core/src/store.rs
+++ b/crates/heph-core/src/store.rs
@ -216,12 +216,18 @@ pub trait Store {
    fn record_sync(&mut self, peer: &str, pushed: Option<&str>, pulled: Option<&str>)
        -> Result<()>;

-    /// Single-tenant authentication gate (tech-spec §13). Map an OIDC `sub` to
-    /// this store's owner: on first sight, **claim** the owner by binding its
-    /// `oidc_sub`; thereafter authorize only that same `sub`. Returns `true` if
-    /// the `sub` owns this store, `false` if a different identity presented a
-    /// token. A hub calls this before serving any op exchange.
-    fn authorize_owner_sub(&mut self, sub: &str) -> Result<bool>;
+    /// Resolve an OIDC `sub` to the `owner_id` it may act as on this store —
+    /// the **multi-tenancy seam** (tech-spec §13). On first sight, **claim** the
+    /// store's owner by binding its `oidc_sub`; thereafter resolve only that
+    /// same `sub`. Returns `Some(owner_id)` when the `sub` owns data here, or
+    /// `None` when a different identity presented a token (the hub then 403s).
+    ///
+    /// Today a store hosts exactly one owner, so this resolves to that single
+    /// owner or `None`. Multi-tenancy (serving N owners from one hub) extends
+    /// this to a real `sub → owner_id` mapping with per-`sub` provisioning, and
+    /// the hub scopes each request to the resolved owner — without changing this
+    /// contract. A hub calls this before serving any op exchange.
+    fn resolve_owner(&mut self, sub: &str) -> Result<Option<String>>;

    /// Open merge conflicts surfaced for the user (`heph conflicts`).
    fn conflicts_list(&self) -> Result<Vec<Conflict>>;
--- a/crates/heph-core/tests/convergence.rs
+++ b/crates/heph-core/tests/convergence.rs
@ -75,18 +75,26 @@ fn sync_cursors_default_empty_then_advance_per_direction() {
 }

 #[test]
-fn owner_sub_gate_claims_first_then_requires_match() {
-    // Single-tenant gate (§13): the first sub claims the owner; only that sub
-    // is authorized thereafter.
+fn resolve_owner_claims_first_then_requires_match() {
+    // The hub resolves an OIDC `sub` to its owner id (§13) — the multi-tenancy
+    // seam. The first sub claims the (single, for now) owner; only that sub
+    // resolves thereafter, and always to the same owner id.
    let (mut a, _ca) = replica(1000);
-    assert!(a.authorize_owner_sub("sub-alice").unwrap(), "first claims");
-    assert!(a.authorize_owner_sub("sub-alice").unwrap(), "same sub ok");
+    let owner = a.resolve_owner("sub-alice").unwrap().expect("first claims");
+    assert_eq!(
+        a.resolve_owner("sub-alice").unwrap().as_deref(),
+        Some(owner.as_str()),
+        "same sub resolves to the same owner"
+    );
    assert!(
-        !a.authorize_owner_sub("sub-mallory").unwrap(),
-        "a different identity must be rejected"
+        a.resolve_owner("sub-mallory").unwrap().is_none(),
+        "a different identity does not resolve (the hub then 403s)"
    );
    // Still bound to the original after a rejection.
-    assert!(a.authorize_owner_sub("sub-alice").unwrap());
+    assert_eq!(
+        a.resolve_owner("sub-alice").unwrap().as_deref(),
+        Some(owner.as_str())
+    );
 }

 #[test]
--- a/crates/hephd/Cargo.toml
+++ b/crates/hephd/Cargo.toml
@ -29,11 +29,18 @@ tracing-subscriber.workspace = true
 clap.workspace = true
 axum.workspace = true
 jsonwebtoken.workspace = true
-keyring.workspace = true
 keyring-core.workspace = true
 reqwest.workspace = true
 ureq.workspace = true

+# The OS credential backend that `oauth.rs` registers as the keyring-core
+# default store — exactly one per platform, not the whole keyring meta-crate.
+[target.'cfg(target_os = "macos")'.dependencies]
+apple-native-keyring-store.workspace = true
+
+[target.'cfg(target_os = "linux")'.dependencies]
+dbus-secret-service-keyring-store.workspace = true
+
 [dev-dependencies]
 tempfile = "3"
 # Auth tests generate a throwaway RSA key + JWKS at runtime (no key in the repo).
--- a/crates/hephd/src/oauth.rs
+++ b/crates/hephd/src/oauth.rs
@ -89,16 +89,24 @@ impl KeyringTokenStore {
    }

    fn entry(&self) -> Result<keyring_core::Entry, AuthError> {
-        // keyring 4 splits the cross-platform `Entry`/`Error` types into
-        // `keyring_core` and requires a credential store to be registered
-        // before any entry is built. Register the OS-native store once,
-        // lazily, on first use (idempotent across both surfaces).
+        // keyring-core holds the cross-platform `Entry`/`Error` types but no
+        // backend — a credential store must be registered before any entry is
+        // built. Register the OS-native store once, lazily, on first use
+        // (idempotent across both surfaces). We register a single backend per
+        // platform (macOS Keychain / Linux Secret Service) rather than pulling
+        // the `keyring` meta-crate, which compiles every backend at once.
        static NATIVE_STORE: std::sync::Once = std::sync::Once::new();
        NATIVE_STORE.call_once(|| {
-            // `not_keyutils = true`: on Linux prefer the Secret Service over
-            // the kernel keyutils store, which is wiped on logout/reboot and
-            // would silently drop a persisted login token.
-            let _ = keyring::use_native_store(true);
+            #[cfg(target_os = "macos")]
+            if let Ok(store) = apple_native_keyring_store::keychain::Store::new() {
+                keyring_core::set_default_store(store);
+            }
+            // The D-Bus Secret Service (not the kernel keyutils store, which is
+            // wiped on logout/reboot and would silently drop a persisted token).
+            #[cfg(target_os = "linux")]
+            if let Ok(store) = dbus_secret_service_keyring_store::Store::new() {
+                keyring_core::set_default_store(store);
+            }
        });
        keyring_core::Entry::new(&self.service, &self.account)
            .map_err(|e| AuthError::Provider(e.to_string()))
--- a/crates/hephd/src/remote.rs
+++ b/crates/hephd/src/remote.rs
@ -315,10 +315,10 @@ impl Store for RemoteStore {
        Ok(())
    }

-    fn authorize_owner_sub(&mut self, _sub: &str) -> Result<bool> {
-        // Hub-side gate; a no-replica client never hosts an endpoint to guard.
+    fn resolve_owner(&mut self, _sub: &str) -> Result<Option<String>> {
+        // Hub-side seam; a no-replica client never hosts an endpoint to guard.
        Err(Error::Remote(
-            "authorize_owner_sub is a hub-side operation".into(),
+            "resolve_owner is a hub-side operation".into(),
        ))
    }

--- a/crates/hephd/src/sync.rs
+++ b/crates/hephd/src/sync.rs
@ -136,20 +136,23 @@ async fn require_auth(
            _ => StatusCode::UNAUTHORIZED,
        })?;

-    // Single-tenant gate: the token's identity must own this hub.
+    // Multi-tenancy seam: resolve the token's identity to the owner it may act
+    // as. Today the hub serves one owner, so this is `Some(that owner)` or
+    // `None` (→ 403). When the hub becomes multi-owner, `_owner_id` is what each
+    // downstream handler scopes its ops to (rather than the store's lone owner).
    let store = state.store.clone();
-    let owns = tokio::task::spawn_blocking(move || {
+    let owner = tokio::task::spawn_blocking(move || {
        store
            .lock()
            .expect("store mutex poisoned")
-            .authorize_owner_sub(&claims.sub)
+            .resolve_owner(&claims.sub)
    })
    .await
    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
    .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-    if !owns {
+    let Some(_owner_id) = owner else {
        return Err(StatusCode::FORBIDDEN);
-    }
+    };

    Ok(next.run(request).await)
 }
--- a/docs/changelog.d/+keyring-slim.infra.md
+++ b/docs/changelog.d/+keyring-slim.infra.md
@ -0,0 +1 @@
+Slimmed the credential-keyring dependency to cut CI compile time. keyring 4's `keyring` meta-crate compiles *every* platform backend for the target — on Linux that pulled the zbus async stack, a redundant libdbus secret-service, the kernel keyutils store, a SQLite/zstd `db-keystore`, and OpenSSL (~290 crates in its subtree). Replaced it with `keyring-core` (the API) plus a single store per OS — macOS Keychain (`apple-native-keyring-store`), Linux Secret Service (`dbus-secret-service-keyring-store`, pure-Rust crypto, vendored libdbus so the build needs no system `libdbus-1-dev`) — registered directly in `oauth.rs`. hephd's Linux dependency graph drops from **401 to 235 crates** (−166), removing the zbus stack and two C builds. Runtime behavior is unchanged.
--- a/docs/changelog.d/v1-hub-prep.doc.md
+++ b/docs/changelog.d/v1-hub-prep.doc.md
@ -0,0 +1 @@
+New how-to: [[set-up-sync-hub]] — stand up the canonical hub and connect an existing local device as an offline-capable spoke, the data-safe way (the hub adopts the device's identity rather than rewriting the device).
--- a/docs/changelog.d/v1-hub-prep.infra.md
+++ b/docs/changelog.d/v1-hub-prep.infra.md
@ -0,0 +1 @@
+Hub auth now **resolves an OIDC `sub` to an `owner_id`** (`Store::resolve_owner → Option<owner_id>`) instead of a single-tenant boolean gate (`authorize_owner_sub → bool`). Behavior is unchanged for the single-owner hub (claim-on-first; a stranger's token still 403s), but the contract no longer assumes one global owner — this is the multi-tenancy seam, so serving N owners later is additive rather than a rewrite. See the `Adoption + multi-tenant` task's context for the full decision.
--- a/docs/how-to/how-to.md
+++ b/docs/how-to/how-to.md
@ -18,4 +18,5 @@ Task-oriented guides for common operations.

 - [[install-heph]] — Install `heph`/`hephd` from the forge, set up the Neovim plugin, and isolate in-repo development
 - [[run-the-daemon]] — Run `hephd` as an OS service with `heph daemon start/stop/restart/status`
+- [[set-up-sync-hub]] — Stand up the canonical hub (indri) and connect an existing device as an offline-capable spoke
 - [[import-todoist]] — Seed a heph store from your Todoist projects + tasks (`mise run import-todoist`)
--- a/docs/how-to/set-up-sync-hub.md
+++ b/docs/how-to/set-up-sync-hub.md
@ -0,0 +1,125 @@
+---
+title: Set up a sync hub (and connect a device)
+modified: 2026-06-04
+tags:
+  - how-to
+---
+
+# Set up a sync hub (and connect a device)
+
+How to stand up the canonical **hub** (on `indri`, in blumeops) and connect an
+existing **local** device (e.g. `gilbert`) to it as an offline-capable spoke,
+**without migrating or risking the device's data**.
+
+## The model
+
+heph is **hub-and-spoke**, not a peer mesh ([[design]] §4, [[v1-prototype-tech-spec]] §3/§12/§13):
+
+- **Hub** — `hephd --mode server`: a full replica that also exposes an HTTP
+  endpoint others sync against. One canonical hub (`indri`).
+- **Spoke** — `hephd --mode local --hub-url <hub>`: its own full SQLite replica,
+  **fully usable offline**, with an append-only op-log; it background-syncs
+  (pull → merge → push) when the hub is reachable. Every device is a spoke.
+
+Surfaces (CLI / TUI / nvim) only ever talk to the **local** daemon over the unix
+socket; that daemon handles the hub conversation in the background.
+
+**Transport vs. identity.** Tailscale gives the devices a secure private network
+(reachability + encryption). **Authentik** sits on top as the authorization
+layer: the hub requires a valid OIDC bearer token on every op exchange, so
+merely being on the tailnet is not enough — this is the owner's most sensitive
+data.
+
+## The data-safety principle: the hub adopts the device, not the reverse
+
+A device's `owner_id` is embedded in some node ids (journals, tags), the op-log,
+and link rows. Rewriting it in place is the risky operation we **avoid**. Instead
+(**"Path A"**): the hub takes on the *existing device's* identity — same
+`owner_id` and data — so the device is **never rewritten**. `gilbert`'s store is
+untouched; `indri` is brought up as a copy of it and the two sync forward.
+
+> A device that is set up **after** the hub exists skips all of this: configure
+> it with the hub + Authentik from first launch ("born authed"), before it
+> creates data, and it simply joins.
+
+## 1. Authentik: register the heph application
+
+Create an OIDC/OAuth2 application + provider in Authentik for heph, configured
+for the **device-code (RFC 8628) flow**. Note the values the daemon and devices
+need:
+
+- **Issuer** — e.g. `https://authentik.ops.eblu.me/application/o/heph/`
+- **Client id** — the device-code client id (this is also the token *audience*).
+
+## 2. Bring up the hub on `indri`
+
+**Seed it from `gilbert` (Path A).** Quiesce `gilbert` (`heph daemon stop`),
+copy its store to `indri`, and give `indri` its **own device origin** so the two
+replicas don't share one (see *Current gaps* — this seeding step is the bit the
+blumeops deployment finalizes). `indri` now holds `gilbert`'s data under the same
+`owner_id`.
+
+Run the hub with auth enabled (issuer **and** audience together turn auth on;
+omit both only for local dev):
+
+```bash
+hephd --mode server \
+  --http-addr 0.0.0.0:8787 \
+  --db /var/lib/heph/heph.db \
+  --oidc-issuer  https://authentik.ops.eblu.me/application/o/heph/ \
+  --oidc-audience <heph-client-id>
+```
+
+The first identity to authenticate **claims** the hub's owner; thereafter only
+that identity is served (single-owner today — see [[design]] and the
+`Adoption + multi-tenant` task for the multi-tenancy seam).
+
+## 3. Point `gilbert` at the hub (spoke)
+
+Run `gilbert`'s daemon in local mode with the hub url + its OIDC client id, then
+log in once (the device-code flow caches a bearer token in the OS keyring):
+
+```bash
+hephd --mode local \
+  --hub-url http://indri.<tailnet>.ts.net:8787 \
+  --oidc-issuer    https://authentik.ops.eblu.me/application/o/heph/ \
+  --oidc-client-id <heph-client-id>
+
+# one-time browser login on this device:
+heph auth login \
+  --hub-url   http://indri.<tailnet>.ts.net:8787 \
+  --issuer    https://authentik.ops.eblu.me/application/o/heph/ \
+  --client-id <heph-client-id>
+```
+
+The spoke now attaches the (auto-refreshing) bearer token to every hub request
+and background-syncs on its interval.
+
+## 4. Verify
+
+```bash
+heph sync --status     # last push/pull cursors, hub url
+heph sync              # force a cycle now
+```
+
+Make a change on `gilbert`, force a sync, and confirm it appears via the hub.
+
+## Current gaps (finalized by the blumeops deployment)
+
+The flag-level flow above works today; two enablers make it a clean, managed
+deployment rather than a hand-run process — tracked in the `Hephaestus` project:
+
+- **`heph daemon` only generates a `--mode local` service** (no `--hub-url` /
+  `--oidc-*`). So for now the hub and the spoke config are expressed as `hephd`
+  flags (run directly, or via the blumeops-managed systemd unit), not via
+  `heph daemon start`.
+- **Path A seeding is manual** (copy the store + reset the device origin). A
+  small enabler — seed a hub from a snapshot with a fresh origin, or
+  `hephd --owner-id` — would make this one step.
+
+## Related
+
+- [[run-the-daemon]] — manage the local daemon as an OS service
+- [[install-heph]] — install `heph`/`hephd` and the plugin
+- [[design]] — §4 the connect-only, hub-and-spoke model
+- [[v1-prototype-tech-spec]] — §3 runtime modes, §12 sync, §13 auth
				`@ -0,0 +1 @@`
				Slimmed the credential-keyring dependency to cut CI compile time. keyring 4's `keyring` meta-crate compiles every platform backend for the target — on Linux that pulled the zbus async stack, a redundant libdbus secret-service, the kernel keyutils store, a SQLite/zstd `db-keystore`, and OpenSSL (~290 crates in its subtree). Replaced it with `keyring-core` (the API) plus a single store per OS — macOS Keychain (`apple-native-keyring-store`), Linux Secret Service (`dbus-secret-service-keyring-store`, pure-Rust crypto, vendored libdbus so the build needs no system `libdbus-1-dev`) — registered directly in `oauth.rs`. hephd's Linux dependency graph drops from 401 to 235 crates (−166), removing the zbus stack and two C builds. Runtime behavior is unchanged.
				`@ -0,0 +1 @@`
				`New how-to: [[set-up-sync-hub]] — stand up the canonical hub and connect an existing local device as an offline-capable spoke, the data-safe way (the hub adopts the device's identity rather than rewriting the device).`
				`@ -0,0 +1 @@`
				Hub auth now resolves an OIDC `sub` to an `owner_id` (`Store::resolve_owner → Option<owner_id>`) instead of a single-tenant boolean gate (`authorize_owner_sub → bool`). Behavior is unchanged for the single-owner hub (claim-on-first; a stranger's token still 403s), but the contract no longer assumes one global owner — this is the multi-tenancy seam, so serving N owners later is additive rather than a rewrite. See the `Adoption + multi-tenant` task's context for the full decision.