generated from eblume/project-template
C2: hephd self-update (Mikado plan — cards for review) #7
2 changed files with 67 additions and 1 deletions
C2(hephd-self-update): impl verify hub-dropout resilience (+ client timeout)
Lock in the base-case guarantee that a self-updating hub (which restarts under its spokes) relies on. New sync_http test: a spoke whose hub is unreachable keeps serving + accepting writes, a sync attempt fails fast (Err, not hang/panic), and when the hub returns the accumulated ops reconcile with no special recovery. The verification surfaced one non-graceful path — the daemon's shared reqwest client had no timeout, so a black-hole hub (connects, never replies) could stall the sync/self-update loop. Give it a 30s timeout so 'the hub can vanish at any moment' holds even mid-request. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
commit
9213a943f5
|
|
@ -77,7 +77,14 @@ impl Daemon {
|
|||
ctx: Ctx {
|
||||
store: Arc::new(Mutex::new(store)),
|
||||
hub_url: None,
|
||||
http: reqwest::Client::new(),
|
||||
// Bound every hub request so a black-hole hub (one that accepts
|
||||
// a connection but never replies) can't stall the sync /
|
||||
// self-update loops — "the hub can vanish at any moment" is the
|
||||
// base case, including vanishing mid-request.
|
||||
http: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()
|
||||
.expect("building the daemon HTTP client"),
|
||||
auth: None,
|
||||
self_update: None,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -84,6 +84,65 @@ async fn a_node_propagates_a_to_hub_to_b() {
|
|||
assert_eq!(on_b.body.as_deref(), Some("shingles need work"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn spoke_survives_an_unreachable_hub_then_reconciles_when_it_returns() {
|
||||
// "The hub can vanish at any moment" is the base case, not a guarded edge:
|
||||
// a spoke whose hub is down keeps serving + accepting writes, and when the
|
||||
// hub returns its accumulated ops reconcile with no special recovery. This
|
||||
// is what makes a self-updating hub (which restarts under its spokes) safe.
|
||||
let http = reqwest::Client::builder()
|
||||
.timeout(std::time::Duration::from_secs(5)) // never hang the test
|
||||
.build()
|
||||
.unwrap();
|
||||
let (a, _ca, _da) = replica(1000);
|
||||
|
||||
// Hub down: work happens locally, and a sync attempt fails *fast* (Err — not
|
||||
// a panic, not a hang) and leaves the store untouched.
|
||||
let id = {
|
||||
let mut ga = a.lock().unwrap();
|
||||
ga.create_node(NewNode::doc(
|
||||
"Offline note",
|
||||
"written while the hub was down",
|
||||
))
|
||||
.unwrap()
|
||||
.id
|
||||
};
|
||||
let dead_hub = "http://127.0.0.1:1"; // nothing listens → connection refused
|
||||
assert!(
|
||||
sync::sync_once(a.clone(), dead_hub, &http, None)
|
||||
.await
|
||||
.is_err(),
|
||||
"sync against a dead hub should error, not hang or panic"
|
||||
);
|
||||
|
||||
// The spoke is unharmed: the note is intact and further writes still succeed.
|
||||
{
|
||||
let mut ga = a.lock().unwrap();
|
||||
assert_eq!(ga.get_node(&id).unwrap().unwrap().title, "Offline note");
|
||||
ga.create_node(NewNode::doc("Another", "still working offline"))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// The hub returns: the spoke pushes everything it accumulated while offline,
|
||||
// and a fresh replica pulls it — convergence resumes, no manual recovery.
|
||||
let hub_url = start_hub().await;
|
||||
let up = sync::sync_once(a.clone(), &hub_url, &http, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(up.pushed > 0, "spoke pushed nothing after the hub returned");
|
||||
let (b, _cb, _db) = replica(1000);
|
||||
sync::sync_once(b.clone(), &hub_url, &http, None)
|
||||
.await
|
||||
.unwrap();
|
||||
let on_b = b
|
||||
.lock()
|
||||
.unwrap()
|
||||
.get_node(&id)
|
||||
.unwrap()
|
||||
.expect("offline-authored node reached B after the hub recovered");
|
||||
assert_eq!(on_b.title, "Offline note");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn divergent_scalar_edits_converge_through_the_hub_with_a_conflict() {
|
||||
let hub_url = start_hub().await;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue