From 5d8ec45c5595989c27bc3ed9c1decc3f0dbf1356 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Sun, 31 May 2026 20:43:05 -0700 Subject: [PATCH] heph-core: full-text search (FTS5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slice query-surface, part 2 (tech-spec §6). Migration v2 adds an FTS5 external-content table over nodes(title, body), kept in sync by insert/update/delete triggers (with a backfill for existing rows). - Store::search(query): owner-scoped, tombstones excluded, best-match first (FTS5 MATCH + rank). Exposed over RPC; `heph search` and `heph journal` CLI commands added. 3 search integration tests (title/body match, edits reflected via trigger, tombstone exclusion, all insert paths indexed). 79 tests green. This completes the local feature surface; the remaining slices are the distributed/auth/nvim layer. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/heph-core/src/sqlite/migrations.rs | 24 ++++++++- crates/heph-core/src/sqlite/mod.rs | 4 ++ crates/heph-core/src/sqlite/nodes.rs | 19 +++++++ crates/heph-core/src/store.rs | 4 ++ crates/heph-core/tests/search.rs | 65 +++++++++++++++++++++++ crates/heph/src/main.rs | 25 +++++++++ crates/hephd/src/rpc.rs | 9 ++++ 7 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 crates/heph-core/tests/search.rs diff --git a/crates/heph-core/src/sqlite/migrations.rs b/crates/heph-core/src/sqlite/migrations.rs index 62a7e55..a6ac4e2 100644 --- a/crates/heph-core/src/sqlite/migrations.rs +++ b/crates/heph-core/src/sqlite/migrations.rs @@ -10,7 +10,7 @@ use rusqlite::Connection; /// The ordered list of migrations. Never reorder or mutate a shipped entry — /// only append. -const MIGRATIONS: &[(i64, &str)] = &[(1, MIGRATION_0001)]; +const MIGRATIONS: &[(i64, &str)] = &[(1, MIGRATION_0001), (2, MIGRATION_0002)]; /// v1 — the base node graph, identity, and sync scaffolding (tech-spec §4.5). const MIGRATION_0001: &str = r#" @@ -93,6 +93,28 @@ CREATE TABLE conflicts ( ); "#; +/// v2 — full-text search over title + body via FTS5 (external content over +/// `nodes`), kept in sync by triggers (tech-spec §4.5). +const MIGRATION_0002: &str = r#" +CREATE VIRTUAL TABLE nodes_fts USING fts5( + title, body, content='nodes', content_rowid='rowid' +); + +-- Index any rows that already exist. +INSERT INTO nodes_fts(rowid, title, body) SELECT rowid, title, body FROM nodes; + +CREATE TRIGGER nodes_ai AFTER INSERT ON nodes BEGIN + INSERT INTO nodes_fts(rowid, title, body) VALUES (new.rowid, new.title, new.body); +END; +CREATE TRIGGER nodes_ad AFTER DELETE ON nodes BEGIN + INSERT INTO nodes_fts(nodes_fts, rowid, title, body) VALUES ('delete', old.rowid, old.title, old.body); +END; +CREATE TRIGGER nodes_au AFTER UPDATE ON nodes BEGIN + INSERT INTO nodes_fts(nodes_fts, rowid, title, body) VALUES ('delete', old.rowid, old.title, old.body); + INSERT INTO nodes_fts(rowid, title, body) VALUES (new.rowid, new.title, new.body); +END; +"#; + /// Apply all pending migrations to `conn`. pub fn migrate(conn: &Connection) -> Result<()> { let current: i64 = conn.query_row("PRAGMA user_version", [], |r| r.get(0))?; diff --git a/crates/heph-core/src/sqlite/mod.rs b/crates/heph-core/src/sqlite/mod.rs index f21e820..65abc82 100644 --- a/crates/heph-core/src/sqlite/mod.rs +++ b/crates/heph-core/src/sqlite/mod.rs @@ -167,6 +167,10 @@ impl Store for LocalStore { tasks::health(&self.conn, &self.owner_id) } + fn search(&self, query: &str) -> Result> { + nodes::search(&self.conn, &self.owner_id, query) + } + fn journal_open_or_create(&mut self, date: &str) -> Result { let now = self.clock.now_ms(); nodes::open_or_create_journal(&self.conn, &self.owner_id, now, date) diff --git a/crates/heph-core/src/sqlite/nodes.rs b/crates/heph-core/src/sqlite/nodes.rs index 5cfd48e..09de2be 100644 --- a/crates/heph-core/src/sqlite/nodes.rs +++ b/crates/heph-core/src/sqlite/nodes.rs @@ -180,6 +180,25 @@ pub(super) fn update( Ok(node) } +/// Full-text search over title + body, owner-scoped, excluding tombstoned +/// nodes, best-match first (tech-spec §6). `query` is FTS5 MATCH syntax. +pub(super) fn search(conn: &Connection, owner: &str, query: &str) -> Result> { + let sql = format!( + "SELECT {} FROM nodes n + JOIN nodes_fts f ON f.rowid = n.rowid + WHERE nodes_fts MATCH ?1 AND n.owner_id = ?2 AND n.tombstoned = 0 + ORDER BY rank", + COLUMNS + .split(", ") + .map(|c| format!("n.{c}")) + .collect::>() + .join(", ") + ); + let mut stmt = conn.prepare(&sql)?; + let rows = stmt.query_map((query, owner), from_row)?; + Ok(rows.collect::>>()?) +} + /// A node's aliases (wiki-link names), sorted. Empty until aliases are written. pub(super) fn aliases(conn: &Connection, id: &str) -> Result> { let mut stmt = conn.prepare("SELECT alias FROM aliases WHERE node_id = ?1 ORDER BY alias")?; diff --git a/crates/heph-core/src/store.rs b/crates/heph-core/src/store.rs index 8d2b2a2..2893565 100644 --- a/crates/heph-core/src/store.rs +++ b/crates/heph-core/src/store.rs @@ -75,6 +75,10 @@ pub trait Store { /// Working-set health — orange/active/on-deck/conflict counts (tech-spec §7). fn health(&self) -> Result; + /// Full-text search over title + body (FTS5), owner-scoped, best-match + /// first, tombstones excluded (tech-spec §6). `query` is FTS5 MATCH syntax. + fn search(&self, query: &str) -> Result>; + /// Open (creating if absent) the journal node for an ISO `date`. The id is /// deterministic in `(owner, date)` so offline replicas converge (§3.1). fn journal_open_or_create(&mut self, date: &str) -> Result; diff --git a/crates/heph-core/tests/search.rs b/crates/heph-core/tests/search.rs new file mode 100644 index 0000000..dac586c --- /dev/null +++ b/crates/heph-core/tests/search.rs @@ -0,0 +1,65 @@ +//! Full-text search over title + body via FTS5 (tech-spec §6, slice query-surface). + +use heph_core::{FixedClock, LocalStore, NewNode, NodeKind, Store}; + +fn store() -> LocalStore { + LocalStore::open_in_memory(Box::new(FixedClock(1_700_000_000_000))).unwrap() +} + +#[test] +fn search_matches_title_and_body() { + let mut s = store(); + let roof = s + .create_node(NewNode::doc( + "Roof repair", + "Called the contractor about shingles.", + )) + .unwrap(); + s.create_node(NewNode::doc("Garden", "Plant tomatoes in spring.")) + .unwrap(); + + // Body term. + let hits = s.search("contractor").unwrap(); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].id, roof.id); + + // Title term. + let hits = s.search("roof").unwrap(); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].id, roof.id); + + // No match. + assert!(s.search("nonexistentword").unwrap().is_empty()); +} + +#[test] +fn search_reflects_edits_and_excludes_tombstoned() { + let mut s = store(); + let n = s.create_node(NewNode::doc("Notes", "alpha")).unwrap(); + + assert_eq!(s.search("alpha").unwrap().len(), 1); + assert!(s.search("bravo").unwrap().is_empty()); + + // Edit the body → FTS index follows via the update trigger. + s.update_node(&n.id, None, Some("bravo charlie".into())) + .unwrap(); + assert!(s.search("alpha").unwrap().is_empty()); + assert_eq!(s.search("bravo").unwrap().len(), 1); + + // Tombstoned nodes drop out of results. + s.tombstone_node(&n.id).unwrap(); + assert!(s.search("bravo").unwrap().is_empty()); +} + +#[test] +fn search_indexes_all_node_insert_paths() { + // Nodes created through paths other than `create_node` (here a journal) + // are indexed too, since the FTS triggers fire on every nodes insert. + let mut s = store(); + let j = s.journal_open_or_create("2026-05-31").unwrap(); + s.update_node(&j.id, None, Some("dentist appointment".into())) + .unwrap(); + let hits = s.search("dentist").unwrap(); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].kind, NodeKind::Journal); +} diff --git a/crates/heph/src/main.rs b/crates/heph/src/main.rs index 3ebb54c..ddccc11 100644 --- a/crates/heph/src/main.rs +++ b/crates/heph/src/main.rs @@ -66,6 +66,16 @@ enum Command { /// Node id. id: String, }, + /// Full-text search over titles and bodies. + Search { + /// FTS5 query. + query: String, + }, + /// Open (or create) the journal for an ISO date (YYYY-MM-DD). + Journal { + /// The ISO date. + date: String, + }, /// Export the store to a directory tree of .md files. Export { /// Destination directory (created if needed). @@ -123,6 +133,21 @@ fn main() -> Result<()> { let result = client.call("node.get", json!({ "id": id }))?; println!("{}", serde_json::to_string_pretty(&result)?); } + Command::Search { query } => { + let result = client.call("search", json!({ "query": query }))?; + let nodes: Vec = serde_json::from_value(result)?; + if nodes.is_empty() { + println!("No matches."); + } + for n in &nodes { + println!("{} [{}] {}", n.id, n.kind.as_str(), n.title); + } + } + Command::Journal { date } => { + let result = client.call("journal.open_or_create", json!({ "date": date }))?; + let node: Node = serde_json::from_value(result)?; + println!("Journal {} ({})", node.title, node.id); + } Command::Export { dir } => { let path = dir .to_str() diff --git a/crates/hephd/src/rpc.rs b/crates/hephd/src/rpc.rs index 9b28aa2..29c5b3f 100644 --- a/crates/hephd/src/rpc.rs +++ b/crates/hephd/src/rpc.rs @@ -158,6 +158,11 @@ struct JournalParams { date: String, } +#[derive(Deserialize)] +struct SearchParams { + query: String, +} + #[derive(Deserialize)] struct LinkParams { id: String, @@ -232,6 +237,10 @@ pub fn dispatch(store: &mut dyn Store, method: &str, params: Value) -> Result json!(store.health()?), + "search" => { + let p: SearchParams = parse(params)?; + json!(store.search(&p.query)?) + } "journal.open_or_create" => { let p: JournalParams = parse(params)?; json!(store.journal_open_or_create(&p.date)?)