heph-core: full-text search (FTS5)
Some checks failed
Build / validate (pull_request) Failing after 3s

Slice query-surface, part 2 (tech-spec §6). Migration v2 adds an FTS5
external-content table over nodes(title, body), kept in sync by
insert/update/delete triggers (with a backfill for existing rows).

- Store::search(query): owner-scoped, tombstones excluded, best-match
  first (FTS5 MATCH + rank). Exposed over RPC; `heph search` and
  `heph journal` CLI commands added.

3 search integration tests (title/body match, edits reflected via trigger,
tombstone exclusion, all insert paths indexed). 79 tests green. This
completes the local feature surface; the remaining slices are the
distributed/auth/nvim layer.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-05-31 20:43:05 -07:00
commit 5d8ec45c55
7 changed files with 149 additions and 1 deletions

View file

@ -10,7 +10,7 @@ use rusqlite::Connection;
/// The ordered list of migrations. Never reorder or mutate a shipped entry —
/// only append.
const MIGRATIONS: &[(i64, &str)] = &[(1, MIGRATION_0001)];
const MIGRATIONS: &[(i64, &str)] = &[(1, MIGRATION_0001), (2, MIGRATION_0002)];
/// v1 — the base node graph, identity, and sync scaffolding (tech-spec §4.5).
const MIGRATION_0001: &str = r#"
@ -93,6 +93,28 @@ CREATE TABLE conflicts (
);
"#;
/// v2 — full-text search over title + body via FTS5 (external content over
/// `nodes`), kept in sync by triggers (tech-spec §4.5).
const MIGRATION_0002: &str = r#"
CREATE VIRTUAL TABLE nodes_fts USING fts5(
title, body, content='nodes', content_rowid='rowid'
);
-- Index any rows that already exist.
INSERT INTO nodes_fts(rowid, title, body) SELECT rowid, title, body FROM nodes;
CREATE TRIGGER nodes_ai AFTER INSERT ON nodes BEGIN
INSERT INTO nodes_fts(rowid, title, body) VALUES (new.rowid, new.title, new.body);
END;
CREATE TRIGGER nodes_ad AFTER DELETE ON nodes BEGIN
INSERT INTO nodes_fts(nodes_fts, rowid, title, body) VALUES ('delete', old.rowid, old.title, old.body);
END;
CREATE TRIGGER nodes_au AFTER UPDATE ON nodes BEGIN
INSERT INTO nodes_fts(nodes_fts, rowid, title, body) VALUES ('delete', old.rowid, old.title, old.body);
INSERT INTO nodes_fts(rowid, title, body) VALUES (new.rowid, new.title, new.body);
END;
"#;
/// Apply all pending migrations to `conn`.
pub fn migrate(conn: &Connection) -> Result<()> {
let current: i64 = conn.query_row("PRAGMA user_version", [], |r| r.get(0))?;

View file

@ -167,6 +167,10 @@ impl Store for LocalStore {
tasks::health(&self.conn, &self.owner_id)
}
fn search(&self, query: &str) -> Result<Vec<Node>> {
nodes::search(&self.conn, &self.owner_id, query)
}
fn journal_open_or_create(&mut self, date: &str) -> Result<Node> {
let now = self.clock.now_ms();
nodes::open_or_create_journal(&self.conn, &self.owner_id, now, date)

View file

@ -180,6 +180,25 @@ pub(super) fn update(
Ok(node)
}
/// Full-text search over title + body, owner-scoped, excluding tombstoned
/// nodes, best-match first (tech-spec §6). `query` is FTS5 MATCH syntax.
pub(super) fn search(conn: &Connection, owner: &str, query: &str) -> Result<Vec<Node>> {
let sql = format!(
"SELECT {} FROM nodes n
JOIN nodes_fts f ON f.rowid = n.rowid
WHERE nodes_fts MATCH ?1 AND n.owner_id = ?2 AND n.tombstoned = 0
ORDER BY rank",
COLUMNS
.split(", ")
.map(|c| format!("n.{c}"))
.collect::<Vec<_>>()
.join(", ")
);
let mut stmt = conn.prepare(&sql)?;
let rows = stmt.query_map((query, owner), from_row)?;
Ok(rows.collect::<rusqlite::Result<Vec<_>>>()?)
}
/// A node's aliases (wiki-link names), sorted. Empty until aliases are written.
pub(super) fn aliases(conn: &Connection, id: &str) -> Result<Vec<String>> {
let mut stmt = conn.prepare("SELECT alias FROM aliases WHERE node_id = ?1 ORDER BY alias")?;

View file

@ -75,6 +75,10 @@ pub trait Store {
/// Working-set health — orange/active/on-deck/conflict counts (tech-spec §7).
fn health(&self) -> Result<Health>;
/// Full-text search over title + body (FTS5), owner-scoped, best-match
/// first, tombstones excluded (tech-spec §6). `query` is FTS5 MATCH syntax.
fn search(&self, query: &str) -> Result<Vec<Node>>;
/// Open (creating if absent) the journal node for an ISO `date`. The id is
/// deterministic in `(owner, date)` so offline replicas converge (§3.1).
fn journal_open_or_create(&mut self, date: &str) -> Result<Node>;

View file

@ -0,0 +1,65 @@
//! Full-text search over title + body via FTS5 (tech-spec §6, slice query-surface).
use heph_core::{FixedClock, LocalStore, NewNode, NodeKind, Store};
fn store() -> LocalStore {
LocalStore::open_in_memory(Box::new(FixedClock(1_700_000_000_000))).unwrap()
}
#[test]
fn search_matches_title_and_body() {
let mut s = store();
let roof = s
.create_node(NewNode::doc(
"Roof repair",
"Called the contractor about shingles.",
))
.unwrap();
s.create_node(NewNode::doc("Garden", "Plant tomatoes in spring."))
.unwrap();
// Body term.
let hits = s.search("contractor").unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].id, roof.id);
// Title term.
let hits = s.search("roof").unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].id, roof.id);
// No match.
assert!(s.search("nonexistentword").unwrap().is_empty());
}
#[test]
fn search_reflects_edits_and_excludes_tombstoned() {
let mut s = store();
let n = s.create_node(NewNode::doc("Notes", "alpha")).unwrap();
assert_eq!(s.search("alpha").unwrap().len(), 1);
assert!(s.search("bravo").unwrap().is_empty());
// Edit the body → FTS index follows via the update trigger.
s.update_node(&n.id, None, Some("bravo charlie".into()))
.unwrap();
assert!(s.search("alpha").unwrap().is_empty());
assert_eq!(s.search("bravo").unwrap().len(), 1);
// Tombstoned nodes drop out of results.
s.tombstone_node(&n.id).unwrap();
assert!(s.search("bravo").unwrap().is_empty());
}
#[test]
fn search_indexes_all_node_insert_paths() {
// Nodes created through paths other than `create_node` (here a journal)
// are indexed too, since the FTS triggers fire on every nodes insert.
let mut s = store();
let j = s.journal_open_or_create("2026-05-31").unwrap();
s.update_node(&j.id, None, Some("dentist appointment".into()))
.unwrap();
let hits = s.search("dentist").unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].kind, NodeKind::Journal);
}

View file

@ -66,6 +66,16 @@ enum Command {
/// Node id.
id: String,
},
/// Full-text search over titles and bodies.
Search {
/// FTS5 query.
query: String,
},
/// Open (or create) the journal for an ISO date (YYYY-MM-DD).
Journal {
/// The ISO date.
date: String,
},
/// Export the store to a directory tree of .md files.
Export {
/// Destination directory (created if needed).
@ -123,6 +133,21 @@ fn main() -> Result<()> {
let result = client.call("node.get", json!({ "id": id }))?;
println!("{}", serde_json::to_string_pretty(&result)?);
}
Command::Search { query } => {
let result = client.call("search", json!({ "query": query }))?;
let nodes: Vec<Node> = serde_json::from_value(result)?;
if nodes.is_empty() {
println!("No matches.");
}
for n in &nodes {
println!("{} [{}] {}", n.id, n.kind.as_str(), n.title);
}
}
Command::Journal { date } => {
let result = client.call("journal.open_or_create", json!({ "date": date }))?;
let node: Node = serde_json::from_value(result)?;
println!("Journal {} ({})", node.title, node.id);
}
Command::Export { dir } => {
let path = dir
.to_str()

View file

@ -158,6 +158,11 @@ struct JournalParams {
date: String,
}
#[derive(Deserialize)]
struct SearchParams {
query: String,
}
#[derive(Deserialize)]
struct LinkParams {
id: String,
@ -232,6 +237,10 @@ pub fn dispatch(store: &mut dyn Store, method: &str, params: Value) -> Result<Va
json!(store.list(p.scope.as_deref(), p.attention, p.include_blue)?)
}
"health" => json!(store.health()?),
"search" => {
let p: SearchParams = parse(params)?;
json!(store.search(&p.query)?)
}
"journal.open_or_create" => {
let p: JournalParams = parse(params)?;
json!(store.journal_open_or_create(&p.date)?)