heph.nvim: rip out auto-spawn — connect-only plugin
All checks were successful
Build / validate (pull_request) Successful in 10m44s

The daemon is now an OS service (`heph daemon`); the plugin no longer spawns or
supervises one. Removes the managed-daemon machinery entirely.

- delete lua/heph/daemon.lua (spawn/ensure/stop_spawned/self-heal)
- init.lua: connect-only; probe `health` once and guide to `heph daemon start`
- rpc.lua: drop set_respawn + respawn-on-drop; a dropped connection just
  reconnects once (e.g. after `heph daemon restart`), never spawns
- config.lua: drop autostart/bin/db; stable socket fallback (data-dir, matches
  hephd::default_socket_path), keep $HEPH_SOCKET for dev isolation
- tests: spawn/wait_ready move into the e2e harness (test infra); rework
  managed_daemon_spec into a connect-only spec (connect / clean-fail / reconnect)

16 nvim e2e specs pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-06-02 21:21:28 -07:00
commit cdd4d9f62a
6 changed files with 114 additions and 308 deletions

View file

@ -4,15 +4,8 @@ local M = {}
M.defaults = {
--- Path to hephd's unix socket. `nil` → `$HEPH_SOCKET`, else the daemon default.
--- The plugin is connect-only; run the daemon with `heph daemon start`.
socket = nil,
--- DB path for an autostarted local daemon. `nil` → `$HEPH_DB`, else hephd's default.
db = nil,
--- Plug-and-play: spawn (and manage) a local hephd when none is serving
--- `socket`. Set `false` when you run your own daemon (server/client): the
--- plugin then connects only, and warns if nothing is reachable.
autostart = true,
--- hephd binary for autostart (on PATH for an installed heph).
bin = "hephd",
--- Title of the home / index page (`:Heph home`).
home = "Home",
--- How many recent days the `:Heph journals` picker offers.
@ -22,27 +15,24 @@ M.defaults = {
}
--- Resolve the socket path: explicit opt, then `$HEPH_SOCKET`, then hephd's
--- default (`$XDG_RUNTIME_DIR/heph/hephd.sock`, temp-dir fallback). The env knob
--- lets a dev Neovim target a `mise run dev` daemon without touching real data.
--- default — `$XDG_RUNTIME_DIR/heph/hephd.sock`, else a **stable**
--- `<data-dir>/heph/hephd.sock` (matching `hephd::default_socket_path`; not a
--- temp dir, since the daemon is a persistent service). `$HEPH_SOCKET` lets a
--- dev Neovim target a `mise run dev` daemon without touching real data.
function M.resolve_socket(opt)
opt = (opt and #opt > 0) and opt or vim.env.HEPH_SOCKET
if opt and #opt > 0 then
return opt
end
local xdg = vim.env.XDG_RUNTIME_DIR
local base = (xdg and #xdg > 0) and xdg or (vim.env.TMPDIR or "/tmp")
return (base:gsub("/+$", "")) .. "/heph/hephd.sock"
end
--- Resolve the DB path for an autostarted daemon: explicit opt, then `$HEPH_DB`,
--- else nil (let hephd pick its default). Pairs with `resolve_socket` for dev
--- isolation.
function M.resolve_db(opt)
opt = (opt and #opt > 0) and opt or vim.env.HEPH_DB
if opt and #opt > 0 then
return opt
if xdg and #xdg > 0 then
return (xdg:gsub("/+$", "")) .. "/heph/hephd.sock"
end
return nil
local data = vim.env.XDG_DATA_HOME
if not (data and #data > 0) then
data = (vim.env.HOME or "") .. "/.local/share"
end
return (data:gsub("/+$", "")) .. "/heph/hephd.sock"
end
--- Apply the default keymaps (no-op when `opts.keymaps` is false).

View file

@ -1,139 +0,0 @@
--- Locate, spawn, and wait on a `hephd` daemon. Shared by optional autostart
--- and by the e2e harness (so test readiness uses the same definition the
--- plugin does).
local uv = vim.uv or vim.loop
local M = {}
-- The daemon THIS nvim spawned (nil if we connected to an existing one).
-- `{ handle, exited = { done }, socket, db, bin }`.
M._managed = nil
--- Spawn a `local`-mode hephd against `opts.db` listening on `opts.socket`.
--- `opts.bin` defaults to `hephd` on PATH. Returns `{ handle, pid }`.
function M.spawn(opts)
local args = { "--mode", "local" }
if opts.db then
table.insert(args, "--db")
table.insert(args, opts.db)
end
if opts.socket then
table.insert(args, "--socket")
table.insert(args, opts.socket)
end
local handle, pid = uv.spawn(opts.bin or "hephd", {
args = args,
stdio = { nil, nil, opts.stderr },
}, function(code, signal)
if opts.on_exit then
opts.on_exit(code, signal)
end
end)
if not handle then
error("heph: failed to spawn hephd (bin=" .. (opts.bin or "hephd") .. ")")
end
return { handle = handle, pid = pid }
end
--- Wait until `socket` both exists and accepts a real RPC (`health`). The
--- existence check alone races the daemon's bind→accept, so we prove liveness
--- with a round-trip. Returns `true`, or `false, reason`.
---
--- The probe runs in a **plain Lua loop**, never inside a `vim.wait` predicate:
--- the rpc round-trip itself uses `vim.wait`, and nesting `vim.wait` inside
--- another `vim.wait`'s predicate deadlocks Neovim (a stale socket made the
--- inner connect-wait re-enter and hang).
function M.wait_ready(socket, timeout)
timeout = timeout or 5000
local rpc = require("heph.rpc")
local deadline = uv.hrtime() + timeout * 1e6 -- ns
while uv.hrtime() < deadline do
if uv.fs_stat(socket) ~= nil then
local session = rpc.new_session(socket)
local ok = pcall(function()
session:call("health", vim.empty_dict(), { timeout = 200 })
end)
session:close()
if ok then
return true
end
end
vim.wait(50) -- yield ~50ms; no predicate, so not nested
end
return false, "daemon not ready at " .. socket
end
--- Ensure a daemon is reachable at `opts.socket`. If one is already serving the
--- socket (any mode — local/server/client), connect to it and do NOT spawn. Else
--- if `opts.autostart`, spawn a local hephd we own (and manage its lifecycle).
--- Returns `reachable, spawned_by_us`.
function M.ensure(opts)
-- Already serving? A quick probe respects a daemon someone else started.
if M.wait_ready(opts.socket, opts.probe_ms or 400) then
return true, false
end
if not opts.autostart then
return false, false
end
local exited = { done = false }
local d = M.spawn({
bin = opts.bin,
socket = opts.socket,
db = opts.db,
on_exit = function()
exited.done = true
end,
})
local ok, reason = M.wait_ready(opts.socket, opts.ready_ms or 5000)
if not ok then
pcall(function()
if not d.handle:is_closing() then
d.handle:kill("sigterm")
end
end)
error("heph: spawned hephd but it never became ready: " .. tostring(reason))
end
M._managed = {
handle = d.handle,
exited = exited,
socket = opts.socket,
db = opts.db,
bin = opts.bin,
}
return true, true
end
--- True if this nvim currently owns a live spawned daemon.
function M.is_managed()
return M._managed ~= nil and not M._managed.exited.done
end
--- Stop the daemon this nvim spawned (no-op if we connected to an existing one).
function M.stop_spawned()
local m = M._managed
if not m then
return
end
M._managed = nil
if m.handle and not m.exited.done then
pcall(function()
m.handle:kill("sigterm")
end)
vim.wait(2000, function()
return m.exited.done
end, 20)
end
pcall(function()
if m.handle and not m.handle:is_closing() then
m.handle:close()
end
end)
-- hephd doesn't unlink its socket on SIGTERM; remove it so the next launch
-- doesn't probe a stale socket. (A crash still leaves one — wait_ready copes.)
pcall(function()
uv.fs_unlink(m.socket)
end)
end
return M

View file

@ -11,48 +11,29 @@ M.config = nil
--- Configure the plugin. `opts.socket` overrides the daemon socket path;
--- `opts.keymaps = false` disables the default keymaps. Idempotent.
---
--- The plugin is **connect-only** — it never spawns or supervises a `hephd`.
--- Run the daemon as an OS service with `heph daemon start` ([[run-the-daemon]]);
--- this just connects to it. If nothing is serving the socket, we notify once
--- with guidance and let later calls retry (a plain reconnect, never a spawn).
function M.setup(opts)
local cfg = vim.tbl_deep_extend("force", config.defaults, opts or {})
cfg.socket = config.resolve_socket(cfg.socket)
cfg.db = config.resolve_db(cfg.db)
M.config = cfg
local rpc = require("heph.rpc")
local daemon = require("heph.daemon")
rpc.setup(cfg.socket)
if cfg.autostart then
-- Plug-and-play: bring up a managed local daemon if none is serving, and
-- self-heal a dropped connection on later calls.
local ok = pcall(daemon.ensure, {
socket = cfg.socket,
db = cfg.db,
bin = cfg.bin,
autostart = true,
})
if not ok then
require("heph.util").notify(
"could not start hephd; will retry on first use",
vim.log.levels.WARN
)
end
rpc.set_respawn(function()
pcall(daemon.ensure, {
socket = cfg.socket,
db = cfg.db,
bin = cfg.bin,
autostart = true,
})
end)
else
-- Explicit architecture: connect only, never spawn over the user's daemon.
rpc.set_respawn(nil)
if not daemon.ensure({ socket = cfg.socket, autostart = false }) then
require("heph.util").notify(
"no hephd reachable at " .. cfg.socket .. " (autostart disabled)",
vim.log.levels.WARN
)
end
-- A cheap liveness probe so a missing daemon is reported up front, not as a
-- cryptic error on the first command.
local ok = pcall(function()
rpc.call("health", {})
end)
if not ok then
require("heph.util").notify(
"no hephd at " .. cfg.socket .. " — run `heph daemon start`",
vim.log.levels.WARN
)
end
config.apply_keymaps(cfg)

View file

@ -182,13 +182,6 @@ function M.session()
return M._default
end
--- Register a hook that (re)ensures the daemon — called once to self-heal a
--- dropped connection before a single retry. `nil` disables self-heal (used when
--- autostart is off, so a connect-only setup fails loudly instead of respawning).
function M.set_respawn(fn)
M._respawn = fn
end
local function is_connection_error(msg)
msg = tostring(msg)
return msg:find("connect", 1, true) ~= nil
@ -196,16 +189,15 @@ local function is_connection_error(msg)
or msg:find("timeout", 1, true) ~= nil
end
--- Blocking call on the default session. If the call fails because the
--- connection is dead and a respawn hook is set, ensure the daemon and retry
--- once (the prior owner releases the DB lock on exit, so a respawn can claim it).
--- Blocking call on the default session. The plugin is connect-only: on a
--- dropped connection we drop the dead session and **reconnect once** (e.g. the
--- daemon was restarted via `heph daemon restart`) — we never spawn a daemon.
function M.call(method, params, opts)
local ok, result = pcall(M.session().call, M.session(), method, params, opts)
if ok then
return result
end
if M._respawn and is_connection_error(result) then
pcall(M._respawn)
if is_connection_error(result) then
M.session():close() -- drop the dead connection so the retry reconnects
return M.session():call(method, params, opts)
end

View file

@ -3,11 +3,52 @@
--- Step builders (create doc/task, open, edit, save) are reusable across specs.
local rpc = require("heph.rpc")
local daemon = require("heph.daemon")
local uv = vim.uv or vim.loop
local M = {}
local counter = 0
--- Spawn a `local`-mode hephd against `db` listening on `socket` (test infra —
--- the plugin itself is connect-only; the daemon is normally an OS service).
local function spawn(opts)
local args = { "--mode", "local", "--db", opts.db, "--socket", opts.socket }
local handle, pid = uv.spawn(opts.bin, {
args = args,
stdio = { nil, nil, opts.stderr },
}, function(code, signal)
if opts.on_exit then
opts.on_exit(code, signal)
end
end)
if not handle then
error("heph-test: failed to spawn hephd (bin=" .. tostring(opts.bin) .. ")")
end
return { handle = handle, pid = pid }
end
--- Wait until `socket` exists and answers `health`. Plain Lua loop — never a
--- `vim.wait` predicate (the rpc round-trip uses `vim.wait`; nesting deadlocks).
local function wait_ready(socket, timeout)
timeout = timeout or 5000
local deadline = uv.hrtime() + timeout * 1e6
while uv.hrtime() < deadline do
if uv.fs_stat(socket) ~= nil then
local session = rpc.new_session(socket)
local ok = pcall(function()
session:call("health", vim.empty_dict(), { timeout = 200 })
end)
session:close()
if ok then
return true
end
end
vim.wait(50)
end
return false, "daemon not ready at " .. socket
end
M.wait_ready = wait_ready
local function repo_root()
-- ":p" makes this absolute regardless of how the runner was launched.
local here = vim.fn.fnamemodify(debug.getinfo(1, "S").source:sub(2), ":p")
@ -35,7 +76,7 @@ local function unique_dir()
end
--- A fresh temp dir + short socket/db paths, WITHOUT spawning a daemon (for
--- tests that drive the plugin's own autostart/lifecycle). `rm` removes it.
--- tests of the no-daemon-running case). `rm` removes it.
function M.tmp()
local dir = unique_dir()
return { dir = dir, sock = dir .. "/s", db = dir .. "/db", rm = function()
@ -45,12 +86,9 @@ function M.tmp()
end }
end
--- Start a fresh daemon and bind the plugin's rpc to it. Returns a `ctx` with:
--- `dir, sock, db, daemon, exited, q` (an isolated session for assertions).
function M.start()
local dir = unique_dir()
local sock = dir .. "/s"
local db = dir .. "/db"
--- Start a daemon on explicit paths and bind the plugin's rpc to it. Returns a
--- `ctx` with `dir, sock, db, daemon, exited, q` (an isolated assert session).
function M.start_on(dir, sock, db)
assert(#sock < 104, "socket path too long for sun_path: " .. sock)
local bin = M.hephd_bin()
assert(
@ -59,7 +97,7 @@ function M.start()
)
local exited = { done = false }
local d = daemon.spawn({
local d = spawn({
bin = bin,
db = db,
socket = sock,
@ -67,7 +105,7 @@ function M.start()
exited.done = true
end,
})
local ok, reason = daemon.wait_ready(sock, 5000)
local ok, reason = wait_ready(sock, 5000)
assert(ok, "daemon not ready: " .. tostring(reason))
rpc.setup(sock) -- the plugin's default session, used by buffers/commands
@ -81,6 +119,12 @@ function M.start()
}
end
--- Start a fresh daemon on a new temp dir and bind the plugin's rpc to it.
function M.start()
local dir = unique_dir()
return M.start_on(dir, dir .. "/s", dir .. "/db")
end
--- Tear down: close sessions, delete heph:// buffers, reap the daemon, rm temp.
function M.stop(ctx)
if not ctx then
@ -92,7 +136,6 @@ function M.stop(ctx)
pcall(function()
rpc.close()
end)
rpc.set_respawn(nil) -- don't let a managed-daemon spec leak self-heal here
for _, b in ipairs(vim.api.nvim_list_bufs()) do
if vim.api.nvim_buf_get_name(b):match("^heph://") then
pcall(vim.api.nvim_buf_delete, b, { force = true })

View file

@ -1,108 +1,47 @@
-- The plugin-managed daemon lifecycle (tech-spec §8): plug-and-play autostart,
-- self-heal on a dropped connection, and connect-only when autostart is off.
-- The plugin is connect-only (tech-spec §8, [[design]] §4): it never spawns a
-- daemon — it connects to one run as an OS service (`heph daemon start`). These
-- specs cover connecting to a running daemon, a clean failure when none is
-- running, and reconnecting after the daemon is restarted.
local h = require("e2e.helpers")
describe("managed daemon", function()
local t
before_each(function()
t = h.tmp() -- temp paths; no daemon spawned by the harness
describe("connect-only daemon", function()
it("connects to a running daemon and works", function()
local ctx = h.start() -- harness starts a real daemon; binds the plugin to it
require("heph").setup({ socket = ctx.sock, keymaps = false })
assert.is_truthy(require("heph.rpc").call("health", {}))
h.stop(ctx)
end)
after_each(function()
pcall(function()
require("heph.daemon").stop_spawned()
end)
it("fails cleanly when no daemon is running (never spawns one)", function()
local t = h.tmp() -- temp socket path with nothing serving it
require("heph.rpc").setup(t.sock)
-- A call must fail loudly (connection error), not hang or spawn a daemon.
local ok = pcall(require("heph.rpc").call, "health", {})
assert.is_false(ok, "expected a connection failure with no daemon running")
pcall(function()
require("heph.rpc").close()
end)
require("heph.rpc").set_respawn(nil)
t.rm()
end)
it("autostart spawns a local daemon and connects plug-and-play", function()
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
assert.is_true(require("heph.daemon").is_managed())
-- A real call works because the plugin brought the daemon up itself.
assert.is_truthy(require("heph.rpc").call("health", {}))
end)
it("self-heals: respawns and reconnects when the daemon dies", function()
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
it("reconnects after the daemon is restarted under it", function()
local ctx = h.start()
require("heph").setup({ socket = ctx.sock, keymaps = false })
require("heph.rpc").call("health", {})
-- Kill the managed daemon out from under the plugin.
local m = require("heph.daemon")._managed
m.handle:kill("sigterm")
-- Kill the daemon, then start a fresh one on the SAME socket (as
-- `heph daemon restart` would). The next call should reconnect.
ctx.daemon.handle:kill("sigterm")
vim.wait(2000, function()
return m.exited.done
return ctx.exited.done
end, 20)
pcall(function()
vim.uv.fs_unlink(ctx.sock)
end)
-- The next call transparently respawns the daemon and succeeds.
local ctx2 = h.start_on(ctx.dir, ctx.sock, ctx.db)
assert.is_truthy(require("heph.rpc").call("health", {}))
assert.is_true(require("heph.daemon").is_managed())
end)
it("does not deadlock on a stale socket left by a crash (regression)", function()
-- Bring up a managed daemon, then HARD-kill it so no cleanup runs — leaving
-- a stale socket file with no listener (the second-launch crash scenario:
-- wait_ready ran the rpc probe inside a vim.wait predicate, nesting vim.wait
-- and freezing Neovim).
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
require("heph.rpc").call("health", {})
local m = require("heph.daemon")._managed
m.handle:kill("sigkill")
vim.wait(2000, function()
return m.exited.done
end, 20)
assert.is_truthy(vim.uv.fs_stat(t.sock), "precondition: a stale socket is present")
-- Probing the stale socket must RETURN promptly (not deadlock). The fix
-- returns in ~200ms; the bug froze here indefinitely.
local start = vim.uv.hrtime()
local ready = require("heph.daemon").wait_ready(t.sock, 200)
local elapsed_ms = (vim.uv.hrtime() - start) / 1e6
assert.is_false(ready)
assert.is_true(elapsed_ms < 2000, "wait_ready took " .. math.floor(elapsed_ms) .. "ms — possible deadlock")
-- A fresh autostart recovers despite the stale socket still being there.
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
assert.is_truthy(require("heph.rpc").call("health", {}))
assert.is_true(require("heph.daemon").is_managed())
end)
it("connect-only (autostart=false) errors when no daemon is running", function()
require("heph").setup({
socket = t.sock,
autostart = false,
keymaps = false,
})
assert.is_false(require("heph.daemon").is_managed())
-- No daemon, no autostart, no self-heal → a call fails loudly.
local ok = pcall(require("heph.rpc").call, "health", {})
assert.is_false(ok, "expected connect-only to fail with no daemon running")
h.stop(ctx2)
end)
end)