hephaestus/heph.nvim/tests/e2e/managed_daemon_spec.lua
Erich Blume acb3949896
Some checks failed
Build / validate (pull_request) Failing after 5m35s
heph.nvim: regression test for the stale-socket wait_ready deadlock
Recreates a crash-left stale socket (spawn a managed daemon, SIGKILL it) and
asserts wait_ready returns promptly (a deadlock would freeze the suite) and that
a fresh autostart recovers. Verified it fails (suite hangs) against the buggy
nested-vim.wait version.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 10:29:58 -07:00

108 lines
3.6 KiB
Lua

-- The plugin-managed daemon lifecycle (tech-spec §8): plug-and-play autostart,
-- self-heal on a dropped connection, and connect-only when autostart is off.
local h = require("e2e.helpers")
describe("managed daemon", function()
local t
before_each(function()
t = h.tmp() -- temp paths; no daemon spawned by the harness
end)
after_each(function()
pcall(function()
require("heph.daemon").stop_spawned()
end)
pcall(function()
require("heph.rpc").close()
end)
require("heph.rpc").set_respawn(nil)
t.rm()
end)
it("autostart spawns a local daemon and connects plug-and-play", function()
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
assert.is_true(require("heph.daemon").is_managed())
-- A real call works because the plugin brought the daemon up itself.
assert.is_truthy(require("heph.rpc").call("health", {}))
end)
it("self-heals: respawns and reconnects when the daemon dies", function()
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
require("heph.rpc").call("health", {})
-- Kill the managed daemon out from under the plugin.
local m = require("heph.daemon")._managed
m.handle:kill("sigterm")
vim.wait(2000, function()
return m.exited.done
end, 20)
-- The next call transparently respawns the daemon and succeeds.
assert.is_truthy(require("heph.rpc").call("health", {}))
assert.is_true(require("heph.daemon").is_managed())
end)
it("does not deadlock on a stale socket left by a crash (regression)", function()
-- Bring up a managed daemon, then HARD-kill it so no cleanup runs — leaving
-- a stale socket file with no listener (the second-launch crash scenario:
-- wait_ready ran the rpc probe inside a vim.wait predicate, nesting vim.wait
-- and freezing Neovim).
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
require("heph.rpc").call("health", {})
local m = require("heph.daemon")._managed
m.handle:kill("sigkill")
vim.wait(2000, function()
return m.exited.done
end, 20)
assert.is_truthy(vim.uv.fs_stat(t.sock), "precondition: a stale socket is present")
-- Probing the stale socket must RETURN promptly (not deadlock). The fix
-- returns in ~200ms; the bug froze here indefinitely.
local start = vim.uv.hrtime()
local ready = require("heph.daemon").wait_ready(t.sock, 200)
local elapsed_ms = (vim.uv.hrtime() - start) / 1e6
assert.is_false(ready)
assert.is_true(elapsed_ms < 2000, "wait_ready took " .. math.floor(elapsed_ms) .. "ms — possible deadlock")
-- A fresh autostart recovers despite the stale socket still being there.
require("heph").setup({
socket = t.sock,
db = t.db,
bin = h.hephd_bin(),
autostart = true,
keymaps = false,
})
assert.is_truthy(require("heph.rpc").call("health", {}))
assert.is_true(require("heph.daemon").is_managed())
end)
it("connect-only (autostart=false) errors when no daemon is running", function()
require("heph").setup({
socket = t.sock,
autostart = false,
keymaps = false,
})
assert.is_false(require("heph.daemon").is_managed())
-- No daemon, no autostart, no self-heal → a call fails loudly.
local ok = pcall(require("heph.rpc").call, "health", {})
assert.is_false(ok, "expected connect-only to fail with no daemon running")
end)
end)