From acb394989603aa8a4280ee02d6a161d71371e55d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Tue, 2 Jun 2026 10:29:58 -0700 Subject: [PATCH] heph.nvim: regression test for the stale-socket wait_ready deadlock Recreates a crash-left stale socket (spawn a managed daemon, SIGKILL it) and asserts wait_ready returns promptly (a deadlock would freeze the suite) and that a fresh autostart recovers. Verified it fails (suite hangs) against the buggy nested-vim.wait version. Co-Authored-By: Claude Opus 4.8 (1M context) --- heph.nvim/tests/e2e/managed_daemon_spec.lua | 40 +++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/heph.nvim/tests/e2e/managed_daemon_spec.lua b/heph.nvim/tests/e2e/managed_daemon_spec.lua index 4bd65e1..e170529 100644 --- a/heph.nvim/tests/e2e/managed_daemon_spec.lua +++ b/heph.nvim/tests/e2e/managed_daemon_spec.lua @@ -54,6 +54,46 @@ describe("managed daemon", function() assert.is_true(require("heph.daemon").is_managed()) end) + it("does not deadlock on a stale socket left by a crash (regression)", function() + -- Bring up a managed daemon, then HARD-kill it so no cleanup runs — leaving + -- a stale socket file with no listener (the second-launch crash scenario: + -- wait_ready ran the rpc probe inside a vim.wait predicate, nesting vim.wait + -- and freezing Neovim). + require("heph").setup({ + socket = t.sock, + db = t.db, + bin = h.hephd_bin(), + autostart = true, + keymaps = false, + }) + require("heph.rpc").call("health", {}) + local m = require("heph.daemon")._managed + m.handle:kill("sigkill") + vim.wait(2000, function() + return m.exited.done + end, 20) + assert.is_truthy(vim.uv.fs_stat(t.sock), "precondition: a stale socket is present") + + -- Probing the stale socket must RETURN promptly (not deadlock). The fix + -- returns in ~200ms; the bug froze here indefinitely. + local start = vim.uv.hrtime() + local ready = require("heph.daemon").wait_ready(t.sock, 200) + local elapsed_ms = (vim.uv.hrtime() - start) / 1e6 + assert.is_false(ready) + assert.is_true(elapsed_ms < 2000, "wait_ready took " .. math.floor(elapsed_ms) .. "ms — possible deadlock") + + -- A fresh autostart recovers despite the stale socket still being there. + require("heph").setup({ + socket = t.sock, + db = t.db, + bin = h.hephd_bin(), + autostart = true, + keymaps = false, + }) + assert.is_truthy(require("heph.rpc").call("health", {})) + assert.is_true(require("heph.daemon").is_managed()) + end) + it("connect-only (autostart=false) errors when no daemon is running", function() require("heph").setup({ socket = t.sock,