From b932a814caec526f536ec7ae663a85a470148b47 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Tue, 2 Jun 2026 10:23:42 -0700 Subject: [PATCH] heph.nvim: fix daemon.wait_ready deadlock on a stale socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wait_ready ran the rpc health probe inside a `vim.wait` predicate, and the probe itself uses `vim.wait` — nesting vim.wait inside another vim.wait's predicate deadlocks Neovim. It only bit when the socket file existed: the first launch's socket doesn't exist yet (probe short-circuits), but the second launch hit the stale socket left by the prior daemon and froze in setup(). - wait_ready now probes in a plain Lua loop (deadline via uv.hrtime + a bare vim.wait(50) yield) — never a vim.wait inside a vim.wait predicate. - stop_spawned now unlinks the socket on exit, so a clean exit leaves no stale socket (a crash still can — the wait_ready fix handles that too). Verified: two-launch repro no longer hangs; a crash-left stale socket recovers in ~460ms. 10 e2e specs green. Co-Authored-By: Claude Opus 4.8 (1M context) --- heph.nvim/lua/heph/daemon.lua | 42 ++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/heph.nvim/lua/heph/daemon.lua b/heph.nvim/lua/heph/daemon.lua index 7e4968f..9792b92 100644 --- a/heph.nvim/lua/heph/daemon.lua +++ b/heph.nvim/lua/heph/daemon.lua @@ -38,25 +38,30 @@ end --- Wait until `socket` both exists and accepts a real RPC (`health`). The --- existence check alone races the daemon's bind→accept, so we prove liveness ---- with a round-trip on a throwaway session. Returns `true`, or `false, reason`. +--- with a round-trip. Returns `true`, or `false, reason`. +--- +--- The probe runs in a **plain Lua loop**, never inside a `vim.wait` predicate: +--- the rpc round-trip itself uses `vim.wait`, and nesting `vim.wait` inside +--- another `vim.wait`'s predicate deadlocks Neovim (a stale socket made the +--- inner connect-wait re-enter and hang). function M.wait_ready(socket, timeout) timeout = timeout or 5000 - if not vim.wait(timeout, function() - return uv.fs_stat(socket) ~= nil - end, 20) then - return false, "socket never appeared: " .. socket + local rpc = require("heph.rpc") + local deadline = uv.hrtime() + timeout * 1e6 -- ns + while uv.hrtime() < deadline do + if uv.fs_stat(socket) ~= nil then + local session = rpc.new_session(socket) + local ok = pcall(function() + session:call("health", vim.empty_dict(), { timeout = 200 }) + end) + session:close() + if ok then + return true + end + end + vim.wait(50) -- yield ~50ms; no predicate, so not nested end - local session = require("heph.rpc").new_session(socket) - local ok = vim.wait(timeout, function() - return pcall(function() - session:call("health", vim.empty_dict(), { timeout = 200 }) - end) - end, 50) - session:close() - if not ok then - return false, "socket present but not accepting rpc: " .. socket - end - return true + return false, "daemon not ready at " .. socket end --- Ensure a daemon is reachable at `opts.socket`. If one is already serving the @@ -124,6 +129,11 @@ function M.stop_spawned() m.handle:close() end end) + -- hephd doesn't unlink its socket on SIGTERM; remove it so the next launch + -- doesn't probe a stale socket. (A crash still leaves one — wait_ready copes.) + pcall(function() + uv.fs_unlink(m.socket) + end) end return M