From 1ce381cb6e15ca1226feee1d6a0fa2c449f929b7 Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Thu, 28 May 2026 14:36:33 -0700 Subject: [PATCH] C0: surface missing-log failures in runner-logs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `mise run runner-logs -j ` previously silently succeeded with no output when forgejo had no log for the task. Two layered causes: 1. zstdcat exits 0 even when the file is missing (writes "can't stat … -- ignored" to stderr). 2. ssh to indri runs fish, which silently drops the remote exit code so the subprocess returncode is always 0. Probe `test -f` over SSH and parse a stdout marker (EXISTS / MISSING) to detect the missing-log case, then report it explicitly with the indri path and a hint about action_task.log_in_storage = 0 so the operator knows where to look next. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../+runner-logs-missing-log.misc.md | 1 + mise-tasks/runner-logs | 25 ++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 docs/changelog.d/+runner-logs-missing-log.misc.md diff --git a/docs/changelog.d/+runner-logs-missing-log.misc.md b/docs/changelog.d/+runner-logs-missing-log.misc.md new file mode 100644 index 0000000..c06704a --- /dev/null +++ b/docs/changelog.d/+runner-logs-missing-log.misc.md @@ -0,0 +1 @@ +`mise run runner-logs -j ` now reports a clear error when the log file doesn't exist on indri (e.g. a runner crash that left `action_task.log_in_storage = 0`). Previously it printed only the header and exited 0, because `zstdcat` exits 0 with a "can't stat … -- ignored" stderr message and ssh+fish on indri swallows the remote exit code. diff --git a/mise-tasks/runner-logs b/mise-tasks/runner-logs index 3c5e8e3..0d3028b 100755 --- a/mise-tasks/runner-logs +++ b/mise-tasks/runner-logs @@ -229,12 +229,35 @@ def fetch_log(run_number: int, job_index: int, repo: str, token: str) -> None: hex_prefix = f"{task_id & 0xff:02x}" log_path = f"~/forgejo/data/actions_log/{repo}/{hex_prefix}/{task_id}.log.zst" + # indri's login shell (fish) silently swallows SSH exit codes, so we can't + # rely on returncode. zstdcat itself also exits 0 with a "can't stat ... + # -- ignored" stderr message when the file is missing. Detect missing logs + # by running `test -f` over SSH and parsing the marker line from stdout. + probe = subprocess.run( + ["ssh", "indri", f"test -f {log_path} && echo EXISTS || echo MISSING"], + capture_output=True, + text=True, + ) + marker = probe.stdout.strip().splitlines()[-1] if probe.stdout.strip() else "" + if marker != "EXISTS": + typer.echo( + f"Error: log not found for run #{run_number} job {job_index} (task {task_id})", + err=True, + ) + typer.echo(f"Path: indri:{log_path}", err=True) + typer.echo( + "The runner may have crashed before uploading its log buffer " + "(action_task.log_in_storage = 0).", + err=True, + ) + raise typer.Exit(1) + result = subprocess.run( ["ssh", "indri", f"zstdcat {log_path}"], capture_output=True, text=True, ) - if result.returncode != 0: + if result.returncode != 0 or not result.stdout: typer.echo( f"Error: could not read log for run #{run_number} job {job_index} (task {task_id})", err=True,