C0: surface missing-log failures in runner-logs
`mise run runner-logs <run> -j <n>` previously silently succeeded with no output when forgejo had no log for the task. Two layered causes: 1. zstdcat exits 0 even when the file is missing (writes "can't stat … -- ignored" to stderr). 2. ssh to indri runs fish, which silently drops the remote exit code so the subprocess returncode is always 0. Probe `test -f` over SSH and parse a stdout marker (EXISTS / MISSING) to detect the missing-log case, then report it explicitly with the indri path and a hint about action_task.log_in_storage = 0 so the operator knows where to look next. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e703d25efe
commit
1ce381cb6e
2 changed files with 25 additions and 1 deletions
|
|
@ -229,12 +229,35 @@ def fetch_log(run_number: int, job_index: int, repo: str, token: str) -> None:
|
|||
hex_prefix = f"{task_id & 0xff:02x}"
|
||||
log_path = f"~/forgejo/data/actions_log/{repo}/{hex_prefix}/{task_id}.log.zst"
|
||||
|
||||
# indri's login shell (fish) silently swallows SSH exit codes, so we can't
|
||||
# rely on returncode. zstdcat itself also exits 0 with a "can't stat ...
|
||||
# -- ignored" stderr message when the file is missing. Detect missing logs
|
||||
# by running `test -f` over SSH and parsing the marker line from stdout.
|
||||
probe = subprocess.run(
|
||||
["ssh", "indri", f"test -f {log_path} && echo EXISTS || echo MISSING"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
marker = probe.stdout.strip().splitlines()[-1] if probe.stdout.strip() else ""
|
||||
if marker != "EXISTS":
|
||||
typer.echo(
|
||||
f"Error: log not found for run #{run_number} job {job_index} (task {task_id})",
|
||||
err=True,
|
||||
)
|
||||
typer.echo(f"Path: indri:{log_path}", err=True)
|
||||
typer.echo(
|
||||
"The runner may have crashed before uploading its log buffer "
|
||||
"(action_task.log_in_storage = 0).",
|
||||
err=True,
|
||||
)
|
||||
raise typer.Exit(1)
|
||||
|
||||
result = subprocess.run(
|
||||
["ssh", "indri", f"zstdcat {log_path}"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
if result.returncode != 0 or not result.stdout:
|
||||
typer.echo(
|
||||
f"Error: could not read log for run #{run_number} job {job_index} (task {task_id})",
|
||||
err=True,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue