From d93f84958c993e1f42113f75f841f99c166e3f6d Mon Sep 17 00:00:00 2001 From: Erich Blume Date: Wed, 3 Jun 2026 22:46:09 -0700 Subject: [PATCH] ci: cap CARGO_BUILD_JOBS at 1 to stop the DinD engine OOM-killing The recent CI failures ("Cannot connect to the Docker daemon") are the DinD build engine being OOM-killed mid-compile, not flakiness. Even 4 parallel rustc invocations spike memory too high on the runner; serialize to jobs=1. Slower but survives. Temporary mitigation pending more host RAM. Co-Authored-By: Claude Opus 4.8 (1M context) --- .dagger/src/hephaestus_ci/main.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.dagger/src/hephaestus_ci/main.py b/.dagger/src/hephaestus_ci/main.py index b485922..0c89604 100644 --- a/.dagger/src/hephaestus_ci/main.py +++ b/.dagger/src/hephaestus_ci/main.py @@ -15,9 +15,10 @@ class HephaestusCi: dag.container() .from_("rust:1-bookworm") .with_exec(["rustup", "component", "add", "clippy", "rustfmt"]) - # Cap parallel rustc — unbounded (= ncpu) spikes memory on heavy - # crates and OOMs the build engine on a many-core runner. - .with_env_variable("CARGO_BUILD_JOBS", "4") + # Serialize compilation (jobs=1) — even 4 parallel rustc invocations + # spike memory on heavy crates and OOM-kill the DinD build engine. + # Slower, but the runner survives; revisit if the host gets more RAM. + .with_env_variable("CARGO_BUILD_JOBS", "1") .with_mounted_cache( "/usr/local/cargo/registry", dag.cache_volume("heph-cargo-registry"),