From 138eefe2b99146ef07201d3952a8276b5ec2c45e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 22 May 2026 11:50:47 -0400 Subject: [PATCH] Fixed failed to spawn thread: Os { code: 11, kind: WouldBlock } panics during validation-heavy scans. Kingfisher built two Tokio runtimes (main + artifact-fetcher) that each defaulted to 512 blocking threads, which combined with Rayon pools and per-call spawns could exceed the OS per-user thread limit (RLIMIT_NPROC, default 8000 on macOS). Both runtimes now cap their blocking pools at max(num_jobs * 8, 32), and on Unix the soft RLIMIT_NPROC is raised to the hard limit at startup so users don't need to tune ulimit -u manually. --- CHANGELOG.md | 3 +++ Cargo.lock | 3 ++- Cargo.toml | 5 ++++- src/main.rs | 31 +++++++++++++++++++++++++++++++ src/scanner/runner.rs | 6 ++++-- src/util.rs | 26 ++++++++++++++++++++++++++ 6 files changed, 70 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d81983..58eb07c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [v1.101.0] +- Fixed `failed to spawn thread: Os { code: 11, kind: WouldBlock }` panics during validation-heavy scans. Kingfisher built two Tokio runtimes (main + artifact-fetcher) that each defaulted to 512 blocking threads, which combined with Rayon pools and per-call spawns could exceed the OS per-user thread limit (`RLIMIT_NPROC`, default 8000 on macOS). Both runtimes now cap their blocking pools at `min(max(num_jobs * 8, 32), 256)`, and on Unix the soft `RLIMIT_NPROC` is raised to the hard limit before Kingfisher starts its worker threads so users don't need to tune `ulimit -u` manually. + ## [v1.100.0] - Archive scanning now reaches inside Android/iOS app packages: added `apk`, `aab`, and `ipa` to the recognized ZIP-based archive formats so secrets embedded in APK/AAB/IPA contents (e.g. `classes*.dex`, `res/values/strings.xml`) are extracted and matched. - Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. diff --git a/Cargo.lock b/Cargo.lock index 85f94fc..ad94549 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4960,7 +4960,7 @@ dependencies = [ [[package]] name = "kingfisher" -version = "1.100.0" +version = "1.101.0" dependencies = [ "anyhow", "asar", @@ -5023,6 +5023,7 @@ dependencies = [ "kingfisher-core", "kingfisher-rules", "kingfisher-scanner", + "libc", "liquid", "liquid-core", "lzma-rs", diff --git a/Cargo.toml b/Cargo.toml index e6876a9..ec096e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ http = "1.4" [package] name = "kingfisher" -version = "1.100.0" +version = "1.101.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -233,6 +233,9 @@ h2 = "0.4.13" version = "0.6" optional = true +[target.'cfg(unix)'.dependencies] +libc = "0.2" + [features] default = ["use-mimalloc"] use-mimalloc = [] diff --git a/src/main.rs b/src/main.rs index 4cda1d7..b89dc55 100644 --- a/src/main.rs +++ b/src/main.rs @@ -80,6 +80,7 @@ use kingfisher::{ rules_database::RulesDatabase, scanner::{load_and_record_rules, run_scan}, update::{check_for_update_async, rewrite_argv_for_reexec}, + util::tokio_blocking_threads_limit, validation::set_user_agent_suffix, }; use serde_json::json; @@ -102,6 +103,7 @@ use crate::cli::commands::{ }; fn main() -> anyhow::Result<()> { + raise_nproc_soft_limit(); color_backtrace::install(); // Run the real entry point on a thread with an explicit, larger stack so that @@ -122,6 +124,30 @@ enum AsyncMainOutcome { Reexec, } +/// Best-effort raise of the soft `RLIMIT_NPROC` (per-user thread/process cap) +/// to the current hard limit. Many users hit `pthread_create` failures +/// (`EAGAIN` / `WouldBlock`) under heavy validation because the default soft +/// limit on macOS is well below the hard limit. Failures here are intentionally +/// silent — this is a quality-of-life nudge, not a correctness requirement. +#[cfg(unix)] +fn raise_nproc_soft_limit() { + // SAFETY: getrlimit/setrlimit are async-signal-safe and take a properly + // sized `rlimit` we own. + unsafe { + let mut rl = libc::rlimit { rlim_cur: 0, rlim_max: 0 }; + if libc::getrlimit(libc::RLIMIT_NPROC, &mut rl) != 0 { + return; + } + if rl.rlim_cur < rl.rlim_max { + let new = libc::rlimit { rlim_cur: rl.rlim_max, rlim_max: rl.rlim_max }; + let _ = libc::setrlimit(libc::RLIMIT_NPROC, &new); + } + } +} + +#[cfg(not(unix))] +fn raise_nproc_soft_limit() {} + fn run() -> anyhow::Result<()> { // Rustls 0.23 requires an explicit crypto provider selection when multiple // providers are present in the dependency graph. @@ -160,8 +186,13 @@ fn run() -> anyhow::Result<()> { // Worker threads need larger stacks because async state machines (validation // pipeline) can produce large poll stack frames. 8 MiB is sufficient now that // the validators are split into separate async fns. + // Bound the blocking-thread pool. Tokio's default is 512 per runtime; the + // helper scales with --jobs but caps each runtime below that default so the + // main and artifact-fetcher runtimes cannot both grow huge blocking pools. + let max_blocking = tokio_blocking_threads_limit(num_jobs); let runtime = Builder::new_multi_thread() .worker_threads(num_jobs) + .max_blocking_threads(max_blocking) .thread_stack_size(8 * 1024 * 1024) // 8 MiB per worker .enable_all() .build() diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 384e0d0..209ca20 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -42,7 +42,7 @@ use crate::{ run_secret_validation, save_docker_images, summary::{compute_scan_totals, print_scan_summary}, }, - util::set_redaction_enabled, + util::{set_redaction_enabled, tokio_blocking_threads_limit}, validation::CachedResponse, validation_rate_limit::ValidationRateLimiter, }; @@ -403,8 +403,10 @@ fn start_artifact_fetching( std::thread::Builder::new() .name("artifact-fetcher".to_string()) .spawn(move || -> Result<()> { + let workers = args.num_jobs.max(1); let rt = tokio::runtime::Builder::new_multi_thread() - .worker_threads(args.num_jobs.max(1)) + .worker_threads(workers) + .max_blocking_threads(tokio_blocking_threads_limit(workers)) .enable_all() .build() .context("Failed to build artifact-fetcher runtime")?; diff --git a/src/util.rs b/src/util.rs index 7c8e233..57a78bf 100644 --- a/src/util.rs +++ b/src/util.rs @@ -16,6 +16,22 @@ use rand::RngExt; static APP_SALT: LazyLock = LazyLock::new(|| generate_salt()); static REDACTION_ENABLED: AtomicBool = AtomicBool::new(false); +const MIN_TOKIO_BLOCKING_THREADS: usize = 32; +const TOKIO_BLOCKING_THREADS_PER_JOB: usize = 8; +const MAX_TOKIO_BLOCKING_THREADS: usize = 256; + +/// Per-runtime cap for Tokio's blocking thread pool. +/// +/// Tokio defaults to 512 blocking threads per runtime. Kingfisher can run the +/// main and artifact-fetcher runtimes at the same time, so keeping each runtime +/// below that default avoids runaway thread growth during validation-heavy scans. +pub fn tokio_blocking_threads_limit(num_jobs: usize) -> usize { + num_jobs + .saturating_mul(TOKIO_BLOCKING_THREADS_PER_JOB) + .max(MIN_TOKIO_BLOCKING_THREADS) + .min(MAX_TOKIO_BLOCKING_THREADS) +} + /// Interns a string once and returns a `'static` reference to it. pub fn intern(s: &str) -> &'static str { static INTERN: LazyLock> = LazyLock::new(|| DashSet::with_capacity(512)); @@ -156,6 +172,16 @@ mod tests { use super::{is_test_like_path, *}; + #[test] + fn tokio_blocking_threads_limit_scales_and_caps() { + assert_eq!(tokio_blocking_threads_limit(0), 32); + assert_eq!(tokio_blocking_threads_limit(1), 32); + assert_eq!(tokio_blocking_threads_limit(4), 32); + assert_eq!(tokio_blocking_threads_limit(8), 64); + assert_eq!(tokio_blocking_threads_limit(32), 256); + assert_eq!(tokio_blocking_threads_limit(usize::MAX), 256); + } + /// Paths that **should** be classified as test-like. #[test] fn test_is_test_like_path_positive() {