diff --git a/src/decompress.rs b/src/decompress.rs index 90963fa..4a85653 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -516,10 +516,6 @@ impl Write for CappedWriter { } } -fn stream_to_file(decoder: R, out_path: &Path) -> Result { - stream_to_file_capped(decoder, out_path, MAX_SINGLE_STREAM_DECOMPRESSED_BYTES) -} - fn stream_to_file_capped( mut decoder: R, out_path: &Path, @@ -537,10 +533,6 @@ fn stream_to_file_capped( Ok(CompressedContent::RawFile(out_path.to_owned())) } -fn stream_xz_to_file(path: &Path, out_path: &Path) -> Result { - stream_xz_to_file_capped(path, out_path, MAX_SINGLE_STREAM_DECOMPRESSED_BYTES) -} - fn stream_xz_to_file_capped(path: &Path, out_path: &Path, cap: u64) -> Result { let input = safe_open_for_read(path)?; let mut reader = BufReader::new(input); @@ -559,7 +551,11 @@ fn stream_xz_to_file_capped(path: &Path, out_path: &Path, cap: u64) -> Result) -> Result { +fn decompress_once_with_single_stream_cap( + path: &Path, + base_dir: Option<&Path>, + single_stream_cap: u64, +) -> Result { let extension = path.extension().and_then(|ext| ext.to_str()).map(|s| s.to_ascii_lowercase()); let mut file = safe_open_for_read(path)?; @@ -600,21 +596,21 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result { let out_path = make_output_path(path, base_dir, "decomp.tar"); let decoder = GzDecoder::new(BufReader::new(safe_open_for_read(path)?)); - return stream_to_file(decoder, &out_path); + return stream_to_file_capped(decoder, &out_path, single_stream_cap); } "bz2" | "bzip2" => { let out_path = make_output_path(path, base_dir, "decomp.tar"); let decoder = DecoderReader::new(BufReader::new(safe_open_for_read(path)?)); - return stream_to_file(decoder, &out_path); + return stream_to_file_capped(decoder, &out_path, single_stream_cap); } "xz" => { let out_path = make_output_path(path, base_dir, "decomp.tar"); - return stream_xz_to_file(path, &out_path); + return stream_xz_to_file_capped(path, &out_path, single_stream_cap); } "zlib" => { let out_path = make_output_path(path, base_dir, "decomp.tar"); let decoder = ZlibDecoder::new(BufReader::new(safe_open_for_read(path)?)); - return stream_to_file(decoder, &out_path); + return stream_to_file_capped(decoder, &out_path, single_stream_cap); } _ => {} } @@ -630,12 +626,21 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result) -> Result { + decompress_file_with_single_stream_cap(path, base_dir, MAX_SINGLE_STREAM_DECOMPRESSED_BYTES) +} + +pub fn decompress_file_with_single_stream_cap( + path: &Path, + base_dir: Option<&Path>, + single_stream_cap: u64, +) -> Result { let mut current_path: &Path = path; let mut owned_buf: Option; loop { let should_extract_tar = is_tar_wrapped_compression(current_path); - let content = decompress_once(current_path, base_dir)?; + let content = + decompress_once_with_single_stream_cap(current_path, base_dir, single_stream_cap)?; // If the step produced a single on-disk file that is itself a .tar, // recurse on that file. @@ -704,7 +709,7 @@ pub fn decompress_file_to_temp(path: &Path) -> Result<(CompressedContent, TempDi #[cfg(test)] mod tests { - use std::{fs::File, io::Write}; + use std::{fs::File, io::Write, path::Path}; use flate2::{Compression, write::GzEncoder}; use tar::Builder; @@ -712,10 +717,17 @@ mod tests { use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions}; use super::{ - CompressedContent, decompress_file_to_temp, decompress_once, - materialize_in_memory_archive_entries, + CompressedContent, decompress_file_to_temp, materialize_in_memory_archive_entries, }; + fn decompress_once(path: &Path, base_dir: Option<&Path>) -> anyhow::Result { + super::decompress_once_with_single_stream_cap( + path, + base_dir, + super::MAX_SINGLE_STREAM_DECOMPRESSED_BYTES, + ) + } + /// 1) Fully unpack: /// - 1st decompress `.gz` -- get a `.tar` file /// @@ -962,7 +974,7 @@ mod tests { use tar::Builder; use tempfile::tempdir; - use super::{CompressedContent, decompress_once}; + use super::CompressedContent; let tmp = tempdir()?; diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 805f977..d15db30 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -17,7 +17,13 @@ use sha2::{Digest, Sha256}; use tracing::debug; use walkdir::WalkDir; -use crate::decompress::decompress_file; +use crate::decompress::decompress_file_with_single_stream_cap; + +/// Docker/OCI image layers are often large tar streams. Keep this high enough +/// to avoid silently dropping scan coverage for normal base OS layers while +/// still bounding hostile compressed input. +// nosemgrep: this is the defensive cap — do not flag for missing-limit rules. +const MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES: u64 = 4 * 1024 * 1024 * 1024; fn helper_get_creds(helper: &str, registry: &str) -> Option<(String, String)> { fn run(bin: &str, registry: &str) -> Option<(String, String)> { @@ -321,7 +327,11 @@ fn extract_layer_archive(path: &Path, out_dir: &Path) -> Result<()> { &aliased_path }; - let result = decompress_file(layer_path, Some(out_dir)); + let result = decompress_file_with_single_stream_cap( + layer_path, + Some(out_dir), + MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES, + ); let cleanup_result = if layer_path != path && layer_path.exists() { std::fs::remove_file(layer_path) } else { @@ -344,7 +354,11 @@ fn extract_saved_archive_layers( pb: &ProgressBar, ) -> Result { pb.set_message("extracting layers"); - decompress_file(archive_path, Some(out_dir))?; + decompress_file_with_single_stream_cap( + archive_path, + Some(out_dir), + MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES, + )?; remove_tar_wrapped_intermediate(archive_path, out_dir)?; let layer_paths = collect_saved_archive_layers(out_dir)?; @@ -518,7 +532,11 @@ impl Docker { let tmp_path = out_dir.join(file_name); let mut tmp = std::fs::File::create(&tmp_path)?; tmp.write_all(&layer.data)?; - decompress_file(&tmp_path, Some(out_dir))?; + decompress_file_with_single_stream_cap( + &tmp_path, + Some(out_dir), + MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES, + )?; std::fs::remove_file(&tmp_path)?; pb.inc(1); }