updated docs

This commit is contained in:
Mick Grove 2026-05-28 21:01:44 -07:00
commit 31af4b4f6f
2 changed files with 52 additions and 22 deletions

View file

@ -516,10 +516,6 @@ impl<W: Write> Write for CappedWriter<W> {
} }
} }
fn stream_to_file<R: Read>(decoder: R, out_path: &Path) -> Result<CompressedContent> {
stream_to_file_capped(decoder, out_path, MAX_SINGLE_STREAM_DECOMPRESSED_BYTES)
}
fn stream_to_file_capped<R: Read>( fn stream_to_file_capped<R: Read>(
mut decoder: R, mut decoder: R,
out_path: &Path, out_path: &Path,
@ -537,10 +533,6 @@ fn stream_to_file_capped<R: Read>(
Ok(CompressedContent::RawFile(out_path.to_owned())) Ok(CompressedContent::RawFile(out_path.to_owned()))
} }
fn stream_xz_to_file(path: &Path, out_path: &Path) -> Result<CompressedContent> {
stream_xz_to_file_capped(path, out_path, MAX_SINGLE_STREAM_DECOMPRESSED_BYTES)
}
fn stream_xz_to_file_capped(path: &Path, out_path: &Path, cap: u64) -> Result<CompressedContent> { fn stream_xz_to_file_capped(path: &Path, out_path: &Path, cap: u64) -> Result<CompressedContent> {
let input = safe_open_for_read(path)?; let input = safe_open_for_read(path)?;
let mut reader = BufReader::new(input); let mut reader = BufReader::new(input);
@ -559,7 +551,11 @@ fn stream_xz_to_file_capped(path: &Path, out_path: &Path, cap: u64) -> Result<Co
/* ─────────────────────────────────────────────────────────────── /* ───────────────────────────────────────────────────────────────
one *step* of decompression one *step* of decompression
*/ */
fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedContent> { fn decompress_once_with_single_stream_cap(
path: &Path,
base_dir: Option<&Path>,
single_stream_cap: u64,
) -> Result<CompressedContent> {
let extension = path.extension().and_then(|ext| ext.to_str()).map(|s| s.to_ascii_lowercase()); let extension = path.extension().and_then(|ext| ext.to_str()).map(|s| s.to_ascii_lowercase());
let mut file = safe_open_for_read(path)?; let mut file = safe_open_for_read(path)?;
@ -600,21 +596,21 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedCon
"gz" | "gzip" | "tgz" => { "gz" | "gzip" | "tgz" => {
let out_path = make_output_path(path, base_dir, "decomp.tar"); let out_path = make_output_path(path, base_dir, "decomp.tar");
let decoder = GzDecoder::new(BufReader::new(safe_open_for_read(path)?)); let decoder = GzDecoder::new(BufReader::new(safe_open_for_read(path)?));
return stream_to_file(decoder, &out_path); return stream_to_file_capped(decoder, &out_path, single_stream_cap);
} }
"bz2" | "bzip2" => { "bz2" | "bzip2" => {
let out_path = make_output_path(path, base_dir, "decomp.tar"); let out_path = make_output_path(path, base_dir, "decomp.tar");
let decoder = DecoderReader::new(BufReader::new(safe_open_for_read(path)?)); let decoder = DecoderReader::new(BufReader::new(safe_open_for_read(path)?));
return stream_to_file(decoder, &out_path); return stream_to_file_capped(decoder, &out_path, single_stream_cap);
} }
"xz" => { "xz" => {
let out_path = make_output_path(path, base_dir, "decomp.tar"); let out_path = make_output_path(path, base_dir, "decomp.tar");
return stream_xz_to_file(path, &out_path); return stream_xz_to_file_capped(path, &out_path, single_stream_cap);
} }
"zlib" => { "zlib" => {
let out_path = make_output_path(path, base_dir, "decomp.tar"); let out_path = make_output_path(path, base_dir, "decomp.tar");
let decoder = ZlibDecoder::new(BufReader::new(safe_open_for_read(path)?)); let decoder = ZlibDecoder::new(BufReader::new(safe_open_for_read(path)?));
return stream_to_file(decoder, &out_path); return stream_to_file_capped(decoder, &out_path, single_stream_cap);
} }
_ => {} _ => {}
} }
@ -630,12 +626,21 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedCon
public entry point keeps peeling layers public entry point keeps peeling layers
*/ */
pub fn decompress_file(path: &Path, base_dir: Option<&Path>) -> Result<CompressedContent> { pub fn decompress_file(path: &Path, base_dir: Option<&Path>) -> Result<CompressedContent> {
decompress_file_with_single_stream_cap(path, base_dir, MAX_SINGLE_STREAM_DECOMPRESSED_BYTES)
}
pub fn decompress_file_with_single_stream_cap(
path: &Path,
base_dir: Option<&Path>,
single_stream_cap: u64,
) -> Result<CompressedContent> {
let mut current_path: &Path = path; let mut current_path: &Path = path;
let mut owned_buf: Option<PathBuf>; let mut owned_buf: Option<PathBuf>;
loop { loop {
let should_extract_tar = is_tar_wrapped_compression(current_path); let should_extract_tar = is_tar_wrapped_compression(current_path);
let content = decompress_once(current_path, base_dir)?; let content =
decompress_once_with_single_stream_cap(current_path, base_dir, single_stream_cap)?;
// If the step produced a single on-disk file that is itself a .tar, // If the step produced a single on-disk file that is itself a .tar,
// recurse on that file. // recurse on that file.
@ -704,7 +709,7 @@ pub fn decompress_file_to_temp(path: &Path) -> Result<(CompressedContent, TempDi
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::{fs::File, io::Write}; use std::{fs::File, io::Write, path::Path};
use flate2::{Compression, write::GzEncoder}; use flate2::{Compression, write::GzEncoder};
use tar::Builder; use tar::Builder;
@ -712,10 +717,17 @@ mod tests {
use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions}; use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions};
use super::{ use super::{
CompressedContent, decompress_file_to_temp, decompress_once, CompressedContent, decompress_file_to_temp, materialize_in_memory_archive_entries,
materialize_in_memory_archive_entries,
}; };
fn decompress_once(path: &Path, base_dir: Option<&Path>) -> anyhow::Result<CompressedContent> {
super::decompress_once_with_single_stream_cap(
path,
base_dir,
super::MAX_SINGLE_STREAM_DECOMPRESSED_BYTES,
)
}
/// 1) Fully unpack: /// 1) Fully unpack:
/// - 1st decompress `.gz` -- get a `.tar` file /// - 1st decompress `.gz` -- get a `.tar` file
/// ///
@ -962,7 +974,7 @@ mod tests {
use tar::Builder; use tar::Builder;
use tempfile::tempdir; use tempfile::tempdir;
use super::{CompressedContent, decompress_once}; use super::CompressedContent;
let tmp = tempdir()?; let tmp = tempdir()?;

View file

@ -17,7 +17,13 @@ use sha2::{Digest, Sha256};
use tracing::debug; use tracing::debug;
use walkdir::WalkDir; use walkdir::WalkDir;
use crate::decompress::decompress_file; use crate::decompress::decompress_file_with_single_stream_cap;
/// Docker/OCI image layers are often large tar streams. Keep this high enough
/// to avoid silently dropping scan coverage for normal base OS layers while
/// still bounding hostile compressed input.
// nosemgrep: this is the defensive cap — do not flag for missing-limit rules.
const MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES: u64 = 4 * 1024 * 1024 * 1024;
fn helper_get_creds(helper: &str, registry: &str) -> Option<(String, String)> { fn helper_get_creds(helper: &str, registry: &str) -> Option<(String, String)> {
fn run(bin: &str, registry: &str) -> Option<(String, String)> { fn run(bin: &str, registry: &str) -> Option<(String, String)> {
@ -321,7 +327,11 @@ fn extract_layer_archive(path: &Path, out_dir: &Path) -> Result<()> {
&aliased_path &aliased_path
}; };
let result = decompress_file(layer_path, Some(out_dir)); let result = decompress_file_with_single_stream_cap(
layer_path,
Some(out_dir),
MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES,
);
let cleanup_result = if layer_path != path && layer_path.exists() { let cleanup_result = if layer_path != path && layer_path.exists() {
std::fs::remove_file(layer_path) std::fs::remove_file(layer_path)
} else { } else {
@ -344,7 +354,11 @@ fn extract_saved_archive_layers(
pb: &ProgressBar, pb: &ProgressBar,
) -> Result<usize> { ) -> Result<usize> {
pb.set_message("extracting layers"); pb.set_message("extracting layers");
decompress_file(archive_path, Some(out_dir))?; decompress_file_with_single_stream_cap(
archive_path,
Some(out_dir),
MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES,
)?;
remove_tar_wrapped_intermediate(archive_path, out_dir)?; remove_tar_wrapped_intermediate(archive_path, out_dir)?;
let layer_paths = collect_saved_archive_layers(out_dir)?; let layer_paths = collect_saved_archive_layers(out_dir)?;
@ -518,7 +532,11 @@ impl Docker {
let tmp_path = out_dir.join(file_name); let tmp_path = out_dir.join(file_name);
let mut tmp = std::fs::File::create(&tmp_path)?; let mut tmp = std::fs::File::create(&tmp_path)?;
tmp.write_all(&layer.data)?; tmp.write_all(&layer.data)?;
decompress_file(&tmp_path, Some(out_dir))?; decompress_file_with_single_stream_cap(
&tmp_path,
Some(out_dir),
MAX_DOCKER_SINGLE_STREAM_DECOMPRESSED_BYTES,
)?;
std::fs::remove_file(&tmp_path)?; std::fs::remove_file(&tmp_path)?;
pb.inc(1); pb.inc(1);
} }