diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d2024e..8df3ba5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [v1.102.0] - Added 3 detection and validation rules for Cognition Devin API credentials: `kingfisher.devin.1` (legacy personal keys, `apk_user_` prefix), `kingfisher.devin.2` (legacy service keys, `apk_` prefix), and `kingfisher.devin.3` (v3 service-user tokens, `cog_` prefix / RFC 4648 base32). Live validation uses `GET /v1/sessions` for `apk_*` keys and `GET /v3/self` for `cog_` tokens. +- Added `kingfisher scan docker --archive ` for scanning saved Docker/OCI image archives directly, including OCI-layout `docker save` output and compressed tar archives. ## [v1.101.0] - Fixed asymmetric JWT validation panics by using a single `jsonwebtoken` crypto backend and adding RS256 regression coverage. Thanks @AgentEnder. [#386](https://github.com/mongodb/kingfisher/pull/386) diff --git a/README.md b/README.md index 00fe70c..721c7f4 100644 --- a/README.md +++ b/README.md @@ -227,10 +227,11 @@ kingfisher scan s3 bucket-name --prefix path/ kingfisher scan gcs bucket-name --prefix path/ ``` -### 14: Scan a Docker image +### 14: Scan a Docker image or saved image archive ```bash kingfisher scan docker ghcr.io/org/image:latest +kingfisher scan docker --archive image.tar ``` ### 15: Scan Jira issues @@ -660,6 +661,9 @@ kingfisher scan gcs bucket-name # Scan Docker image kingfisher scan docker ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master +# Scan Docker image archive produced by docker save +kingfisher scan docker --archive image.tar + # Scan GitHub organization kingfisher scan github --organization my-org diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 06e3fcd..acd6d6c 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file. ## [v1.102.0] - Added 3 detection and validation rules for Cognition Devin API credentials: `kingfisher.devin.1` (legacy personal keys, `apk_user_` prefix), `kingfisher.devin.2` (legacy service keys, `apk_` prefix), and `kingfisher.devin.3` (v3 service-user tokens, `cog_` prefix / RFC 4648 base32). Live validation uses `GET /v1/sessions` for `apk_*` keys and `GET /v3/self` for `cog_` tokens. +- Added `kingfisher scan docker --archive ` for scanning saved Docker/OCI image archives directly, including OCI-layout `docker save` output and compressed tar archives. ## [v1.101.0] - Fixed asymmetric JWT validation panics by using a single `jsonwebtoken` crypto backend and adding RS256 regression coverage. Thanks @AgentEnder. [#386](https://github.com/mongodb/kingfisher/pull/386) diff --git a/docs-site/docs/usage/basic-scanning.md b/docs-site/docs/usage/basic-scanning.md index 4e73176..0130378 100644 --- a/docs-site/docs/usage/basic-scanning.md +++ b/docs-site/docs/usage/basic-scanning.md @@ -665,6 +665,10 @@ kingfisher scan gcs cloud-samples-data --prefix "storage/" Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. +To skip local image lookup, registry access, and `docker save`, scan a Docker or OCI image archive +directly with `--archive`. Archive inputs include files produced by `docker save`. Supported archive +formats are `.tar`, `.tar.gz`, `.tar.gzip`, `.tgz`, `.tar.bz2`, `.tar.bzip2`, and `.tar.xz`. + Authentication happens *in this order*: 1. **`KF_DOCKER_TOKEN`** env var @@ -688,6 +692,12 @@ kingfisher scan docker some-private-registry.dkr.ecr.us-east-1.amazonaws.com/bas # 3) Or rely on your Docker CLI login/keychain: # (e.g. aws ecr get-login-password … | docker login …) kingfisher scan docker private.registry.example.com/my-image:tag + +# 4) Scan a Docker image archive created by docker save: +docker save ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master -o image.tar +kingfisher scan docker --archive image.tar +gzip -k image.tar +kingfisher scan docker --archive image.tar.gz ``` --- diff --git a/docs-site/docs/usage/configuration.md b/docs-site/docs/usage/configuration.md index c9ea0d6..63b2e6c 100644 --- a/docs-site/docs/usage/configuration.md +++ b/docs-site/docs/usage/configuration.md @@ -179,7 +179,7 @@ invocation is scanning, not project policy: - positional paths, `--git-url` - `--github-user` / `--github-org`, `--gitlab-user` / `--gitlab-group` and the equivalent Gitea / Bitbucket / Azure / Hugging Face flags -- `--s3-bucket`, `--gcs-bucket`, `--docker-image` +- `--s3-bucket`, `--gcs-bucket`, `--docker-image`, Docker `--archive` - `--jira-url`, `--confluence-url`, `--slack-query`, `--teams-query`, `--postman-*` diff --git a/docs-site/docs/usage/integrations.md b/docs-site/docs/usage/integrations.md index 634fecc..11954af 100644 --- a/docs-site/docs/usage/integrations.md +++ b/docs-site/docs/usage/integrations.md @@ -103,6 +103,10 @@ kingfisher scan gcs cloud-samples-data --prefix "storage/" Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. +To skip local image lookup, registry access, and `docker save`, scan a Docker or OCI image archive +directly with `--archive`. Archive inputs include files produced by `docker save`. Supported archive +formats are `.tar`, `.tar.gz`, `.tar.gzip`, `.tgz`, `.tar.bz2`, `.tar.bzip2`, and `.tar.xz`. + Authentication happens *in this order*: 1. **`KF_DOCKER_TOKEN`** env var @@ -128,6 +132,12 @@ kingfisher scan docker some-private-registry.dkr.ecr.us-east-1.amazonaws.com/bas # 3) Or rely on your Docker CLI login/keychain: # (e.g. aws ecr get-login-password … | docker login …) kingfisher scan docker private.registry.example.com/my-image:tag + +# 4) Scan a Docker image archive created by docker save: +docker save ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master -o image.tar +kingfisher scan docker --archive image.tar +gzip -k image.tar +kingfisher scan docker --archive image.tar.gz ``` > **Deprecated** diff --git a/docs/CONFIG.md b/docs/CONFIG.md index d4ffd84..3297aec 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -123,7 +123,7 @@ invocation is scanning, not project policy: - positional paths, `--git-url` - `--github-user` / `--github-org`, `--gitlab-user` / `--gitlab-group` and the equivalent Gitea / Bitbucket / Azure / Hugging Face flags -- `--s3-bucket`, `--gcs-bucket`, `--docker-image` +- `--s3-bucket`, `--gcs-bucket`, `--docker-image`, Docker `--archive` - `--jira-url`, `--confluence-url`, `--slack-query`, `--teams-query`, `--postman-*` diff --git a/docs/INTEGRATIONS.md b/docs/INTEGRATIONS.md index ebe2729..e05c117 100644 --- a/docs/INTEGRATIONS.md +++ b/docs/INTEGRATIONS.md @@ -100,6 +100,10 @@ kingfisher scan gcs cloud-samples-data --prefix "storage/" Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. +To skip local image lookup, registry access, and `docker save`, scan a Docker or OCI image archive +directly with `--archive`. Archive inputs include files produced by `docker save`. Supported archive +formats are `.tar`, `.tar.gz`, `.tar.gzip`, `.tgz`, `.tar.bz2`, `.tar.bzip2`, and `.tar.xz`. + Authentication happens *in this order*: 1. **`KF_DOCKER_TOKEN`** env var @@ -125,6 +129,12 @@ kingfisher scan docker some-private-registry.dkr.ecr.us-east-1.amazonaws.com/bas # 3) Or rely on your Docker CLI login/keychain: # (e.g. aws ecr get-login-password … | docker login …) kingfisher scan docker private.registry.example.com/my-image:tag + +# 4) Scan a Docker image archive created by docker save: +docker save ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master -o image.tar +kingfisher scan docker --archive image.tar +gzip -k image.tar +kingfisher scan docker --archive image.tar.gz ``` > **Deprecated** diff --git a/docs/USAGE.md b/docs/USAGE.md index 74f6ed4..eae52f4 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -660,6 +660,10 @@ kingfisher scan gcs cloud-samples-data --prefix "storage/" Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. +To skip local image lookup, registry access, and `docker save`, scan a Docker or OCI image archive +directly with `--archive`. Archive inputs include files produced by `docker save`. Supported archive +formats are `.tar`, `.tar.gz`, `.tar.gzip`, `.tgz`, `.tar.bz2`, `.tar.bzip2`, and `.tar.xz`. + Authentication happens *in this order*: 1. **`KF_DOCKER_TOKEN`** env var @@ -683,6 +687,13 @@ kingfisher scan docker some-private-registry.dkr.ecr.us-east-1.amazonaws.com/bas # 3) Or rely on your Docker CLI login/keychain: # (e.g. aws ecr get-login-password … | docker login …) kingfisher scan docker private.registry.example.com/my-image:tag + +# 4) Scan a Docker image archive created by docker save: +docker pull ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master +docker save ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master -o image.tar +kingfisher scan docker --archive image.tar +gzip -k image.tar +kingfisher scan docker --archive image.tar.gz ``` --- diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 8abc316..574d329 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -365,6 +365,10 @@ pub struct InputSpecifierArgs { #[arg(long = "docker-image", hide = true)] pub docker_image: Vec, + /// Docker image archives provided by the docker subcommand + #[arg(skip)] + pub docker_archive: Vec, + /// Select how to clone Git repositories #[arg(long, default_value_t=GitCloneMode::Bare, alias="git-clone-mode")] pub git_clone: GitCloneMode, @@ -474,6 +478,7 @@ impl InputSpecifierArgs { || self.s3_bucket.is_some() || self.gcs_bucket.is_some() || !self.docker_image.is_empty() + || !self.docker_archive.is_empty() } /// Return true when any flag has been set that schedules artifact @@ -492,6 +497,7 @@ impl InputSpecifierArgs { || !self.postman_environments.is_empty() || self.postman_all || !self.docker_image.is_empty() + || !self.docker_archive.is_empty() } /// Emit deprecation warnings for legacy top-level provider flags. diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 721cbed..d1d4f1d 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -613,10 +613,13 @@ impl ScanCommandArgs { None } ScanInputCommand::Docker(args) => { - if args.images.is_empty() { - bail!("Provide at least one image when using the docker subcommand"); + if args.images.is_empty() && args.archives.is_empty() { + bail!( + "Provide at least one image or --archive path when using the docker subcommand" + ); } scan_args.input_specifier_args.docker_image = args.images; + scan_args.input_specifier_args.docker_archive = args.archives; None } }; @@ -1040,6 +1043,10 @@ pub struct GcsScanArgs { #[derive(Args, Debug, Clone)] pub struct DockerScanArgs { /// Docker or OCI images to scan - #[arg(value_name = "IMAGE", num_args = 1..)] + #[arg(value_name = "IMAGE")] pub images: Vec, + + /// Docker image archive files to scan, such as files produced by docker save + #[arg(long = "archive", value_name = "PATH", value_hint = ValueHint::FilePath)] + pub archives: Vec, } diff --git a/src/direct_validate.rs b/src/direct_validate.rs index 0e98315..512bcee 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -1039,6 +1039,7 @@ pub(crate) fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs postman_include_mocks_monitors: false, postman_api_url: Url::parse("https://api.getpostman.com/").unwrap(), docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/src/main.rs b/src/main.rs index b7617b2..313834c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1136,8 +1136,17 @@ fn describe_scan_target(args: &InputSpecifierArgs) -> Option { if let Some(b) = &args.gcs_bucket { return Some(format!("gs://{}{}", b, args.gcs_prefix.as_deref().unwrap_or(""))); } - if !args.docker_image.is_empty() { - return Some(format!("docker: {}", join_brief(&args.docker_image, "images"))); + if !args.docker_image.is_empty() || !args.docker_archive.is_empty() { + let mut docker_targets = Vec::new(); + if !args.docker_image.is_empty() { + docker_targets.push(join_brief(&args.docker_image, "images")); + } + if !args.docker_archive.is_empty() { + let archives = + args.docker_archive.iter().map(|p| p.display().to_string()).collect::>(); + docker_targets.push(join_brief(&archives, "archives")); + } + return Some(format!("docker: {}", docker_targets.join(", "))); } if let Some(u) = &args.jira_url { return Some(format!("jira: {}", u)); @@ -1675,6 +1684,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { postman_api_url: Url::parse("https://api.getpostman.com/").unwrap(), // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, diff --git a/src/reporter.rs b/src/reporter.rs index 0b6b280..5c3536d 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1402,6 +1402,9 @@ fn derive_scan_target(args: &cli::commands::scan::ScanArgs) -> Option { for image in &input_args.docker_image { targets.push(format!("docker://{image}")); } + for archive in &input_args.docker_archive { + targets.push(format!("docker-archive://{}", archive.display())); + } if input_args.jira_url.is_some() { targets.push("jira".to_string()); } @@ -1818,6 +1821,7 @@ mod tests { gcs_prefix: None, gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index b650b90..323c82f 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -185,6 +185,7 @@ mod tests { gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), // clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 278996f..805f977 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -1,7 +1,8 @@ +use std::collections::HashSet; use std::env; use std::fs::File; use std::io::{Read, Write}; -use std::path::{Path, PathBuf}; +use std::path::{Component, Path, PathBuf}; use std::process::{Command, Stdio}; use std::time::Duration; @@ -67,6 +68,296 @@ fn image_dir_name(reference: &str) -> String { name } +fn archive_dir_name(path: &Path) -> String { + image_dir_name(&path.display().to_string()) +} + +fn progress_bar(use_progress: bool) -> ProgressBar { + if use_progress { + let style = + ProgressStyle::with_template("{spinner} {msg} {pos}/{len}").expect("progress template"); + let pb = ProgressBar::new(0).with_style(style); + pb.enable_steady_tick(Duration::from_millis(100)); + pb + } else { + ProgressBar::hidden() + } +} + +fn tar_wrapped_intermediate_path(archive_path: &Path, out_dir: &Path) -> Option { + let filename = archive_path.file_name()?.to_str()?.to_ascii_lowercase(); + let is_tar_wrapped = filename.ends_with(".tgz") + || filename.ends_with(".tar.gz") + || filename.ends_with(".tar.gzip") + || filename.ends_with(".tar.bz2") + || filename.ends_with(".tar.bzip2") + || filename.ends_with(".tar.xz"); + + if !is_tar_wrapped { + return None; + } + + let stem = archive_path.file_stem()?; + Some(out_dir.join(stem).with_extension("decomp.tar")) +} + +fn is_safe_relative_path(path: &Path) -> bool { + !path.is_absolute() + && path + .components() + .all(|component| matches!(component, Component::Normal(_) | Component::CurDir)) +} + +fn push_manifest_layer( + out_dir: &Path, + layer_path: &str, + layer_paths: &mut Vec, + seen: &mut HashSet, +) -> Result<()> { + let relative_path = Path::new(layer_path); + if !is_safe_relative_path(relative_path) { + return Err(anyhow!("unsafe Docker archive layer path {layer_path}")); + } + + let path = out_dir.join(relative_path); + if !path.is_file() { + return Err(anyhow!("Docker archive layer {} was not found", path.display())); + } + + if seen.insert(path.clone()) { + layer_paths.push(path); + } + Ok(()) +} + +fn collect_docker_manifest_layers( + out_dir: &Path, + layer_paths: &mut Vec, + seen: &mut HashSet, +) -> Result<()> { + let manifest_path = out_dir.join("manifest.json"); + if !manifest_path.is_file() { + return Ok(()); + } + + let manifest: Value = serde_json::from_reader(File::open(&manifest_path)?) + .with_context(|| format!("parsing {}", manifest_path.display()))?; + if let Some(images) = manifest.as_array() { + for image in images { + if let Some(layers) = image.get("Layers").and_then(|v| v.as_array()) { + for layer in layers { + if let Some(layer_path) = layer.as_str() { + push_manifest_layer(out_dir, layer_path, layer_paths, seen)?; + } + } + } + } + } + + Ok(()) +} + +fn blob_path_from_digest(out_dir: &Path, digest: &str) -> Option { + let (algorithm, value) = digest.split_once(':')?; + let relative_path = Path::new("blobs").join(algorithm).join(value); + if is_safe_relative_path(&relative_path) { Some(out_dir.join(relative_path)) } else { None } +} + +fn collect_oci_layers_from_value( + out_dir: &Path, + value: &Value, + layer_paths: &mut Vec, + seen_layers: &mut HashSet, + seen_manifests: &mut HashSet, +) -> Result<()> { + if let Some(layers) = value.get("layers").and_then(|v| v.as_array()) { + for layer in layers { + if let Some(digest) = layer.get("digest").and_then(|v| v.as_str()) { + let path = blob_path_from_digest(out_dir, digest) + .ok_or_else(|| anyhow!("invalid OCI layer digest {digest}"))?; + if !path.is_file() { + return Err(anyhow!("OCI layer blob {} was not found", path.display())); + } + if seen_layers.insert(path.clone()) { + layer_paths.push(path); + } + } + } + } + + if let Some(manifests) = value.get("manifests").and_then(|v| v.as_array()) { + for manifest in manifests { + let is_attestation = manifest + .get("annotations") + .and_then(|v| v.get("vnd.docker.reference.type")) + .and_then(|v| v.as_str()) + == Some("attestation-manifest"); + let is_unknown_platform = + manifest.get("platform").and_then(|v| v.get("os")).and_then(|v| v.as_str()) + == Some("unknown"); + if is_attestation || is_unknown_platform { + continue; + } + + if let Some(digest) = manifest.get("digest").and_then(|v| v.as_str()) { + let path = blob_path_from_digest(out_dir, digest) + .ok_or_else(|| anyhow!("invalid OCI manifest digest {digest}"))?; + if !path.is_file() || !seen_manifests.insert(path.clone()) { + continue; + } + let manifest_value: Value = serde_json::from_reader(File::open(&path)?) + .with_context(|| format!("parsing OCI manifest {}", path.display()))?; + collect_oci_layers_from_value( + out_dir, + &manifest_value, + layer_paths, + seen_layers, + seen_manifests, + )?; + } + } + } + + Ok(()) +} + +fn collect_oci_layout_layers( + out_dir: &Path, + layer_paths: &mut Vec, + seen_layers: &mut HashSet, +) -> Result<()> { + let index_path = out_dir.join("index.json"); + if !index_path.is_file() { + return Ok(()); + } + + let index: Value = serde_json::from_reader(File::open(&index_path)?) + .with_context(|| format!("parsing {}", index_path.display()))?; + let mut seen_manifests = HashSet::new(); + collect_oci_layers_from_value(out_dir, &index, layer_paths, seen_layers, &mut seen_manifests) +} + +fn collect_saved_archive_layers(out_dir: &Path) -> Result> { + let mut layer_paths = Vec::new(); + let mut seen = HashSet::new(); + + for entry in WalkDir::new(out_dir) { + let entry = entry?; + if entry.file_name() == "layer.tar" { + let path = entry.path().to_path_buf(); + if seen.insert(path.clone()) { + layer_paths.push(path); + } + } + } + + if layer_paths.is_empty() { + collect_docker_manifest_layers(out_dir, &mut layer_paths, &mut seen)?; + } + if layer_paths.is_empty() { + collect_oci_layout_layers(out_dir, &mut layer_paths, &mut seen)?; + } + + Ok(layer_paths) +} + +fn sha256_file(path: &Path) -> Result { + let mut file = File::open(path)?; + let mut hasher = Sha256::new(); + let mut buf = [0_u8; 16 * 1024]; + loop { + let read = file.read(&mut buf)?; + if read == 0 { + break; + } + hasher.update(&buf[..read]); + } + Ok(hex::encode(hasher.finalize())) +} + +fn extension_for_extensionless_layer(path: &Path) -> Result<&'static str> { + let mut file = File::open(path)?; + let mut buf = [0_u8; 512]; + let len = file.read(&mut buf)?; + + if len >= 2 && buf[0] == 0x1f && buf[1] == 0x8b { + return Ok("tar.gz"); + } + if len >= 262 && &buf[257..262] == b"ustar" { + return Ok("tar"); + } + + Err(anyhow!("unsupported Docker archive layer compression for {}", path.display())) +} + +fn link_or_copy_layer(source: &Path, dest: &Path) -> Result<()> { + match std::fs::hard_link(source, dest) { + Ok(()) => Ok(()), + Err(_) => { + std::fs::copy(source, dest)?; + Ok(()) + } + } +} + +fn remove_tar_wrapped_intermediate(path: &Path, out_dir: &Path) -> Result<()> { + if let Some(intermediate) = tar_wrapped_intermediate_path(path, out_dir) + && intermediate.exists() + { + std::fs::remove_file(intermediate)?; + } + Ok(()) +} + +fn extract_layer_archive(path: &Path, out_dir: &Path) -> Result<()> { + let aliased_path; + let layer_path = if path.extension().is_some() { + path + } else { + let ext = extension_for_extensionless_layer(path)?; + let digest = sha256_file(path)?; + aliased_path = out_dir.join(format!("layer_{digest}.{ext}")); + link_or_copy_layer(path, &aliased_path)?; + &aliased_path + }; + + let result = decompress_file(layer_path, Some(out_dir)); + let cleanup_result = if layer_path != path && layer_path.exists() { + std::fs::remove_file(layer_path) + } else { + Ok(()) + }; + result?; + cleanup_result?; + remove_tar_wrapped_intermediate(layer_path, out_dir)?; + + if path.starts_with(out_dir) && path.exists() { + std::fs::remove_file(path)?; + } + + Ok(()) +} + +fn extract_saved_archive_layers( + archive_path: &Path, + out_dir: &Path, + pb: &ProgressBar, +) -> Result { + pb.set_message("extracting layers"); + decompress_file(archive_path, Some(out_dir))?; + remove_tar_wrapped_intermediate(archive_path, out_dir)?; + + let layer_paths = collect_saved_archive_layers(out_dir)?; + + pb.set_length(layer_paths.len() as u64); + for p in &layer_paths { + extract_layer_archive(p, out_dir)?; + pb.inc(1); + } + + Ok(layer_paths.len()) +} + fn creds_from_docker_config(registry: &str) -> Option<(String, String)> { let config_dir = env::var("DOCKER_CONFIG") .map(PathBuf::from) @@ -140,15 +431,7 @@ impl Docker { return Err(anyhow!("image not local")); } - let pb = if use_progress { - let style = ProgressStyle::with_template("{spinner} {msg} {pos}/{len}") - .expect("progress template"); - let pb = ProgressBar::new(0).with_style(style); - pb.enable_steady_tick(Duration::from_millis(100)); - pb - } else { - ProgressBar::hidden() - }; + let pb = progress_bar(use_progress); pb.set_message(format!("saving local {image}")); std::fs::create_dir_all(out_dir)?; @@ -162,43 +445,35 @@ impl Docker { return Err(anyhow!("failed to save local image")); } - pb.set_message("extracting layers"); - decompress_file(&tar_path, Some(out_dir))?; - - let mut layer_paths = Vec::new(); - for entry in WalkDir::new(out_dir) { - let entry = entry?; - if entry.file_name() == "layer.tar" { - layer_paths.push(entry.path().to_path_buf()); - } - } - - pb.set_length(layer_paths.len() as u64); - for p in layer_paths { - let mut file = File::open(&p)?; - let mut hasher = Sha256::new(); - let mut buf = [0_u8; 16 * 1024]; - loop { - let read = file.read(&mut buf)?; - if read == 0 { - break; - } - hasher.update(&buf[..read]); - } - let digest = hex::encode(hasher.finalize()); - - let new_path = out_dir.join(format!("layer_{digest}.tar")); - std::fs::rename(&p, &new_path)?; - // extract layer contents so inner filenames appear in scan results - decompress_file(&new_path, Some(out_dir))?; - std::fs::remove_file(&new_path)?; - pb.inc(1); - } + extract_saved_archive_layers(&tar_path, out_dir, &pb)?; pb.finish_with_message(format!("saved {image}")); Ok(()) } + pub fn save_archive_to_dir( + &self, + archive_path: &Path, + out_dir: &Path, + use_progress: bool, + ) -> Result<()> { + let pb = progress_bar(use_progress); + pb.set_message(format!("extracting {}", archive_path.display())); + + std::fs::create_dir_all(out_dir)?; + let layer_count = extract_saved_archive_layers(archive_path, out_dir, &pb)?; + if layer_count == 0 { + pb.finish_with_message("no docker layers found"); + return Err(anyhow!( + "archive {} did not contain Docker image layers", + archive_path.display() + )); + } + + pb.finish_with_message(format!("extracted {}", archive_path.display())); + Ok(()) + } + pub async fn save_image_to_dir( &self, image: &str, @@ -211,16 +486,8 @@ impl Docker { let reference: Reference = image.parse().with_context(|| format!("invalid image reference {image}"))?; debug!("Pulling {image}"); - let pb = if use_progress { - let style = ProgressStyle::with_template("{spinner} {msg} {pos}/{len}") - .expect("progress template"); - let pb = ProgressBar::new(0).with_style(style); - pb.enable_steady_tick(Duration::from_millis(100)); - pb.set_message(format!("pulling {image}")); - pb - } else { - ProgressBar::hidden() - }; + let pb = progress_bar(use_progress); + pb.set_message(format!("pulling {image}")); let client = Client::new(ClientConfig { platform_resolver: Some(Box::new(linux_amd64_resolver)), ..Default::default() @@ -281,12 +548,193 @@ pub async fn save_docker_images( Ok(dirs) } +pub fn save_docker_archives( + archives: &[PathBuf], + clone_root: &Path, + use_progress: bool, +) -> Result> { + let docker = Docker::new(); + let mut dirs = Vec::new(); + + for archive in archives { + let dir_name = archive_dir_name(archive); + let out_dir = clone_root.join(format!("docker_archive_{dir_name}")); + docker + .save_archive_to_dir(archive, &out_dir, use_progress) + .with_context(|| format!("extracting docker archive {}", archive.display()))?; + dirs.push((out_dir, archive.display().to_string())); + } + + Ok(dirs) +} + #[cfg(test)] mod tests { use super::*; + use flate2::{Compression, write::GzEncoder}; + use tempfile::tempdir; #[test] fn docker_struct_new() { let _ = Docker::new(); } + + fn append_bytes(tar: &mut tar::Builder, path: &str, data: &[u8]) -> Result<()> { + let mut hdr = tar::Header::new_gnu(); + hdr.set_size(data.len() as u64); + hdr.set_mode(0o644); + hdr.set_cksum(); + tar.append_data(&mut hdr, path, data)?; + Ok(()) + } + + fn build_layer_tar() -> Result> { + let mut layer = Vec::new(); + { + let mut tar = tar::Builder::new(&mut layer); + append_bytes( + &mut tar, + "app/secret.txt", + b"token=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\n", + )?; + tar.finish()?; + } + Ok(layer) + } + + fn build_docker_archive(path: &Path, gzip: bool) -> Result<()> { + let layer = build_layer_tar()?; + let file = File::create(path)?; + + if gzip { + let gz = GzEncoder::new(file, Compression::default()); + let mut tar = tar::Builder::new(gz); + append_bytes(&mut tar, "manifest.json", br#"[{"Layers":["abc/layer.tar"]}]"#)?; + append_bytes(&mut tar, "abc/layer.tar", &layer)?; + tar.into_inner()?.finish()?; + } else { + let mut tar = tar::Builder::new(file); + append_bytes(&mut tar, "manifest.json", br#"[{"Layers":["abc/layer.tar"]}]"#)?; + append_bytes(&mut tar, "abc/layer.tar", &layer)?; + tar.finish()?; + } + + Ok(()) + } + + fn build_oci_layout_archive(path: &Path) -> Result<()> { + let layer = build_layer_tar()?; + let file = File::create(path)?; + let gz = GzEncoder::new(Vec::new(), Compression::default()); + let mut layer_tar = tar::Builder::new(gz); + append_bytes( + &mut layer_tar, + "app/secret.txt", + b"token=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\n", + )?; + let compressed_layer = layer_tar.into_inner()?.finish()?; + + let mut tar = tar::Builder::new(file); + append_bytes(&mut tar, "oci-layout", br#"{"imageLayoutVersion":"1.0.0"}"#)?; + append_bytes( + &mut tar, + "manifest.json", + br#"[{"Config":"blobs/sha256/config","Layers":["blobs/sha256/layer"]}]"#, + )?; + append_bytes(&mut tar, "blobs/sha256/config", br#"{}"#)?; + append_bytes(&mut tar, "blobs/sha256/layer", &compressed_layer)?; + append_bytes(&mut tar, "blobs/sha256/unused", &layer)?; + tar.finish()?; + Ok(()) + } + + fn build_pure_oci_archive(path: &Path) -> Result<()> { + let file = File::create(path)?; + let gz = GzEncoder::new(Vec::new(), Compression::default()); + let mut layer_tar = tar::Builder::new(gz); + append_bytes( + &mut layer_tar, + "app/secret.txt", + b"token=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\n", + )?; + let compressed_layer = layer_tar.into_inner()?.finish()?; + + let mut tar = tar::Builder::new(file); + append_bytes(&mut tar, "oci-layout", br#"{"imageLayoutVersion":"1.0.0"}"#)?; + append_bytes( + &mut tar, + "index.json", + br#"{"schemaVersion":2,"manifests":[{"mediaType":"application/vnd.oci.image.manifest.v1+json","digest":"sha256:manifest","platform":{"os":"linux","architecture":"amd64"}},{"mediaType":"application/vnd.oci.image.manifest.v1+json","digest":"sha256:attestation","platform":{"os":"unknown","architecture":"unknown"},"annotations":{"vnd.docker.reference.type":"attestation-manifest"}}]}"#, + )?; + append_bytes( + &mut tar, + "blobs/sha256/manifest", + br#"{"schemaVersion":2,"layers":[{"mediaType":"application/vnd.oci.image.layer.v1.tar+gzip","digest":"sha256:layer"}]}"#, + )?; + append_bytes( + &mut tar, + "blobs/sha256/attestation", + br#"{"schemaVersion":2,"layers":[{"mediaType":"application/vnd.in-toto+json","digest":"sha256:attestation-layer"}]}"#, + )?; + append_bytes(&mut tar, "blobs/sha256/layer", &compressed_layer)?; + append_bytes(&mut tar, "blobs/sha256/attestation-layer", br#"{"predicate":{}}"#)?; + tar.finish()?; + Ok(()) + } + + #[test] + fn save_archive_to_dir_extracts_docker_archive() -> Result<()> { + let dir = tempdir()?; + let archive = dir.path().join("image.tar"); + let out = dir.path().join("out"); + build_docker_archive(&archive, false)?; + + Docker::new().save_archive_to_dir(&archive, &out, false)?; + + assert!(out.join("app/secret.txt").exists()); + Ok(()) + } + + #[test] + fn save_archive_to_dir_extracts_gzipped_docker_archive() -> Result<()> { + let dir = tempdir()?; + let archive = dir.path().join("image.tar.gz"); + let out = dir.path().join("out"); + build_docker_archive(&archive, true)?; + + Docker::new().save_archive_to_dir(&archive, &out, false)?; + + assert!(out.join("app/secret.txt").exists()); + assert!(!out.join("image.decomp.tar").exists()); + Ok(()) + } + + #[test] + fn save_archive_to_dir_extracts_oci_layout_archive() -> Result<()> { + let dir = tempdir()?; + let archive = dir.path().join("image.tar"); + let out = dir.path().join("out"); + build_oci_layout_archive(&archive)?; + + Docker::new().save_archive_to_dir(&archive, &out, false)?; + + assert!(out.join("app/secret.txt").exists()); + assert!(!out.join("blobs/sha256/layer").exists()); + Ok(()) + } + + #[test] + fn save_archive_to_dir_extracts_pure_oci_archive() -> Result<()> { + let dir = tempdir()?; + let archive = dir.path().join("image.tar"); + let out = dir.path().join("out"); + build_pure_oci_archive(&archive)?; + + Docker::new().save_archive_to_dir(&archive, &out, false)?; + + assert!(out.join("app/secret.txt").exists()); + assert!(out.join("blobs/sha256/attestation-layer").exists()); + assert!(!out.join("blobs/sha256/layer").exists()); + Ok(()) + } } diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 918136e..9f6c0b3 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,5 +1,5 @@ //! Public façade for the scanner subsystem. -pub(crate) use docker::save_docker_images; +pub(crate) use docker::{save_docker_archives, save_docker_images}; pub(crate) use enumerate::enumerate_filesystem_inputs; pub(crate) use repos::{ clone_or_update_git_repos_streaming, enumerate_azure_repos, enumerate_bitbucket_repos, diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 209ca20..80db193 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -39,7 +39,7 @@ use crate::{ fetch_gcs_objects, fetch_git_host_artifacts, fetch_jira_issues, fetch_postman_resources, fetch_s3_objects, fetch_slack_messages, fetch_teams_messages, }, - run_secret_validation, save_docker_images, + run_secret_validation, save_docker_archives, save_docker_images, summary::{compute_scan_totals, print_scan_summary}, }, util::{set_redaction_enabled, tokio_blocking_threads_limit}, @@ -495,21 +495,31 @@ async fn fetch_all_artifacts( } } - if !args.input_specifier_args.docker_image.is_empty() { + if !args.input_specifier_args.docker_image.is_empty() + || !args.input_specifier_args.docker_archive.is_empty() + { let clone_root = { let ds = datastore.lock().unwrap(); ds.clone_root() }; - let docker_dirs = save_docker_images( - &args.input_specifier_args.docker_image, + let mut docker_dirs = Vec::new(); + docker_dirs.extend( + save_docker_images( + &args.input_specifier_args.docker_image, + &clone_root, + progress_enabled, + ) + .await?, + ); + docker_dirs.extend(save_docker_archives( + &args.input_specifier_args.docker_archive, &clone_root, progress_enabled, - ) - .await?; - for (dir, img) in docker_dirs { + )?); + for (dir, source) in docker_dirs { { let mut ds = datastore.lock().unwrap(); - ds.register_docker_image(dir.clone(), img); + ds.register_docker_image(dir.clone(), source); } if !push(dir, &out_tx) { return Ok(()); diff --git a/tests/cli_subcommands.rs b/tests/cli_subcommands.rs index 9d5b7c3..fe230fd 100644 --- a/tests/cli_subcommands.rs +++ b/tests/cli_subcommands.rs @@ -833,6 +833,32 @@ mod huggingface { } } +// ============================================================================= +// Docker Scan Subcommand Tests +// ============================================================================= + +mod docker { + use super::*; + + #[test] + fn scan_docker_help_includes_archive() { + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args(["scan", "docker", "--help"]) + .assert() + .success() + .stdout(contains("Scan Docker or OCI images").and(contains("--archive"))); + } + + #[test] + fn scan_docker_requires_image_or_archive() { + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args(["scan", "docker", "--no-update-check"]) + .assert() + .failure() + .stderr(contains("image or --archive")); + } +} + // ============================================================================= // Cross-Platform Tests // ============================================================================= diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index 9ddd1d3..bf86bde 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -143,6 +143,7 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { gcs_prefix: None, gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index fdc7270..1b0a48e 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -147,6 +147,7 @@ rules: gcs_service_account: None, // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_github.rs b/tests/int_github.rs index ca938c2..8f43793 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -134,6 +134,7 @@ fn test_github_remote_scan() -> Result<()> { gcs_service_account: None, // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index d39572e..3927c6d 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -133,6 +133,7 @@ fn test_gitlab_remote_scan() -> Result<()> { gcs_service_account: None, // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, @@ -324,6 +325,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { role_arn: None, aws_local_profile: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::None, commit_metadata: true, diff --git a/tests/int_postman.rs b/tests/int_postman.rs index 2909f6c..fe76cf8 100644 --- a/tests/int_postman.rs +++ b/tests/int_postman.rs @@ -205,6 +205,7 @@ async fn test_scan_postman_all() -> Result<()> { gcs_prefix: None, gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 9a7867e..b70607c 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -111,6 +111,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { gcs_prefix: None, gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 083a2b7..96a4409 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -116,6 +116,7 @@ impl TestContext { gcs_service_account: None, max_results: 10, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, @@ -292,6 +293,7 @@ async fn test_scan_slack_messages() -> Result<()> { gcs_prefix: None, gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/tests/int_teams.rs b/tests/int_teams.rs index 6996c77..20d4ac9 100644 --- a/tests/int_teams.rs +++ b/tests/int_teams.rs @@ -152,6 +152,7 @@ async fn test_scan_teams_messages() -> Result<()> { gcs_prefix: None, gcs_service_account: None, docker_image: Vec::new(), + docker_archive: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, commit_metadata: true, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 2977283..681efa8 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -190,6 +190,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> { gcs_service_account: None, // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 491b3eb..88bcada 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -133,6 +133,7 @@ impl TestContext { gcs_service_account: None, // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, @@ -291,6 +292,7 @@ impl TestContext { aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), + docker_archive: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs index 3bd4307..2e13205 100644 --- a/tests/smoke_docker.rs +++ b/tests/smoke_docker.rs @@ -1,4 +1,39 @@ -use std::process::Command; +use std::{fs::File, io::Write, path::Path, process::Command}; + +use assert_cmd::prelude::*; +use flate2::{Compression, write::GzEncoder}; +use predicates::prelude::*; + +fn append_bytes(tar: &mut tar::Builder, path: &str, data: &[u8]) -> anyhow::Result<()> { + let mut header = tar::Header::new_gnu(); + header.set_size(data.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + tar.append_data(&mut header, path, data)?; + Ok(()) +} + +fn build_docker_archive(path: &Path, github_pat: &str) -> anyhow::Result<()> { + let mut layer = GzEncoder::new(Vec::new(), Compression::default()); + { + let mut tar = tar::Builder::new(&mut layer); + append_bytes(&mut tar, "app/secret.txt", format!("token={github_pat}\n").as_bytes())?; + tar.finish()?; + } + let layer = layer.finish()?; + + let mut tar = tar::Builder::new(File::create(path)?); + append_bytes(&mut tar, "oci-layout", br#"{"imageLayoutVersion":"1.0.0"}"#)?; + append_bytes( + &mut tar, + "manifest.json", + br#"[{"Config":"blobs/sha256/config","Layers":["blobs/sha256/layer"]}]"#, + )?; + append_bytes(&mut tar, "blobs/sha256/config", br#"{}"#)?; + append_bytes(&mut tar, "blobs/sha256/layer", &layer)?; + tar.finish()?; + Ok(()) +} #[test] fn smoke_scan_docker_image() -> anyhow::Result<()> { @@ -23,3 +58,33 @@ fn smoke_scan_docker_image() -> anyhow::Result<()> { assert!(stdout.contains("Not Attempted")); Ok(()) } + +#[test] +fn smoke_scan_docker_archive() -> anyhow::Result<()> { + let dir = tempfile::tempdir()?; + let archive = dir.path().join("image.tar"); + let github_pat = "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"; + build_docker_archive(&archive, github_pat)?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + "docker", + "--archive", + archive.to_str().unwrap(), + "--confidence=low", + "--format", + "json", + "--rule", + "kingfisher.github.2", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + predicate::str::contains(github_pat).and(predicate::str::contains("app/secret.txt")), + ); + + Ok(()) +}