diff --git a/CHANGELOG.md b/CHANGELOG.md index f21fc28..5556a43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.27.0] +- Added Buildkite rule +- Added support for scanning Docker images via `--docker-image` + ## [1.26.0] - Added rule for ElevenLabs - Added support for scanning Jira issues via a given JQL (Jira Query Language) diff --git a/Cargo.toml b/Cargo.toml index eff7a26..79595e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.26.0" +version = "1.27.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -186,6 +186,8 @@ globset = "0.4.16" jsonwebtoken = "9.3.1" ipnet = "2.11.0" jira_query = "1.6.0" +oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] } +walkdir = "2.5.0" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/README.md b/README.md index 8d398fa..8dab5ad 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,13 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru Kingfisher originated as a fork of [Nosey Parker](https://github.com/praetorian-inc/noseyparker) by Praetorian Security, Inc, and is built atop their incredible work and the work contributed by the Nosey Parker community. Kingfisher extends Nosey Parker by: -1. Validating secrets in real time via cloud-provider APIs -2. Enhancing regex-based detection with source-code parsing for improved accuracy -3. Adding GitLab repository scanning support -4. Providing Jira scanning capabilities -5. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones -5. Offering native Windows environment support +1. **Validating secrets** in real time via cloud-provider APIs +2. Enhancing regex-based detection with **source-code parsing** for improved accuracy +3. Adding **GitLab** repository scanning support +4. Adding support for scanning **Docker** images via `--docker-image` +5. Providing **Jira** scanning capabilities +6. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones +7. Offering native **Windows** support **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -28,6 +29,7 @@ Kingfisher extends Nosey Parker by: - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos - **Jira Scanning**: Scan issues returned from a JQL search using `--jira-url` and `--jql` +- **Docker Image Scanning**: Scan public or private docker images via `--docker-image` - **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. # Getting Started @@ -195,6 +197,7 @@ kingfisher scan /path/to/repo --format sarif --output findings.sarif ```bash cat /path/to/file.py | kingfisher scan - + ``` ### Scan using a rule _family_ with one flag @@ -243,8 +246,35 @@ kingfisher scan ./my-project \ --exclude tests \ -v ``` +## Scanning Docker Images ---- +Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. + +Authentication happens *in this order*: + +1. **`KF_DOCKER_TOKEN`** env var + - If it contains `user:pass`, it’s used as Basic auth + - Otherwise it’s sent as a Bearer token +2. **Docker CLI credentials** + - Checks `credHelpers` (per-registry) and `credsStore` in `~/.docker/config.json`. + - Falls back to the legacy `auths` → `auth` (base64) entries. +3. **Anonymous** (no credentials) + + +```bash +# 1) Scan public or already-pulled image +kingfisher scan --docker-image ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master + +# 2) For private registries, explicitly set KF_DOCKER_TOKEN: +# - Basic auth: "user:pass" +# - Bearer only: "TOKEN" +export KF_DOCKER_TOKEN="AWS:$(aws ecr get-login-password --region us-east-1)" +kingfisher scan --docker-image some-private-registry.dkr.ecr.us-east-1.amazonaws.com/base/amazonlinux2023:latest + +# 3) Or rely on your Docker CLI login/keychain: +# (e.g. aws ecr get-login-password … | docker login …) +kingfisher scan --docker-image private.registry.example.com/my-image:tag +``` ## Scanning GitHub @@ -320,6 +350,7 @@ KF_JIRA_TOKEN="token" kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | +| `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | Set them temporarily per command: @@ -356,12 +387,19 @@ _If no token is provided Kingfisher still works for public repositories._ Run the provided helper script to add a hook that scans staged files before each commit: ```bash -./install-precommit-hook.sh +# local (current repo only ─ default) +./install-kingfisher-hook.sh ``` This creates `.git/hooks/pre-commit` that scans the files staged for commit with `kingfisher scan --no-update-check` and blocks the commit if any secrets are found. +```bash +# global (every repo on this machine) +./install-kingfisher-hook.sh --global ### Install a Pre-Receive Hook +``` + +Installs a global pre-commit hook at `$HOME/.git/hooks/pre-commit`; for every Git repository you use, it runs `kingfisher scan --no-update-check` on the staged files and cancels the commit if any secrets are detected. To check incoming pushes on a server-side repository, install the pre-receive hook: diff --git a/data/rules/buildkite.yml b/data/rules/buildkite.yml new file mode 100644 index 0000000..3728e98 --- /dev/null +++ b/data/rules/buildkite.yml @@ -0,0 +1,31 @@ +rules: + - name: Buildkite API Key + id: kingfisher.buildkite.1 + pattern: | + (?xi) + \b + ( + bkua_[a-z0-9]{40} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - bkua_3c7019c2e4b6e76fe2e8bdde7c154e3c1a211743 + references: + - https://buildkite.com/docs/apis/rest-api/access-token + validation: + type: Http + content: + request: + method: GET + url: https://api.buildkite.com/v2/access-token + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['"uuid"', '"user"'] + \ No newline at end of file diff --git a/data/rules/datadog.yml b/data/rules/datadog.yml index c851ed8..718b282 100644 --- a/data/rules/datadog.yml +++ b/data/rules/datadog.yml @@ -43,12 +43,11 @@ rules: id: kingfisher.datadog.2 pattern: | (?xi) - \b - (?: - dd[_-]?\w{0,8}[_-]?(?:key|secret) | - datadog - ) - (?:.|[\n\r]){0,64}? + \b + datadog + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? \b ( [a-z0-9]{40} @@ -57,7 +56,7 @@ rules: min_entropy: 3.3 confidence: medium examples: - - dd_secret_key-3c0c3965368a6b10f7640dbda46abfdca981c2d3 + - datadog_secret_key-3c0c3965368a6b10f7640dbda46abfdca981c2d3 - datadog_token = BzHpkcs7LujMb3Q1vLRRjbpBNxxYV0ousumYoKJS references: - https://docs.datadoghq.com/account_management/api-app-keys/ \ No newline at end of file diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh old mode 100644 new mode 100755 index 6a6283f..7a81ba6 --- a/install-precommit-hook.sh +++ b/install-precommit-hook.sh @@ -1,17 +1,54 @@ #!/usr/bin/env bash +# +# Install a Git pre-commit hook that runs `kingfisher scan`. +# --global → install once for all repos using core.hooksPath +# --force → overwrite an existing pre-commit hook +# set -euo pipefail -HOOK_DIR="$(git rev-parse --git-dir)/hooks" +MODE="local" +FORCE=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + -g|--global) MODE="global" ;; + -f|--force) FORCE=1 ;; + -h|--help) + echo "Usage: $0 [--global] [--force]" && exit 0 + ;; + *) echo "Unknown flag: $1" >&2; exit 1 ;; + esac + shift +done + +if [[ "$MODE" == "local" ]]; then + # ensure we're inside a Git repo + REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) \ + || { echo "Not inside a Git repository" >&2; exit 1; } + + HOOK_DIR="$(git rev-parse --git-dir)/hooks" +else + # global: honour existing core.hooksPath or default to ~/.git-hooks + HOOK_DIR=$(git config --global --get core.hooksPath || echo "$HOME/.git-hooks") + mkdir -p "$HOOK_DIR" + + # if the user hasn’t set core.hooksPath, do it now + if ! git config --global --get core.hooksPath >/dev/null; then + git config --global core.hooksPath "$HOOK_DIR" + echo "Set git config --global core.hooksPath to $HOOK_DIR" + fi +fi + HOOK_PATH="$HOOK_DIR/pre-commit" -if [ -e "$HOOK_PATH" ]; then - echo "Error: $HOOK_PATH already exists. Move or remove the existing hook to continue." >&2 +if [[ -e "$HOOK_PATH" && $FORCE -eq 0 ]]; then + echo "Error: $HOOK_PATH already exists. Use --force to overwrite." >&2 exit 1 fi -cat > "$HOOK_PATH" <<'HOOK' +cat >"$HOOK_PATH" <<'HOOK' #!/usr/bin/env bash -# Pre-commit hook to run Kingfisher scan on staged changes +# Git pre-commit hook to run Kingfisher on staged changes set -euo pipefail if ! command -v kingfisher >/dev/null 2>&1; then @@ -22,11 +59,11 @@ fi git diff --cached --name-only -z | \ xargs -0 --no-run-if-empty kingfisher scan --no-update-check status=$? -if [ "$status" -ne 0 ]; then +if [[ $status -ne 0 ]]; then echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 - exit "$status" + exit $status fi HOOK chmod +x "$HOOK_PATH" -echo "Pre-commit hook installed to $HOOK_PATH" +echo "Pre-commit hook installed to $HOOK_PATH ($MODE mode)" diff --git a/install-prereceive-hook.sh b/install-prereceive-hook.sh old mode 100644 new mode 100755 diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index c7a59bc..f698d87 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -26,7 +26,8 @@ pub struct InputSpecifierArgs { "git_url", "all_github_organizations", "all_gitlab_groups", - "jira_url" + "jira_url", + "docker_image" ]), value_hint = ValueHint::AnyPath )] @@ -97,6 +98,11 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = 100)] pub max_results: usize, + /// Docker/OCI images to scan (no local Docker required) + #[arg(long = "docker-image")] + pub docker_image: Vec, + + /// Select how to clone Git repositories #[arg(long, default_value_t=GitCloneMode::Bare, alias="git-clone-mode")] pub git_clone: GitCloneMode, diff --git a/src/decompress.rs b/src/decompress.rs index 88a11ae..8bc43aa 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -256,12 +256,28 @@ fn make_output_path(path: &Path, base: Option<&Path>, extension: &str) -> PathBu } } -/* ───────────────────────────────────────────────────────────── */ pub fn decompress_file_to_temp(path: &Path) -> Result<(CompressedContent, TempDir)> { let temp_dir = tempdir()?; - let content = decompress_file(path, Some(temp_dir.path()))?; + let mut content = decompress_file(path, Some(temp_dir.path()))?; - if let CompressedContent::Archive(ref files) = content { + // if let CompressedContent::Archive(ref files) = content { + let mut prefix_for_replace = None; + if let Some(stem) = path.file_stem() { + let candidate = temp_dir.path().join(stem).with_extension("decomp.tar"); + prefix_for_replace = Some(candidate); + } + + if let CompressedContent::Archive(ref mut files) = content { + if let Some(prefix) = &prefix_for_replace { + let prefix_str = prefix.display().to_string(); + for (name, _) in files.iter_mut() { + if let Some(rest) = name.strip_prefix(&prefix_str) { + if let Some((_, suffix)) = rest.split_once('!') { + *name = format!("{}!{}", path.display(), suffix); + } + } + } + } for (name, data) in files { let rel = name.split_once('!').map(|(_, sub)| sub).unwrap_or(name); let p = temp_dir.path().join(rel.replace('\\', "/")); @@ -270,6 +286,17 @@ pub fn decompress_file_to_temp(path: &Path) -> Result<(CompressedContent, TempDi } fs::write(p, data)?; } + } else if let CompressedContent::ArchiveFiles(ref mut entries) = content { + if let Some(prefix) = &prefix_for_replace { + let prefix_str = prefix.display().to_string(); + for (name, _) in entries.iter_mut() { + if let Some(rest) = name.strip_prefix(&prefix_str) { + if let Some((_, suffix)) = rest.split_once('!') { + *name = format!("{}!{}", path.display(), suffix); + } + } + } + } } Ok((content, temp_dir)) } diff --git a/src/findings_store.rs b/src/findings_store.rs index 7d3cd76..5972490 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -52,6 +52,7 @@ pub struct FindingsStore { bloom_items: usize, blob_meta: FxHashMap>, origin_meta: FxHashMap>, + docker_images: FxHashMap, } impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { @@ -69,6 +70,7 @@ impl FindingsStore { clone_dir, seen_bloom, bloom_items: 0, + docker_images: FxHashMap::default(), } } @@ -286,6 +288,13 @@ impl FindingsStore { self.clone_dir.clone() } + pub fn register_docker_image(&mut self, dir: PathBuf, image: String) { + self.docker_images.insert(dir, image); + } + + pub fn docker_images(&self) -> &FxHashMap { + &self.docker_images + } pub fn get_finding_data_iter( &self, diff --git a/src/main.rs b/src/main.rs index 3b0d444..a85fb48 100644 --- a/src/main.rs +++ b/src/main.rs @@ -281,7 +281,9 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, diff --git a/src/reporter.rs b/src/reporter.rs index f6a3331..ca69c97 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -140,6 +140,21 @@ impl DetailsReporter { } } + fn docker_display_path(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + for (dir, image) in ds.docker_images().iter() { + if path.starts_with(dir) { + let rel = path.strip_prefix(dir).ok()?; + let mut rel_str = rel.display().to_string(); + rel_str = rel_str.replace(".decomp.tar!", ".tar.gz | "); + rel_str = rel_str.replace(".tar!", ".tar | "); + rel_str = rel_str.replace('!', " | "); + return Some(format!("{} | {}", image, rel_str)); + } + } + None + } + fn gather_findings(&self) -> Result> { let metadata_list = self.get_finding_data()?; let all_matches = self.get_filtered_matches()?; diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 31123b6..6916337 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -103,6 +103,8 @@ impl DetailsReporter { if let Origin::File(e) = origin { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(mapped) = self.docker_display_path(&e.path) { + Some(mapped) } else { Some(e.path.display().to_string()) } @@ -252,6 +254,8 @@ impl DetailsReporter { if let Origin::File(e) = origin { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(mapped) = self.docker_display_path(&e.path) { + Some(mapped) } else { Some(e.path.display().to_string()) } @@ -428,7 +432,9 @@ mod tests { // Jira options jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 0b4a46c..ffa7cf0 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -216,6 +216,8 @@ impl<'a> Display for PrettyFinding<'a> { Origin::File(e) => { let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) { url + } else if let Some(mapped) = reporter.docker_display_path(&e.path) { + mapped } else { e.path.display().to_string() }; @@ -344,7 +346,9 @@ fn test_pretty_format_with_nan_entropy_panics() { // Jira options jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs new file mode 100644 index 0000000..775e687 --- /dev/null +++ b/src/scanner/docker.rs @@ -0,0 +1,283 @@ +use std::env; +use std::fs::File; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use anyhow::{anyhow, Context, Result}; +use base64::Engine; +use indicatif::{ProgressBar, ProgressStyle}; +use oci_client::client::{linux_amd64_resolver, Client, ClientConfig}; +use oci_client::secrets::RegistryAuth; +use oci_client::Reference; +use serde_json::Value; +use sha2::{Digest, Sha256}; +use tracing::debug; +use walkdir::WalkDir; + +use crate::decompress::decompress_file; + +fn helper_get_creds(helper: &str, registry: &str) -> Option<(String, String)> { + fn run(bin: &str, registry: &str) -> Option<(String, String)> { + let mut child = Command::new(bin) + .arg("get") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .ok()?; + { + let stdin = child.stdin.as_mut()?; + let _ = stdin.write_all(format!("{registry}\n").as_bytes()); + } + let output = child.wait_with_output().ok()?; + if !output.status.success() { + return None; + } + let v: Value = serde_json::from_slice(&output.stdout).ok()?; + let user = v.get("Username")?.as_str()?.to_string(); + let secret = v.get("Secret")?.as_str()?.to_string(); + Some((user, secret)) + } + + let bin = format!("docker-credential-{helper}"); + if let Some(creds) = run(&bin, registry) { + return Some(creds); + } + if helper == "keychain" && bin != "docker-credential-osxkeychain" { + if let Some(creds) = run("docker-credential-osxkeychain", registry) { + return Some(creds); + } + } + None +} + +/// Turn `registry.example.com/foo/bar:latest` into something like +/// `registry.example.com_foo_bar_latest_4d3c9e83` +fn image_dir_name(reference: &str) -> String { + // keep it readable + let mut name = reference.replace(['/', ':'], "_"); + + // add a truncated SHA-256 to guarantee uniqueness + let hash = Sha256::digest(reference.as_bytes()); + let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty + name.push('_'); + name.push_str(short); + name +} + +fn creds_from_docker_config(registry: &str) -> Option<(String, String)> { + let config_dir = env::var("DOCKER_CONFIG") + .map(PathBuf::from) + .or_else(|_| env::var("HOME").map(|h| PathBuf::from(h).join(".docker"))) + .ok()?; + let path = config_dir.join("config.json"); + let mut content = String::new(); + File::open(path).ok()?.read_to_string(&mut content).ok()?; + let json: Value = serde_json::from_str(&content).ok()?; + + if let Some(ch) = json.get("credHelpers").and_then(|v| v.get(registry)).and_then(|v| v.as_str()) + { + if let Some(creds) = helper_get_creds(ch, registry) { + return Some(creds); + } + } + if let Some(store) = json.get("credsStore").and_then(|v| v.as_str()) { + if let Some(creds) = helper_get_creds(store, registry) { + return Some(creds); + } + } + + if let Some(auths) = json.get("auths").and_then(|v| v.as_object()) { + if let Some(entry) = auths + .get(registry) + .or_else(|| auths.get(&format!("https://{registry}"))) + .or_else(|| auths.get(&format!("http://{registry}"))) + { + if let Some(auth) = entry.get("auth").and_then(|v| v.as_str()) { + let decoded = base64::engine::general_purpose::STANDARD.decode(auth).ok()?; + let cred = String::from_utf8(decoded).ok()?; + if let Some((u, p)) = cred.split_once(':') { + return Some((u.to_string(), p.to_string())); + } + } + } + } + None +} + +fn registry_auth(reference: &Reference) -> RegistryAuth { + if let Ok(token) = env::var("KF_DOCKER_TOKEN") { + if let Some((user, pass)) = token.split_once(':') { + return RegistryAuth::Basic(user.to_string(), pass.to_string()); + } else { + return RegistryAuth::Bearer(token); + } + } + if let Some((user, pass)) = creds_from_docker_config(reference.registry()) { + RegistryAuth::Basic(user, pass) + } else { + RegistryAuth::Anonymous + } +} + +pub struct Docker; + +impl Docker { + pub fn new() -> Self { + Docker + } + + fn try_save_local_image(&self, image: &str, out_dir: &Path, use_progress: bool) -> Result<()> { + let docker = Command::new("docker") + .args(["image", "inspect", image]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status(); + + if !matches!(docker, Ok(s) if s.success()) { + return Err(anyhow!("image not local")); + } + + let pb = if use_progress { + let style = ProgressStyle::with_template("{spinner} {msg} {pos}/{len}") + .expect("progress template"); + let pb = ProgressBar::new(0).with_style(style); + pb.enable_steady_tick(Duration::from_millis(100)); + pb + } else { + ProgressBar::hidden() + }; + pb.set_message(format!("saving local {image}")); + + std::fs::create_dir_all(out_dir)?; + let tar_path = out_dir.join("local_image.tar"); + let status = Command::new("docker") + .args(["image", "save", image, "-o", &tar_path.to_string_lossy()]) + .status() + .with_context(|| "running docker save")?; + if !status.success() { + pb.finish_with_message("docker save failed"); + return Err(anyhow!("failed to save local image")); + } + + pb.set_message("extracting layers"); + decompress_file(&tar_path, Some(out_dir))?; + + let mut layer_paths = Vec::new(); + for entry in WalkDir::new(out_dir) { + let entry = entry?; + if entry.file_name() == "layer.tar" { + layer_paths.push(entry.path().to_path_buf()); + } + } + + pb.set_length(layer_paths.len() as u64); + for p in layer_paths { + let mut data = Vec::new(); + File::open(&p)?.read_to_end(&mut data)?; + let digest = format!("{:x}", Sha256::digest(&data)); + let new_path = out_dir.join(format!("layer_{digest}.tar")); + std::fs::rename(&p, &new_path)?; + // extract layer contents so inner filenames appear in scan results + decompress_file(&new_path, Some(out_dir))?; + std::fs::remove_file(&new_path)?; + pb.inc(1); + } + + pb.finish_with_message(format!("saved {image}")); + Ok(()) + } + + pub async fn save_image_to_dir( + &self, + image: &str, + out_dir: &Path, + use_progress: bool, + ) -> Result<()> { + if self.try_save_local_image(image, out_dir, use_progress).is_ok() { + return Ok(()); + } + let reference: Reference = + image.parse().with_context(|| format!("invalid image reference {image}"))?; + debug!("Pulling {image}"); + let pb = if use_progress { + let style = ProgressStyle::with_template("{spinner} {msg} {pos}/{len}") + .expect("progress template"); + let pb = ProgressBar::new(0).with_style(style); + pb.enable_steady_tick(Duration::from_millis(100)); + pb.set_message(format!("pulling {image}")); + pb + } else { + ProgressBar::hidden() + }; + let client = Client::new(ClientConfig { + platform_resolver: Some(Box::new(linux_amd64_resolver)), + ..Default::default() + }); + let client = client; + let auth = registry_auth(&reference); + let accepted = vec![ + oci_client::manifest::IMAGE_LAYER_MEDIA_TYPE, + oci_client::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE, + oci_client::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE, + oci_client::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE, + ]; + let pulled = client.pull(&reference, &auth, accepted).await?; + pb.set_length(pulled.layers.len() as u64); + pb.set_message("extracting layers"); + + std::fs::create_dir_all(out_dir)?; + for layer in pulled.layers.into_iter() { + let ext = match layer.media_type.as_str() { + oci_client::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE + | oci_client::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE => "tar.gz", + oci_client::manifest::IMAGE_LAYER_MEDIA_TYPE + | oci_client::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE => "tar", + _ => "bin", + }; + let digest = layer.sha256_digest(); + let file_name = format!("layer_{digest}.{ext}"); + let tmp_path = out_dir.join(file_name); + let mut tmp = std::fs::File::create(&tmp_path)?; + tmp.write_all(&layer.data)?; + decompress_file(&tmp_path, Some(out_dir))?; + std::fs::remove_file(&tmp_path)?; + pb.inc(1); + } + pb.finish_with_message(format!("saved {image}")); + Ok(()) + } +} + +pub async fn save_docker_images( + images: &[String], + clone_root: &Path, + use_progress: bool, +) -> Result> { + let docker = Docker::new(); + let mut dirs = Vec::new(); + + for image in images { + let dir_name = image_dir_name(image); + let out_dir = clone_root.join(format!("docker_{dir_name}")); + docker + .save_image_to_dir(image, &out_dir, use_progress) + .await + .with_context(|| format!("saving image {image}"))?; + dirs.push((out_dir, image.clone())); + } + + Ok(dirs) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn docker_struct_new() { + let _ = Docker::new(); + } +} \ No newline at end of file diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 0b4423f..fff9440 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -3,7 +3,9 @@ pub(crate) use enumerate::enumerate_filesystem_inputs; pub(crate) use repos::{clone_or_update_git_repos, enumerate_github_repos}; pub use runner::{load_and_record_rules, run_async_scan, run_scan}; pub(crate) use validation::run_secret_validation; +pub(crate) use docker::save_docker_images; +mod docker; mod enumerate; mod processing; mod repos; diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 99ad466..f1271cf 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -19,7 +19,7 @@ use crate::{ scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{enumerate_gitlab_repos, fetch_jira_issues}, - run_secret_validation, + run_secret_validation, save_docker_images, summary::print_scan_summary, }, }; @@ -68,6 +68,27 @@ pub async fn run_async_scan( let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); + // Save Docker images if specified + if !args.input_specifier_args.docker_image.is_empty() { + let clone_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let docker_dirs = save_docker_images( + &args.input_specifier_args.docker_image, + &clone_root, + progress_enabled, + ) + .await?; + for (dir, img) in docker_dirs { + { + let mut ds = datastore.lock().unwrap(); + ds.register_docker_image(dir.clone(), img); + } + input_roots.push(dir); + } + } + if input_roots.is_empty() { bail!("No inputs to scan"); } diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 4c4975c..2763ebd 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -81,7 +81,9 @@ rules: jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_github.rs b/tests/int_github.rs index c8256c8..d4f7f25 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -68,7 +68,9 @@ fn test_github_remote_scan() -> Result<()> { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 6ec6e19..67b1bc3 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -67,7 +67,9 @@ fn test_gitlab_remote_scan() -> Result<()> { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, scan_nested_repos: true, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 933c068..3e21947 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -124,7 +124,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 0da7868..ad78192 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -67,7 +67,9 @@ impl TestContext { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, @@ -135,7 +137,9 @@ impl TestContext { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs new file mode 100644 index 0000000..2fd61bc --- /dev/null +++ b/tests/smoke_docker.rs @@ -0,0 +1,20 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::process::Command; + +#[test] +fn smoke_scan_docker_image() -> anyhow::Result<()> { + Command::cargo_bin("kingfisher")? + .args([ + "scan", + "--docker-image", + "ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master", + "--format", + "json", + "--no-update-check", + ]) + .assert() + .code(205) + .stdout(predicate::str::contains("Active Credential")); + Ok(()) +} \ No newline at end of file