From 96ab0d4b59305116b2f4229cf4aaabcf96fe1067 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 2 Aug 2025 20:40:16 -0700 Subject: [PATCH 1/7] -Added support for scanning AWS S3 buckets via --s3-bucket and optional --s3-prefix - Added --role-arn and --aws-local-profile flags for S3 authentication alongside KF_AWS_KEY/KF_AWS_SECRET --- CHANGELOG.md | 4 ++ Cargo.toml | 5 +- README.md | 10 ++++ src/cli/commands/inputs.rs | 20 +++++++- src/findings_store.rs | 10 ++++ src/lib.rs | 1 + src/main.rs | 5 ++ src/reporter.rs | 11 +++++ src/reporter/json_format.rs | 22 ++++++--- src/reporter/pretty_format.rs | 25 ++++++++-- src/reporter/sarif_format.rs | 23 ++++++++-- src/s3.rs | 86 +++++++++++++++++++++++++++++++++++ src/scanner/repos.rs | 73 +++++++++++++++++++++++++++-- src/scanner/runner.rs | 43 ++++++++++++++---- tests/int_dedup.rs | 5 ++ tests/int_github.rs | 5 ++ tests/int_gitlab.rs | 5 ++ tests/int_slack.rs | 9 ++++ tests/int_validation_cache.rs | 5 ++ tests/int_vulnerable_files.rs | 10 ++++ 20 files changed, 347 insertions(+), 30 deletions(-) create mode 100644 src/s3.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 350a3fb..07607fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.32.0] +- Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` +- Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` +- ## [1.31.0] - New rules: Telegram bot token, OpenWeatherMap, Apify, Groq - New OpenAI detectors added (@joshlarsen) diff --git a/Cargo.toml b/Cargo.toml index de43779..425db27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.31.0" +version = "1.32.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -186,6 +186,7 @@ oci-client = { version = "0.15", default-features = false, features = ["rustls-t walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } +aws-sdk-s3 = "1.100.0" [dependencies.tikv-jemallocator] version = "0.6" @@ -207,7 +208,7 @@ rand_chacha = "0.9.0" [profile.release] debug = false -strip = "debuginfo" +strip = true #"debuginfo" opt-level = 3 # Maximum optimization for performance lto = true # Enable Link Time Optimization codegen-units = 1 # Optimize for size but slower compilation diff --git a/README.md b/README.md index a837351..4af4295 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` + - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, or `--aws-local-profile` - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -109,6 +110,15 @@ docker run --rm \ ghcr.io/mongodb/kingfisher:latest \ scan --git-url https://github.com/org/private_repo.git +# Scan an S3 bucket +# Credentials can come from KF_AWS_KEY/KF_AWS_SECRET, --role-arn, or --aws-local-profile +docker run --rm \ + -e KF_AWS_KEY=AKIA... \ + -e KF_AWS_SECRET=g5nYW... \ + ghcr.io/mongodb/kingfisher:latest \ + scan --s3-bucket bucket-name + + # Scan and write a JSON report locally # Here we: # 1. Mount $PWD → /proj diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 8a1c23d..ea38722 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -28,7 +28,8 @@ pub struct InputSpecifierArgs { "all_gitlab_groups", "jira_url", "docker_image", - "slack_query" + "slack_query", + "s3_bucket" ]), value_hint = ValueHint::AnyPath )] @@ -107,6 +108,23 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = 100)] pub max_results: usize, + /// Scan the specified S3 bucket + #[arg(long)] + pub s3_bucket: Option, + + /// Optional prefix within the S3 bucket + #[arg(long, requires = "s3_bucket")] + pub s3_prefix: Option, + + /// AWS IAM role ARN to assume for S3 access + #[arg(long, requires = "s3_bucket")] + pub role_arn: Option, + + /// Use credentials from a local AWS profile in ~/.aws/config + #[arg(long, requires = "s3_bucket")] + pub aws_local_profile: Option, + + /// Docker/OCI images to scan (no local Docker required) #[arg(long = "docker-image")] pub docker_image: Vec, diff --git a/src/findings_store.rs b/src/findings_store.rs index 93e9f1c..a1c94d4 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -54,6 +54,7 @@ pub struct FindingsStore { origin_meta: FxHashMap>, docker_images: FxHashMap, slack_links: FxHashMap, + s3_buckets: FxHashMap, } impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { @@ -73,6 +74,7 @@ impl FindingsStore { bloom_items: 0, docker_images: FxHashMap::default(), slack_links: FxHashMap::default(), + s3_buckets: FxHashMap::default(), } } @@ -306,6 +308,14 @@ impl FindingsStore { &self.slack_links } + pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) { + self.s3_buckets.insert(dir, bucket); + } + + pub fn s3_buckets(&self) -> &FxHashMap { + &self.s3_buckets + } + pub fn get_finding_data_iter( &self, ) -> impl Iterator + '_ { diff --git a/src/lib.rs b/src/lib.rs index 85bc57c..90d0451 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ pub mod rule_profiling; pub mod rules; pub mod rules_database; pub mod safe_list; +pub mod s3; pub mod scanner; pub mod scanner_pool; pub mod serde_utils; diff --git a/src/main.rs b/src/main.rs index 9c30b92..73c77a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -286,6 +286,11 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, max_results: 100, + + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Slack query slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/src/reporter.rs b/src/reporter.rs index 210da31..ad0efe9 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -141,6 +141,17 @@ impl DetailsReporter { ds.slack_links().get(path).cloned() } + fn s3_display_path(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + for (dir, bucket) in ds.s3_buckets().iter() { + if path.starts_with(dir) { + let rel = path.strip_prefix(dir).ok()?; + return Some(format!("s3://{}/{}", bucket, rel.display())); + } + } + None + } + fn docker_display_path(&self, path: &std::path::Path) -> Option { let ds = self.datastore.lock().ok()?; for (dir, image) in ds.docker_images().iter() { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 5533b55..9fcb1ec 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -99,20 +99,22 @@ impl DetailsReporter { let file_path = rm .origin .iter() - .find_map(|origin| { - if let Origin::File(e) = origin { + .find_map(|origin| match origin { + Origin::File(e) => { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) + } else if let Some(mapped) = self.s3_display_path(&e.path) { + Some(mapped) } else if let Some(mapped) = self.docker_display_path(&e.path) { Some(mapped) } else { Some(e.path.display().to_string()) } - } else { - None - } + } + Origin::Extended(e) => e.path().map(|p| p.display().to_string()), + _ => None, }) .unwrap_or_default(); @@ -258,11 +260,15 @@ impl DetailsReporter { Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) + } else if let Some(mapped) = self.s3_display_path(&e.path) { + Some(mapped) } else if let Some(mapped) = self.docker_display_path(&e.path) { Some(mapped) } else { Some(e.path.display().to_string()) } + } else if let Origin::Extended(e) = origin { + e.path().map(|p| p.display().to_string()) } else { None } @@ -437,10 +443,14 @@ mod tests { jira_url: None, jql: None, max_results: 100, - // Docker image scanning // Slack options slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, docker_image: Vec::new(), // clone / history options diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 62dd354..942e7ad 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -218,6 +218,8 @@ impl<'a> Display for PrettyFinding<'a> { url } else if let Some(url) = reporter.slack_message_url(&e.path) { url + } else if let Some(mapped) = reporter.s3_display_path(&e.path) { + mapped } else if let Some(mapped) = reporter.docker_display_path(&e.path) { mapped } else { @@ -233,13 +235,23 @@ impl<'a> Display for PrettyFinding<'a> { } )?; } + Origin::Extended(e) => { + if let Some(p) = e.path() { + let display_path = p.display().to_string(); + writeln!( + f, + " |Path..........: {}", + if rm.validation_success { + reporter.style_active_creds(&display_path).to_string() + } else { + display_path + } + )?; + } + } Origin::GitRepo(e) => { reporter.write_git_metadata(f, e, args, source_span.start.line)?; } - Origin::Extended(e) => { - writeln!(f, " |Extended......: {}", reporter.style_metadata(e).to_string())?; - // Convert StyledObject to String - } } } Ok(()) @@ -353,6 +365,11 @@ fn test_pretty_format_with_nan_entropy_panics() { // Slack options slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index 5829bba..033d37c 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -75,6 +75,8 @@ impl DetailsReporter { url } else if let Some(url) = self.slack_message_url(&e.path) { url + } else if let Some(mapped) = self.s3_display_path(&e.path) { + mapped } else { e.path.display().to_string() }; @@ -82,6 +84,16 @@ impl DetailsReporter { sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?, ); } + Origin::Extended(e) => { + if let Some(p) = e.path() { + artifact_locations.push( + sarif::ArtifactLocationBuilder::default() + .uri(p.display().to_string()) + .build() + .ok()?, + ); + } + } Origin::GitRepo(e) => { // Extract and store Git metadata if let Some(git_metadata) = self.extract_git_metadata(e, source_span) { @@ -111,7 +123,6 @@ impl DetailsReporter { ); } } - Origin::Extended(_) => (), } } @@ -212,11 +223,18 @@ impl DetailsReporter { url } else if let Some(url) = self.slack_message_url(&e.path) { url + } else if let Some(mapped) = self.s3_display_path(&e.path) { + mapped } else { e.path.display().to_string() }; msg.push_str(&format!("Location: {}\n", uri)); } + Origin::Extended(e) => { + if let Some(p) = e.path() { + msg.push_str(&format!("Location: {}\n", p.display())); + } + } Origin::GitRepo(e) => { if let Some(cs) = &e.first_commit { let repo_url = get_repo_url(&e.repo_path) @@ -235,9 +253,6 @@ impl DetailsReporter { msg.push_str(&format!("File: {}", cs.blob_path)); } } - Origin::Extended(e) => { - msg.push_str(&format!("Extended: {}\n", e)); - } } msg } else { diff --git a/src/s3.rs b/src/s3.rs new file mode 100644 index 0000000..5f35dde --- /dev/null +++ b/src/s3.rs @@ -0,0 +1,86 @@ +use anyhow::{Context, Result}; +use aws_config::{meta::region::RegionProviderChain, BehaviorVersion}; +use aws_credential_types::Credentials; +use aws_sdk_s3::Client; + +/// Visit all objects in the given S3 bucket (optionally under a prefix), +/// calling `visitor` with each object's key and bytes. +pub async fn visit_bucket_objects( + bucket: &str, + prefix: Option<&str>, + role_arn: Option<&str>, + profile: Option<&str>, + mut visitor: F, +) -> Result<()> +where + F: FnMut(String, Vec) -> Result<()>, +{ + let mut config_loader = aws_config::defaults(BehaviorVersion::latest()); + + if let Some(profile) = profile { + config_loader = config_loader.profile_name(profile); + } + + // If explicit credentials are provided via KF_AWS_KEY/KF_AWS_SECRET use them + if let (Ok(key), Ok(secret)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) { + let creds = Credentials::new(key, secret, None, None, "kf_env"); + config_loader = config_loader.credentials_provider(creds); + } + + // Resolve region using the default chain, falling back to us-east-1 + let region_provider = RegionProviderChain::default_provider().or_else("us-east-1"); + let base_config = config_loader.region(region_provider).load().await; + + let client = if let Some(role) = role_arn { + let assume_role = aws_config::sts::AssumeRoleProvider::builder(role.to_string()) + .session_name("kingfisher") + .configure(&base_config) + .build() + .await; + let conf = aws_sdk_s3::config::Builder::from(&base_config) + .credentials_provider(assume_role) + .build(); + Client::from_conf(conf) + } else { + Client::new(&base_config) + }; + + let mut continuation_token = None; + + loop { + let mut req = client.list_objects_v2().bucket(bucket.to_string()); + if let Some(p) = prefix { + req = req.prefix(p.to_string()); + } + if let Some(token) = continuation_token.clone() { + req = req.continuation_token(token); + } + + let resp = req.send().await.context("Failed to list objects in bucket")?; + + if let Some(objects) = resp.contents { + for obj in objects { + if let Some(key) = obj.key { + let get_resp = client + .get_object() + .bucket(bucket) + .key(&key) + .send() + .await + .with_context(|| format!("Failed to fetch object {key}"))?; + let data = + get_resp.body.collect().await.context("Failed to read S3 object body")?; + visitor(key, data.into_bytes().to_vec())?; + } + } + } + + if resp.is_truncated.unwrap_or(false) { + continuation_token = resp.next_continuation_token; + } else { + break; + } + } + + Ok(()) +} \ No newline at end of file diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 9d944ea..735e381 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -8,6 +8,7 @@ use indicatif::{HumanCount, ProgressBar, ProgressStyle}; use tokio::time::Duration; use tracing::{debug, error, info}; +use crate::blob::BlobIdMap; use crate::{ blob::BlobMetadata, cli::{ @@ -21,10 +22,15 @@ use crate::{ git_binary::{CloneMode, Git}, git_url::GitUrl, github, gitlab, jira, - matcher::Match, - origin::OriginSet, - slack, PathBuf, + matcher::{Match, Matcher, MatcherStats}, + origin::{Origin, OriginSet}, + rules_database::RulesDatabase, + s3, + scanner::processing::BlobProcessor, + scanner_pool::ScannerPool, + slack, guesser::Guesser, PathBuf, }; + pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option, Match)>); pub fn clone_or_update_git_repos( @@ -284,3 +290,64 @@ pub async fn fetch_slack_messages( } Ok(vec![output_dir]) } + + +pub async fn fetch_s3_objects( + args: &scan::ScanArgs, + datastore: &Arc>, + rules_db: &RulesDatabase, + matcher_stats: &Mutex, + enable_profiling: bool, + shared_profiler: Arc, +) -> Result<()> { + let Some(bucket) = args.input_specifier_args.s3_bucket.as_deref() else { + return Ok(()); + }; + let prefix = args.input_specifier_args.s3_prefix.as_deref(); + let role_arn = args.input_specifier_args.role_arn.as_deref(); + let profile = args.input_specifier_args.aws_local_profile.as_deref(); + + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let seen_blobs = BlobIdMap::new(); + let matcher = Matcher::new( + rules_db, + scanner_pool, + &seen_blobs, + Some(matcher_stats), + enable_profiling, + Some(shared_profiler.clone()), + )?; + let guesser = Guesser::new().expect("should be able to create filetype guesser"); + let mut processor = BlobProcessor { matcher, guesser }; + let bucket_name = bucket.to_string(); + + s3::visit_bucket_objects(bucket, prefix, role_arn, profile, |key, bytes| { + let origin = OriginSet::new( + Origin::from_extended(serde_json::json!({ + "path": format!("s3://{}/{}", bucket_name, key) + })), + Vec::new(), + ); + let blob = crate::blob::Blob::from_bytes(bytes); + + if let Some((origin, blob_md, scored_matches)) = processor.run(origin, blob, args.no_dedup)? { + // Wrap origin & metadata once: + let origin_arc = Arc::new(origin); + let blob_arc = Arc::new(blob_md); + + // Now build a batch of exactly one FindingsStoreMessage per Match + let mut batch = Vec::with_capacity(scored_matches.len()); + for (_score, m) in scored_matches { + batch.push((origin_arc.clone(), blob_arc.clone(), m)); + } + + // Call record with the right type + let added = datastore.lock().unwrap().record(batch, !args.no_dedup); + debug!("Added {} new S3 blobs", added); + } + Ok(()) + }) + .await?; + + Ok(()) +} \ No newline at end of file diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 0a880da..f8dae87 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -18,7 +18,9 @@ use crate::{ rules_database::RulesDatabase, scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, - repos::{enumerate_gitlab_repos, fetch_jira_issues, fetch_slack_messages}, + repos::{ + enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, + }, run_secret_validation, save_docker_images, summary::print_scan_summary, }, @@ -72,6 +74,7 @@ pub async fn run_async_scan( let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); + // Save Docker images if specified if !args.input_specifier_args.docker_image.is_empty() { let clone_root = { @@ -93,22 +96,42 @@ pub async fn run_async_scan( } } - if input_roots.is_empty() { - bail!("No inputs to scan"); - } + // if input_roots.is_empty() { + // bail!("No inputs to scan"); + // } let shared_profiler = Arc::new(ConcurrentRuleProfiler::new()); let enable_profiling = args.rule_stats; let matcher_stats = Mutex::new(MatcherStats::default()); - let _inputs = enumerate_filesystem_inputs( + + // Fetch S3 objects if requested (scanned immediately) + fetch_s3_objects( args, - datastore.clone(), - &input_roots, - progress_enabled, + &datastore, rules_db, + &matcher_stats, enable_profiling, Arc::clone(&shared_profiler), - &matcher_stats, - )?; + ) + .await?; + + let has_s3 = args.input_specifier_args.s3_bucket.is_some(); + if input_roots.is_empty() && !has_s3 { + bail!("No inputs to scan"); + } + + if !input_roots.is_empty() { + let _inputs = enumerate_filesystem_inputs( + args, + datastore.clone(), + &input_roots, + progress_enabled, + rules_db, + enable_profiling, + Arc::clone(&shared_profiler), + &matcher_stats, + )?; + } + if !args.no_dedup { // Final deduplication step before validation (or before reporting) diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index c42967f..0c93023 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -84,6 +84,11 @@ rules: max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_github.rs b/tests/int_github.rs index 4bda269..2892b91 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -71,6 +71,11 @@ fn test_github_remote_scan() -> Result<()> { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index e53bbd8..0b55799 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -70,6 +70,11 @@ fn test_gitlab_remote_scan() -> Result<()> { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), git_clone: GitCloneMode::Bare, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 699dad9..d22b8f0 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -59,6 +59,10 @@ impl TestContext { jql: None, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, max_results: 10, docker_image: Vec::new(), git_clone: GitCloneMode::Bare, @@ -147,6 +151,11 @@ async fn test_scan_slack_messages() -> Result<()> { slack_query: Some("test".into()), slack_api_url: Url::parse(&format!("{}/", server.uri()))?, max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index ae8dd50..a7ab9ea 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -127,6 +127,11 @@ async fn test_validation_cache_and_depvars() -> Result<()> { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 187427e..abeb6f1 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -70,6 +70,11 @@ impl TestContext { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options @@ -142,6 +147,11 @@ impl TestContext { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options From ef6ba415f25f952c0f429089c99070c2b066cb43 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 08:13:08 -0700 Subject: [PATCH 2/7] improving s3 bucket scanning feature --- Cargo.toml | 1 + src/s3.rs | 144 ++++++++++++------ .../{ => baseline}/archive/kfArchiveTest.7z | Bin .../{ => baseline}/archive/kfArchiveTest.tar | Bin .../archive/kfArchiveTest.tar.bz2 | Bin .../archive/kfArchiveTest.tar.gz | Bin .../archive/kfArchiveTest.tar.lz4 | Bin .../archive/kfArchiveTest.tar.xz | Bin .../{ => baseline}/archive/kfArchiveTest.zip | Bin .../archive/kfArchiveTest_zip_inside.zip | Bin .../{ => baseline}/archive/makeArchives.sh | 0 testdata/{ => baseline}/archive/template.zip | Bin 12 files changed, 102 insertions(+), 43 deletions(-) rename testdata/{ => baseline}/archive/kfArchiveTest.7z (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.bz2 (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.gz (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.lz4 (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.xz (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.zip (100%) rename testdata/{ => baseline}/archive/kfArchiveTest_zip_inside.zip (100%) rename testdata/{ => baseline}/archive/makeArchives.sh (100%) rename testdata/{ => baseline}/archive/template.zip (100%) diff --git a/Cargo.toml b/Cargo.toml index 425db27..686812d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,6 +187,7 @@ walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } aws-sdk-s3 = "1.100.0" +aws-smithy-http = "0.62.2" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/src/s3.rs b/src/s3.rs index 5f35dde..7a180f4 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -1,10 +1,14 @@ use anyhow::{Context, Result}; -use aws_config::{meta::region::RegionProviderChain, BehaviorVersion}; +use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion, ConfigLoader}; use aws_credential_types::Credentials; -use aws_sdk_s3::Client; +use aws_sdk_s3::{ + Client, + operation::list_objects_v2::ListObjectsV2Error, // modeled service error + error::ProvideErrorMetadata, // for .code() :contentReference[oaicite:8]{index=8} +}; +use aws_types::region::Region; +use reqwest; // HTTP client for HEAD fallback -/// Visit all objects in the given S3 bucket (optionally under a prefix), -/// calling `visitor` with each object's key and bytes. pub async fn visit_bucket_objects( bucket: &str, prefix: Option<&str>, @@ -15,66 +19,120 @@ pub async fn visit_bucket_objects( where F: FnMut(String, Vec) -> Result<()>, { - let mut config_loader = aws_config::defaults(BehaviorVersion::latest()); + // Helper to build ConfigLoader with profile/creds/no_credentials + let build_loader = || { + let mut loader = defaults(BehaviorVersion::latest()); + if let Some(p) = profile { + loader = loader.profile_name(p); + } + if let (Ok(k), Ok(s)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) { + loader = loader.credentials_provider(Credentials::new(k, s, None, None, "kf_env")); + } + if profile.is_none() && std::env::var("KF_AWS_KEY").is_err() && role_arn.is_none() { + loader = loader.no_credentials(); + } + loader + }; - if let Some(profile) = profile { - config_loader = config_loader.profile_name(profile); - } - - // If explicit credentials are provided via KF_AWS_KEY/KF_AWS_SECRET use them - if let (Ok(key), Ok(secret)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) { - let creds = Credentials::new(key, secret, None, None, "kf_env"); - config_loader = config_loader.credentials_provider(creds); - } - - // Resolve region using the default chain, falling back to us-east-1 - let region_provider = RegionProviderChain::default_provider().or_else("us-east-1"); - let base_config = config_loader.region(region_provider).load().await; - - let client = if let Some(role) = role_arn { - let assume_role = aws_config::sts::AssumeRoleProvider::builder(role.to_string()) + // Initial client in default→us-east-1 + let default_region = RegionProviderChain::default_provider().or_else("us-east-1"); + let mut config = build_loader().region(default_region).load().await; + let mut client = if let Some(role) = role_arn { + let assume = aws_config::sts::AssumeRoleProvider::builder(role.to_string()) .session_name("kingfisher") - .configure(&base_config) + .configure(&config) .build() .await; - let conf = aws_sdk_s3::config::Builder::from(&base_config) - .credentials_provider(assume_role) + let conf = aws_sdk_s3::config::Builder::from(&config) + .credentials_provider(assume) .build(); Client::from_conf(conf) } else { - Client::new(&base_config) + Client::new(&config) }; - let mut continuation_token = None; - + let mut continuation_token: Option = None; loop { - let mut req = client.list_objects_v2().bucket(bucket.to_string()); + let mut req = client.list_objects_v2().bucket(bucket); if let Some(p) = prefix { - req = req.prefix(p.to_string()); + req = req.prefix(p); } - if let Some(token) = continuation_token.clone() { + if let Some(ref token) = continuation_token { req = req.continuation_token(token); } - let resp = req.send().await.context("Failed to list objects in bucket")?; + let resp = match req.send().await { + Ok(r) => r, - if let Some(objects) = resp.contents { - for obj in objects { - if let Some(key) = obj.key { - let get_resp = client - .get_object() - .bucket(bucket) - .key(&key) + // On error, extract the modeled service error + Err(err) => { + let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError :contentReference[oaicite:9]{index=9} + + // If the bucket must be addressed at another region... + if svc_err.code() == Some("PermanentRedirect") { + // HEAD request to get x-amz-bucket-region header + let url = format!("https://{bucket}.s3.amazonaws.com"); + let head = reqwest::Client::new() + .head(&url) .send() .await - .with_context(|| format!("Failed to fetch object {key}"))?; - let data = - get_resp.body.collect().await.context("Failed to read S3 object body")?; - visitor(key, data.into_bytes().to_vec())?; + .context("Failed to HEAD bucket for region")?; + let region_str = head + .headers() + .get("x-amz-bucket-region") + .and_then(|v| v.to_str().ok()) + .unwrap_or("us-east-1") + .to_string(); + + // Rebuild client in the correct region + let override_region = RegionProviderChain::first_try(Region::new(region_str)) + .or_else("us-east-1"); + config = build_loader().region(override_region).load().await; + client = if let Some(r) = role_arn { + let assume = aws_config::sts::AssumeRoleProvider::builder(r.to_string()) + .session_name("kingfisher") + .configure(&config) + .build() + .await; + let conf = aws_sdk_s3::config::Builder::from(&config) + .credentials_provider(assume) + .build(); + Client::from_conf(conf) + } else { + Client::new(&config) + }; + + // Reset pagination and retry list + continuation_token = None; + continue; } + + // Any other error is fatal + return Err(svc_err).context("Failed to list objects in bucket"); + } + }; + + // Process objects + for obj in resp.contents.unwrap_or_default() { + if let Some(key) = obj.key { + let data = client + .get_object() + .bucket(bucket) + .key(&key) + .send() + .await + .with_context(|| format!("Failed to fetch object {}", key))? + .body + .collect() + .await + .context("Failed to read S3 object body")? + .into_bytes() + .to_vec(); + visitor(key, data)?; } } + // Continue or finish pagination if resp.is_truncated.unwrap_or(false) { continuation_token = resp.next_continuation_token; } else { @@ -83,4 +141,4 @@ where } Ok(()) -} \ No newline at end of file +} diff --git a/testdata/archive/kfArchiveTest.7z b/testdata/baseline/archive/kfArchiveTest.7z similarity index 100% rename from testdata/archive/kfArchiveTest.7z rename to testdata/baseline/archive/kfArchiveTest.7z diff --git a/testdata/archive/kfArchiveTest.tar b/testdata/baseline/archive/kfArchiveTest.tar similarity index 100% rename from testdata/archive/kfArchiveTest.tar rename to testdata/baseline/archive/kfArchiveTest.tar diff --git a/testdata/archive/kfArchiveTest.tar.bz2 b/testdata/baseline/archive/kfArchiveTest.tar.bz2 similarity index 100% rename from testdata/archive/kfArchiveTest.tar.bz2 rename to testdata/baseline/archive/kfArchiveTest.tar.bz2 diff --git a/testdata/archive/kfArchiveTest.tar.gz b/testdata/baseline/archive/kfArchiveTest.tar.gz similarity index 100% rename from testdata/archive/kfArchiveTest.tar.gz rename to testdata/baseline/archive/kfArchiveTest.tar.gz diff --git a/testdata/archive/kfArchiveTest.tar.lz4 b/testdata/baseline/archive/kfArchiveTest.tar.lz4 similarity index 100% rename from testdata/archive/kfArchiveTest.tar.lz4 rename to testdata/baseline/archive/kfArchiveTest.tar.lz4 diff --git a/testdata/archive/kfArchiveTest.tar.xz b/testdata/baseline/archive/kfArchiveTest.tar.xz similarity index 100% rename from testdata/archive/kfArchiveTest.tar.xz rename to testdata/baseline/archive/kfArchiveTest.tar.xz diff --git a/testdata/archive/kfArchiveTest.zip b/testdata/baseline/archive/kfArchiveTest.zip similarity index 100% rename from testdata/archive/kfArchiveTest.zip rename to testdata/baseline/archive/kfArchiveTest.zip diff --git a/testdata/archive/kfArchiveTest_zip_inside.zip b/testdata/baseline/archive/kfArchiveTest_zip_inside.zip similarity index 100% rename from testdata/archive/kfArchiveTest_zip_inside.zip rename to testdata/baseline/archive/kfArchiveTest_zip_inside.zip diff --git a/testdata/archive/makeArchives.sh b/testdata/baseline/archive/makeArchives.sh similarity index 100% rename from testdata/archive/makeArchives.sh rename to testdata/baseline/archive/makeArchives.sh diff --git a/testdata/archive/template.zip b/testdata/baseline/archive/template.zip similarity index 100% rename from testdata/archive/template.zip rename to testdata/baseline/archive/template.zip From f8789607e7262ac650c2c38ca44e9e221d02b7c7 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 08:14:32 -0700 Subject: [PATCH 3/7] improving s3 bucket scanning feature --- .../{baseline => }/archive/kfArchiveTest.7z | Bin .../{baseline => }/archive/kfArchiveTest.tar | Bin .../archive/kfArchiveTest.tar.bz2 | Bin .../archive/kfArchiveTest.tar.gz | Bin .../archive/kfArchiveTest.tar.lz4 | Bin .../archive/kfArchiveTest.tar.xz | Bin .../{baseline => }/archive/kfArchiveTest.zip | Bin .../archive/kfArchiveTest_zip_inside.zip | Bin .../{baseline => }/archive/makeArchives.sh | 0 testdata/{baseline => }/archive/template.zip | Bin testdata/baseline/baseline_test.go | 95 ------------------ 11 files changed, 95 deletions(-) rename testdata/{baseline => }/archive/kfArchiveTest.7z (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.bz2 (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.gz (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.lz4 (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.xz (100%) rename testdata/{baseline => }/archive/kfArchiveTest.zip (100%) rename testdata/{baseline => }/archive/kfArchiveTest_zip_inside.zip (100%) rename testdata/{baseline => }/archive/makeArchives.sh (100%) rename testdata/{baseline => }/archive/template.zip (100%) delete mode 100644 testdata/baseline/baseline_test.go diff --git a/testdata/baseline/archive/kfArchiveTest.7z b/testdata/archive/kfArchiveTest.7z similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.7z rename to testdata/archive/kfArchiveTest.7z diff --git a/testdata/baseline/archive/kfArchiveTest.tar b/testdata/archive/kfArchiveTest.tar similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar rename to testdata/archive/kfArchiveTest.tar diff --git a/testdata/baseline/archive/kfArchiveTest.tar.bz2 b/testdata/archive/kfArchiveTest.tar.bz2 similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.bz2 rename to testdata/archive/kfArchiveTest.tar.bz2 diff --git a/testdata/baseline/archive/kfArchiveTest.tar.gz b/testdata/archive/kfArchiveTest.tar.gz similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.gz rename to testdata/archive/kfArchiveTest.tar.gz diff --git a/testdata/baseline/archive/kfArchiveTest.tar.lz4 b/testdata/archive/kfArchiveTest.tar.lz4 similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.lz4 rename to testdata/archive/kfArchiveTest.tar.lz4 diff --git a/testdata/baseline/archive/kfArchiveTest.tar.xz b/testdata/archive/kfArchiveTest.tar.xz similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.xz rename to testdata/archive/kfArchiveTest.tar.xz diff --git a/testdata/baseline/archive/kfArchiveTest.zip b/testdata/archive/kfArchiveTest.zip similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.zip rename to testdata/archive/kfArchiveTest.zip diff --git a/testdata/baseline/archive/kfArchiveTest_zip_inside.zip b/testdata/archive/kfArchiveTest_zip_inside.zip similarity index 100% rename from testdata/baseline/archive/kfArchiveTest_zip_inside.zip rename to testdata/archive/kfArchiveTest_zip_inside.zip diff --git a/testdata/baseline/archive/makeArchives.sh b/testdata/archive/makeArchives.sh similarity index 100% rename from testdata/baseline/archive/makeArchives.sh rename to testdata/archive/makeArchives.sh diff --git a/testdata/baseline/archive/template.zip b/testdata/archive/template.zip similarity index 100% rename from testdata/baseline/archive/template.zip rename to testdata/archive/template.zip diff --git a/testdata/baseline/baseline_test.go b/testdata/baseline/baseline_test.go deleted file mode 100644 index 00e7770..0000000 --- a/testdata/baseline/baseline_test.go +++ /dev/null @@ -1,95 +0,0 @@ -package core - -import ( - "io/ioutil" - "os" - "path" - "path/filepath" - "runtime" - "testing" - - "github.com/10gen/kingfisher/core" -) - -func rootDir() string { - _, b, _, _ := runtime.Caller(0) - return filepath.Dir(path.Dir(b)) -} - -func NewTestSession(baselineFilename string) (*core.Session, error) { - session := core.PrepareTestSession() - session.Testing = true - session.ReqScanMode = core.LocalFiles - session.Options.ValidateSecrets = true - session.Options.BaselineFilename = baselineFilename - session.Options.KingfisherTempDir = core.GetTempDir() - core.GlobalSessionRef = session - session.InitializeTargetModeClient() - return session, nil -} - -func beginTesting(t *testing.T, testfile string, expectedSkippedFindings, expectedFindingsSuppressKingfisher int) { - rootdir := rootDir() - testfilePath := filepath.Join(rootdir, testfile) - _, filename := filepath.Split(testfilePath) - - byteBaseLine := []byte(`FileContent: - matches: [] -FilePaths: - matches: [] -ExactFindings: - matches: - - filepath: testdata/ruby_vulnerable.rb - findinghash: 701c302855ecc97e8415c44f37123bc2ca0c3343bd87028682aaaeaa90568084 - linenum: 40 - lastupdated: Tue Apr 16 13:04:10 PDT 2024 - - filepath: testdata/ruby_vulnerable.rb - findinghash: 065d1e2faeae9328ca8b2f2754afa6c196d3ef2da2720dabca7e5161d67a6ca1 - linenum: 40 - lastupdated: Tue Apr 16 13:04:10 PDT 2024 -`) - - // Write byteBaseline to a file in a temp directory and give yaml extension - tempFile, err := ioutil.TempFile("", "baseline-*.yaml") - if err != nil { - t.Fatal(err) - } - defer os.Remove(tempFile.Name()) // Clean up the file after test - - if _, err := tempFile.Write(byteBaseLine); err != nil { - t.Fatal(err) - } - if err := tempFile.Close(); err != nil { - t.Fatal(err) - } - - sess, err := NewTestSession(tempFile.Name()) - if err != nil { - t.Fatal(err) - } - - matchFile := core.NewMatchFile(testfilePath, sess, nil) - core.BeginFileAnalysis(matchFile) - if sess.Stats.SkippedFindings != expectedSkippedFindings { - core.PrintSessionStats(sess) - t.Errorf("Expected %d findings, got %d -- file: <%s>", expectedSkippedFindings, sess.Stats.SkippedFindings, filename) - } -} - -func TestBaselineFeature(t *testing.T) { - - tests := []struct { - fileName string - expectedSkippedFindings int - expectedFindingsSuppressKingfisher int - }{ - {"ruby_vulnerable.rb", 3, 0}, - } - - for _, tt := range tests { - t.Run(tt.fileName, func(t *testing.T) { - beginTesting(t, tt.fileName, tt.expectedSkippedFindings, tt.expectedFindingsSuppressKingfisher) - }) - } - -} From 9a3d27f88136b5ac9903f5ab9f16f0f07a895b42 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 08:56:22 -0700 Subject: [PATCH 4/7] added integration test --- tests/int_s3.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/int_s3.rs diff --git a/tests/int_s3.rs b/tests/int_s3.rs new file mode 100644 index 0000000..948734c --- /dev/null +++ b/tests/int_s3.rs @@ -0,0 +1,21 @@ +use anyhow::Result; +use kingfisher::s3::visit_bucket_objects; + +#[tokio::test] +async fn test_visit_public_bucket() -> Result<()> { + let mut objects = Vec::new(); + visit_bucket_objects("wikisum", None, None, None, |key, data| { + objects.push((key, data)); + Ok(()) + }) + .await?; + + assert!(objects.iter().any(|(k, _)| k == "README.txt"), "README object not found"); + let creds = objects.iter().find(|(k, _)| k == "README.txt").expect("README object"); + let body = std::str::from_utf8(&creds.1)?; + assert!( + body.contains("This dataset provides how-to articles"), + "expected README file" + ); + Ok(()) +} \ No newline at end of file From 10d604418b2b9541cb33ff345f2b585e697c20d6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 09:45:52 -0700 Subject: [PATCH 5/7] improved integration test and updated README --- README.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- src/s3.rs | 6 +++--- tests/int_s3.rs | 17 ++++++++++----- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4af4295..2297709 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs - **Language-aware detection** (source-code parsing) for ~20 languages -- **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages +- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages - **Baseline mode**: ignore known secrets, flag only new ones - **Native Windows** binary @@ -26,7 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` - - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, or `--aws-local-profile` + - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -274,6 +274,57 @@ kingfisher scan ./my-project \ --exclude tests \ -v ``` +## Scan an S3 bucket +You can scan S3 objects directly: + +```bash +kingfisher scan --s3-bucket bucket-name [--s3-prefix path/] +``` + +Credential resolution happens in this order: + +1. `KF_AWS_KEY` and `KF_AWS_SECRET` environment variables +2. `--aws-local-profile` pointing to a profile in `~/.aws/config` (works with AWS SSO) +3. anonymous access for public buckets + +If `--role-arn` is supplied, the credentials from steps 1–2 are used to assume that role. + +Examples: + +```bash +# using explicit keys +export KF_AWS_KEY=AKIA... +export KF_AWS_SECRET=g5nYW... +kingfisher scan --s3-bucket some-example-bucket + +# Above can also be run as: +KF_AWS_KEY=AKIA... KF_AWS_SECRET=g5nYW... kingfisher scan --s3-bucket some-example-bucket + +# using a local profile (e.g., SSO) that exists in your AWS profile (~/.aws/config) +kingfisher scan --s3-bucket some-example-bucket --aws-local-profile myprofile + +# anonymous scan of a bucket, while providing an object prefix to only scan subset of the s3 bucket +kingfisher scan \ + --s3-bucket awsglue-datasets \ + --s3-prefix examples/us-legislators/all + +# assuming a role when scanning +kingfisher scan --s3-bucket some-example-bucket \ + --role-arn arn:aws:iam::123456789012:role/MyRole + +# anonymous scan of a public bucket +kingfisher scan --s3-bucket some-example-bucket +``` + +Docker example: + +```bash +docker run --rm \ + -e KF_AWS_KEY=AKIA... \ + -e KF_AWS_SECRET=g5nYW... \ + ghcr.io/mongodb/kingfisher:latest \ + scan --s3-bucket bucket-name +``` ## Scanning Docker Images Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. diff --git a/src/s3.rs b/src/s3.rs index 7a180f4..ed18a52 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -1,10 +1,10 @@ use anyhow::{Context, Result}; -use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion, ConfigLoader}; +use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion}; use aws_credential_types::Credentials; use aws_sdk_s3::{ Client, operation::list_objects_v2::ListObjectsV2Error, // modeled service error - error::ProvideErrorMetadata, // for .code() :contentReference[oaicite:8]{index=8} + error::ProvideErrorMetadata, // for .code() }; use aws_types::region::Region; use reqwest; // HTTP client for HEAD fallback @@ -66,7 +66,7 @@ where // On error, extract the modeled service error Err(err) => { - let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError :contentReference[oaicite:9]{index=9} + let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError // If the bucket must be addressed at another region... if svc_err.code() == Some("PermanentRedirect") { diff --git a/tests/int_s3.rs b/tests/int_s3.rs index 948734c..c44afe8 100644 --- a/tests/int_s3.rs +++ b/tests/int_s3.rs @@ -4,18 +4,25 @@ use kingfisher::s3::visit_bucket_objects; #[tokio::test] async fn test_visit_public_bucket() -> Result<()> { let mut objects = Vec::new(); - visit_bucket_objects("wikisum", None, None, None, |key, data| { + visit_bucket_objects("awsglue-datasets", Some("examples/us-legislators/all/"), None, None, |key, data| { objects.push((key, data)); Ok(()) }) .await?; - assert!(objects.iter().any(|(k, _)| k == "README.txt"), "README object not found"); - let creds = objects.iter().find(|(k, _)| k == "README.txt").expect("README object"); + assert!( + objects.iter().any(|(k, _)| k.ends_with("events.json")), + "events.json object not found" + ); + let creds = objects + .iter() + .find(|(k, _)| k.ends_with("events.json")) + .expect("events.json object"); + let body = std::str::from_utf8(&creds.1)?; assert!( - body.contains("This dataset provides how-to articles"), - "expected README file" + body.contains("Q4450263"), + "expected events.json file" ); Ok(()) } \ No newline at end of file From 141640ef4a1732f8c4ac47375a42499bfe9a7d17 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 10:35:52 -0700 Subject: [PATCH 6/7] Update src/scanner/runner.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/scanner/runner.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index f8dae87..63f7bee 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -96,9 +96,6 @@ pub async fn run_async_scan( } } - // if input_roots.is_empty() { - // bail!("No inputs to scan"); - // } let shared_profiler = Arc::new(ConcurrentRuleProfiler::new()); let enable_profiling = args.rule_stats; let matcher_stats = Mutex::new(MatcherStats::default()); From 459d4d0ef0560bb9b15841863ed0932167b39c97 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 10:37:02 -0700 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- CHANGELOG.md | 1 - Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07607fe..2c92a92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ All notable changes to this project will be documented in this file. ## [1.32.0] - Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` - Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` -- ## [1.31.0] - New rules: Telegram bot token, OpenWeatherMap, Apify, Groq - New OpenAI detectors added (@joshlarsen) diff --git a/Cargo.toml b/Cargo.toml index 686812d..425db27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,7 +187,6 @@ walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } aws-sdk-s3 = "1.100.0" -aws-smithy-http = "0.62.2" [dependencies.tikv-jemallocator] version = "0.6"