From 10d604418b2b9541cb33ff345f2b585e697c20d6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 09:45:52 -0700 Subject: [PATCH] improved integration test and updated README --- README.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- src/s3.rs | 6 +++--- tests/int_s3.rs | 17 ++++++++++----- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4af4295..2297709 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs - **Language-aware detection** (source-code parsing) for ~20 languages -- **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages +- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages - **Baseline mode**: ignore known secrets, flag only new ones - **Native Windows** binary @@ -26,7 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` - - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, or `--aws-local-profile` + - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -274,6 +274,57 @@ kingfisher scan ./my-project \ --exclude tests \ -v ``` +## Scan an S3 bucket +You can scan S3 objects directly: + +```bash +kingfisher scan --s3-bucket bucket-name [--s3-prefix path/] +``` + +Credential resolution happens in this order: + +1. `KF_AWS_KEY` and `KF_AWS_SECRET` environment variables +2. `--aws-local-profile` pointing to a profile in `~/.aws/config` (works with AWS SSO) +3. anonymous access for public buckets + +If `--role-arn` is supplied, the credentials from steps 1–2 are used to assume that role. + +Examples: + +```bash +# using explicit keys +export KF_AWS_KEY=AKIA... +export KF_AWS_SECRET=g5nYW... +kingfisher scan --s3-bucket some-example-bucket + +# Above can also be run as: +KF_AWS_KEY=AKIA... KF_AWS_SECRET=g5nYW... kingfisher scan --s3-bucket some-example-bucket + +# using a local profile (e.g., SSO) that exists in your AWS profile (~/.aws/config) +kingfisher scan --s3-bucket some-example-bucket --aws-local-profile myprofile + +# anonymous scan of a bucket, while providing an object prefix to only scan subset of the s3 bucket +kingfisher scan \ + --s3-bucket awsglue-datasets \ + --s3-prefix examples/us-legislators/all + +# assuming a role when scanning +kingfisher scan --s3-bucket some-example-bucket \ + --role-arn arn:aws:iam::123456789012:role/MyRole + +# anonymous scan of a public bucket +kingfisher scan --s3-bucket some-example-bucket +``` + +Docker example: + +```bash +docker run --rm \ + -e KF_AWS_KEY=AKIA... \ + -e KF_AWS_SECRET=g5nYW... \ + ghcr.io/mongodb/kingfisher:latest \ + scan --s3-bucket bucket-name +``` ## Scanning Docker Images Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. diff --git a/src/s3.rs b/src/s3.rs index 7a180f4..ed18a52 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -1,10 +1,10 @@ use anyhow::{Context, Result}; -use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion, ConfigLoader}; +use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion}; use aws_credential_types::Credentials; use aws_sdk_s3::{ Client, operation::list_objects_v2::ListObjectsV2Error, // modeled service error - error::ProvideErrorMetadata, // for .code() :contentReference[oaicite:8]{index=8} + error::ProvideErrorMetadata, // for .code() }; use aws_types::region::Region; use reqwest; // HTTP client for HEAD fallback @@ -66,7 +66,7 @@ where // On error, extract the modeled service error Err(err) => { - let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError :contentReference[oaicite:9]{index=9} + let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError // If the bucket must be addressed at another region... if svc_err.code() == Some("PermanentRedirect") { diff --git a/tests/int_s3.rs b/tests/int_s3.rs index 948734c..c44afe8 100644 --- a/tests/int_s3.rs +++ b/tests/int_s3.rs @@ -4,18 +4,25 @@ use kingfisher::s3::visit_bucket_objects; #[tokio::test] async fn test_visit_public_bucket() -> Result<()> { let mut objects = Vec::new(); - visit_bucket_objects("wikisum", None, None, None, |key, data| { + visit_bucket_objects("awsglue-datasets", Some("examples/us-legislators/all/"), None, None, |key, data| { objects.push((key, data)); Ok(()) }) .await?; - assert!(objects.iter().any(|(k, _)| k == "README.txt"), "README object not found"); - let creds = objects.iter().find(|(k, _)| k == "README.txt").expect("README object"); + assert!( + objects.iter().any(|(k, _)| k.ends_with("events.json")), + "events.json object not found" + ); + let creds = objects + .iter() + .find(|(k, _)| k.ends_with("events.json")) + .expect("events.json object"); + let body = std::str::from_utf8(&creds.1)?; assert!( - body.contains("This dataset provides how-to articles"), - "expected README file" + body.contains("Q4450263"), + "expected events.json file" ); Ok(()) } \ No newline at end of file