forked from mirrors/kingfisher
Merge pull request #70 from micksmix/main
Added support for scanning s3 buckets
This commit is contained in:
commit
caee82dc1b
22 changed files with 481 additions and 126 deletions
|
|
@ -2,6 +2,9 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.32.0]
|
||||
- Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix`
|
||||
- Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET`
|
||||
## [1.31.0]
|
||||
- New rules: Telegram bot token, OpenWeatherMap, Apify, Groq
|
||||
- New OpenAI detectors added (@joshlarsen)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.31.0"
|
||||
version = "1.32.0"
|
||||
description = "MongoDB's blazingly fast secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
@ -186,6 +186,7 @@ oci-client = { version = "0.15", default-features = false, features = ["rustls-t
|
|||
walkdir = "2.5.0"
|
||||
p256 = "0.13.2"
|
||||
ed25519-dalek = { version = "2.2", features = ["pkcs8"] }
|
||||
aws-sdk-s3 = "1.100.0"
|
||||
|
||||
[dependencies.tikv-jemallocator]
|
||||
version = "0.6"
|
||||
|
|
@ -207,7 +208,7 @@ rand_chacha = "0.9.0"
|
|||
|
||||
[profile.release]
|
||||
debug = false
|
||||
strip = "debuginfo"
|
||||
strip = true #"debuginfo"
|
||||
opt-level = 3 # Maximum optimization for performance
|
||||
lto = true # Enable Link Time Optimization
|
||||
codegen-units = 1 # Optimize for size but slower compilation
|
||||
|
|
|
|||
63
README.md
63
README.md
|
|
@ -13,7 +13,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
|
|||
## What Kingfisher Adds
|
||||
- **Live validation** via cloud-provider APIs
|
||||
- **Language-aware detection** (source-code parsing) for ~20 languages
|
||||
- **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages
|
||||
- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages
|
||||
- **Baseline mode**: ignore known secrets, flag only new ones
|
||||
- **Native Windows** binary
|
||||
|
||||
|
|
@ -26,6 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
|
|||
- **Docker images**: public or private via `--docker-image`
|
||||
- **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql`
|
||||
- **Slack messages**: query‑based scans with `--slack-query`
|
||||
- **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous
|
||||
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
|
||||
|
||||
**Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
|
||||
|
|
@ -109,6 +110,15 @@ docker run --rm \
|
|||
ghcr.io/mongodb/kingfisher:latest \
|
||||
scan --git-url https://github.com/org/private_repo.git
|
||||
|
||||
# Scan an S3 bucket
|
||||
# Credentials can come from KF_AWS_KEY/KF_AWS_SECRET, --role-arn, or --aws-local-profile
|
||||
docker run --rm \
|
||||
-e KF_AWS_KEY=AKIA... \
|
||||
-e KF_AWS_SECRET=g5nYW... \
|
||||
ghcr.io/mongodb/kingfisher:latest \
|
||||
scan --s3-bucket bucket-name
|
||||
|
||||
|
||||
# Scan and write a JSON report locally
|
||||
# Here we:
|
||||
# 1. Mount $PWD → /proj
|
||||
|
|
@ -264,6 +274,57 @@ kingfisher scan ./my-project \
|
|||
--exclude tests \
|
||||
-v
|
||||
```
|
||||
## Scan an S3 bucket
|
||||
You can scan S3 objects directly:
|
||||
|
||||
```bash
|
||||
kingfisher scan --s3-bucket bucket-name [--s3-prefix path/]
|
||||
```
|
||||
|
||||
Credential resolution happens in this order:
|
||||
|
||||
1. `KF_AWS_KEY` and `KF_AWS_SECRET` environment variables
|
||||
2. `--aws-local-profile` pointing to a profile in `~/.aws/config` (works with AWS SSO)
|
||||
3. anonymous access for public buckets
|
||||
|
||||
If `--role-arn` is supplied, the credentials from steps 1–2 are used to assume that role.
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
# using explicit keys
|
||||
export KF_AWS_KEY=AKIA...
|
||||
export KF_AWS_SECRET=g5nYW...
|
||||
kingfisher scan --s3-bucket some-example-bucket
|
||||
|
||||
# Above can also be run as:
|
||||
KF_AWS_KEY=AKIA... KF_AWS_SECRET=g5nYW... kingfisher scan --s3-bucket some-example-bucket
|
||||
|
||||
# using a local profile (e.g., SSO) that exists in your AWS profile (~/.aws/config)
|
||||
kingfisher scan --s3-bucket some-example-bucket --aws-local-profile myprofile
|
||||
|
||||
# anonymous scan of a bucket, while providing an object prefix to only scan subset of the s3 bucket
|
||||
kingfisher scan \
|
||||
--s3-bucket awsglue-datasets \
|
||||
--s3-prefix examples/us-legislators/all
|
||||
|
||||
# assuming a role when scanning
|
||||
kingfisher scan --s3-bucket some-example-bucket \
|
||||
--role-arn arn:aws:iam::123456789012:role/MyRole
|
||||
|
||||
# anonymous scan of a public bucket
|
||||
kingfisher scan --s3-bucket some-example-bucket
|
||||
```
|
||||
|
||||
Docker example:
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
-e KF_AWS_KEY=AKIA... \
|
||||
-e KF_AWS_SECRET=g5nYW... \
|
||||
ghcr.io/mongodb/kingfisher:latest \
|
||||
scan --s3-bucket bucket-name
|
||||
```
|
||||
## Scanning Docker Images
|
||||
|
||||
Kingfisher will first try to use any locally available image, then fall back to pulling via OCI.
|
||||
|
|
|
|||
|
|
@ -28,7 +28,8 @@ pub struct InputSpecifierArgs {
|
|||
"all_gitlab_groups",
|
||||
"jira_url",
|
||||
"docker_image",
|
||||
"slack_query"
|
||||
"slack_query",
|
||||
"s3_bucket"
|
||||
]),
|
||||
value_hint = ValueHint::AnyPath
|
||||
)]
|
||||
|
|
@ -107,6 +108,23 @@ pub struct InputSpecifierArgs {
|
|||
#[arg(long, default_value_t = 100)]
|
||||
pub max_results: usize,
|
||||
|
||||
/// Scan the specified S3 bucket
|
||||
#[arg(long)]
|
||||
pub s3_bucket: Option<String>,
|
||||
|
||||
/// Optional prefix within the S3 bucket
|
||||
#[arg(long, requires = "s3_bucket")]
|
||||
pub s3_prefix: Option<String>,
|
||||
|
||||
/// AWS IAM role ARN to assume for S3 access
|
||||
#[arg(long, requires = "s3_bucket")]
|
||||
pub role_arn: Option<String>,
|
||||
|
||||
/// Use credentials from a local AWS profile in ~/.aws/config
|
||||
#[arg(long, requires = "s3_bucket")]
|
||||
pub aws_local_profile: Option<String>,
|
||||
|
||||
|
||||
/// Docker/OCI images to scan (no local Docker required)
|
||||
#[arg(long = "docker-image")]
|
||||
pub docker_image: Vec<String>,
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ pub struct FindingsStore {
|
|||
origin_meta: FxHashMap<u64, Arc<OriginSet>>,
|
||||
docker_images: FxHashMap<PathBuf, String>,
|
||||
slack_links: FxHashMap<PathBuf, String>,
|
||||
s3_buckets: FxHashMap<PathBuf, String>,
|
||||
}
|
||||
impl FindingsStore {
|
||||
pub fn new(clone_dir: PathBuf) -> Self {
|
||||
|
|
@ -73,6 +74,7 @@ impl FindingsStore {
|
|||
bloom_items: 0,
|
||||
docker_images: FxHashMap::default(),
|
||||
slack_links: FxHashMap::default(),
|
||||
s3_buckets: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -306,6 +308,14 @@ impl FindingsStore {
|
|||
&self.slack_links
|
||||
}
|
||||
|
||||
pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) {
|
||||
self.s3_buckets.insert(dir, bucket);
|
||||
}
|
||||
|
||||
pub fn s3_buckets(&self) -> &FxHashMap<PathBuf, String> {
|
||||
&self.s3_buckets
|
||||
}
|
||||
|
||||
pub fn get_finding_data_iter(
|
||||
&self,
|
||||
) -> impl Iterator<Item = finding_data::FindingMetadata> + '_ {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ pub mod rule_profiling;
|
|||
pub mod rules;
|
||||
pub mod rules_database;
|
||||
pub mod safe_list;
|
||||
pub mod s3;
|
||||
pub mod scanner;
|
||||
pub mod scanner_pool;
|
||||
pub mod serde_utils;
|
||||
|
|
|
|||
|
|
@ -286,6 +286,11 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
jira_url: None,
|
||||
jql: None,
|
||||
max_results: 100,
|
||||
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Slack query
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
|
|
|
|||
|
|
@ -141,6 +141,17 @@ impl DetailsReporter {
|
|||
ds.slack_links().get(path).cloned()
|
||||
}
|
||||
|
||||
fn s3_display_path(&self, path: &std::path::Path) -> Option<String> {
|
||||
let ds = self.datastore.lock().ok()?;
|
||||
for (dir, bucket) in ds.s3_buckets().iter() {
|
||||
if path.starts_with(dir) {
|
||||
let rel = path.strip_prefix(dir).ok()?;
|
||||
return Some(format!("s3://{}/{}", bucket, rel.display()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn docker_display_path(&self, path: &std::path::Path) -> Option<String> {
|
||||
let ds = self.datastore.lock().ok()?;
|
||||
for (dir, image) in ds.docker_images().iter() {
|
||||
|
|
|
|||
|
|
@ -99,20 +99,22 @@ impl DetailsReporter {
|
|||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| {
|
||||
if let Origin::File(e) = origin {
|
||||
.find_map(|origin| match origin {
|
||||
Origin::File(e) => {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Origin::Extended(e) => e.path().map(|p| p.display().to_string()),
|
||||
_ => None,
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
|
|
@ -258,11 +260,15 @@ impl DetailsReporter {
|
|||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
} else if let Origin::Extended(e) = origin {
|
||||
e.path().map(|p| p.display().to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
@ -437,10 +443,14 @@ mod tests {
|
|||
jira_url: None,
|
||||
jql: None,
|
||||
max_results: 100,
|
||||
// Docker image scanning
|
||||
// Slack options
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
|
||||
docker_image: Vec::new(),
|
||||
// clone / history options
|
||||
|
|
|
|||
|
|
@ -218,6 +218,8 @@ impl<'a> Display for PrettyFinding<'a> {
|
|||
url
|
||||
} else if let Some(url) = reporter.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = reporter.s3_display_path(&e.path) {
|
||||
mapped
|
||||
} else if let Some(mapped) = reporter.docker_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
|
|
@ -233,13 +235,23 @@ impl<'a> Display for PrettyFinding<'a> {
|
|||
}
|
||||
)?;
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
if let Some(p) = e.path() {
|
||||
let display_path = p.display().to_string();
|
||||
writeln!(
|
||||
f,
|
||||
" |Path..........: {}",
|
||||
if rm.validation_success {
|
||||
reporter.style_active_creds(&display_path).to_string()
|
||||
} else {
|
||||
display_path
|
||||
}
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
reporter.write_git_metadata(f, e, args, source_span.start.line)?;
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
writeln!(f, " |Extended......: {}", reporter.style_metadata(e).to_string())?;
|
||||
// Convert StyledObject to String
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
|
@ -353,6 +365,11 @@ fn test_pretty_format_with_nan_entropy_panics() {
|
|||
// Slack options
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -75,6 +75,8 @@ impl DetailsReporter {
|
|||
url
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
|
|
@ -82,6 +84,16 @@ impl DetailsReporter {
|
|||
sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?,
|
||||
);
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
if let Some(p) = e.path() {
|
||||
artifact_locations.push(
|
||||
sarif::ArtifactLocationBuilder::default()
|
||||
.uri(p.display().to_string())
|
||||
.build()
|
||||
.ok()?,
|
||||
);
|
||||
}
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
// Extract and store Git metadata
|
||||
if let Some(git_metadata) = self.extract_git_metadata(e, source_span) {
|
||||
|
|
@ -111,7 +123,6 @@ impl DetailsReporter {
|
|||
);
|
||||
}
|
||||
}
|
||||
Origin::Extended(_) => (),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -212,11 +223,18 @@ impl DetailsReporter {
|
|||
url
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
msg.push_str(&format!("Location: {}\n", uri));
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
if let Some(p) = e.path() {
|
||||
msg.push_str(&format!("Location: {}\n", p.display()));
|
||||
}
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
|
|
@ -235,9 +253,6 @@ impl DetailsReporter {
|
|||
msg.push_str(&format!("File: {}", cs.blob_path));
|
||||
}
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
msg.push_str(&format!("Extended: {}\n", e));
|
||||
}
|
||||
}
|
||||
msg
|
||||
} else {
|
||||
|
|
|
|||
144
src/s3.rs
Normal file
144
src/s3.rs
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
use anyhow::{Context, Result};
|
||||
use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion};
|
||||
use aws_credential_types::Credentials;
|
||||
use aws_sdk_s3::{
|
||||
Client,
|
||||
operation::list_objects_v2::ListObjectsV2Error, // modeled service error
|
||||
error::ProvideErrorMetadata, // for .code()
|
||||
};
|
||||
use aws_types::region::Region;
|
||||
use reqwest; // HTTP client for HEAD fallback
|
||||
|
||||
pub async fn visit_bucket_objects<F>(
|
||||
bucket: &str,
|
||||
prefix: Option<&str>,
|
||||
role_arn: Option<&str>,
|
||||
profile: Option<&str>,
|
||||
mut visitor: F,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: FnMut(String, Vec<u8>) -> Result<()>,
|
||||
{
|
||||
// Helper to build ConfigLoader with profile/creds/no_credentials
|
||||
let build_loader = || {
|
||||
let mut loader = defaults(BehaviorVersion::latest());
|
||||
if let Some(p) = profile {
|
||||
loader = loader.profile_name(p);
|
||||
}
|
||||
if let (Ok(k), Ok(s)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) {
|
||||
loader = loader.credentials_provider(Credentials::new(k, s, None, None, "kf_env"));
|
||||
}
|
||||
if profile.is_none() && std::env::var("KF_AWS_KEY").is_err() && role_arn.is_none() {
|
||||
loader = loader.no_credentials();
|
||||
}
|
||||
loader
|
||||
};
|
||||
|
||||
// Initial client in default→us-east-1
|
||||
let default_region = RegionProviderChain::default_provider().or_else("us-east-1");
|
||||
let mut config = build_loader().region(default_region).load().await;
|
||||
let mut client = if let Some(role) = role_arn {
|
||||
let assume = aws_config::sts::AssumeRoleProvider::builder(role.to_string())
|
||||
.session_name("kingfisher")
|
||||
.configure(&config)
|
||||
.build()
|
||||
.await;
|
||||
let conf = aws_sdk_s3::config::Builder::from(&config)
|
||||
.credentials_provider(assume)
|
||||
.build();
|
||||
Client::from_conf(conf)
|
||||
} else {
|
||||
Client::new(&config)
|
||||
};
|
||||
|
||||
let mut continuation_token: Option<String> = None;
|
||||
loop {
|
||||
let mut req = client.list_objects_v2().bucket(bucket);
|
||||
if let Some(p) = prefix {
|
||||
req = req.prefix(p);
|
||||
}
|
||||
if let Some(ref token) = continuation_token {
|
||||
req = req.continuation_token(token);
|
||||
}
|
||||
|
||||
let resp = match req.send().await {
|
||||
Ok(r) => r,
|
||||
|
||||
// On error, extract the modeled service error
|
||||
Err(err) => {
|
||||
let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError
|
||||
|
||||
// If the bucket must be addressed at another region...
|
||||
if svc_err.code() == Some("PermanentRedirect") {
|
||||
// HEAD request to get x-amz-bucket-region header
|
||||
let url = format!("https://{bucket}.s3.amazonaws.com");
|
||||
let head = reqwest::Client::new()
|
||||
.head(&url)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to HEAD bucket for region")?;
|
||||
let region_str = head
|
||||
.headers()
|
||||
.get("x-amz-bucket-region")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("us-east-1")
|
||||
.to_string();
|
||||
|
||||
// Rebuild client in the correct region
|
||||
let override_region = RegionProviderChain::first_try(Region::new(region_str))
|
||||
.or_else("us-east-1");
|
||||
config = build_loader().region(override_region).load().await;
|
||||
client = if let Some(r) = role_arn {
|
||||
let assume = aws_config::sts::AssumeRoleProvider::builder(r.to_string())
|
||||
.session_name("kingfisher")
|
||||
.configure(&config)
|
||||
.build()
|
||||
.await;
|
||||
let conf = aws_sdk_s3::config::Builder::from(&config)
|
||||
.credentials_provider(assume)
|
||||
.build();
|
||||
Client::from_conf(conf)
|
||||
} else {
|
||||
Client::new(&config)
|
||||
};
|
||||
|
||||
// Reset pagination and retry list
|
||||
continuation_token = None;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Any other error is fatal
|
||||
return Err(svc_err).context("Failed to list objects in bucket");
|
||||
}
|
||||
};
|
||||
|
||||
// Process objects
|
||||
for obj in resp.contents.unwrap_or_default() {
|
||||
if let Some(key) = obj.key {
|
||||
let data = client
|
||||
.get_object()
|
||||
.bucket(bucket)
|
||||
.key(&key)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("Failed to fetch object {}", key))?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("Failed to read S3 object body")?
|
||||
.into_bytes()
|
||||
.to_vec();
|
||||
visitor(key, data)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Continue or finish pagination
|
||||
if resp.is_truncated.unwrap_or(false) {
|
||||
continuation_token = resp.next_continuation_token;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ use indicatif::{HumanCount, ProgressBar, ProgressStyle};
|
|||
use tokio::time::Duration;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
use crate::blob::BlobIdMap;
|
||||
use crate::{
|
||||
blob::BlobMetadata,
|
||||
cli::{
|
||||
|
|
@ -21,10 +22,15 @@ use crate::{
|
|||
git_binary::{CloneMode, Git},
|
||||
git_url::GitUrl,
|
||||
github, gitlab, jira,
|
||||
matcher::Match,
|
||||
origin::OriginSet,
|
||||
slack, PathBuf,
|
||||
matcher::{Match, Matcher, MatcherStats},
|
||||
origin::{Origin, OriginSet},
|
||||
rules_database::RulesDatabase,
|
||||
s3,
|
||||
scanner::processing::BlobProcessor,
|
||||
scanner_pool::ScannerPool,
|
||||
slack, guesser::Guesser, PathBuf,
|
||||
};
|
||||
|
||||
pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option<f64>, Match)>);
|
||||
|
||||
pub fn clone_or_update_git_repos(
|
||||
|
|
@ -284,3 +290,64 @@ pub async fn fetch_slack_messages(
|
|||
}
|
||||
Ok(vec![output_dir])
|
||||
}
|
||||
|
||||
|
||||
pub async fn fetch_s3_objects(
|
||||
args: &scan::ScanArgs,
|
||||
datastore: &Arc<Mutex<findings_store::FindingsStore>>,
|
||||
rules_db: &RulesDatabase,
|
||||
matcher_stats: &Mutex<MatcherStats>,
|
||||
enable_profiling: bool,
|
||||
shared_profiler: Arc<crate::rule_profiling::ConcurrentRuleProfiler>,
|
||||
) -> Result<()> {
|
||||
let Some(bucket) = args.input_specifier_args.s3_bucket.as_deref() else {
|
||||
return Ok(());
|
||||
};
|
||||
let prefix = args.input_specifier_args.s3_prefix.as_deref();
|
||||
let role_arn = args.input_specifier_args.role_arn.as_deref();
|
||||
let profile = args.input_specifier_args.aws_local_profile.as_deref();
|
||||
|
||||
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
|
||||
let seen_blobs = BlobIdMap::new();
|
||||
let matcher = Matcher::new(
|
||||
rules_db,
|
||||
scanner_pool,
|
||||
&seen_blobs,
|
||||
Some(matcher_stats),
|
||||
enable_profiling,
|
||||
Some(shared_profiler.clone()),
|
||||
)?;
|
||||
let guesser = Guesser::new().expect("should be able to create filetype guesser");
|
||||
let mut processor = BlobProcessor { matcher, guesser };
|
||||
let bucket_name = bucket.to_string();
|
||||
|
||||
s3::visit_bucket_objects(bucket, prefix, role_arn, profile, |key, bytes| {
|
||||
let origin = OriginSet::new(
|
||||
Origin::from_extended(serde_json::json!({
|
||||
"path": format!("s3://{}/{}", bucket_name, key)
|
||||
})),
|
||||
Vec::new(),
|
||||
);
|
||||
let blob = crate::blob::Blob::from_bytes(bytes);
|
||||
|
||||
if let Some((origin, blob_md, scored_matches)) = processor.run(origin, blob, args.no_dedup)? {
|
||||
// Wrap origin & metadata once:
|
||||
let origin_arc = Arc::new(origin);
|
||||
let blob_arc = Arc::new(blob_md);
|
||||
|
||||
// Now build a batch of exactly one FindingsStoreMessage per Match
|
||||
let mut batch = Vec::with_capacity(scored_matches.len());
|
||||
for (_score, m) in scored_matches {
|
||||
batch.push((origin_arc.clone(), blob_arc.clone(), m));
|
||||
}
|
||||
|
||||
// Call record with the right type
|
||||
let added = datastore.lock().unwrap().record(batch, !args.no_dedup);
|
||||
debug!("Added {} new S3 blobs", added);
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -18,7 +18,9 @@ use crate::{
|
|||
rules_database::RulesDatabase,
|
||||
scanner::{
|
||||
clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos,
|
||||
repos::{enumerate_gitlab_repos, fetch_jira_issues, fetch_slack_messages},
|
||||
repos::{
|
||||
enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages,
|
||||
},
|
||||
run_secret_validation, save_docker_images,
|
||||
summary::print_scan_summary,
|
||||
},
|
||||
|
|
@ -72,6 +74,7 @@ pub async fn run_async_scan(
|
|||
let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?;
|
||||
input_roots.extend(slack_dirs);
|
||||
|
||||
|
||||
// Save Docker images if specified
|
||||
if !args.input_specifier_args.docker_image.is_empty() {
|
||||
let clone_root = {
|
||||
|
|
@ -93,22 +96,39 @@ pub async fn run_async_scan(
|
|||
}
|
||||
}
|
||||
|
||||
if input_roots.is_empty() {
|
||||
bail!("No inputs to scan");
|
||||
}
|
||||
let shared_profiler = Arc::new(ConcurrentRuleProfiler::new());
|
||||
let enable_profiling = args.rule_stats;
|
||||
let matcher_stats = Mutex::new(MatcherStats::default());
|
||||
let _inputs = enumerate_filesystem_inputs(
|
||||
|
||||
// Fetch S3 objects if requested (scanned immediately)
|
||||
fetch_s3_objects(
|
||||
args,
|
||||
datastore.clone(),
|
||||
&input_roots,
|
||||
progress_enabled,
|
||||
&datastore,
|
||||
rules_db,
|
||||
&matcher_stats,
|
||||
enable_profiling,
|
||||
Arc::clone(&shared_profiler),
|
||||
&matcher_stats,
|
||||
)?;
|
||||
)
|
||||
.await?;
|
||||
|
||||
let has_s3 = args.input_specifier_args.s3_bucket.is_some();
|
||||
if input_roots.is_empty() && !has_s3 {
|
||||
bail!("No inputs to scan");
|
||||
}
|
||||
|
||||
if !input_roots.is_empty() {
|
||||
let _inputs = enumerate_filesystem_inputs(
|
||||
args,
|
||||
datastore.clone(),
|
||||
&input_roots,
|
||||
progress_enabled,
|
||||
rules_db,
|
||||
enable_profiling,
|
||||
Arc::clone(&shared_profiler),
|
||||
&matcher_stats,
|
||||
)?;
|
||||
}
|
||||
|
||||
|
||||
if !args.no_dedup {
|
||||
// Final deduplication step before validation (or before reporting)
|
||||
|
|
|
|||
95
testdata/baseline/baseline_test.go
vendored
95
testdata/baseline/baseline_test.go
vendored
|
|
@ -1,95 +0,0 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/10gen/kingfisher/core"
|
||||
)
|
||||
|
||||
func rootDir() string {
|
||||
_, b, _, _ := runtime.Caller(0)
|
||||
return filepath.Dir(path.Dir(b))
|
||||
}
|
||||
|
||||
func NewTestSession(baselineFilename string) (*core.Session, error) {
|
||||
session := core.PrepareTestSession()
|
||||
session.Testing = true
|
||||
session.ReqScanMode = core.LocalFiles
|
||||
session.Options.ValidateSecrets = true
|
||||
session.Options.BaselineFilename = baselineFilename
|
||||
session.Options.KingfisherTempDir = core.GetTempDir()
|
||||
core.GlobalSessionRef = session
|
||||
session.InitializeTargetModeClient()
|
||||
return session, nil
|
||||
}
|
||||
|
||||
func beginTesting(t *testing.T, testfile string, expectedSkippedFindings, expectedFindingsSuppressKingfisher int) {
|
||||
rootdir := rootDir()
|
||||
testfilePath := filepath.Join(rootdir, testfile)
|
||||
_, filename := filepath.Split(testfilePath)
|
||||
|
||||
byteBaseLine := []byte(`FileContent:
|
||||
matches: []
|
||||
FilePaths:
|
||||
matches: []
|
||||
ExactFindings:
|
||||
matches:
|
||||
- filepath: testdata/ruby_vulnerable.rb
|
||||
findinghash: 701c302855ecc97e8415c44f37123bc2ca0c3343bd87028682aaaeaa90568084
|
||||
linenum: 40
|
||||
lastupdated: Tue Apr 16 13:04:10 PDT 2024
|
||||
- filepath: testdata/ruby_vulnerable.rb
|
||||
findinghash: 065d1e2faeae9328ca8b2f2754afa6c196d3ef2da2720dabca7e5161d67a6ca1
|
||||
linenum: 40
|
||||
lastupdated: Tue Apr 16 13:04:10 PDT 2024
|
||||
`)
|
||||
|
||||
// Write byteBaseline to a file in a temp directory and give yaml extension
|
||||
tempFile, err := ioutil.TempFile("", "baseline-*.yaml")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.Remove(tempFile.Name()) // Clean up the file after test
|
||||
|
||||
if _, err := tempFile.Write(byteBaseLine); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := tempFile.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sess, err := NewTestSession(tempFile.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
matchFile := core.NewMatchFile(testfilePath, sess, nil)
|
||||
core.BeginFileAnalysis(matchFile)
|
||||
if sess.Stats.SkippedFindings != expectedSkippedFindings {
|
||||
core.PrintSessionStats(sess)
|
||||
t.Errorf("Expected %d findings, got %d -- file: <%s>", expectedSkippedFindings, sess.Stats.SkippedFindings, filename)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaselineFeature(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
fileName string
|
||||
expectedSkippedFindings int
|
||||
expectedFindingsSuppressKingfisher int
|
||||
}{
|
||||
{"ruby_vulnerable.rb", 3, 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.fileName, func(t *testing.T) {
|
||||
beginTesting(t, tt.fileName, tt.expectedSkippedFindings, tt.expectedFindingsSuppressKingfisher)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -84,6 +84,11 @@ rules:
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -71,6 +71,11 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
git_clone: GitCloneMode::Bare,
|
||||
|
|
|
|||
28
tests/int_s3.rs
Normal file
28
tests/int_s3.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
use anyhow::Result;
|
||||
use kingfisher::s3::visit_bucket_objects;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_visit_public_bucket() -> Result<()> {
|
||||
let mut objects = Vec::new();
|
||||
visit_bucket_objects("awsglue-datasets", Some("examples/us-legislators/all/"), None, None, |key, data| {
|
||||
objects.push((key, data));
|
||||
Ok(())
|
||||
})
|
||||
.await?;
|
||||
|
||||
assert!(
|
||||
objects.iter().any(|(k, _)| k.ends_with("events.json")),
|
||||
"events.json object not found"
|
||||
);
|
||||
let creds = objects
|
||||
.iter()
|
||||
.find(|(k, _)| k.ends_with("events.json"))
|
||||
.expect("events.json object");
|
||||
|
||||
let body = std::str::from_utf8(&creds.1)?;
|
||||
assert!(
|
||||
body.contains("Q4450263"),
|
||||
"expected events.json file"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -59,6 +59,10 @@ impl TestContext {
|
|||
jql: None,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
max_results: 10,
|
||||
docker_image: Vec::new(),
|
||||
git_clone: GitCloneMode::Bare,
|
||||
|
|
@ -147,6 +151,11 @@ async fn test_scan_slack_messages() -> Result<()> {
|
|||
slack_query: Some("test".into()),
|
||||
slack_api_url: Url::parse(&format!("{}/", server.uri()))?,
|
||||
max_results: 10,
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
docker_image: Vec::new(),
|
||||
git_clone: GitCloneMode::Bare,
|
||||
git_history: GitHistoryMode::Full,
|
||||
|
|
|
|||
|
|
@ -127,6 +127,11 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ impl TestContext {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
@ -142,6 +147,11 @@ impl TestContext {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue