forked from mirrors/kingfisher
commit
b2a239dfbc
27 changed files with 835 additions and 1367 deletions
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
|
|
@ -24,6 +24,8 @@ jobs:
|
|||
run: make ubuntu-arm64
|
||||
- name: Run tests
|
||||
run: make tests
|
||||
env:
|
||||
CARGO_BUILD_JOBS: 1
|
||||
|
||||
macos-arm64:
|
||||
name: macOS arm64
|
||||
|
|
|
|||
|
|
@ -2,6 +2,12 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.32.0]
|
||||
- Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix`
|
||||
- Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET`
|
||||
- Added progress bar for scanning s3 buckets
|
||||
- Refactored output reporting and formatting logic
|
||||
|
||||
## [1.31.0]
|
||||
- New rules: Telegram bot token, OpenWeatherMap, Apify, Groq
|
||||
- New OpenAI detectors added (@joshlarsen)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.31.0"
|
||||
version = "1.32.0"
|
||||
description = "MongoDB's blazingly fast secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
@ -186,6 +186,7 @@ oci-client = { version = "0.15", default-features = false, features = ["rustls-t
|
|||
walkdir = "2.5.0"
|
||||
p256 = "0.13.2"
|
||||
ed25519-dalek = { version = "2.2", features = ["pkcs8"] }
|
||||
aws-sdk-s3 = "1.100.0"
|
||||
|
||||
[dependencies.tikv-jemallocator]
|
||||
version = "0.6"
|
||||
|
|
@ -207,7 +208,7 @@ rand_chacha = "0.9.0"
|
|||
|
||||
[profile.release]
|
||||
debug = false
|
||||
strip = "debuginfo"
|
||||
strip = true #"debuginfo"
|
||||
opt-level = 3 # Maximum optimization for performance
|
||||
lto = true # Enable Link Time Optimization
|
||||
codegen-units = 1 # Optimize for size but slower compilation
|
||||
|
|
|
|||
63
README.md
63
README.md
|
|
@ -13,7 +13,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
|
|||
## What Kingfisher Adds
|
||||
- **Live validation** via cloud-provider APIs
|
||||
- **Language-aware detection** (source-code parsing) for ~20 languages
|
||||
- **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages
|
||||
- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages
|
||||
- **Baseline mode**: ignore known secrets, flag only new ones
|
||||
- **Native Windows** binary
|
||||
|
||||
|
|
@ -26,6 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
|
|||
- **Docker images**: public or private via `--docker-image`
|
||||
- **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql`
|
||||
- **Slack messages**: query‑based scans with `--slack-query`
|
||||
- **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous
|
||||
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
|
||||
|
||||
**Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
|
||||
|
|
@ -109,6 +110,15 @@ docker run --rm \
|
|||
ghcr.io/mongodb/kingfisher:latest \
|
||||
scan --git-url https://github.com/org/private_repo.git
|
||||
|
||||
# Scan an S3 bucket
|
||||
# Credentials can come from KF_AWS_KEY/KF_AWS_SECRET, --role-arn, or --aws-local-profile
|
||||
docker run --rm \
|
||||
-e KF_AWS_KEY=AKIA... \
|
||||
-e KF_AWS_SECRET=g5nYW... \
|
||||
ghcr.io/mongodb/kingfisher:latest \
|
||||
scan --s3-bucket bucket-name
|
||||
|
||||
|
||||
# Scan and write a JSON report locally
|
||||
# Here we:
|
||||
# 1. Mount $PWD → /proj
|
||||
|
|
@ -264,6 +274,57 @@ kingfisher scan ./my-project \
|
|||
--exclude tests \
|
||||
-v
|
||||
```
|
||||
## Scan an S3 bucket
|
||||
You can scan S3 objects directly:
|
||||
|
||||
```bash
|
||||
kingfisher scan --s3-bucket bucket-name [--s3-prefix path/]
|
||||
```
|
||||
|
||||
Credential resolution happens in this order:
|
||||
|
||||
1. `KF_AWS_KEY` and `KF_AWS_SECRET` environment variables
|
||||
2. `--aws-local-profile` pointing to a profile in `~/.aws/config` (works with AWS SSO)
|
||||
3. anonymous access for public buckets
|
||||
|
||||
If `--role-arn` is supplied, the credentials from steps 1–2 are used to assume that role.
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
# using explicit keys
|
||||
export KF_AWS_KEY=AKIA...
|
||||
export KF_AWS_SECRET=g5nYW...
|
||||
kingfisher scan --s3-bucket some-example-bucket
|
||||
|
||||
# Above can also be run as:
|
||||
KF_AWS_KEY=AKIA... KF_AWS_SECRET=g5nYW... kingfisher scan --s3-bucket some-example-bucket
|
||||
|
||||
# using a local profile (e.g., SSO) that exists in your AWS profile (~/.aws/config)
|
||||
kingfisher scan --s3-bucket some-example-bucket --aws-local-profile default
|
||||
|
||||
# anonymous scan of a bucket, while providing an object prefix to only scan subset of the s3 bucket
|
||||
kingfisher scan \
|
||||
--s3-bucket awsglue-datasets \
|
||||
--s3-prefix examples/us-legislators/all
|
||||
|
||||
# assuming a role when scanning
|
||||
kingfisher scan --s3-bucket some-example-bucket \
|
||||
--role-arn arn:aws:iam::123456789012:role/MyRole
|
||||
|
||||
# anonymous scan of a public bucket
|
||||
kingfisher scan --s3-bucket some-example-bucket
|
||||
```
|
||||
|
||||
Docker example:
|
||||
|
||||
```bash
|
||||
docker run --rm \
|
||||
-e KF_AWS_KEY=AKIA... \
|
||||
-e KF_AWS_SECRET=g5nYW... \
|
||||
ghcr.io/mongodb/kingfisher:latest \
|
||||
scan --s3-bucket bucket-name
|
||||
```
|
||||
## Scanning Docker Images
|
||||
|
||||
Kingfisher will first try to use any locally available image, then fall back to pulling via OCI.
|
||||
|
|
|
|||
|
|
@ -20,6 +20,6 @@ rules:
|
|||
[a-z0-9\/._~-]*
|
||||
)?
|
||||
min_entropy: 3.0
|
||||
confidence: low
|
||||
confidence: medium
|
||||
examples:
|
||||
- https://eaRIWNkE:qyOIhJiM@j2LYY414Q5cCYD
|
||||
|
|
@ -96,7 +96,6 @@ rules:
|
|||
["']
|
||||
min_entropy: 3.3
|
||||
confidence: low
|
||||
categories: [fuzzy, generic, secret]
|
||||
examples:
|
||||
- |
|
||||
password = "super$ecret"
|
||||
|
|
|
|||
21
data/rules/vmware.yml
Normal file
21
data/rules/vmware.yml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
rules:
|
||||
- name: Credentials in Connect-VIServer Invocation
|
||||
id: kingfisher.vmware.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
Connect-VIServer
|
||||
.{0,50}
|
||||
-User \s+ (\S{3,30}) \s+ (?# username )
|
||||
.{0,50}
|
||||
-Password \s+ (\S{3,30}) (?# password )
|
||||
|
||||
examples:
|
||||
- 'Connect-VIServer -Server 192.168.1.51 -User administrator@vSphere.local -Password VMware1!'
|
||||
- |
|
||||
#Set-PowerCLIConfiguration -InvalidCertificateAction:Ignore
|
||||
Connect-VIServer "$endpoint" -User "$username" -Password "$password" | Out-Null
|
||||
- 'Connect-VIServer $ESXiHost.EsxiHost -user $ESXiUser -password $ESXipass'
|
||||
- '$null = connect-viserver vc.lab.local -user administrator@vsphere.local -password VMware1!'
|
||||
|
||||
references:
|
||||
- https://developer.broadcom.com/powercli/latest/vmware.vimautomation.core/commands/connect-viserver
|
||||
|
|
@ -28,7 +28,8 @@ pub struct InputSpecifierArgs {
|
|||
"all_gitlab_groups",
|
||||
"jira_url",
|
||||
"docker_image",
|
||||
"slack_query"
|
||||
"slack_query",
|
||||
"s3_bucket"
|
||||
]),
|
||||
value_hint = ValueHint::AnyPath
|
||||
)]
|
||||
|
|
@ -107,6 +108,22 @@ pub struct InputSpecifierArgs {
|
|||
#[arg(long, default_value_t = 100)]
|
||||
pub max_results: usize,
|
||||
|
||||
/// Scan the specified S3 bucket
|
||||
#[arg(long)]
|
||||
pub s3_bucket: Option<String>,
|
||||
|
||||
/// Optional prefix within the S3 bucket
|
||||
#[arg(long, requires = "s3_bucket")]
|
||||
pub s3_prefix: Option<String>,
|
||||
|
||||
/// AWS IAM role ARN to assume for S3 access
|
||||
#[arg(long, requires = "s3_bucket")]
|
||||
pub role_arn: Option<String>,
|
||||
|
||||
/// Use credentials from a local AWS profile in ~/.aws/config
|
||||
#[arg(long, requires = "s3_bucket")]
|
||||
pub aws_local_profile: Option<String>,
|
||||
|
||||
/// Docker/OCI images to scan (no local Docker required)
|
||||
#[arg(long = "docker-image")]
|
||||
pub docker_image: Vec<String>,
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ pub struct FindingsStore {
|
|||
origin_meta: FxHashMap<u64, Arc<OriginSet>>,
|
||||
docker_images: FxHashMap<PathBuf, String>,
|
||||
slack_links: FxHashMap<PathBuf, String>,
|
||||
s3_buckets: FxHashMap<PathBuf, String>,
|
||||
}
|
||||
impl FindingsStore {
|
||||
pub fn new(clone_dir: PathBuf) -> Self {
|
||||
|
|
@ -73,6 +74,7 @@ impl FindingsStore {
|
|||
bloom_items: 0,
|
||||
docker_images: FxHashMap::default(),
|
||||
slack_links: FxHashMap::default(),
|
||||
s3_buckets: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -306,6 +308,14 @@ impl FindingsStore {
|
|||
&self.slack_links
|
||||
}
|
||||
|
||||
pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) {
|
||||
self.s3_buckets.insert(dir, bucket);
|
||||
}
|
||||
|
||||
pub fn s3_buckets(&self) -> &FxHashMap<PathBuf, String> {
|
||||
&self.s3_buckets
|
||||
}
|
||||
|
||||
pub fn get_finding_data_iter(
|
||||
&self,
|
||||
) -> impl Iterator<Item = finding_data::FindingMetadata> + '_ {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ pub mod rule_loader;
|
|||
pub mod rule_profiling;
|
||||
pub mod rules;
|
||||
pub mod rules_database;
|
||||
pub mod s3;
|
||||
pub mod safe_list;
|
||||
pub mod scanner;
|
||||
pub mod scanner_pool;
|
||||
|
|
|
|||
|
|
@ -286,6 +286,11 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
jira_url: None,
|
||||
jql: None,
|
||||
max_results: 100,
|
||||
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Slack query
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
|
|
|
|||
287
src/reporter.rs
287
src/reporter.rs
|
|
@ -5,12 +5,12 @@ use std::{
|
|||
|
||||
use anyhow::Result;
|
||||
use http::StatusCode;
|
||||
use indenter::indented;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
blob::BlobMetadata,
|
||||
bstring_escape::Escaped,
|
||||
cli,
|
||||
cli::global::GlobalArgs,
|
||||
finding_data, findings_store,
|
||||
|
|
@ -23,7 +23,7 @@ mod json_format;
|
|||
mod pretty_format;
|
||||
mod sarif_format;
|
||||
pub mod styles;
|
||||
use std::{hash::Hash, io::IsTerminal};
|
||||
use std::io::IsTerminal;
|
||||
|
||||
use styles::{StyledObject, Styles};
|
||||
|
||||
|
|
@ -141,6 +141,17 @@ impl DetailsReporter {
|
|||
ds.slack_links().get(path).cloned()
|
||||
}
|
||||
|
||||
fn s3_display_path(&self, path: &std::path::Path) -> Option<String> {
|
||||
let ds = self.datastore.lock().ok()?;
|
||||
for (dir, bucket) in ds.s3_buckets().iter() {
|
||||
if path.starts_with(dir) {
|
||||
let rel = path.strip_prefix(dir).ok()?;
|
||||
return Some(format!("s3://{}/{}", bucket, rel.display()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn docker_display_path(&self, path: &std::path::Path) -> Option<String> {
|
||||
let ds = self.datastore.lock().ok()?;
|
||||
for (dir, image) in ds.docker_images().iter() {
|
||||
|
|
@ -156,19 +167,6 @@ impl DetailsReporter {
|
|||
None
|
||||
}
|
||||
|
||||
fn gather_findings(&self) -> Result<Vec<Finding>> {
|
||||
let metadata_list = self.get_finding_data()?;
|
||||
let all_matches = self.get_filtered_matches()?;
|
||||
let mut findings = Vec::new();
|
||||
for md in metadata_list {
|
||||
// Filter matches that belong to this metadata if needed
|
||||
let matches_for_md =
|
||||
all_matches.iter().filter(|m| m.m.rule_name == md.rule_name).cloned().collect();
|
||||
findings.push(Finding::new(md.clone(), matches_for_md));
|
||||
}
|
||||
Ok(findings)
|
||||
}
|
||||
|
||||
fn process_matches(&self, only_valid: bool, filter_visible: bool) -> Result<Vec<ReportMatch>> {
|
||||
let datastore = self.datastore.lock().unwrap();
|
||||
Ok(datastore
|
||||
|
|
@ -215,38 +213,6 @@ impl DetailsReporter {
|
|||
.collect())
|
||||
}
|
||||
|
||||
// fn process_matches(&self, only_valid: bool) -> Result<Vec<ReportMatch>> {
|
||||
// let datastore = self.datastore.lock().unwrap();
|
||||
// Ok(datastore
|
||||
// .get_matches()
|
||||
// .iter()
|
||||
// .filter(|msg| {
|
||||
// let (_origin, _blob_metadata, match_item) = &***msg;
|
||||
// if only_valid {
|
||||
// match_item.validation_success
|
||||
// && match_item.validation_response_status != StatusCode::CONTINUE.as_u16()
|
||||
// && match_item.visible
|
||||
// } else {
|
||||
// match_item.visible
|
||||
// }
|
||||
// })
|
||||
// .map(|msg| {
|
||||
// let (origin, blob_metadata, match_item) = &**msg;
|
||||
// ReportMatch {
|
||||
// origin: origin.clone(),
|
||||
// blob_metadata: blob_metadata.clone(),
|
||||
// m: match_item.clone(),
|
||||
// comment: None,
|
||||
// visible: match_item.visible,
|
||||
// match_confidence: match_item.rule_confidence,
|
||||
// validation_response_body: match_item.validation_response_body.clone(),
|
||||
// validation_response_status: match_item.validation_response_status,
|
||||
// validation_success: match_item.validation_success,
|
||||
// }
|
||||
// })
|
||||
// .collect())
|
||||
// }
|
||||
|
||||
pub fn get_filtered_matches(&self) -> Result<Vec<ReportMatch>> {
|
||||
self.process_matches(self.only_valid, true)
|
||||
}
|
||||
|
|
@ -255,24 +221,164 @@ impl DetailsReporter {
|
|||
self.process_matches(only_valid.unwrap_or(self.only_valid), false)
|
||||
}
|
||||
|
||||
fn get_finding_data(&self) -> Result<Vec<finding_data::FindingMetadata>> {
|
||||
let datastore = self.datastore.lock().unwrap();
|
||||
Ok(datastore
|
||||
.get_finding_data_iter()
|
||||
.filter(|metadata| {
|
||||
if self.only_valid {
|
||||
datastore.get_matches().iter().any(|msg| {
|
||||
let (_, _, match_item) = &**msg;
|
||||
match_item.rule_name == metadata.rule_name
|
||||
&& match_item.validation_success
|
||||
&& match_item.validation_response_status
|
||||
!= StatusCode::CONTINUE.as_u16()
|
||||
})
|
||||
pub fn deduplicate_matches(
|
||||
&self,
|
||||
matches: Vec<ReportMatch>,
|
||||
no_dedup: bool,
|
||||
) -> Vec<ReportMatch> {
|
||||
if no_dedup {
|
||||
return matches;
|
||||
}
|
||||
|
||||
use std::collections::HashMap;
|
||||
let mut by_fp: HashMap<u64, ReportMatch> = HashMap::new();
|
||||
|
||||
for rm in matches {
|
||||
let fp = rm.m.finding_fingerprint;
|
||||
if let Some(existing) = by_fp.get_mut(&fp) {
|
||||
// merge origin sets (keep first origin, append the rest)
|
||||
for o in rm.origin.iter() {
|
||||
if !existing.origin.iter().any(|e| e == o) {
|
||||
existing.origin = OriginSet::new(
|
||||
existing.origin.first().clone(),
|
||||
existing
|
||||
.origin
|
||||
.iter()
|
||||
.skip(1)
|
||||
.cloned()
|
||||
.chain(std::iter::once(o.clone()))
|
||||
.collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
by_fp.insert(fp, rm);
|
||||
}
|
||||
by_fp.into_values().collect()
|
||||
}
|
||||
|
||||
fn matches_for_output(&self, args: &cli::commands::scan::ScanArgs) -> Result<Vec<ReportMatch>> {
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
if args.no_dedup {
|
||||
let mut expanded = Vec::new();
|
||||
for rm in matches {
|
||||
if rm.origin.len() > 1 {
|
||||
for origin in rm.origin.iter() {
|
||||
let mut single = rm.clone();
|
||||
single.origin = OriginSet::new(origin.clone(), Vec::new());
|
||||
expanded.push(single);
|
||||
}
|
||||
} else {
|
||||
true
|
||||
expanded.push(rm);
|
||||
}
|
||||
}
|
||||
matches = expanded;
|
||||
}
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
pub fn build_finding_record(
|
||||
&self,
|
||||
rm: &ReportMatch,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> FindingReporterRecord {
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential".to_string()
|
||||
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted".to_string()
|
||||
} else {
|
||||
"Inactive Credential".to_string()
|
||||
};
|
||||
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
|
||||
let git_metadata_val = rm
|
||||
.origin
|
||||
.iter()
|
||||
.filter_map(|origin| {
|
||||
if let Origin::GitRepo(e) = origin {
|
||||
self.extract_git_metadata(e, source_span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
.next();
|
||||
|
||||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| match origin {
|
||||
Origin::File(e) => {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
}
|
||||
Origin::Extended(e) => e.path().map(|p| p.display().to_string()),
|
||||
_ => None,
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
FindingReporterRecord {
|
||||
rule: RuleMetadata {
|
||||
name: rm.m.rule_name.to_string(),
|
||||
id: rm.m.rule_text_id.to_string(),
|
||||
},
|
||||
finding: FindingRecordData {
|
||||
snippet,
|
||||
fingerprint: rm.m.finding_fingerprint.to_string(),
|
||||
confidence: rm.match_confidence.to_string(),
|
||||
entropy: format!("{:.2}", rm.m.calculated_entropy),
|
||||
validation: ValidationInfo { status: validation_status, response: response_body },
|
||||
language: rm
|
||||
.blob_metadata
|
||||
.language
|
||||
.clone()
|
||||
.unwrap_or_else(|| "Unknown".to_string()),
|
||||
line: line_num as u32,
|
||||
column_start: source_span.start.column as u32,
|
||||
column_end: source_span.end.column as u32,
|
||||
path: file_path,
|
||||
git_metadata: git_metadata_val,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_finding_records(
|
||||
&self,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<Vec<FindingReporterRecord>> {
|
||||
let matches = self.matches_for_output(args)?;
|
||||
Ok(matches.iter().map(|rm| self.build_finding_record(rm, args)).collect())
|
||||
}
|
||||
|
||||
fn style_finding_heading<D>(&self, val: D) -> StyledObject<D> {
|
||||
|
|
@ -336,13 +442,7 @@ impl Reportable for DetailsReporter {
|
|||
}
|
||||
}
|
||||
}
|
||||
/// A group of matches that all have the same rule and capture group content
|
||||
#[derive(Serialize, JsonSchema)]
|
||||
pub(crate) struct Finding {
|
||||
#[serde(flatten)]
|
||||
metadata: finding_data::FindingMetadata,
|
||||
matches: Vec<ReportMatch>,
|
||||
}
|
||||
|
||||
/// A match produced by one of kingfisher's rules.
|
||||
/// This corresponds to a single location.
|
||||
#[derive(Serialize, JsonSchema, Clone)]
|
||||
|
|
@ -355,18 +455,14 @@ pub struct ReportMatch {
|
|||
#[serde(flatten)]
|
||||
pub m: Match,
|
||||
|
||||
/// An optional score assigned to the match
|
||||
// #[validate(range(min = 0.0, max = 1.0))]
|
||||
// score: Option<f64>,
|
||||
|
||||
/// An optional comment assigned to the match
|
||||
pub comment: Option<String>,
|
||||
|
||||
/// The confidence level of the match
|
||||
pub match_confidence: Confidence,
|
||||
|
||||
/// Whether the match is visible in the output
|
||||
pub visible: bool,
|
||||
/// An optional status assigned to the match
|
||||
// status: Option<finding_data::Status>,
|
||||
|
||||
/// Validation Body
|
||||
pub validation_response_body: String,
|
||||
|
|
@ -377,6 +473,41 @@ pub struct ReportMatch {
|
|||
/// Validation Success
|
||||
pub validation_success: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct FindingReporterRecord {
|
||||
pub rule: RuleMetadata,
|
||||
pub finding: FindingRecordData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct RuleMetadata {
|
||||
pub name: String,
|
||||
pub id: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct ValidationInfo {
|
||||
pub status: String,
|
||||
pub response: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct FindingRecordData {
|
||||
pub snippet: String,
|
||||
pub fingerprint: String,
|
||||
pub confidence: String,
|
||||
pub entropy: String,
|
||||
pub validation: ValidationInfo,
|
||||
pub language: String,
|
||||
pub line: u32,
|
||||
pub column_start: u32,
|
||||
pub column_end: u32,
|
||||
pub path: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub git_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl From<finding_data::FindingDataEntry> for ReportMatch {
|
||||
fn from(e: finding_data::FindingDataEntry) -> Self {
|
||||
ReportMatch {
|
||||
|
|
@ -392,8 +523,4 @@ impl From<finding_data::FindingDataEntry> for ReportMatch {
|
|||
}
|
||||
}
|
||||
}
|
||||
impl Finding {
|
||||
fn new(metadata: finding_data::FindingMetadata, matches: Vec<ReportMatch>) -> Self {
|
||||
Self { metadata, matches }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,90 +1,17 @@
|
|||
use bson::Document;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::*;
|
||||
|
||||
impl DetailsReporter {
|
||||
/// Formats findings as BSON and writes them to the provided writer.
|
||||
/// For testing purposes, prints the full JSON for each finding before converting.
|
||||
pub fn bson_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
// Get filtered matches
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
|
||||
// Apply deduplication only if requested
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
let mut bson_findings = Vec::new();
|
||||
|
||||
// For each match, handle it based on the no_dedup flag
|
||||
for rm in matches {
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For no_dedup and multiple origins, create separate findings for each origin
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin version of this match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Process to JSON first, then convert to BSON
|
||||
let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
if let Ok(bson_doc) = json_to_bson_document(&json_finding) {
|
||||
bson_findings.push(bson_doc);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Process normally for deduped matches or matches with only one origin
|
||||
let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
if let Ok(bson_doc) = json_to_bson_document(&json_finding) {
|
||||
bson_findings.push(bson_doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write each BSON document
|
||||
for doc in bson_findings {
|
||||
let records = self.build_finding_records(args)?;
|
||||
for record in records {
|
||||
let doc = bson::to_document(&record)?;
|
||||
doc.to_writer(&mut writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// pub fn bson_format<W: std::io::Write>(
|
||||
// &self,
|
||||
// mut writer: W,
|
||||
// args: &cli::commands::scan::ScanArgs,
|
||||
// ) -> Result<()> {
|
||||
// let findings = self.gather_json_findings(args)?;
|
||||
|
||||
// // Print the full JSON for each finding
|
||||
// for finding in &findings {
|
||||
// println!("Full JSON:\n{}", serde_json::to_string_pretty(finding)?);
|
||||
// }
|
||||
|
||||
// let bson_findings: Vec<Document> = findings
|
||||
// .into_iter()
|
||||
// .filter_map(|finding| json_to_bson_document(&finding).ok())
|
||||
// .collect();
|
||||
// for doc in bson_findings {
|
||||
// doc.to_writer(&mut writer)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
||||
|
||||
fn json_to_bson_document(json: &Value) -> Result<Document> {
|
||||
match bson::to_bson(json)? {
|
||||
bson::Bson::Document(doc) => Ok(doc),
|
||||
_ => Err(anyhow::anyhow!("Failed to convert JSON to BSON document")),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,409 +1,60 @@
|
|||
use http::StatusCode;
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::bstring_escape::Escaped;
|
||||
|
||||
impl DetailsReporter {
|
||||
pub fn deduplicate_matches(
|
||||
&self,
|
||||
matches: Vec<ReportMatch>,
|
||||
no_dedup: bool,
|
||||
) -> Vec<ReportMatch> {
|
||||
if no_dedup {
|
||||
return matches;
|
||||
}
|
||||
|
||||
use std::collections::HashMap;
|
||||
let mut by_fp: HashMap<u64, ReportMatch> = HashMap::new();
|
||||
|
||||
for rm in matches {
|
||||
let fp = rm.m.finding_fingerprint;
|
||||
if let Some(existing) = by_fp.get_mut(&fp) {
|
||||
// merge origin sets (keep first origin, append the rest)
|
||||
for o in rm.origin.iter() {
|
||||
if !existing.origin.iter().any(|e| e == o) {
|
||||
existing.origin = OriginSet::new(
|
||||
existing.origin.first().clone(),
|
||||
existing
|
||||
.origin
|
||||
.iter()
|
||||
.skip(1)
|
||||
.cloned()
|
||||
.chain(std::iter::once(o.clone()))
|
||||
.collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
by_fp.insert(fp, rm);
|
||||
}
|
||||
by_fp.into_values().collect()
|
||||
}
|
||||
|
||||
pub fn gather_json_findings(
|
||||
&self,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<Vec<serde_json::Value>> {
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
let mut json_findings = Vec::new();
|
||||
for rm in matches {
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential"
|
||||
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted"
|
||||
} else {
|
||||
"Inactive Credential"
|
||||
};
|
||||
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
|
||||
// Call extract_git_metadata on each GitRepo origin and take the first non-null result.
|
||||
let git_metadata_val = rm
|
||||
.origin
|
||||
.iter()
|
||||
.filter_map(|origin| {
|
||||
if let Origin::GitRepo(e) = origin {
|
||||
self.extract_git_metadata(e, source_span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
|
||||
// Collect a file path from an Origin::File, if available.
|
||||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| {
|
||||
if let Origin::File(e) = origin {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let match_json = json!({
|
||||
"rule": {
|
||||
"name": rm.m.rule_name,
|
||||
"id": rm.m.rule_text_id,
|
||||
},
|
||||
"finding": {
|
||||
"snippet": snippet,
|
||||
"fingerprint": rm.m.finding_fingerprint.to_string(),
|
||||
"confidence": rm.match_confidence.to_string(),
|
||||
"entropy": format!("{:.2}", rm.m.calculated_entropy),
|
||||
"validation": {
|
||||
"status": validation_status,
|
||||
"response": response_body,
|
||||
},
|
||||
"language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()),
|
||||
"line": line_num,
|
||||
"column_start": source_span.start.column,
|
||||
"column_end": source_span.end.column,
|
||||
"path": file_path,
|
||||
"git_metadata": git_metadata_val
|
||||
}
|
||||
});
|
||||
|
||||
let finding_json = json!({
|
||||
"id": rm.m.rule_text_id,
|
||||
"matches": [ match_json ]
|
||||
});
|
||||
json_findings.push(finding_json);
|
||||
}
|
||||
Ok(json_findings)
|
||||
}
|
||||
pub fn json_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
// Get filtered matches
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
|
||||
// Apply deduplication only if requested
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
// For each match, handle it based on the no_dedup flag
|
||||
for rm in matches {
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For no_dedup and multiple origins, create separate findings for each origin
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin version of this match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Process this single-origin match into a JSON finding
|
||||
let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
findings.push(json_finding);
|
||||
}
|
||||
} else {
|
||||
// Process normally for deduped matches or matches with only one origin
|
||||
let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
findings.push(json_finding);
|
||||
}
|
||||
}
|
||||
|
||||
// Write the JSON output
|
||||
if !findings.is_empty() {
|
||||
serde_json::to_writer_pretty(&mut writer, &findings)?;
|
||||
let records = self.build_finding_records(args)?;
|
||||
if !records.is_empty() {
|
||||
serde_json::to_writer_pretty(&mut writer, &records)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Add a helper method to convert a ReportMatch to a JSON finding
|
||||
pub fn process_match_to_json(
|
||||
&self,
|
||||
rm: &ReportMatch,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<serde_json::Value> {
|
||||
// Extract the relevant data from the match as you already do in your current implementation
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential"
|
||||
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted"
|
||||
} else {
|
||||
"Inactive Credential"
|
||||
};
|
||||
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
|
||||
// Call extract_git_metadata on each GitRepo origin and take the first non-null result.
|
||||
let git_metadata_val = rm
|
||||
.origin
|
||||
.iter()
|
||||
.filter_map(|origin| {
|
||||
if let Origin::GitRepo(e) = origin {
|
||||
self.extract_git_metadata(e, source_span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
|
||||
// Collect a file path from an Origin::File, if available.
|
||||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| {
|
||||
if let Origin::File(e) = origin {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let match_json = json!({
|
||||
"rule": {
|
||||
"name": rm.m.rule_name,
|
||||
"id": rm.m.rule_text_id,
|
||||
},
|
||||
"finding": {
|
||||
"snippet": snippet,
|
||||
"fingerprint": rm.m.finding_fingerprint.to_string(),
|
||||
"confidence": rm.match_confidence.to_string(),
|
||||
"entropy": format!("{:.2}", rm.m.calculated_entropy),
|
||||
"validation": {
|
||||
"status": validation_status,
|
||||
"response": response_body,
|
||||
},
|
||||
"language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()),
|
||||
"line": line_num,
|
||||
"column_start": source_span.start.column,
|
||||
"column_end": source_span.end.column,
|
||||
"path": file_path,
|
||||
"git_metadata": git_metadata_val
|
||||
}
|
||||
});
|
||||
|
||||
let finding_json = json!({
|
||||
"id": rm.m.rule_text_id,
|
||||
"matches": [ match_json ]
|
||||
});
|
||||
|
||||
Ok(finding_json)
|
||||
}
|
||||
// // Modified JSON format to pass args to gather_json_findings
|
||||
// pub fn json_format<W: std::io::Write>(
|
||||
// &self,
|
||||
// mut writer: W,
|
||||
// args: &cli::commands::scan::ScanArgs,
|
||||
// ) -> Result<()> {
|
||||
// let findings = self.gather_json_findings(args)?;
|
||||
// if !findings.is_empty() {
|
||||
// serde_json::to_writer_pretty(&mut writer, &findings)?;
|
||||
// writeln!(writer)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
pub fn jsonl_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
// Get filtered matches
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
|
||||
// Apply deduplication only if requested
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
// For each match, handle it based on the no_dedup flag
|
||||
for rm in matches {
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For no_dedup and multiple origins, create separate findings for each origin
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin version of this match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Process this single-origin match into a JSON finding and write it
|
||||
let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
serde_json::to_writer(&mut writer, &json_finding)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
} else {
|
||||
// Process normally for deduped matches or matches with only one origin
|
||||
let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
serde_json::to_writer(&mut writer, &json_finding)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
let records = self.build_finding_records(args)?;
|
||||
for record in records {
|
||||
serde_json::to_writer(&mut writer, &record)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// // Modified JSONL format to pass args to gather_json_findings
|
||||
// pub fn jsonl_format<W: std::io::Write>(
|
||||
// &self,
|
||||
// mut writer: W,
|
||||
// args: &cli::commands::scan::ScanArgs,
|
||||
// ) -> Result<()> {
|
||||
// let findings = self.gather_json_findings(args)?;
|
||||
// for finding in findings {
|
||||
// serde_json::to_writer(&mut writer, &finding)?;
|
||||
// writeln!(writer)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cli::commands::github::GitCloneMode;
|
||||
use crate::cli::commands::github::GitHistoryMode;
|
||||
use crate::cli::commands::rules::RuleSpecifierArgs;
|
||||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||||
use crate::util::intern;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
cli::commands::github::GitHubRepoType,
|
||||
cli::commands::inputs::ContentFilteringArgs,
|
||||
cli::commands::inputs::InputSpecifierArgs,
|
||||
cli::commands::output::{OutputArgs, ReportOutputFormat},
|
||||
cli::commands::scan::ConfidenceLevel,
|
||||
findings_store::FindingsStore,
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::Match,
|
||||
origin::Origin,
|
||||
reporter::styles::Styles,
|
||||
};
|
||||
use std::{
|
||||
io::Cursor,
|
||||
path::PathBuf,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use serde_json::Value;
|
||||
use url::Url;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
cli::commands::{
|
||||
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
|
||||
inputs::{ContentFilteringArgs, InputSpecifierArgs},
|
||||
output::OutputArgs,
|
||||
rules::RuleSpecifierArgs,
|
||||
scan::ConfidenceLevel,
|
||||
},
|
||||
findings_store::FindingsStore,
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::{Match, SerializableCapture, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
reporter::{ReportMatch, Styles},
|
||||
rules::rule::Confidence,
|
||||
util::intern,
|
||||
};
|
||||
|
||||
fn create_default_args() -> cli::commands::scan::ScanArgs {
|
||||
use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope
|
||||
|
||||
|
|
@ -437,10 +88,14 @@ mod tests {
|
|||
jira_url: None,
|
||||
jql: None,
|
||||
max_results: 100,
|
||||
// Docker image scanning
|
||||
// Slack options
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
|
||||
docker_image: Vec::new(),
|
||||
// clone / history options
|
||||
|
|
@ -470,7 +125,6 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to create a mock Match
|
||||
fn create_mock_match(
|
||||
rule_name: &str,
|
||||
rule_text_id: &str,
|
||||
|
|
@ -498,7 +152,7 @@ mod tests {
|
|||
finding_fingerprint: 0123,
|
||||
rule_finding_fingerprint: intern(rule_finding_fingerprint),
|
||||
rule_text_id: intern(rule_text_id),
|
||||
rule_name: intern(rule_name), //.to_string(),
|
||||
rule_name: intern(rule_name),
|
||||
rule_confidence: Confidence::Medium,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
|
|
@ -508,10 +162,8 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to create a mock DetailsReporter
|
||||
fn setup_mock_reporter(matches: Vec<ReportMatch>) -> DetailsReporter {
|
||||
let mut datastore = FindingsStore::new(PathBuf::from("/tmp"));
|
||||
// Create mock origin and blob metadata for the first test match
|
||||
if !matches.is_empty() {
|
||||
let blob_metadata = BlobMetadata {
|
||||
id: BlobId::new(b"mock_blob"),
|
||||
|
|
@ -521,16 +173,14 @@ mod tests {
|
|||
language: Some("Rust".to_string()),
|
||||
};
|
||||
let dedup = true;
|
||||
// Add matches to datastore
|
||||
for m in matches.clone() {
|
||||
datastore.record(
|
||||
vec![(
|
||||
Arc::new(OriginSet::new(
|
||||
// OriginSet -- Arc<…>
|
||||
Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
vec![],
|
||||
)),
|
||||
Arc::new(blob_metadata.clone()), // BlobMetadata -- Arc<…>
|
||||
Arc::new(blob_metadata.clone()),
|
||||
m.m.clone(),
|
||||
)],
|
||||
dedup,
|
||||
|
|
@ -543,9 +193,9 @@ mod tests {
|
|||
only_valid: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_format() -> Result<()> {
|
||||
// Create a mock match with successful validation
|
||||
let mock_match =
|
||||
create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
let matches = vec![ReportMatch {
|
||||
|
|
@ -567,72 +217,17 @@ mod tests {
|
|||
}];
|
||||
let reporter = setup_mock_reporter(matches);
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
// Call the json_format method
|
||||
reporter.json_format(&mut output, &create_default_args())?;
|
||||
// Parse and validate JSON output
|
||||
let json_output: Vec<Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
let json_output: Vec<serde_json::Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
assert!(!json_output.is_empty(), "JSON output should not be empty");
|
||||
let first_finding = &json_output[0];
|
||||
assert!(first_finding.get("id").is_some(), "Finding should have an 'id'");
|
||||
assert!(first_finding.get("matches").is_some(), "Finding should have 'matches'");
|
||||
// Validate the structure of the first match
|
||||
let matches = first_finding.get("matches").unwrap().as_array().unwrap();
|
||||
let first_match = &matches[0];
|
||||
assert_eq!(first_match.get("rule").unwrap().get("name").unwrap(), "MockRule");
|
||||
assert_eq!(first_match.get("finding").unwrap().get("language").unwrap(), "Rust");
|
||||
let first = &json_output[0];
|
||||
assert_eq!(first["rule"]["name"], "MockRule");
|
||||
assert_eq!(first["finding"]["language"], "Rust");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn test_jsonl_format() -> Result<()> {
|
||||
// // Create a mock match with successful validation
|
||||
// let mock_match =
|
||||
// create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
// let matches = vec![ReportMatch {
|
||||
// origin: OriginSet::new(
|
||||
// Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
// vec![],
|
||||
// ),
|
||||
// blob_metadata: BlobMetadata {
|
||||
// id: BlobId::new(b"mock_blob"),
|
||||
// num_bytes: 1024,
|
||||
// mime_essence: Some("text/plain".to_string()),
|
||||
// charset: Some("UTF-8".to_string()),
|
||||
// language: Some("Rust".to_string()),
|
||||
// },
|
||||
// m: mock_match,
|
||||
// comment: None,
|
||||
// match_confidence: Confidence::Medium,
|
||||
// visible: true,
|
||||
// validation_response_body: "validation response".to_string(),
|
||||
// validation_response_status: 200,
|
||||
// validation_success: true,
|
||||
// }];
|
||||
// let reporter = setup_mock_reporter(matches);
|
||||
// let mut output = Cursor::new(Vec::new());
|
||||
// // Call the jsonl_format method
|
||||
// reporter.jsonl_format(&mut output, &create_default_args())?;
|
||||
// // Split output into lines and validate
|
||||
// let jsonl_output = String::from_utf8(output.into_inner())?;
|
||||
// let lines: Vec<&str> = jsonl_output.lines().collect();
|
||||
// assert!(!lines.is_empty(), "JSONL output should not be empty");
|
||||
// for line in &lines {
|
||||
// let json_value: serde_json::Value = serde_json::from_str(line)?;
|
||||
// assert!(
|
||||
// json_value.get("rule_name").is_some(),
|
||||
// "Each line should have a 'rule_name'"
|
||||
// );
|
||||
// assert!(
|
||||
// json_value.get("matches").is_some(),
|
||||
// "Each line should have 'matches'"
|
||||
// );
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_validation_status_in_json() -> Result<()> {
|
||||
// Test validation status in JSON output
|
||||
let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")];
|
||||
for (validation_success, expected_status) in test_cases {
|
||||
let mock_match = create_mock_match(
|
||||
|
|
@ -663,23 +258,11 @@ mod tests {
|
|||
}];
|
||||
let reporter = setup_mock_reporter(matches);
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
// Call the json_format method
|
||||
reporter.json_format(&mut output, &create_default_args())?;
|
||||
// Parse and validate JSON output
|
||||
let json_output: Vec<Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
let json_output: Vec<serde_json::Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
assert!(!json_output.is_empty(), "JSON output should not be empty");
|
||||
let first_finding = &json_output[0];
|
||||
let matches = first_finding.get("matches").unwrap().as_array().unwrap();
|
||||
let first_match = &matches[0];
|
||||
let validation_status = first_match
|
||||
.get("finding")
|
||||
.unwrap()
|
||||
.get("validation")
|
||||
.unwrap()
|
||||
.get("status")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
.unwrap();
|
||||
let first = &json_output[0];
|
||||
let validation_status = first["finding"]["validation"]["status"].as_str().unwrap();
|
||||
assert_eq!(validation_status, expected_status);
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -1,396 +1,126 @@
|
|||
use std::fmt::{Display, Formatter, Result as FmtResult};
|
||||
|
||||
use http::StatusCode;
|
||||
use indenter::indented;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
bstring_escape::Escaped,
|
||||
origin::{get_repo_url, GitRepoOrigin},
|
||||
};
|
||||
|
||||
impl DetailsReporter {
|
||||
// Modified pretty format to use deduplicate_matches helper
|
||||
pub fn pretty_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
let num_findings = matches.len();
|
||||
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
for (index, rm) in matches.into_iter().enumerate() {
|
||||
// When no_dedup is true, we'll handle each origin separately
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For each origin, create a separate "finding"
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a new ReportMatch with just this single origin
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
self.write_finding(
|
||||
&mut writer,
|
||||
&single_origin_rm,
|
||||
index + 1,
|
||||
num_findings,
|
||||
args,
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
// Normal processing for deduped matches or matches with only one origin
|
||||
self.write_finding(&mut writer, &rm, index + 1, num_findings, args)?;
|
||||
}
|
||||
let records = self.build_finding_records(args)?;
|
||||
let num_findings = records.len();
|
||||
for (index, record) in records.iter().enumerate() {
|
||||
self.write_finding_record(&mut writer, record, index + 1, num_findings)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_finding<W: std::io::Write>(
|
||||
fn write_finding_record<W: std::io::Write>(
|
||||
&self,
|
||||
writer: &mut W,
|
||||
rm: &ReportMatch,
|
||||
record: &FindingReporterRecord,
|
||||
_finding_num: usize,
|
||||
_num_findings: usize,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
let lock_icon = if rm.validation_success { "🔓 " } else { "" };
|
||||
let is_active = record.finding.validation.status == "Active Credential";
|
||||
let lock_icon = if is_active { "🔓 " } else { "" };
|
||||
let formatted_heading = format!(
|
||||
"{}{} => [{}]",
|
||||
lock_icon,
|
||||
rm.m.rule_name.to_uppercase(),
|
||||
rm.m.rule_text_id.to_uppercase()
|
||||
record.rule.name.to_uppercase(),
|
||||
record.rule.id.to_uppercase()
|
||||
);
|
||||
if rm.validation_success {
|
||||
if is_active {
|
||||
writeln!(writer, "{}", self.style_finding_active_heading(formatted_heading))?;
|
||||
} else {
|
||||
writeln!(writer, "{}", self.style_finding_heading(formatted_heading))?;
|
||||
}
|
||||
writeln!(writer, "{}", PrettyFinding(self, rm, args))?;
|
||||
writeln!(writer, "{}", PrettyFindingRecord(self, record))?;
|
||||
writeln!(writer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_git_metadata(
|
||||
fn write_git_metadata_value(
|
||||
&self,
|
||||
f: &mut Formatter<'_>,
|
||||
e: &GitRepoOrigin,
|
||||
_args: &cli::commands::scan::ScanArgs,
|
||||
line_num: usize,
|
||||
git: &serde_json::Value,
|
||||
) -> FmtResult {
|
||||
// Check if this is a remote git scan
|
||||
// let mut is_remote_git_scan = !args.input_specifier_args.git_url.is_empty();
|
||||
// let mut git_url_string = String::new();
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
.unwrap_or_else(|_| e.repo_path.to_string_lossy().to_string().into());
|
||||
let mut git_url_string = repo_url.clone();
|
||||
if git_url_string.ends_with(".git") {
|
||||
git_url_string = git_url_string.strip_suffix(".git").unwrap().to_string().into();
|
||||
let repo_url = git["repository_url"].as_str().unwrap_or("");
|
||||
writeln!(f, " |Git Repo......: {}", self.style_metadata(repo_url))?;
|
||||
if let Some(commit) = git.get("commit") {
|
||||
if let Some(url) = commit.get("url").and_then(|v| v.as_str()) {
|
||||
writeln!(f, " |__Commit......: {}", self.style_metadata(url))?;
|
||||
}
|
||||
if let Some(committer) = commit.get("committer") {
|
||||
let name = committer.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let email = committer.get("email").and_then(|v| v.as_str()).unwrap_or("");
|
||||
writeln!(indented(f).with_str(" |__"), "Committer...: {} <{}>", name, email)?;
|
||||
}
|
||||
if let Some(date) = commit.get("date").and_then(|v| v.as_str()) {
|
||||
writeln!(indented(f).with_str(" |__"), "Date........: {}", date)?;
|
||||
}
|
||||
}
|
||||
writeln!(f, " |Git Repo......: {}", self.style_metadata(&git_url_string),)?;
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let cmd = &cs.commit_metadata;
|
||||
|
||||
let atime =
|
||||
cmd.committer_timestamp.format(gix::date::time::format::SHORT.clone()).to_string();
|
||||
|
||||
let commit_id = &cmd.commit_id;
|
||||
let commit_url = format!("{}/commit/{}", &git_url_string, commit_id);
|
||||
// Write Commit Information
|
||||
writeln!(f, " |__Commit......: {}", self.style_metadata(&commit_url))?;
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Committer...: {} <{}>",
|
||||
cmd.committer_name,
|
||||
cmd.committer_email
|
||||
)?;
|
||||
writeln!(indented(f).with_str(" |__"), "Date........: {}", atime)?;
|
||||
// writeln!(indented(f).with_str(" |__"), "Summary.....: {}", msg)?;
|
||||
writeln!(indented(f).with_str(" |__"), "Path........: {}", cs.blob_path)?;
|
||||
// Construct Git Command
|
||||
let git_link =
|
||||
format!("{}/blob/{}/{}#L{}", &git_url_string, commit_id, cs.blob_path, line_num);
|
||||
let git_command =
|
||||
format!("git -C {} show {}:{}", e.repo_path.display(), commit_id, cs.blob_path);
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Link....: {}",
|
||||
self.style_metadata(&git_link)
|
||||
)?;
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Command.: {}",
|
||||
self.style_metadata(&git_command)
|
||||
)?;
|
||||
if let Some(file) = git.get("file") {
|
||||
if let Some(path) = file.get("path").and_then(|v| v.as_str()) {
|
||||
writeln!(indented(f).with_str(" |__"), "Path........: {}", path)?;
|
||||
}
|
||||
if let Some(url) = file.get("url").and_then(|v| v.as_str()) {
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Link....: {}",
|
||||
self.style_metadata(url)
|
||||
)?;
|
||||
}
|
||||
if let Some(cmd) = file.get("git_command").and_then(|v| v.as_str()) {
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Command.: {}",
|
||||
self.style_metadata(cmd)
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
// pub struct PrettyFinding<'a>(&'a DetailsReporter, &'a Finding);
|
||||
pub struct PrettyFinding<'a>(
|
||||
&'a DetailsReporter,
|
||||
&'a ReportMatch,
|
||||
&'a cli::commands::scan::ScanArgs,
|
||||
);
|
||||
impl<'a> Display for PrettyFinding<'a> {
|
||||
|
||||
pub struct PrettyFindingRecord<'a>(&'a DetailsReporter, &'a FindingReporterRecord);
|
||||
|
||||
impl<'a> Display for PrettyFindingRecord<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
let PrettyFinding(reporter, rm, args) = self;
|
||||
// Use Box<dyn Fn(&str) -> String> to store the closure
|
||||
let style_fn: Box<dyn Fn(&str) -> String> = if rm.validation_success {
|
||||
Box::new(|s: &str| reporter.style_active_creds(s).to_string()) // Convert StyledObject
|
||||
// to String
|
||||
let reporter = self.0;
|
||||
let record = self.1;
|
||||
let is_active = record.finding.validation.status == "Active Credential";
|
||||
let style_fn: Box<dyn Fn(&str) -> String> = if is_active {
|
||||
Box::new(|s| reporter.style_active_creds(s).to_string())
|
||||
} else {
|
||||
Box::new(|s: &str| reporter.style_match(s).to_string()) // Convert StyledObject to
|
||||
// String
|
||||
Box::new(|s| reporter.style_match(s).to_string())
|
||||
};
|
||||
let matching_finding =
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or(&[]);
|
||||
writeln!(f, " |Finding.......: {}", style_fn(&Escaped(matching_finding).to_string()))?;
|
||||
writeln!(f, " |Fingerprint...: {}", rm.m.finding_fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", rm.match_confidence.to_string())?;
|
||||
writeln!(f, " |Entropy.......: {:.2}", rm.m.calculated_entropy)?;
|
||||
let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16()
|
||||
|| rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16()
|
||||
{
|
||||
"Not Attempted".to_string()
|
||||
} else if rm.validation_success {
|
||||
"Active Credential".to_string()
|
||||
} else {
|
||||
"Inactive Credential".to_string()
|
||||
};
|
||||
writeln!(
|
||||
f,
|
||||
" |Validation....: {}",
|
||||
if rm.validation_success {
|
||||
reporter.style_finding_active_heading(&validation_status).to_string()
|
||||
// Convert StyledObject to String
|
||||
} else {
|
||||
(&validation_status).to_string()
|
||||
}
|
||||
)?;
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
if rm.validation_response_status != StatusCode::CONTINUE.as_u16() {
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let finding = &record.finding;
|
||||
writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?;
|
||||
writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", finding.confidence)?;
|
||||
writeln!(f, " |Entropy.......: {}", finding.entropy)?;
|
||||
if is_active {
|
||||
writeln!(
|
||||
f,
|
||||
" |__Response....: {}{}",
|
||||
if rm.validation_success {
|
||||
reporter.style_active_creds(&truncated_body).to_string() // Convert StyledObject
|
||||
// to String
|
||||
} else {
|
||||
reporter.style_metadata(&truncated_body).to_string() // Convert StyledObject to
|
||||
// String
|
||||
},
|
||||
ellipsis
|
||||
" |Validation....: {}",
|
||||
reporter.style_finding_active_heading(&finding.validation.status).to_string()
|
||||
)?;
|
||||
} else {
|
||||
writeln!(f, " |Validation....: {}", finding.validation.status)?;
|
||||
}
|
||||
writeln!(
|
||||
f,
|
||||
" |Language......: {}",
|
||||
rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string())
|
||||
)?;
|
||||
|
||||
let source_span = &rm.m.location.source_span;
|
||||
writeln!(f, " |Line Num......: {}", source_span.start.line)?;
|
||||
|
||||
//print all the other areas where this was seen
|
||||
for p in rm.origin.iter() {
|
||||
match p {
|
||||
Origin::File(e) => {
|
||||
let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) {
|
||||
url
|
||||
} else if let Some(url) = reporter.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = reporter.docker_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
writeln!(
|
||||
f,
|
||||
" |Path..........: {}",
|
||||
if rm.validation_success {
|
||||
reporter.style_active_creds(&display_path).to_string()
|
||||
} else {
|
||||
display_path
|
||||
}
|
||||
)?;
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
reporter.write_git_metadata(f, e, args, source_span.start.line)?;
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
writeln!(f, " |Extended......: {}", reporter.style_metadata(e).to_string())?;
|
||||
// Convert StyledObject to String
|
||||
}
|
||||
}
|
||||
if finding.validation.status != "Not Attempted" {
|
||||
writeln!(f, " |__Response....: {}", style_fn(&finding.validation.response))?;
|
||||
}
|
||||
writeln!(f, " |Language......: {}", finding.language)?;
|
||||
writeln!(f, " |Line Num......: {}", finding.line)?;
|
||||
writeln!(f, " |Path..........: {}", style_fn(&finding.path))?;
|
||||
if let Some(git) = &finding.git_metadata {
|
||||
reporter.write_git_metadata_value(f, git)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pretty_format_with_nan_entropy_panics() {
|
||||
use std::{
|
||||
io::Cursor,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use http::StatusCode;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
blob::BlobMetadata,
|
||||
cli::commands::{
|
||||
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
|
||||
gitlab::GitLabRepoType,
|
||||
inputs::{ContentFilteringArgs, InputSpecifierArgs},
|
||||
output::{OutputArgs, ReportOutputFormat},
|
||||
rules::RuleSpecifierArgs,
|
||||
scan::{ConfidenceLevel, ScanArgs},
|
||||
},
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::{Match, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
reporter::{DetailsReporter, Styles},
|
||||
};
|
||||
|
||||
// Construct a fake match with NaN entropy
|
||||
let m = Match {
|
||||
rule_name: "dummy_rule".into(),
|
||||
rule_text_id: "dummy.id".into(),
|
||||
finding_fingerprint: 123456789,
|
||||
rule_finding_fingerprint: "abc".into(),
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 0, end: 1 },
|
||||
source_span: SourceSpan {
|
||||
start: SourcePoint { line: 1, column: 0 },
|
||||
end: SourcePoint { line: 1, column: 10 },
|
||||
},
|
||||
},
|
||||
blob_id: crate::blob::BlobId::default(),
|
||||
groups: SerializableCaptures { captures: vec![] },
|
||||
rule_confidence: crate::rules::rule::Confidence::Medium,
|
||||
validation_success: true,
|
||||
validation_response_status: StatusCode::OK.as_u16(),
|
||||
validation_response_body: "OK".into(),
|
||||
calculated_entropy: f32::NAN, // Here's the trigger
|
||||
visible: true,
|
||||
};
|
||||
|
||||
let _rm = crate::reporter::ReportMatch {
|
||||
origin: OriginSet::new(Origin::from_file("dummy.txt".into()), vec![]),
|
||||
blob_metadata: BlobMetadata {
|
||||
id: m.blob_id,
|
||||
num_bytes: 1,
|
||||
mime_essence: None,
|
||||
charset: None,
|
||||
language: Some("Rust".into()),
|
||||
},
|
||||
m,
|
||||
comment: None,
|
||||
visible: true,
|
||||
match_confidence: crate::rules::rule::Confidence::Medium,
|
||||
validation_response_body: "OK".into(),
|
||||
validation_response_status: StatusCode::OK.as_u16(),
|
||||
validation_success: true,
|
||||
};
|
||||
|
||||
let store = Arc::new(Mutex::new(crate::findings_store::FindingsStore::new(".".into())));
|
||||
let reporter =
|
||||
DetailsReporter { datastore: store, styles: Styles::new(false), only_valid: false };
|
||||
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
let args = ScanArgs {
|
||||
// core execution / performance
|
||||
num_jobs: 1,
|
||||
no_dedup: false,
|
||||
|
||||
// rule selection
|
||||
rules: RuleSpecifierArgs {
|
||||
rules_path: Vec::new(),
|
||||
rule: vec!["all".into()],
|
||||
load_builtins: true,
|
||||
},
|
||||
|
||||
// input discovery
|
||||
input_specifier_args: InputSpecifierArgs {
|
||||
path_inputs: Vec::new(),
|
||||
git_url: Vec::new(),
|
||||
github_user: Vec::new(),
|
||||
github_organization: Vec::new(),
|
||||
all_github_organizations: false,
|
||||
github_api_url: url::Url::parse("https://api.github.com/").unwrap(),
|
||||
github_repo_type: GitHubRepoType::Source,
|
||||
// new GitLab defaults
|
||||
gitlab_user: Vec::new(),
|
||||
gitlab_group: Vec::new(),
|
||||
all_gitlab_groups: false,
|
||||
gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(),
|
||||
gitlab_repo_type: GitLabRepoType::Owner,
|
||||
// Jira options
|
||||
jira_url: None,
|
||||
jql: None,
|
||||
max_results: 100,
|
||||
|
||||
// Slack options
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
git_clone: GitCloneMode::Bare,
|
||||
git_history: GitHistoryMode::Full,
|
||||
scan_nested_repos: true,
|
||||
commit_metadata: true,
|
||||
},
|
||||
|
||||
// content filtering
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
no_binary: true,
|
||||
},
|
||||
|
||||
// scanning behaviour
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
no_validate: false,
|
||||
rule_stats: false,
|
||||
only_valid: false,
|
||||
min_entropy: None,
|
||||
redact: false,
|
||||
git_repo_timeout: 1800, // 30 minutes
|
||||
|
||||
// output
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
|
||||
// display
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
// This will panic if the entropy isn't checked for NaN
|
||||
let _result = reporter.pretty_format(&mut buf, &args);
|
||||
// assert!(result.is_err() || result.is_ok(), "Should not crash"); // remove this line if panic
|
||||
// is expected pre-fix
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,259 +1,56 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
|
||||
use rayon::prelude::*;
|
||||
use serde_sarif::sarif;
|
||||
|
||||
use super::*;
|
||||
use crate::{bstring_escape::Escaped, defaults::get_builtin_rules, origin::get_repo_url};
|
||||
#[derive(Hash, Eq, PartialEq)]
|
||||
struct LocationKey {
|
||||
file_path: String,
|
||||
line: usize,
|
||||
column_start: usize,
|
||||
column_end: usize,
|
||||
text: String,
|
||||
}
|
||||
use crate::defaults::get_builtin_rules;
|
||||
|
||||
impl DetailsReporter {
|
||||
fn make_sarif_result(
|
||||
&self,
|
||||
finding: &Finding,
|
||||
no_dedup: bool,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<sarif::Result> {
|
||||
// Deduplicate exactly as in the JSON reporter
|
||||
// let matches = self.deduplicate_matches(finding.matches.clone(), no_dedup);
|
||||
// Deduplicate exactly as in the JSON reporter - but only if no_dedup is false
|
||||
let matches = if no_dedup {
|
||||
finding.matches.clone()
|
||||
} else {
|
||||
self.deduplicate_matches(finding.matches.clone(), no_dedup)
|
||||
};
|
||||
fn record_to_sarif_result(&self, record: &FindingReporterRecord) -> Result<sarif::Result> {
|
||||
let finding = &record.finding;
|
||||
let artifact_location =
|
||||
sarif::ArtifactLocationBuilder::default().uri(finding.path.clone()).build()?;
|
||||
let region = sarif::RegionBuilder::default()
|
||||
.start_line(finding.line as i64)
|
||||
.start_column(finding.column_start as i64)
|
||||
.end_line(finding.line as i64)
|
||||
.end_column(finding.column_end as i64)
|
||||
.snippet(
|
||||
sarif::ArtifactContentBuilder::default().text(finding.snippet.clone()).build()?,
|
||||
)
|
||||
.build()?;
|
||||
|
||||
let metadata = &finding.metadata;
|
||||
|
||||
let mut location_map: HashMap<LocationKey, Vec<(&OriginSet, &Match)>> = HashMap::new();
|
||||
for rm in &matches {
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let snippet =
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or(&[]);
|
||||
let key = LocationKey {
|
||||
file_path: rm
|
||||
.origin
|
||||
.first()
|
||||
.blob_path()
|
||||
.map(|p| p.to_string_lossy().into_owned())
|
||||
.unwrap_or_default(),
|
||||
line: source_span.start.line,
|
||||
column_start: source_span.start.column,
|
||||
column_end: source_span.end.column,
|
||||
text: Escaped(snippet).to_string(),
|
||||
};
|
||||
location_map.entry(key).or_default().push((&rm.origin, &rm.m));
|
||||
let mut props = BTreeMap::new();
|
||||
props.insert("validation_status".to_string(), serde_json::json!(finding.validation.status));
|
||||
props.insert("entropy".to_string(), serde_json::json!(finding.entropy));
|
||||
if let Some(git) = &finding.git_metadata {
|
||||
props.insert("git_metadata".to_string(), git.clone());
|
||||
}
|
||||
let properties =
|
||||
sarif::PropertyBagBuilder::default().additional_properties(props).build()?;
|
||||
|
||||
let mut fpu64: u64 = 0;
|
||||
let location = sarif::LocationBuilder::default()
|
||||
.physical_location(
|
||||
sarif::PhysicalLocationBuilder::default()
|
||||
.artifact_location(artifact_location)
|
||||
.region(region)
|
||||
.build()?,
|
||||
)
|
||||
.properties(properties)
|
||||
.build()?;
|
||||
|
||||
let locations: Vec<sarif::Location> = location_map
|
||||
.into_iter()
|
||||
.filter_map(|(key, matches)| {
|
||||
let (prov, m) = matches[0];
|
||||
let source_span = &m.location.source_span;
|
||||
let mut artifact_locations = Vec::new();
|
||||
let mut git_metadata_list = Vec::new();
|
||||
|
||||
fpu64 = m.finding_fingerprint;
|
||||
|
||||
for p in prov.iter() {
|
||||
match p {
|
||||
Origin::File(e) => {
|
||||
let uri = if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
url
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
url
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
artifact_locations.push(
|
||||
sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?,
|
||||
);
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
// Extract and store Git metadata
|
||||
if let Some(git_metadata) = self.extract_git_metadata(e, source_span) {
|
||||
git_metadata_list.push(git_metadata);
|
||||
}
|
||||
|
||||
// Build Git artifact location
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
.unwrap_or_else(|_| {
|
||||
e.repo_path.to_string_lossy().to_string().into()
|
||||
})
|
||||
.trim_end_matches(".git")
|
||||
.to_string();
|
||||
let git_url = format!(
|
||||
"{}/blob/{}/{}#L{}",
|
||||
repo_url,
|
||||
cs.commit_metadata.commit_id,
|
||||
cs.blob_path,
|
||||
source_span.start.line
|
||||
);
|
||||
artifact_locations.push(
|
||||
sarif::ArtifactLocationBuilder::default()
|
||||
.uri(git_url)
|
||||
.build()
|
||||
.ok()?,
|
||||
);
|
||||
}
|
||||
}
|
||||
Origin::Extended(_) => (),
|
||||
}
|
||||
}
|
||||
|
||||
if artifact_locations.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let region = sarif::RegionBuilder::default()
|
||||
.start_line(key.line as i64)
|
||||
.start_column(key.column_start as i64)
|
||||
.end_line(key.line as i64)
|
||||
.end_column(key.column_end as i64)
|
||||
.snippet(sarif::ArtifactContentBuilder::default().text(key.text).build().ok()?)
|
||||
.build()
|
||||
.ok()?;
|
||||
|
||||
let logical_location = sarif::LogicalLocationBuilder::default()
|
||||
.kind("blob")
|
||||
.name(m.finding_fingerprint.to_string())
|
||||
.build()
|
||||
.ok()?;
|
||||
|
||||
let validation_status =
|
||||
if m.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted"
|
||||
} else if m.validation_success {
|
||||
"Active Credential"
|
||||
} else {
|
||||
"Inactive Credential"
|
||||
};
|
||||
|
||||
// Build combined properties including Git metadata and fingerprint
|
||||
let mut props = std::collections::BTreeMap::new();
|
||||
props.insert("validation_status".to_string(), serde_json::json!(validation_status));
|
||||
|
||||
props.insert(
|
||||
"entropy".to_string(),
|
||||
serde_json::json!(format!("{:.2}", m.calculated_entropy)),
|
||||
);
|
||||
|
||||
// Add the fingerprint property from the match
|
||||
props.insert("fingerprint".to_string(), serde_json::json!(m.finding_fingerprint));
|
||||
|
||||
if !git_metadata_list.is_empty() {
|
||||
props.insert("git_metadata".to_string(), serde_json::json!(git_metadata_list));
|
||||
}
|
||||
|
||||
let properties = sarif::PropertyBagBuilder::default()
|
||||
.additional_properties(props)
|
||||
.build()
|
||||
.ok()?;
|
||||
|
||||
// Create locations for each artifact location
|
||||
let locations = artifact_locations
|
||||
.into_iter()
|
||||
.map(|artifact_location| {
|
||||
sarif::LocationBuilder::default()
|
||||
.physical_location(
|
||||
sarif::PhysicalLocationBuilder::default()
|
||||
.artifact_location(artifact_location)
|
||||
.region(region.clone())
|
||||
.build()
|
||||
.ok()?,
|
||||
)
|
||||
.logical_locations(vec![logical_location.clone()])
|
||||
.properties(properties.clone())
|
||||
.build()
|
||||
.ok()
|
||||
})
|
||||
.collect::<Option<Vec<_>>>()?;
|
||||
Some(locations)
|
||||
})
|
||||
.flatten()
|
||||
.collect();
|
||||
// let message = sarif::MessageBuilder::default()
|
||||
// .text(format!(
|
||||
// "Rule {} found {} unique {}.\nFirst blob id matched: {}",
|
||||
// metadata.rule_name,
|
||||
// locations.len(),
|
||||
// if locations.len() == 1 { "match" } else { "matches" },
|
||||
// first_match_blob_id
|
||||
// ))
|
||||
// .build()?;
|
||||
// Create detailed message from first location's information
|
||||
let detailed_msg = if let Some(first_match) = matches.first() {
|
||||
let mut msg = format!(
|
||||
"Rule {} found {} unique {}.\n",
|
||||
metadata.rule_name,
|
||||
locations.len(),
|
||||
if locations.len() == 1 { "match" } else { "matches" }
|
||||
);
|
||||
// Add file or Git information based on origin
|
||||
// Get first origin of first match - we know this exists
|
||||
let p = first_match.origin.first();
|
||||
match p {
|
||||
Origin::File(e) => {
|
||||
let uri = if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
url
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
url
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
msg.push_str(&format!("Location: {}\n", uri));
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
.unwrap_or_else(|_| e.repo_path.to_string_lossy().to_string().into())
|
||||
.trim_end_matches(".git")
|
||||
.to_string();
|
||||
// Add commit and author information
|
||||
let cmd = &cs.commit_metadata;
|
||||
msg.push_str(&format!("Repository: {}\n", repo_url));
|
||||
msg.push_str(&format!("Commit: {}\n", cmd.commit_id));
|
||||
msg.push_str(&format!(
|
||||
"Committer: {} <{}>\n",
|
||||
String::from_utf8_lossy(&cmd.committer_name),
|
||||
String::from_utf8_lossy(&cmd.committer_email)
|
||||
));
|
||||
msg.push_str(&format!("File: {}", cs.blob_path));
|
||||
}
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
msg.push_str(&format!("Extended: {}\n", e));
|
||||
}
|
||||
}
|
||||
msg
|
||||
} else {
|
||||
format!("Rule {} found {} unique matches.", metadata.rule_name, locations.len(),)
|
||||
};
|
||||
let message = sarif::MessageBuilder::default().text(detailed_msg).build()?;
|
||||
let fingerprint_name = "fingerprint".to_string();
|
||||
let fingerprint = fpu64.to_string();
|
||||
let message = sarif::MessageBuilder::default()
|
||||
.text(format!("Rule {} matched {}", record.rule.name, finding.path))
|
||||
.build()?;
|
||||
|
||||
let result = sarif::ResultBuilder::default()
|
||||
.rule_id(&metadata.rule_name)
|
||||
.rule_id(&record.rule.name)
|
||||
.message(message)
|
||||
.kind(sarif::ResultKind::Review.to_string())
|
||||
.locations(locations)
|
||||
.locations(vec![location])
|
||||
.level(sarif::ResultLevel::Warning.to_string())
|
||||
.partial_fingerprints([(fingerprint_name, fingerprint)])
|
||||
.partial_fingerprints([("fingerprint".to_string(), finding.fingerprint.clone())])
|
||||
.build()?;
|
||||
Ok(result)
|
||||
}
|
||||
|
|
@ -261,54 +58,11 @@ impl DetailsReporter {
|
|||
pub fn sarif_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
no_dedup: bool,
|
||||
_no_dedup: bool,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
// Gather findings first
|
||||
let mut findings = self.gather_findings()?;
|
||||
|
||||
// If no_dedup is true, expand findings with multiple origins into separate findings
|
||||
if no_dedup {
|
||||
let mut expanded_findings = Vec::new();
|
||||
for finding in findings {
|
||||
// Check matches with multiple origins
|
||||
let matches_with_multiple_origins: Vec<_> =
|
||||
finding.matches.iter().filter(|rm| rm.origin.len() > 1).collect();
|
||||
|
||||
if !matches_with_multiple_origins.is_empty() {
|
||||
// For each match with multiple origins, create separate findings
|
||||
for rm in matches_with_multiple_origins {
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Create a new finding with just this single-origin match
|
||||
let new_finding =
|
||||
Finding::new(finding.metadata.clone(), vec![single_origin_rm]);
|
||||
expanded_findings.push(new_finding);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If the finding has no matches with multiple origins, keep it as is
|
||||
expanded_findings.push(finding);
|
||||
}
|
||||
}
|
||||
findings = expanded_findings;
|
||||
}
|
||||
|
||||
// Filter only rules relevant to the findings
|
||||
let finding_rule_ids: std::collections::HashSet<_> =
|
||||
findings.iter().map(|f| f.metadata.rule_name.clone()).collect();
|
||||
let records = self.build_finding_records(args)?;
|
||||
let finding_rule_ids: HashSet<_> = records.iter().map(|r| r.rule.name.clone()).collect();
|
||||
let rules: Vec<sarif::ReportingDescriptor> = get_builtin_rules(None)?
|
||||
.iter_rules()
|
||||
.par_bridge()
|
||||
|
|
@ -351,10 +105,9 @@ impl DetailsReporter {
|
|||
)
|
||||
.build()?;
|
||||
|
||||
let sarif_results: Vec<sarif::Result> = findings
|
||||
.par_iter()
|
||||
.filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok())
|
||||
.collect();
|
||||
let sarif_results: Vec<sarif::Result> =
|
||||
records.iter().filter_map(|r| self.record_to_sarif_result(r).ok()).collect();
|
||||
|
||||
let run = sarif::RunBuilder::default().tool(tool).results(sarif_results).build()?;
|
||||
let sarif = sarif::SarifBuilder::default()
|
||||
.version(sarif::Version::V2_1_0.to_string())
|
||||
|
|
|
|||
142
src/s3.rs
Normal file
142
src/s3.rs
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
use anyhow::{Context, Result};
|
||||
use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion};
|
||||
use aws_credential_types::Credentials;
|
||||
use aws_sdk_s3::{
|
||||
error::ProvideErrorMetadata, // for .code()
|
||||
operation::list_objects_v2::ListObjectsV2Error, // modeled service error
|
||||
Client,
|
||||
};
|
||||
use aws_types::region::Region;
|
||||
use reqwest; // HTTP client for HEAD fallback
|
||||
|
||||
pub async fn visit_bucket_objects<F>(
|
||||
bucket: &str,
|
||||
prefix: Option<&str>,
|
||||
role_arn: Option<&str>,
|
||||
profile: Option<&str>,
|
||||
mut visitor: F,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: FnMut(String, Vec<u8>) -> Result<()>,
|
||||
{
|
||||
// Helper to build ConfigLoader with profile/creds/no_credentials
|
||||
let build_loader = || {
|
||||
let mut loader = defaults(BehaviorVersion::latest());
|
||||
if let Some(p) = profile {
|
||||
loader = loader.profile_name(p);
|
||||
}
|
||||
if let (Ok(k), Ok(s)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) {
|
||||
loader = loader.credentials_provider(Credentials::new(k, s, None, None, "kf_env"));
|
||||
}
|
||||
if profile.is_none() && std::env::var("KF_AWS_KEY").is_err() && role_arn.is_none() {
|
||||
loader = loader.no_credentials();
|
||||
}
|
||||
loader
|
||||
};
|
||||
|
||||
// Initial client in default→us-east-1
|
||||
let default_region = RegionProviderChain::default_provider().or_else("us-east-1");
|
||||
let mut config = build_loader().region(default_region).load().await;
|
||||
let mut client = if let Some(role) = role_arn {
|
||||
let assume = aws_config::sts::AssumeRoleProvider::builder(role.to_string())
|
||||
.session_name("kingfisher")
|
||||
.configure(&config)
|
||||
.build()
|
||||
.await;
|
||||
let conf = aws_sdk_s3::config::Builder::from(&config).credentials_provider(assume).build();
|
||||
Client::from_conf(conf)
|
||||
} else {
|
||||
Client::new(&config)
|
||||
};
|
||||
|
||||
let mut continuation_token: Option<String> = None;
|
||||
loop {
|
||||
let mut req = client.list_objects_v2().bucket(bucket);
|
||||
if let Some(p) = prefix {
|
||||
req = req.prefix(p);
|
||||
}
|
||||
if let Some(ref token) = continuation_token {
|
||||
req = req.continuation_token(token);
|
||||
}
|
||||
|
||||
let resp = match req.send().await {
|
||||
Ok(r) => r,
|
||||
|
||||
// On error, extract the modeled service error
|
||||
Err(err) => {
|
||||
let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError
|
||||
|
||||
// If the bucket must be addressed at another region...
|
||||
if svc_err.code() == Some("PermanentRedirect") {
|
||||
// HEAD request to get x-amz-bucket-region header
|
||||
let url = format!("https://{bucket}.s3.amazonaws.com");
|
||||
let head = reqwest::Client::new()
|
||||
.head(&url)
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to HEAD bucket for region")?;
|
||||
let region_str = head
|
||||
.headers()
|
||||
.get("x-amz-bucket-region")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("us-east-1")
|
||||
.to_string();
|
||||
|
||||
// Rebuild client in the correct region
|
||||
let override_region = RegionProviderChain::first_try(Region::new(region_str))
|
||||
.or_else("us-east-1");
|
||||
config = build_loader().region(override_region).load().await;
|
||||
client = if let Some(r) = role_arn {
|
||||
let assume = aws_config::sts::AssumeRoleProvider::builder(r.to_string())
|
||||
.session_name("kingfisher")
|
||||
.configure(&config)
|
||||
.build()
|
||||
.await;
|
||||
let conf = aws_sdk_s3::config::Builder::from(&config)
|
||||
.credentials_provider(assume)
|
||||
.build();
|
||||
Client::from_conf(conf)
|
||||
} else {
|
||||
Client::new(&config)
|
||||
};
|
||||
|
||||
// Reset pagination and retry list
|
||||
continuation_token = None;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Any other error is fatal
|
||||
return Err(svc_err).context("Failed to list objects in bucket");
|
||||
}
|
||||
};
|
||||
|
||||
// Process objects
|
||||
for obj in resp.contents.unwrap_or_default() {
|
||||
if let Some(key) = obj.key {
|
||||
let data = client
|
||||
.get_object()
|
||||
.bucket(bucket)
|
||||
.key(&key)
|
||||
.send()
|
||||
.await
|
||||
.with_context(|| format!("Failed to fetch object {}", key))?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("Failed to read S3 object body")?
|
||||
.into_bytes()
|
||||
.to_vec();
|
||||
visitor(key, data)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Continue or finish pagination
|
||||
if resp.is_truncated.unwrap_or(false) {
|
||||
continuation_token = resp.next_continuation_token;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@ use indicatif::{HumanCount, ProgressBar, ProgressStyle};
|
|||
use tokio::time::Duration;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
use crate::blob::BlobIdMap;
|
||||
use crate::{
|
||||
blob::BlobMetadata,
|
||||
cli::{
|
||||
|
|
@ -20,11 +21,18 @@ use crate::{
|
|||
findings_store,
|
||||
git_binary::{CloneMode, Git},
|
||||
git_url::GitUrl,
|
||||
github, gitlab, jira,
|
||||
matcher::Match,
|
||||
origin::OriginSet,
|
||||
github, gitlab,
|
||||
guesser::Guesser,
|
||||
jira,
|
||||
matcher::{Match, Matcher, MatcherStats},
|
||||
origin::{Origin, OriginSet},
|
||||
rules_database::RulesDatabase,
|
||||
s3,
|
||||
scanner::processing::BlobProcessor,
|
||||
scanner_pool::ScannerPool,
|
||||
slack, PathBuf,
|
||||
};
|
||||
|
||||
pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option<f64>, Match)>);
|
||||
|
||||
pub fn clone_or_update_git_repos(
|
||||
|
|
@ -284,3 +292,86 @@ pub async fn fetch_slack_messages(
|
|||
}
|
||||
Ok(vec![output_dir])
|
||||
}
|
||||
|
||||
pub async fn fetch_s3_objects(
|
||||
args: &scan::ScanArgs,
|
||||
datastore: &Arc<Mutex<findings_store::FindingsStore>>,
|
||||
rules_db: &RulesDatabase,
|
||||
matcher_stats: &Mutex<MatcherStats>,
|
||||
enable_profiling: bool,
|
||||
shared_profiler: Arc<crate::rule_profiling::ConcurrentRuleProfiler>,
|
||||
progress_enabled: bool,
|
||||
) -> Result<()> {
|
||||
let Some(bucket) = args.input_specifier_args.s3_bucket.as_deref() else {
|
||||
return Ok(());
|
||||
};
|
||||
let prefix = args.input_specifier_args.s3_prefix.as_deref();
|
||||
let role_arn = args.input_specifier_args.role_arn.as_deref();
|
||||
let profile = args.input_specifier_args.aws_local_profile.as_deref();
|
||||
|
||||
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
|
||||
let seen_blobs = BlobIdMap::new();
|
||||
let matcher = Matcher::new(
|
||||
rules_db,
|
||||
scanner_pool,
|
||||
&seen_blobs,
|
||||
Some(matcher_stats),
|
||||
enable_profiling,
|
||||
Some(shared_profiler.clone()),
|
||||
)?;
|
||||
let guesser = Guesser::new().expect("should be able to create filetype guesser");
|
||||
let mut processor = BlobProcessor { matcher, guesser };
|
||||
|
||||
let progress = if progress_enabled {
|
||||
let style =
|
||||
ProgressStyle::with_template("{spinner} {msg} ({pos} objects) [{elapsed_precise}]")
|
||||
.expect("progress bar style template should compile");
|
||||
let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching S3 objects");
|
||||
pb.enable_steady_tick(Duration::from_millis(500));
|
||||
pb
|
||||
} else {
|
||||
ProgressBar::hidden()
|
||||
};
|
||||
|
||||
let bucket_name = bucket.to_string();
|
||||
let pb = progress.clone();
|
||||
|
||||
|
||||
let bucket_name = bucket.to_string();
|
||||
|
||||
s3::visit_bucket_objects(bucket, prefix, role_arn, profile, move |key, bytes| {
|
||||
let origin = OriginSet::new(
|
||||
Origin::from_extended(serde_json::json!({
|
||||
"path": format!("s3://{}/{}", bucket_name, key)
|
||||
})),
|
||||
Vec::new(),
|
||||
);
|
||||
let blob = crate::blob::Blob::from_bytes(bytes);
|
||||
|
||||
if let Some((origin, blob_md, scored_matches)) =
|
||||
processor.run(origin, blob, args.no_dedup)?
|
||||
{
|
||||
// Wrap origin & metadata once:
|
||||
let origin_arc = Arc::new(origin);
|
||||
let blob_arc = Arc::new(blob_md);
|
||||
|
||||
// Now build a batch of exactly one FindingsStoreMessage per Match
|
||||
let mut batch = Vec::with_capacity(scored_matches.len());
|
||||
for (_score, m) in scored_matches {
|
||||
batch.push((origin_arc.clone(), blob_arc.clone(), m));
|
||||
}
|
||||
|
||||
// Call record with the right type
|
||||
let added = datastore.lock().unwrap().record(batch, !args.no_dedup);
|
||||
debug!("Added {} new S3 blobs", added);
|
||||
}
|
||||
pb.inc(1);
|
||||
Ok(())
|
||||
})
|
||||
.await?;
|
||||
|
||||
let total = progress.position();
|
||||
progress.finish_with_message(format!("Fetched {} S3 objects", total));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,9 @@ use crate::{
|
|||
rules_database::RulesDatabase,
|
||||
scanner::{
|
||||
clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos,
|
||||
repos::{enumerate_gitlab_repos, fetch_jira_issues, fetch_slack_messages},
|
||||
repos::{
|
||||
enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages,
|
||||
},
|
||||
run_secret_validation, save_docker_images,
|
||||
summary::print_scan_summary,
|
||||
},
|
||||
|
|
@ -93,22 +95,39 @@ pub async fn run_async_scan(
|
|||
}
|
||||
}
|
||||
|
||||
if input_roots.is_empty() {
|
||||
bail!("No inputs to scan");
|
||||
}
|
||||
let shared_profiler = Arc::new(ConcurrentRuleProfiler::new());
|
||||
let enable_profiling = args.rule_stats;
|
||||
let matcher_stats = Mutex::new(MatcherStats::default());
|
||||
let _inputs = enumerate_filesystem_inputs(
|
||||
|
||||
// Fetch S3 objects if requested (scanned immediately)
|
||||
fetch_s3_objects(
|
||||
args,
|
||||
datastore.clone(),
|
||||
&input_roots,
|
||||
progress_enabled,
|
||||
&datastore,
|
||||
rules_db,
|
||||
&matcher_stats,
|
||||
enable_profiling,
|
||||
Arc::clone(&shared_profiler),
|
||||
&matcher_stats,
|
||||
)?;
|
||||
progress_enabled,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let has_s3 = args.input_specifier_args.s3_bucket.is_some();
|
||||
if input_roots.is_empty() && !has_s3 {
|
||||
bail!("No inputs to scan");
|
||||
}
|
||||
|
||||
if !input_roots.is_empty() {
|
||||
let _inputs = enumerate_filesystem_inputs(
|
||||
args,
|
||||
datastore.clone(),
|
||||
&input_roots,
|
||||
progress_enabled,
|
||||
rules_db,
|
||||
enable_profiling,
|
||||
Arc::clone(&shared_profiler),
|
||||
&matcher_stats,
|
||||
)?;
|
||||
}
|
||||
|
||||
if !args.no_dedup {
|
||||
// Final deduplication step before validation (or before reporting)
|
||||
|
|
|
|||
95
testdata/baseline/baseline_test.go
vendored
95
testdata/baseline/baseline_test.go
vendored
|
|
@ -1,95 +0,0 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/10gen/kingfisher/core"
|
||||
)
|
||||
|
||||
func rootDir() string {
|
||||
_, b, _, _ := runtime.Caller(0)
|
||||
return filepath.Dir(path.Dir(b))
|
||||
}
|
||||
|
||||
func NewTestSession(baselineFilename string) (*core.Session, error) {
|
||||
session := core.PrepareTestSession()
|
||||
session.Testing = true
|
||||
session.ReqScanMode = core.LocalFiles
|
||||
session.Options.ValidateSecrets = true
|
||||
session.Options.BaselineFilename = baselineFilename
|
||||
session.Options.KingfisherTempDir = core.GetTempDir()
|
||||
core.GlobalSessionRef = session
|
||||
session.InitializeTargetModeClient()
|
||||
return session, nil
|
||||
}
|
||||
|
||||
func beginTesting(t *testing.T, testfile string, expectedSkippedFindings, expectedFindingsSuppressKingfisher int) {
|
||||
rootdir := rootDir()
|
||||
testfilePath := filepath.Join(rootdir, testfile)
|
||||
_, filename := filepath.Split(testfilePath)
|
||||
|
||||
byteBaseLine := []byte(`FileContent:
|
||||
matches: []
|
||||
FilePaths:
|
||||
matches: []
|
||||
ExactFindings:
|
||||
matches:
|
||||
- filepath: testdata/ruby_vulnerable.rb
|
||||
findinghash: 701c302855ecc97e8415c44f37123bc2ca0c3343bd87028682aaaeaa90568084
|
||||
linenum: 40
|
||||
lastupdated: Tue Apr 16 13:04:10 PDT 2024
|
||||
- filepath: testdata/ruby_vulnerable.rb
|
||||
findinghash: 065d1e2faeae9328ca8b2f2754afa6c196d3ef2da2720dabca7e5161d67a6ca1
|
||||
linenum: 40
|
||||
lastupdated: Tue Apr 16 13:04:10 PDT 2024
|
||||
`)
|
||||
|
||||
// Write byteBaseline to a file in a temp directory and give yaml extension
|
||||
tempFile, err := ioutil.TempFile("", "baseline-*.yaml")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.Remove(tempFile.Name()) // Clean up the file after test
|
||||
|
||||
if _, err := tempFile.Write(byteBaseLine); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := tempFile.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sess, err := NewTestSession(tempFile.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
matchFile := core.NewMatchFile(testfilePath, sess, nil)
|
||||
core.BeginFileAnalysis(matchFile)
|
||||
if sess.Stats.SkippedFindings != expectedSkippedFindings {
|
||||
core.PrintSessionStats(sess)
|
||||
t.Errorf("Expected %d findings, got %d -- file: <%s>", expectedSkippedFindings, sess.Stats.SkippedFindings, filename)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaselineFeature(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
fileName string
|
||||
expectedSkippedFindings int
|
||||
expectedFindingsSuppressKingfisher int
|
||||
}{
|
||||
{"ruby_vulnerable.rb", 3, 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.fileName, func(t *testing.T) {
|
||||
beginTesting(t, tt.fileName, tt.expectedSkippedFindings, tt.expectedFindingsSuppressKingfisher)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -84,6 +84,11 @@ rules:
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -71,6 +71,11 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
git_clone: GitCloneMode::Bare,
|
||||
|
|
|
|||
29
tests/int_s3.rs
Normal file
29
tests/int_s3.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
use anyhow::Result;
|
||||
use kingfisher::s3::visit_bucket_objects;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_visit_public_bucket() -> Result<()> {
|
||||
let mut objects = Vec::new();
|
||||
visit_bucket_objects(
|
||||
"awsglue-datasets",
|
||||
Some("examples/us-legislators/all/"),
|
||||
None,
|
||||
None,
|
||||
|key, data| {
|
||||
objects.push((key, data));
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert!(
|
||||
objects.iter().any(|(k, _)| k.ends_with("events.json")),
|
||||
"events.json object not found"
|
||||
);
|
||||
let creds =
|
||||
objects.iter().find(|(k, _)| k.ends_with("events.json")).expect("events.json object");
|
||||
|
||||
let body = std::str::from_utf8(&creds.1)?;
|
||||
assert!(body.contains("Q4450263"), "expected events.json file");
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -59,6 +59,10 @@ impl TestContext {
|
|||
jql: None,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
max_results: 10,
|
||||
docker_image: Vec::new(),
|
||||
git_clone: GitCloneMode::Bare,
|
||||
|
|
@ -147,6 +151,11 @@ async fn test_scan_slack_messages() -> Result<()> {
|
|||
slack_query: Some("test".into()),
|
||||
slack_api_url: Url::parse(&format!("{}/", server.uri()))?,
|
||||
max_results: 10,
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
docker_image: Vec::new(),
|
||||
git_clone: GitCloneMode::Bare,
|
||||
git_history: GitHistoryMode::Full,
|
||||
|
|
|
|||
|
|
@ -127,6 +127,11 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
|
|
@ -70,6 +70,11 @@ impl TestContext {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
@ -142,6 +147,11 @@ impl TestContext {
|
|||
max_results: 100,
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue