forked from mirrors/kingfisher
refactored output reporting and formatting logic
This commit is contained in:
parent
bbe1c38c93
commit
bc05c3e5f2
15 changed files with 1109 additions and 1275 deletions
|
|
@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file.
|
|||
## [1.32.0]
|
||||
- Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix`
|
||||
- Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET`
|
||||
- Refactored output reporting and formatting logic
|
||||
|
||||
## [1.31.0]
|
||||
- New rules: Telegram bot token, OpenWeatherMap, Apify, Groq
|
||||
- New OpenAI detectors added (@joshlarsen)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,6 @@ rules:
|
|||
[a-z0-9\/._~-]*
|
||||
)?
|
||||
min_entropy: 3.0
|
||||
confidence: low
|
||||
confidence: medium
|
||||
examples:
|
||||
- https://eaRIWNkE:qyOIhJiM@j2LYY414Q5cCYD
|
||||
|
|
@ -96,7 +96,6 @@ rules:
|
|||
["']
|
||||
min_entropy: 3.3
|
||||
confidence: low
|
||||
categories: [fuzzy, generic, secret]
|
||||
examples:
|
||||
- |
|
||||
password = "super$ecret"
|
||||
|
|
|
|||
719
f1.patch
Normal file
719
f1.patch
Normal file
|
|
@ -0,0 +1,719 @@
|
|||
diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
|
||||
index 9fcb1ecdfe8decc60278848c4a7be43cc9ebee70..b600f9f65838e52ce5dc3d7bb3bb1a5d5ff2bcaf 100644
|
||||
--- a/src/reporter/json_format.rs
|
||||
+++ b/src/reporter/json_format.rs
|
||||
@@ -1,436 +1,80 @@
|
||||
-use http::StatusCode;
|
||||
-use serde_json::json;
|
||||
-
|
||||
use super::*;
|
||||
-use crate::bstring_escape::Escaped;
|
||||
|
||||
impl DetailsReporter {
|
||||
- pub fn deduplicate_matches(
|
||||
- &self,
|
||||
- matches: Vec<ReportMatch>,
|
||||
- no_dedup: bool,
|
||||
- ) -> Vec<ReportMatch> {
|
||||
- if no_dedup {
|
||||
- return matches;
|
||||
- }
|
||||
-
|
||||
- use std::collections::HashMap;
|
||||
- let mut by_fp: HashMap<u64, ReportMatch> = HashMap::new();
|
||||
-
|
||||
- for rm in matches {
|
||||
- let fp = rm.m.finding_fingerprint;
|
||||
- if let Some(existing) = by_fp.get_mut(&fp) {
|
||||
- // merge origin sets (keep first origin, append the rest)
|
||||
- for o in rm.origin.iter() {
|
||||
- if !existing.origin.iter().any(|e| e == o) {
|
||||
- existing.origin = OriginSet::new(
|
||||
- existing.origin.first().clone(),
|
||||
- existing
|
||||
- .origin
|
||||
- .iter()
|
||||
- .skip(1)
|
||||
- .cloned()
|
||||
- .chain(std::iter::once(o.clone()))
|
||||
- .collect(),
|
||||
- );
|
||||
- }
|
||||
- }
|
||||
- continue;
|
||||
- }
|
||||
- by_fp.insert(fp, rm);
|
||||
- }
|
||||
- by_fp.into_values().collect()
|
||||
- }
|
||||
-
|
||||
- pub fn gather_json_findings(
|
||||
- &self,
|
||||
- args: &cli::commands::scan::ScanArgs,
|
||||
- ) -> Result<Vec<serde_json::Value>> {
|
||||
- let mut matches = self.get_filtered_matches()?;
|
||||
- if !args.no_dedup {
|
||||
- matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
- }
|
||||
-
|
||||
- let mut json_findings = Vec::new();
|
||||
- for rm in matches {
|
||||
- let source_span = &rm.m.location.source_span;
|
||||
- let line_num = source_span.start.line;
|
||||
-
|
||||
- let snippet = Escaped(
|
||||
- rm.m.groups
|
||||
- .captures
|
||||
- .get(1)
|
||||
- .or_else(|| rm.m.groups.captures.get(0))
|
||||
- .map(|capture| capture.value.as_bytes())
|
||||
- .unwrap_or_default(),
|
||||
- )
|
||||
- .to_string();
|
||||
-
|
||||
- let validation_status = if rm.validation_success {
|
||||
- "Active Credential"
|
||||
- } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
- "Not Attempted"
|
||||
- } else {
|
||||
- "Inactive Credential"
|
||||
- };
|
||||
-
|
||||
- const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
- let truncated_body: String =
|
||||
- rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
- let ellipsis =
|
||||
- if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
- let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
-
|
||||
- // Call extract_git_metadata on each GitRepo origin and take the first non-null result.
|
||||
- let git_metadata_val = rm
|
||||
- .origin
|
||||
- .iter()
|
||||
- .filter_map(|origin| {
|
||||
- if let Origin::GitRepo(e) = origin {
|
||||
- self.extract_git_metadata(e, source_span)
|
||||
- } else {
|
||||
- None
|
||||
- }
|
||||
- })
|
||||
- .next()
|
||||
- .unwrap_or(serde_json::Value::Null);
|
||||
-
|
||||
- // Collect a file path from an Origin::File, if available.
|
||||
- let file_path = rm
|
||||
- .origin
|
||||
- .iter()
|
||||
- .find_map(|origin| match origin {
|
||||
- Origin::File(e) => {
|
||||
- if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
- Some(url)
|
||||
- } else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
- Some(url)
|
||||
- } else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
- Some(mapped)
|
||||
- } else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
- Some(mapped)
|
||||
- } else {
|
||||
- Some(e.path.display().to_string())
|
||||
- }
|
||||
- }
|
||||
- Origin::Extended(e) => e.path().map(|p| p.display().to_string()),
|
||||
- _ => None,
|
||||
- })
|
||||
- .unwrap_or_default();
|
||||
-
|
||||
- let match_json = json!({
|
||||
- "rule": {
|
||||
- "name": rm.m.rule_name,
|
||||
- "id": rm.m.rule_text_id,
|
||||
- },
|
||||
- "finding": {
|
||||
- "snippet": snippet,
|
||||
- "fingerprint": rm.m.finding_fingerprint.to_string(),
|
||||
- "confidence": rm.match_confidence.to_string(),
|
||||
- "entropy": format!("{:.2}", rm.m.calculated_entropy),
|
||||
- "validation": {
|
||||
- "status": validation_status,
|
||||
- "response": response_body,
|
||||
- },
|
||||
- "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()),
|
||||
- "line": line_num,
|
||||
- "column_start": source_span.start.column,
|
||||
- "column_end": source_span.end.column,
|
||||
- "path": file_path,
|
||||
- "git_metadata": git_metadata_val
|
||||
- }
|
||||
- });
|
||||
-
|
||||
- let finding_json = json!({
|
||||
- "id": rm.m.rule_text_id,
|
||||
- "matches": [ match_json ]
|
||||
- });
|
||||
- json_findings.push(finding_json);
|
||||
- }
|
||||
- Ok(json_findings)
|
||||
- }
|
||||
pub fn json_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
- let mut findings = Vec::new();
|
||||
-
|
||||
- // Get filtered matches
|
||||
- let mut matches = self.get_filtered_matches()?;
|
||||
-
|
||||
- // Apply deduplication only if requested
|
||||
- if !args.no_dedup {
|
||||
- matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
- }
|
||||
-
|
||||
- // For each match, handle it based on the no_dedup flag
|
||||
- for rm in matches {
|
||||
- if args.no_dedup && rm.origin.len() > 1 {
|
||||
- // For no_dedup and multiple origins, create separate findings for each origin
|
||||
- for origin in rm.origin.iter() {
|
||||
- // Create a single-origin version of this match
|
||||
- let single_origin_rm = ReportMatch {
|
||||
- origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
- blob_metadata: rm.blob_metadata.clone(),
|
||||
- m: rm.m.clone(),
|
||||
- comment: rm.comment.clone(),
|
||||
- visible: rm.visible,
|
||||
- match_confidence: rm.match_confidence,
|
||||
- validation_response_body: rm.validation_response_body.clone(),
|
||||
- validation_response_status: rm.validation_response_status,
|
||||
- validation_success: rm.validation_success,
|
||||
- };
|
||||
-
|
||||
- // Process this single-origin match into a JSON finding
|
||||
- let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
- findings.push(json_finding);
|
||||
- }
|
||||
- } else {
|
||||
- // Process normally for deduped matches or matches with only one origin
|
||||
- let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
- findings.push(json_finding);
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- // Write the JSON output
|
||||
- if !findings.is_empty() {
|
||||
- serde_json::to_writer_pretty(&mut writer, &findings)?;
|
||||
+ let records = self.build_finding_records(args)?;
|
||||
+ if !records.is_empty() {
|
||||
+ serde_json::to_writer_pretty(&mut writer, &records)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
- // Add a helper method to convert a ReportMatch to a JSON finding
|
||||
- pub fn process_match_to_json(
|
||||
- &self,
|
||||
- rm: &ReportMatch,
|
||||
- args: &cli::commands::scan::ScanArgs,
|
||||
- ) -> Result<serde_json::Value> {
|
||||
- // Extract the relevant data from the match as you already do in your current implementation
|
||||
- let source_span = &rm.m.location.source_span;
|
||||
- let line_num = source_span.start.line;
|
||||
-
|
||||
- let snippet = Escaped(
|
||||
- rm.m.groups
|
||||
- .captures
|
||||
- .get(1)
|
||||
- .or_else(|| rm.m.groups.captures.get(0))
|
||||
- .map(|capture| capture.value.as_bytes())
|
||||
- .unwrap_or_default(),
|
||||
- )
|
||||
- .to_string();
|
||||
-
|
||||
- let validation_status = if rm.validation_success {
|
||||
- "Active Credential"
|
||||
- } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
- "Not Attempted"
|
||||
- } else {
|
||||
- "Inactive Credential"
|
||||
- };
|
||||
-
|
||||
- const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
- let truncated_body: String =
|
||||
- rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
- let ellipsis =
|
||||
- if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
- let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
-
|
||||
- // Call extract_git_metadata on each GitRepo origin and take the first non-null result.
|
||||
- let git_metadata_val = rm
|
||||
- .origin
|
||||
- .iter()
|
||||
- .filter_map(|origin| {
|
||||
- if let Origin::GitRepo(e) = origin {
|
||||
- self.extract_git_metadata(e, source_span)
|
||||
- } else {
|
||||
- None
|
||||
- }
|
||||
- })
|
||||
- .next()
|
||||
- .unwrap_or(serde_json::Value::Null);
|
||||
-
|
||||
- // Collect a file path from an Origin::File, if available.
|
||||
- let file_path = rm
|
||||
- .origin
|
||||
- .iter()
|
||||
- .find_map(|origin| {
|
||||
- if let Origin::File(e) = origin {
|
||||
- if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
- Some(url)
|
||||
- } else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
- Some(url)
|
||||
- } else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
- Some(mapped)
|
||||
- } else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
- Some(mapped)
|
||||
- } else {
|
||||
- Some(e.path.display().to_string())
|
||||
- }
|
||||
- } else if let Origin::Extended(e) = origin {
|
||||
- e.path().map(|p| p.display().to_string())
|
||||
- } else {
|
||||
- None
|
||||
- }
|
||||
- })
|
||||
- .unwrap_or_default();
|
||||
-
|
||||
- let match_json = json!({
|
||||
- "rule": {
|
||||
- "name": rm.m.rule_name,
|
||||
- "id": rm.m.rule_text_id,
|
||||
- },
|
||||
- "finding": {
|
||||
- "snippet": snippet,
|
||||
- "fingerprint": rm.m.finding_fingerprint.to_string(),
|
||||
- "confidence": rm.match_confidence.to_string(),
|
||||
- "entropy": format!("{:.2}", rm.m.calculated_entropy),
|
||||
- "validation": {
|
||||
- "status": validation_status,
|
||||
- "response": response_body,
|
||||
- },
|
||||
- "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()),
|
||||
- "line": line_num,
|
||||
- "column_start": source_span.start.column,
|
||||
- "column_end": source_span.end.column,
|
||||
- "path": file_path,
|
||||
- "git_metadata": git_metadata_val
|
||||
- }
|
||||
- });
|
||||
-
|
||||
- let finding_json = json!({
|
||||
- "id": rm.m.rule_text_id,
|
||||
- "matches": [ match_json ]
|
||||
- });
|
||||
-
|
||||
- Ok(finding_json)
|
||||
- }
|
||||
- // // Modified JSON format to pass args to gather_json_findings
|
||||
- // pub fn json_format<W: std::io::Write>(
|
||||
- // &self,
|
||||
- // mut writer: W,
|
||||
- // args: &cli::commands::scan::ScanArgs,
|
||||
- // ) -> Result<()> {
|
||||
- // let findings = self.gather_json_findings(args)?;
|
||||
- // if !findings.is_empty() {
|
||||
- // serde_json::to_writer_pretty(&mut writer, &findings)?;
|
||||
- // writeln!(writer)?;
|
||||
- // }
|
||||
- // Ok(())
|
||||
- // }
|
||||
-
|
||||
pub fn jsonl_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
- // Get filtered matches
|
||||
- let mut matches = self.get_filtered_matches()?;
|
||||
-
|
||||
- // Apply deduplication only if requested
|
||||
- if !args.no_dedup {
|
||||
- matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
- }
|
||||
-
|
||||
- // For each match, handle it based on the no_dedup flag
|
||||
- for rm in matches {
|
||||
- if args.no_dedup && rm.origin.len() > 1 {
|
||||
- // For no_dedup and multiple origins, create separate findings for each origin
|
||||
- for origin in rm.origin.iter() {
|
||||
- // Create a single-origin version of this match
|
||||
- let single_origin_rm = ReportMatch {
|
||||
- origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
- blob_metadata: rm.blob_metadata.clone(),
|
||||
- m: rm.m.clone(),
|
||||
- comment: rm.comment.clone(),
|
||||
- visible: rm.visible,
|
||||
- match_confidence: rm.match_confidence,
|
||||
- validation_response_body: rm.validation_response_body.clone(),
|
||||
- validation_response_status: rm.validation_response_status,
|
||||
- validation_success: rm.validation_success,
|
||||
- };
|
||||
-
|
||||
- // Process this single-origin match into a JSON finding and write it
|
||||
- let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
- serde_json::to_writer(&mut writer, &json_finding)?;
|
||||
- writeln!(writer)?;
|
||||
- }
|
||||
- } else {
|
||||
- // Process normally for deduped matches or matches with only one origin
|
||||
- let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
- serde_json::to_writer(&mut writer, &json_finding)?;
|
||||
- writeln!(writer)?;
|
||||
- }
|
||||
+ let records = self.build_finding_records(args)?;
|
||||
+ for record in records {
|
||||
+ serde_json::to_writer(&mut writer, &record)?;
|
||||
+ writeln!(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
- // // Modified JSONL format to pass args to gather_json_findings
|
||||
- // pub fn jsonl_format<W: std::io::Write>(
|
||||
- // &self,
|
||||
- // mut writer: W,
|
||||
- // args: &cli::commands::scan::ScanArgs,
|
||||
- // ) -> Result<()> {
|
||||
- // let findings = self.gather_json_findings(args)?;
|
||||
- // for finding in findings {
|
||||
- // serde_json::to_writer(&mut writer, &finding)?;
|
||||
- // writeln!(writer)?;
|
||||
- // }
|
||||
- // Ok(())
|
||||
- // }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
- use std::{
|
||||
- io::Cursor,
|
||||
- path::PathBuf,
|
||||
- sync::{Arc, Mutex},
|
||||
- };
|
||||
-
|
||||
- use anyhow::Result;
|
||||
- use serde_json::Value;
|
||||
- use url::Url;
|
||||
-
|
||||
use super::*;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
- cli::commands::{
|
||||
- github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
|
||||
- inputs::{ContentFilteringArgs, InputSpecifierArgs},
|
||||
- output::OutputArgs,
|
||||
- rules::RuleSpecifierArgs,
|
||||
- scan::ConfidenceLevel,
|
||||
+ cli::commands::github::GitHubRepoType,
|
||||
+ cli::commands::output::{OutputArgs, ReportOutputFormat},
|
||||
+ cli::commands::scan::{
|
||||
+ ConfidenceLevel, ContentFilteringArgs, GitCloneMode, GitHistoryMode,
|
||||
+ InputSpecifierArgs, RuleSpecifierArgs,
|
||||
},
|
||||
findings_store::FindingsStore,
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
- matcher::{Match, SerializableCapture, SerializableCaptures},
|
||||
- origin::{Origin, OriginSet},
|
||||
- reporter::{ReportMatch, Styles},
|
||||
- rules::rule::Confidence,
|
||||
- util::intern,
|
||||
+ matcher::serializable::{SerializableCapture, SerializableCaptures},
|
||||
+ matcher::Match,
|
||||
+ origin::Origin,
|
||||
+ reporter::styles::Styles,
|
||||
+ scanner::test_utils::intern,
|
||||
};
|
||||
+ use std::{
|
||||
+ io::Cursor,
|
||||
+ path::PathBuf,
|
||||
+ sync::{Arc, Mutex},
|
||||
+ };
|
||||
+ use url::Url;
|
||||
|
||||
fn create_default_args() -> cli::commands::scan::ScanArgs {
|
||||
use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope
|
||||
|
||||
cli::commands::scan::ScanArgs {
|
||||
num_jobs: 1,
|
||||
no_dedup: false,
|
||||
rules: RuleSpecifierArgs {
|
||||
rules_path: Vec::new(),
|
||||
rule: vec!["all".into()],
|
||||
load_builtins: true,
|
||||
},
|
||||
input_specifier_args: InputSpecifierArgs {
|
||||
// local path / git URL inputs
|
||||
path_inputs: Vec::new(),
|
||||
git_url: Vec::new(),
|
||||
|
||||
// GitHub
|
||||
github_user: Vec::new(),
|
||||
github_organization: Vec::new(),
|
||||
all_github_organizations: false,
|
||||
github_api_url: Url::parse("https://api.github.com/").unwrap(),
|
||||
github_repo_type: GitHubRepoType::Source,
|
||||
|
||||
// GitLab
|
||||
diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
|
||||
index 9fcb1ecdfe8decc60278848c4a7be43cc9ebee70..b600f9f65838e52ce5dc3d7bb3bb1a5d5ff2bcaf 100644
|
||||
--- a/src/reporter/json_format.rs
|
||||
+++ b/src/reporter/json_format.rs
|
||||
@@ -458,240 +102,168 @@ mod tests {
|
||||
git_history: GitHistoryMode::Full,
|
||||
scan_nested_repos: true,
|
||||
commit_metadata: true,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
no_binary: true,
|
||||
},
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
no_validate: false,
|
||||
rule_stats: false,
|
||||
only_valid: false,
|
||||
min_entropy: None,
|
||||
redact: false,
|
||||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
}
|
||||
}
|
||||
|
||||
- // Helper function to create a mock Match
|
||||
fn create_mock_match(
|
||||
rule_name: &str,
|
||||
rule_text_id: &str,
|
||||
rule_finding_fingerprint: &str,
|
||||
validation_success: bool,
|
||||
) -> Match {
|
||||
Match {
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 10, end: 20 },
|
||||
source_span: SourceSpan {
|
||||
start: SourcePoint { line: 5, column: 10 },
|
||||
end: SourcePoint { line: 5, column: 20 },
|
||||
},
|
||||
},
|
||||
groups: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
name: Some("token".to_string()),
|
||||
match_number: 1,
|
||||
start: 10,
|
||||
end: 20,
|
||||
value: "mock_token".into(),
|
||||
}],
|
||||
},
|
||||
blob_id: BlobId::new(b"mock_blob"),
|
||||
finding_fingerprint: 0123,
|
||||
rule_finding_fingerprint: intern(rule_finding_fingerprint),
|
||||
rule_text_id: intern(rule_text_id),
|
||||
- rule_name: intern(rule_name), //.to_string(),
|
||||
+ rule_name: intern(rule_name),
|
||||
rule_confidence: Confidence::Medium,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
validation_success,
|
||||
calculated_entropy: 4.5,
|
||||
visible: true,
|
||||
}
|
||||
}
|
||||
|
||||
- // Helper function to create a mock DetailsReporter
|
||||
fn setup_mock_reporter(matches: Vec<ReportMatch>) -> DetailsReporter {
|
||||
let mut datastore = FindingsStore::new(PathBuf::from("/tmp"));
|
||||
- // Create mock origin and blob metadata for the first test match
|
||||
if !matches.is_empty() {
|
||||
let blob_metadata = BlobMetadata {
|
||||
id: BlobId::new(b"mock_blob"),
|
||||
num_bytes: 1024,
|
||||
mime_essence: Some("text/plain".to_string()),
|
||||
charset: Some("UTF-8".to_string()),
|
||||
language: Some("Rust".to_string()),
|
||||
};
|
||||
let dedup = true;
|
||||
- // Add matches to datastore
|
||||
for m in matches.clone() {
|
||||
datastore.record(
|
||||
vec![(
|
||||
Arc::new(OriginSet::new(
|
||||
- // OriginSet -- Arc<…>
|
||||
Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
vec![],
|
||||
)),
|
||||
- Arc::new(blob_metadata.clone()), // BlobMetadata -- Arc<…>
|
||||
+ Arc::new(blob_metadata.clone()),
|
||||
m.m.clone(),
|
||||
)],
|
||||
dedup,
|
||||
);
|
||||
}
|
||||
}
|
||||
DetailsReporter {
|
||||
datastore: Arc::new(Mutex::new(datastore)),
|
||||
styles: Styles::new(false),
|
||||
only_valid: false,
|
||||
}
|
||||
}
|
||||
+
|
||||
#[test]
|
||||
fn test_json_format() -> Result<()> {
|
||||
- // Create a mock match with successful validation
|
||||
let mock_match =
|
||||
create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
let matches = vec![ReportMatch {
|
||||
origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]),
|
||||
blob_metadata: BlobMetadata {
|
||||
id: BlobId::new(b"mock_blob"),
|
||||
num_bytes: 1024,
|
||||
mime_essence: Some("text/plain".to_string()),
|
||||
charset: Some("UTF-8".to_string()),
|
||||
language: Some("Rust".to_string()),
|
||||
},
|
||||
m: mock_match,
|
||||
comment: None,
|
||||
match_confidence: Confidence::Medium,
|
||||
visible: true,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
validation_success: true,
|
||||
}];
|
||||
let reporter = setup_mock_reporter(matches);
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
- // Call the json_format method
|
||||
reporter.json_format(&mut output, &create_default_args())?;
|
||||
- // Parse and validate JSON output
|
||||
- let json_output: Vec<Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
+ let json_output: Vec<serde_json::Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
assert!(!json_output.is_empty(), "JSON output should not be empty");
|
||||
- let first_finding = &json_output[0];
|
||||
- assert!(first_finding.get("id").is_some(), "Finding should have an 'id'");
|
||||
- assert!(first_finding.get("matches").is_some(), "Finding should have 'matches'");
|
||||
- // Validate the structure of the first match
|
||||
- let matches = first_finding.get("matches").unwrap().as_array().unwrap();
|
||||
- let first_match = &matches[0];
|
||||
- assert_eq!(first_match.get("rule").unwrap().get("name").unwrap(), "MockRule");
|
||||
- assert_eq!(first_match.get("finding").unwrap().get("language").unwrap(), "Rust");
|
||||
+ let first = &json_output[0];
|
||||
+ assert_eq!(first["rule"]["name"], "MockRule");
|
||||
+ assert_eq!(first["finding"]["language"], "Rust");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
- // #[test]
|
||||
- // fn test_jsonl_format() -> Result<()> {
|
||||
- // // Create a mock match with successful validation
|
||||
- // let mock_match =
|
||||
- // create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
- // let matches = vec![ReportMatch {
|
||||
- // origin: OriginSet::new(
|
||||
- // Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
- // vec![],
|
||||
- // ),
|
||||
- // blob_metadata: BlobMetadata {
|
||||
- // id: BlobId::new(b"mock_blob"),
|
||||
- // num_bytes: 1024,
|
||||
- // mime_essence: Some("text/plain".to_string()),
|
||||
- // charset: Some("UTF-8".to_string()),
|
||||
- // language: Some("Rust".to_string()),
|
||||
- // },
|
||||
- // m: mock_match,
|
||||
- // comment: None,
|
||||
- // match_confidence: Confidence::Medium,
|
||||
- // visible: true,
|
||||
- // validation_response_body: "validation response".to_string(),
|
||||
- // validation_response_status: 200,
|
||||
- // validation_success: true,
|
||||
- // }];
|
||||
- // let reporter = setup_mock_reporter(matches);
|
||||
- // let mut output = Cursor::new(Vec::new());
|
||||
- // // Call the jsonl_format method
|
||||
- // reporter.jsonl_format(&mut output, &create_default_args())?;
|
||||
- // // Split output into lines and validate
|
||||
- // let jsonl_output = String::from_utf8(output.into_inner())?;
|
||||
- // let lines: Vec<&str> = jsonl_output.lines().collect();
|
||||
- // assert!(!lines.is_empty(), "JSONL output should not be empty");
|
||||
- // for line in &lines {
|
||||
- // let json_value: serde_json::Value = serde_json::from_str(line)?;
|
||||
- // assert!(
|
||||
- // json_value.get("rule_name").is_some(),
|
||||
- // "Each line should have a 'rule_name'"
|
||||
- // );
|
||||
- // assert!(
|
||||
- // json_value.get("matches").is_some(),
|
||||
- // "Each line should have 'matches'"
|
||||
- // );
|
||||
- // }
|
||||
- // Ok(())
|
||||
- // }
|
||||
-
|
||||
#[test]
|
||||
fn test_validation_status_in_json() -> Result<()> {
|
||||
- // Test validation status in JSON output
|
||||
let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")];
|
||||
for (validation_success, expected_status) in test_cases {
|
||||
let mock_match = create_mock_match(
|
||||
"MockRule",
|
||||
"mock_rule_1",
|
||||
"mock_finding_fingerprint",
|
||||
validation_success,
|
||||
);
|
||||
let matches = vec![ReportMatch {
|
||||
origin: OriginSet::new(
|
||||
Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
vec![],
|
||||
),
|
||||
blob_metadata: BlobMetadata {
|
||||
id: BlobId::new(b"mock_blob"),
|
||||
num_bytes: 1024,
|
||||
mime_essence: Some("text/plain".to_string()),
|
||||
charset: Some("UTF-8".to_string()),
|
||||
language: Some("Rust".to_string()),
|
||||
},
|
||||
m: mock_match,
|
||||
comment: None,
|
||||
match_confidence: Confidence::Medium,
|
||||
visible: true,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
validation_success,
|
||||
}];
|
||||
let reporter = setup_mock_reporter(matches);
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
- // Call the json_format method
|
||||
reporter.json_format(&mut output, &create_default_args())?;
|
||||
- // Parse and validate JSON output
|
||||
- let json_output: Vec<Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
+ let json_output: Vec<serde_json::Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
assert!(!json_output.is_empty(), "JSON output should not be empty");
|
||||
- let first_finding = &json_output[0];
|
||||
- let matches = first_finding.get("matches").unwrap().as_array().unwrap();
|
||||
- let first_match = &matches[0];
|
||||
- let validation_status = first_match
|
||||
- .get("finding")
|
||||
- .unwrap()
|
||||
- .get("validation")
|
||||
- .unwrap()
|
||||
- .get("status")
|
||||
- .unwrap()
|
||||
- .as_str()
|
||||
- .unwrap();
|
||||
+ let first = &json_output[0];
|
||||
+ let validation_status = first["finding"]["validation"]["status"].as_str().unwrap();
|
||||
assert_eq!(validation_status, expected_status);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -124,7 +124,6 @@ pub struct InputSpecifierArgs {
|
|||
#[arg(long, requires = "s3_bucket")]
|
||||
pub aws_local_profile: Option<String>,
|
||||
|
||||
|
||||
/// Docker/OCI images to scan (no local Docker required)
|
||||
#[arg(long = "docker-image")]
|
||||
pub docker_image: Vec<String>,
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ pub mod rule_loader;
|
|||
pub mod rule_profiling;
|
||||
pub mod rules;
|
||||
pub mod rules_database;
|
||||
pub mod safe_list;
|
||||
pub mod s3;
|
||||
pub mod safe_list;
|
||||
pub mod scanner;
|
||||
pub mod scanner_pool;
|
||||
pub mod serde_utils;
|
||||
|
|
|
|||
229
src/reporter.rs
229
src/reporter.rs
|
|
@ -5,12 +5,12 @@ use std::{
|
|||
|
||||
use anyhow::Result;
|
||||
use http::StatusCode;
|
||||
use indenter::indented;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{
|
||||
blob::BlobMetadata,
|
||||
bstring_escape::Escaped,
|
||||
cli,
|
||||
cli::global::GlobalArgs,
|
||||
finding_data, findings_store,
|
||||
|
|
@ -226,38 +226,6 @@ impl DetailsReporter {
|
|||
.collect())
|
||||
}
|
||||
|
||||
// fn process_matches(&self, only_valid: bool) -> Result<Vec<ReportMatch>> {
|
||||
// let datastore = self.datastore.lock().unwrap();
|
||||
// Ok(datastore
|
||||
// .get_matches()
|
||||
// .iter()
|
||||
// .filter(|msg| {
|
||||
// let (_origin, _blob_metadata, match_item) = &***msg;
|
||||
// if only_valid {
|
||||
// match_item.validation_success
|
||||
// && match_item.validation_response_status != StatusCode::CONTINUE.as_u16()
|
||||
// && match_item.visible
|
||||
// } else {
|
||||
// match_item.visible
|
||||
// }
|
||||
// })
|
||||
// .map(|msg| {
|
||||
// let (origin, blob_metadata, match_item) = &**msg;
|
||||
// ReportMatch {
|
||||
// origin: origin.clone(),
|
||||
// blob_metadata: blob_metadata.clone(),
|
||||
// m: match_item.clone(),
|
||||
// comment: None,
|
||||
// visible: match_item.visible,
|
||||
// match_confidence: match_item.rule_confidence,
|
||||
// validation_response_body: match_item.validation_response_body.clone(),
|
||||
// validation_response_status: match_item.validation_response_status,
|
||||
// validation_success: match_item.validation_success,
|
||||
// }
|
||||
// })
|
||||
// .collect())
|
||||
// }
|
||||
|
||||
pub fn get_filtered_matches(&self) -> Result<Vec<ReportMatch>> {
|
||||
self.process_matches(self.only_valid, true)
|
||||
}
|
||||
|
|
@ -266,6 +234,166 @@ impl DetailsReporter {
|
|||
self.process_matches(only_valid.unwrap_or(self.only_valid), false)
|
||||
}
|
||||
|
||||
pub fn deduplicate_matches(
|
||||
&self,
|
||||
matches: Vec<ReportMatch>,
|
||||
no_dedup: bool,
|
||||
) -> Vec<ReportMatch> {
|
||||
if no_dedup {
|
||||
return matches;
|
||||
}
|
||||
|
||||
use std::collections::HashMap;
|
||||
let mut by_fp: HashMap<u64, ReportMatch> = HashMap::new();
|
||||
|
||||
for rm in matches {
|
||||
let fp = rm.m.finding_fingerprint;
|
||||
if let Some(existing) = by_fp.get_mut(&fp) {
|
||||
// merge origin sets (keep first origin, append the rest)
|
||||
for o in rm.origin.iter() {
|
||||
if !existing.origin.iter().any(|e| e == o) {
|
||||
existing.origin = OriginSet::new(
|
||||
existing.origin.first().clone(),
|
||||
existing
|
||||
.origin
|
||||
.iter()
|
||||
.skip(1)
|
||||
.cloned()
|
||||
.chain(std::iter::once(o.clone()))
|
||||
.collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
by_fp.insert(fp, rm);
|
||||
}
|
||||
by_fp.into_values().collect()
|
||||
}
|
||||
|
||||
fn matches_for_output(&self, args: &cli::commands::scan::ScanArgs) -> Result<Vec<ReportMatch>> {
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
if args.no_dedup {
|
||||
let mut expanded = Vec::new();
|
||||
for rm in matches {
|
||||
if rm.origin.len() > 1 {
|
||||
for origin in rm.origin.iter() {
|
||||
let mut single = rm.clone();
|
||||
single.origin = OriginSet::new(origin.clone(), Vec::new());
|
||||
expanded.push(single);
|
||||
}
|
||||
} else {
|
||||
expanded.push(rm);
|
||||
}
|
||||
}
|
||||
matches = expanded;
|
||||
}
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
pub fn build_finding_record(
|
||||
&self,
|
||||
rm: &ReportMatch,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> FindingReporterRecord {
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential".to_string()
|
||||
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted".to_string()
|
||||
} else {
|
||||
"Inactive Credential".to_string()
|
||||
};
|
||||
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
|
||||
let git_metadata_val = rm
|
||||
.origin
|
||||
.iter()
|
||||
.filter_map(|origin| {
|
||||
if let Origin::GitRepo(e) = origin {
|
||||
self.extract_git_metadata(e, source_span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next();
|
||||
|
||||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| match origin {
|
||||
Origin::File(e) => {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
}
|
||||
Origin::Extended(e) => e.path().map(|p| p.display().to_string()),
|
||||
_ => None,
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
FindingReporterRecord {
|
||||
rule: RuleMetadata {
|
||||
name: rm.m.rule_name.to_string().clone(),
|
||||
id: rm.m.rule_text_id.to_string().clone(),
|
||||
},
|
||||
finding: FindingRecordData {
|
||||
snippet,
|
||||
fingerprint: rm.m.finding_fingerprint.to_string(),
|
||||
confidence: rm.match_confidence.to_string(),
|
||||
entropy: format!("{:.2}", rm.m.calculated_entropy),
|
||||
validation: ValidationInfo { status: validation_status, response: response_body },
|
||||
language: rm
|
||||
.blob_metadata
|
||||
.language
|
||||
.clone()
|
||||
.unwrap_or_else(|| "Unknown".to_string()),
|
||||
line: line_num as u32,
|
||||
column_start: source_span.start.column as u32,
|
||||
column_end: source_span.end.column as u32,
|
||||
path: file_path,
|
||||
git_metadata: git_metadata_val,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_finding_records(
|
||||
&self,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<Vec<FindingReporterRecord>> {
|
||||
let matches = self.matches_for_output(args)?;
|
||||
Ok(matches.iter().map(|rm| self.build_finding_record(rm, args)).collect())
|
||||
}
|
||||
|
||||
fn get_finding_data(&self) -> Result<Vec<finding_data::FindingMetadata>> {
|
||||
let datastore = self.datastore.lock().unwrap();
|
||||
Ok(datastore
|
||||
|
|
@ -388,6 +516,41 @@ pub struct ReportMatch {
|
|||
/// Validation Success
|
||||
pub validation_success: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct FindingReporterRecord {
|
||||
pub rule: RuleMetadata,
|
||||
pub finding: FindingRecordData,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct RuleMetadata {
|
||||
pub name: String,
|
||||
pub id: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct ValidationInfo {
|
||||
pub status: String,
|
||||
pub response: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, JsonSchema, Clone, Debug)]
|
||||
pub struct FindingRecordData {
|
||||
pub snippet: String,
|
||||
pub fingerprint: String,
|
||||
pub confidence: String,
|
||||
pub entropy: String,
|
||||
pub validation: ValidationInfo,
|
||||
pub language: String,
|
||||
pub line: u32,
|
||||
pub column_start: u32,
|
||||
pub column_end: u32,
|
||||
pub path: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub git_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl From<finding_data::FindingDataEntry> for ReportMatch {
|
||||
fn from(e: finding_data::FindingDataEntry) -> Self {
|
||||
ReportMatch {
|
||||
|
|
|
|||
|
|
@ -1,90 +1,17 @@
|
|||
use bson::Document;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::*;
|
||||
|
||||
impl DetailsReporter {
|
||||
/// Formats findings as BSON and writes them to the provided writer.
|
||||
/// For testing purposes, prints the full JSON for each finding before converting.
|
||||
pub fn bson_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
// Get filtered matches
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
|
||||
// Apply deduplication only if requested
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
let mut bson_findings = Vec::new();
|
||||
|
||||
// For each match, handle it based on the no_dedup flag
|
||||
for rm in matches {
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For no_dedup and multiple origins, create separate findings for each origin
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin version of this match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Process to JSON first, then convert to BSON
|
||||
let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
if let Ok(bson_doc) = json_to_bson_document(&json_finding) {
|
||||
bson_findings.push(bson_doc);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Process normally for deduped matches or matches with only one origin
|
||||
let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
if let Ok(bson_doc) = json_to_bson_document(&json_finding) {
|
||||
bson_findings.push(bson_doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write each BSON document
|
||||
for doc in bson_findings {
|
||||
let records = self.build_finding_records(args)?;
|
||||
for record in records {
|
||||
let doc = bson::to_document(&record)?;
|
||||
doc.to_writer(&mut writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// pub fn bson_format<W: std::io::Write>(
|
||||
// &self,
|
||||
// mut writer: W,
|
||||
// args: &cli::commands::scan::ScanArgs,
|
||||
// ) -> Result<()> {
|
||||
// let findings = self.gather_json_findings(args)?;
|
||||
|
||||
// // Print the full JSON for each finding
|
||||
// for finding in &findings {
|
||||
// println!("Full JSON:\n{}", serde_json::to_string_pretty(finding)?);
|
||||
// }
|
||||
|
||||
// let bson_findings: Vec<Document> = findings
|
||||
// .into_iter()
|
||||
// .filter_map(|finding| json_to_bson_document(&finding).ok())
|
||||
// .collect();
|
||||
// for doc in bson_findings {
|
||||
// doc.to_writer(&mut writer)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
||||
|
||||
fn json_to_bson_document(json: &Value) -> Result<Document> {
|
||||
match bson::to_bson(json)? {
|
||||
bson::Bson::Document(doc) => Ok(doc),
|
||||
_ => Err(anyhow::anyhow!("Failed to convert JSON to BSON document")),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,415 +1,61 @@
|
|||
use http::StatusCode;
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::bstring_escape::Escaped;
|
||||
use serde_json::Value;
|
||||
|
||||
impl DetailsReporter {
|
||||
pub fn deduplicate_matches(
|
||||
&self,
|
||||
matches: Vec<ReportMatch>,
|
||||
no_dedup: bool,
|
||||
) -> Vec<ReportMatch> {
|
||||
if no_dedup {
|
||||
return matches;
|
||||
}
|
||||
|
||||
use std::collections::HashMap;
|
||||
let mut by_fp: HashMap<u64, ReportMatch> = HashMap::new();
|
||||
|
||||
for rm in matches {
|
||||
let fp = rm.m.finding_fingerprint;
|
||||
if let Some(existing) = by_fp.get_mut(&fp) {
|
||||
// merge origin sets (keep first origin, append the rest)
|
||||
for o in rm.origin.iter() {
|
||||
if !existing.origin.iter().any(|e| e == o) {
|
||||
existing.origin = OriginSet::new(
|
||||
existing.origin.first().clone(),
|
||||
existing
|
||||
.origin
|
||||
.iter()
|
||||
.skip(1)
|
||||
.cloned()
|
||||
.chain(std::iter::once(o.clone()))
|
||||
.collect(),
|
||||
);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
by_fp.insert(fp, rm);
|
||||
}
|
||||
by_fp.into_values().collect()
|
||||
}
|
||||
|
||||
pub fn gather_json_findings(
|
||||
&self,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<Vec<serde_json::Value>> {
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
let mut json_findings = Vec::new();
|
||||
for rm in matches {
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential"
|
||||
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted"
|
||||
} else {
|
||||
"Inactive Credential"
|
||||
};
|
||||
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
|
||||
// Call extract_git_metadata on each GitRepo origin and take the first non-null result.
|
||||
let git_metadata_val = rm
|
||||
.origin
|
||||
.iter()
|
||||
.filter_map(|origin| {
|
||||
if let Origin::GitRepo(e) = origin {
|
||||
self.extract_git_metadata(e, source_span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
|
||||
// Collect a file path from an Origin::File, if available.
|
||||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| match origin {
|
||||
Origin::File(e) => {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
}
|
||||
Origin::Extended(e) => e.path().map(|p| p.display().to_string()),
|
||||
_ => None,
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let match_json = json!({
|
||||
"rule": {
|
||||
"name": rm.m.rule_name,
|
||||
"id": rm.m.rule_text_id,
|
||||
},
|
||||
"finding": {
|
||||
"snippet": snippet,
|
||||
"fingerprint": rm.m.finding_fingerprint.to_string(),
|
||||
"confidence": rm.match_confidence.to_string(),
|
||||
"entropy": format!("{:.2}", rm.m.calculated_entropy),
|
||||
"validation": {
|
||||
"status": validation_status,
|
||||
"response": response_body,
|
||||
},
|
||||
"language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()),
|
||||
"line": line_num,
|
||||
"column_start": source_span.start.column,
|
||||
"column_end": source_span.end.column,
|
||||
"path": file_path,
|
||||
"git_metadata": git_metadata_val
|
||||
}
|
||||
});
|
||||
|
||||
let finding_json = json!({
|
||||
"id": rm.m.rule_text_id,
|
||||
"matches": [ match_json ]
|
||||
});
|
||||
json_findings.push(finding_json);
|
||||
}
|
||||
Ok(json_findings)
|
||||
}
|
||||
pub fn json_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
let mut findings = Vec::new();
|
||||
|
||||
// Get filtered matches
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
|
||||
// Apply deduplication only if requested
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
// For each match, handle it based on the no_dedup flag
|
||||
for rm in matches {
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For no_dedup and multiple origins, create separate findings for each origin
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin version of this match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Process this single-origin match into a JSON finding
|
||||
let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
findings.push(json_finding);
|
||||
}
|
||||
} else {
|
||||
// Process normally for deduped matches or matches with only one origin
|
||||
let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
findings.push(json_finding);
|
||||
}
|
||||
}
|
||||
|
||||
// Write the JSON output
|
||||
if !findings.is_empty() {
|
||||
serde_json::to_writer_pretty(&mut writer, &findings)?;
|
||||
let records = self.build_finding_records(args)?;
|
||||
if !records.is_empty() {
|
||||
serde_json::to_writer_pretty(&mut writer, &records)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Add a helper method to convert a ReportMatch to a JSON finding
|
||||
pub fn process_match_to_json(
|
||||
&self,
|
||||
rm: &ReportMatch,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<serde_json::Value> {
|
||||
// Extract the relevant data from the match as you already do in your current implementation
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential"
|
||||
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted"
|
||||
} else {
|
||||
"Inactive Credential"
|
||||
};
|
||||
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let response_body = format!("{}{}", truncated_body, ellipsis);
|
||||
|
||||
// Call extract_git_metadata on each GitRepo origin and take the first non-null result.
|
||||
let git_metadata_val = rm
|
||||
.origin
|
||||
.iter()
|
||||
.filter_map(|origin| {
|
||||
if let Origin::GitRepo(e) = origin {
|
||||
self.extract_git_metadata(e, source_span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.next()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
|
||||
// Collect a file path from an Origin::File, if available.
|
||||
let file_path = rm
|
||||
.origin
|
||||
.iter()
|
||||
.find_map(|origin| {
|
||||
if let Origin::File(e) = origin {
|
||||
if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
Some(url)
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
Some(url)
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else if let Some(mapped) = self.docker_display_path(&e.path) {
|
||||
Some(mapped)
|
||||
} else {
|
||||
Some(e.path.display().to_string())
|
||||
}
|
||||
} else if let Origin::Extended(e) = origin {
|
||||
e.path().map(|p| p.display().to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let match_json = json!({
|
||||
"rule": {
|
||||
"name": rm.m.rule_name,
|
||||
"id": rm.m.rule_text_id,
|
||||
},
|
||||
"finding": {
|
||||
"snippet": snippet,
|
||||
"fingerprint": rm.m.finding_fingerprint.to_string(),
|
||||
"confidence": rm.match_confidence.to_string(),
|
||||
"entropy": format!("{:.2}", rm.m.calculated_entropy),
|
||||
"validation": {
|
||||
"status": validation_status,
|
||||
"response": response_body,
|
||||
},
|
||||
"language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()),
|
||||
"line": line_num,
|
||||
"column_start": source_span.start.column,
|
||||
"column_end": source_span.end.column,
|
||||
"path": file_path,
|
||||
"git_metadata": git_metadata_val
|
||||
}
|
||||
});
|
||||
|
||||
let finding_json = json!({
|
||||
"id": rm.m.rule_text_id,
|
||||
"matches": [ match_json ]
|
||||
});
|
||||
|
||||
Ok(finding_json)
|
||||
}
|
||||
// // Modified JSON format to pass args to gather_json_findings
|
||||
// pub fn json_format<W: std::io::Write>(
|
||||
// &self,
|
||||
// mut writer: W,
|
||||
// args: &cli::commands::scan::ScanArgs,
|
||||
// ) -> Result<()> {
|
||||
// let findings = self.gather_json_findings(args)?;
|
||||
// if !findings.is_empty() {
|
||||
// serde_json::to_writer_pretty(&mut writer, &findings)?;
|
||||
// writeln!(writer)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
pub fn jsonl_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
// Get filtered matches
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
|
||||
// Apply deduplication only if requested
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
// For each match, handle it based on the no_dedup flag
|
||||
for rm in matches {
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For no_dedup and multiple origins, create separate findings for each origin
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin version of this match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Process this single-origin match into a JSON finding and write it
|
||||
let json_finding = self.process_match_to_json(&single_origin_rm, args)?;
|
||||
serde_json::to_writer(&mut writer, &json_finding)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
} else {
|
||||
// Process normally for deduped matches or matches with only one origin
|
||||
let json_finding = self.process_match_to_json(&rm, args)?;
|
||||
serde_json::to_writer(&mut writer, &json_finding)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
let records = self.build_finding_records(args)?;
|
||||
for record in records {
|
||||
serde_json::to_writer(&mut writer, &record)?;
|
||||
writeln!(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// // Modified JSONL format to pass args to gather_json_findings
|
||||
// pub fn jsonl_format<W: std::io::Write>(
|
||||
// &self,
|
||||
// mut writer: W,
|
||||
// args: &cli::commands::scan::ScanArgs,
|
||||
// ) -> Result<()> {
|
||||
// let findings = self.gather_json_findings(args)?;
|
||||
// for finding in findings {
|
||||
// serde_json::to_writer(&mut writer, &finding)?;
|
||||
// writeln!(writer)?;
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cli::commands::github::GitCloneMode;
|
||||
use crate::cli::commands::github::GitHistoryMode;
|
||||
use crate::cli::commands::rules::RuleSpecifierArgs;
|
||||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||||
use crate::util::intern;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
cli::commands::github::GitHubRepoType,
|
||||
cli::commands::inputs::ContentFilteringArgs,
|
||||
cli::commands::inputs::InputSpecifierArgs,
|
||||
cli::commands::output::{OutputArgs, ReportOutputFormat},
|
||||
cli::commands::scan::ConfidenceLevel,
|
||||
findings_store::FindingsStore,
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::Match,
|
||||
origin::Origin,
|
||||
reporter::styles::Styles,
|
||||
};
|
||||
use std::{
|
||||
io::Cursor,
|
||||
path::PathBuf,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use serde_json::Value;
|
||||
use url::Url;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
cli::commands::{
|
||||
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
|
||||
inputs::{ContentFilteringArgs, InputSpecifierArgs},
|
||||
output::OutputArgs,
|
||||
rules::RuleSpecifierArgs,
|
||||
scan::ConfidenceLevel,
|
||||
},
|
||||
findings_store::FindingsStore,
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::{Match, SerializableCapture, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
reporter::{ReportMatch, Styles},
|
||||
rules::rule::Confidence,
|
||||
util::intern,
|
||||
};
|
||||
|
||||
fn create_default_args() -> cli::commands::scan::ScanArgs {
|
||||
use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope
|
||||
|
||||
|
|
@ -480,7 +126,6 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to create a mock Match
|
||||
fn create_mock_match(
|
||||
rule_name: &str,
|
||||
rule_text_id: &str,
|
||||
|
|
@ -508,7 +153,7 @@ mod tests {
|
|||
finding_fingerprint: 0123,
|
||||
rule_finding_fingerprint: intern(rule_finding_fingerprint),
|
||||
rule_text_id: intern(rule_text_id),
|
||||
rule_name: intern(rule_name), //.to_string(),
|
||||
rule_name: intern(rule_name),
|
||||
rule_confidence: Confidence::Medium,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
|
|
@ -518,10 +163,8 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to create a mock DetailsReporter
|
||||
fn setup_mock_reporter(matches: Vec<ReportMatch>) -> DetailsReporter {
|
||||
let mut datastore = FindingsStore::new(PathBuf::from("/tmp"));
|
||||
// Create mock origin and blob metadata for the first test match
|
||||
if !matches.is_empty() {
|
||||
let blob_metadata = BlobMetadata {
|
||||
id: BlobId::new(b"mock_blob"),
|
||||
|
|
@ -531,16 +174,14 @@ mod tests {
|
|||
language: Some("Rust".to_string()),
|
||||
};
|
||||
let dedup = true;
|
||||
// Add matches to datastore
|
||||
for m in matches.clone() {
|
||||
datastore.record(
|
||||
vec![(
|
||||
Arc::new(OriginSet::new(
|
||||
// OriginSet -- Arc<…>
|
||||
Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
vec![],
|
||||
)),
|
||||
Arc::new(blob_metadata.clone()), // BlobMetadata -- Arc<…>
|
||||
Arc::new(blob_metadata.clone()),
|
||||
m.m.clone(),
|
||||
)],
|
||||
dedup,
|
||||
|
|
@ -553,9 +194,9 @@ mod tests {
|
|||
only_valid: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_format() -> Result<()> {
|
||||
// Create a mock match with successful validation
|
||||
let mock_match =
|
||||
create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
let matches = vec![ReportMatch {
|
||||
|
|
@ -577,72 +218,17 @@ mod tests {
|
|||
}];
|
||||
let reporter = setup_mock_reporter(matches);
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
// Call the json_format method
|
||||
reporter.json_format(&mut output, &create_default_args())?;
|
||||
// Parse and validate JSON output
|
||||
let json_output: Vec<Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
let json_output: Vec<serde_json::Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
assert!(!json_output.is_empty(), "JSON output should not be empty");
|
||||
let first_finding = &json_output[0];
|
||||
assert!(first_finding.get("id").is_some(), "Finding should have an 'id'");
|
||||
assert!(first_finding.get("matches").is_some(), "Finding should have 'matches'");
|
||||
// Validate the structure of the first match
|
||||
let matches = first_finding.get("matches").unwrap().as_array().unwrap();
|
||||
let first_match = &matches[0];
|
||||
assert_eq!(first_match.get("rule").unwrap().get("name").unwrap(), "MockRule");
|
||||
assert_eq!(first_match.get("finding").unwrap().get("language").unwrap(), "Rust");
|
||||
let first = &json_output[0];
|
||||
assert_eq!(first["rule"]["name"], "MockRule");
|
||||
assert_eq!(first["finding"]["language"], "Rust");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn test_jsonl_format() -> Result<()> {
|
||||
// // Create a mock match with successful validation
|
||||
// let mock_match =
|
||||
// create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
// let matches = vec![ReportMatch {
|
||||
// origin: OriginSet::new(
|
||||
// Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
// vec![],
|
||||
// ),
|
||||
// blob_metadata: BlobMetadata {
|
||||
// id: BlobId::new(b"mock_blob"),
|
||||
// num_bytes: 1024,
|
||||
// mime_essence: Some("text/plain".to_string()),
|
||||
// charset: Some("UTF-8".to_string()),
|
||||
// language: Some("Rust".to_string()),
|
||||
// },
|
||||
// m: mock_match,
|
||||
// comment: None,
|
||||
// match_confidence: Confidence::Medium,
|
||||
// visible: true,
|
||||
// validation_response_body: "validation response".to_string(),
|
||||
// validation_response_status: 200,
|
||||
// validation_success: true,
|
||||
// }];
|
||||
// let reporter = setup_mock_reporter(matches);
|
||||
// let mut output = Cursor::new(Vec::new());
|
||||
// // Call the jsonl_format method
|
||||
// reporter.jsonl_format(&mut output, &create_default_args())?;
|
||||
// // Split output into lines and validate
|
||||
// let jsonl_output = String::from_utf8(output.into_inner())?;
|
||||
// let lines: Vec<&str> = jsonl_output.lines().collect();
|
||||
// assert!(!lines.is_empty(), "JSONL output should not be empty");
|
||||
// for line in &lines {
|
||||
// let json_value: serde_json::Value = serde_json::from_str(line)?;
|
||||
// assert!(
|
||||
// json_value.get("rule_name").is_some(),
|
||||
// "Each line should have a 'rule_name'"
|
||||
// );
|
||||
// assert!(
|
||||
// json_value.get("matches").is_some(),
|
||||
// "Each line should have 'matches'"
|
||||
// );
|
||||
// }
|
||||
// Ok(())
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_validation_status_in_json() -> Result<()> {
|
||||
// Test validation status in JSON output
|
||||
let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")];
|
||||
for (validation_success, expected_status) in test_cases {
|
||||
let mock_match = create_mock_match(
|
||||
|
|
@ -673,23 +259,11 @@ mod tests {
|
|||
}];
|
||||
let reporter = setup_mock_reporter(matches);
|
||||
let mut output = Cursor::new(Vec::new());
|
||||
// Call the json_format method
|
||||
reporter.json_format(&mut output, &create_default_args())?;
|
||||
// Parse and validate JSON output
|
||||
let json_output: Vec<Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
let json_output: Vec<serde_json::Value> = serde_json::from_slice(&output.into_inner())?;
|
||||
assert!(!json_output.is_empty(), "JSON output should not be empty");
|
||||
let first_finding = &json_output[0];
|
||||
let matches = first_finding.get("matches").unwrap().as_array().unwrap();
|
||||
let first_match = &matches[0];
|
||||
let validation_status = first_match
|
||||
.get("finding")
|
||||
.unwrap()
|
||||
.get("validation")
|
||||
.unwrap()
|
||||
.get("status")
|
||||
.unwrap()
|
||||
.as_str()
|
||||
.unwrap();
|
||||
let first = &json_output[0];
|
||||
let validation_status = first["finding"]["validation"]["status"].as_str().unwrap();
|
||||
assert_eq!(validation_status, expected_status);
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -1,413 +1,126 @@
|
|||
use std::fmt::{Display, Formatter, Result as FmtResult};
|
||||
|
||||
use http::StatusCode;
|
||||
use indenter::indented;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
bstring_escape::Escaped,
|
||||
origin::{get_repo_url, GitRepoOrigin},
|
||||
};
|
||||
|
||||
impl DetailsReporter {
|
||||
// Modified pretty format to use deduplicate_matches helper
|
||||
pub fn pretty_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
let mut matches = self.get_filtered_matches()?;
|
||||
let num_findings = matches.len();
|
||||
|
||||
if !args.no_dedup {
|
||||
matches = self.deduplicate_matches(matches, args.no_dedup);
|
||||
}
|
||||
|
||||
for (index, rm) in matches.into_iter().enumerate() {
|
||||
// When no_dedup is true, we'll handle each origin separately
|
||||
if args.no_dedup && rm.origin.len() > 1 {
|
||||
// For each origin, create a separate "finding"
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a new ReportMatch with just this single origin
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
self.write_finding(
|
||||
&mut writer,
|
||||
&single_origin_rm,
|
||||
index + 1,
|
||||
num_findings,
|
||||
args,
|
||||
)?;
|
||||
}
|
||||
} else {
|
||||
// Normal processing for deduped matches or matches with only one origin
|
||||
self.write_finding(&mut writer, &rm, index + 1, num_findings, args)?;
|
||||
}
|
||||
let records = self.build_finding_records(args)?;
|
||||
let num_findings = records.len();
|
||||
for (index, record) in records.iter().enumerate() {
|
||||
self.write_finding_record(&mut writer, record, index + 1, num_findings)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_finding<W: std::io::Write>(
|
||||
fn write_finding_record<W: std::io::Write>(
|
||||
&self,
|
||||
writer: &mut W,
|
||||
rm: &ReportMatch,
|
||||
record: &FindingReporterRecord,
|
||||
_finding_num: usize,
|
||||
_num_findings: usize,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
let lock_icon = if rm.validation_success { "🔓 " } else { "" };
|
||||
let is_active = record.finding.validation.status == "Active Credential";
|
||||
let lock_icon = if is_active { "🔓 " } else { "" };
|
||||
let formatted_heading = format!(
|
||||
"{}{} => [{}]",
|
||||
lock_icon,
|
||||
rm.m.rule_name.to_uppercase(),
|
||||
rm.m.rule_text_id.to_uppercase()
|
||||
record.rule.name.to_uppercase(),
|
||||
record.rule.id.to_uppercase()
|
||||
);
|
||||
if rm.validation_success {
|
||||
if is_active {
|
||||
writeln!(writer, "{}", self.style_finding_active_heading(formatted_heading))?;
|
||||
} else {
|
||||
writeln!(writer, "{}", self.style_finding_heading(formatted_heading))?;
|
||||
}
|
||||
writeln!(writer, "{}", PrettyFinding(self, rm, args))?;
|
||||
writeln!(writer, "{}", PrettyFindingRecord(self, record))?;
|
||||
writeln!(writer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_git_metadata(
|
||||
fn write_git_metadata_value(
|
||||
&self,
|
||||
f: &mut Formatter<'_>,
|
||||
e: &GitRepoOrigin,
|
||||
_args: &cli::commands::scan::ScanArgs,
|
||||
line_num: usize,
|
||||
git: &serde_json::Value,
|
||||
) -> FmtResult {
|
||||
// Check if this is a remote git scan
|
||||
// let mut is_remote_git_scan = !args.input_specifier_args.git_url.is_empty();
|
||||
// let mut git_url_string = String::new();
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
.unwrap_or_else(|_| e.repo_path.to_string_lossy().to_string().into());
|
||||
let mut git_url_string = repo_url.clone();
|
||||
if git_url_string.ends_with(".git") {
|
||||
git_url_string = git_url_string.strip_suffix(".git").unwrap().to_string().into();
|
||||
let repo_url = git["repository_url"].as_str().unwrap_or("");
|
||||
writeln!(f, " |Git Repo......: {}", self.style_metadata(repo_url))?;
|
||||
if let Some(commit) = git.get("commit") {
|
||||
if let Some(url) = commit.get("url").and_then(|v| v.as_str()) {
|
||||
writeln!(f, " |__Commit......: {}", self.style_metadata(url))?;
|
||||
}
|
||||
if let Some(committer) = commit.get("committer") {
|
||||
let name = committer.get("name").and_then(|v| v.as_str()).unwrap_or("");
|
||||
let email = committer.get("email").and_then(|v| v.as_str()).unwrap_or("");
|
||||
writeln!(indented(f).with_str(" |__"), "Committer...: {} <{}>", name, email)?;
|
||||
}
|
||||
if let Some(date) = commit.get("date").and_then(|v| v.as_str()) {
|
||||
writeln!(indented(f).with_str(" |__"), "Date........: {}", date)?;
|
||||
}
|
||||
}
|
||||
writeln!(f, " |Git Repo......: {}", self.style_metadata(&git_url_string),)?;
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let cmd = &cs.commit_metadata;
|
||||
|
||||
let atime =
|
||||
cmd.committer_timestamp.format(gix::date::time::format::SHORT.clone()).to_string();
|
||||
|
||||
let commit_id = &cmd.commit_id;
|
||||
let commit_url = format!("{}/commit/{}", &git_url_string, commit_id);
|
||||
// Write Commit Information
|
||||
writeln!(f, " |__Commit......: {}", self.style_metadata(&commit_url))?;
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Committer...: {} <{}>",
|
||||
cmd.committer_name,
|
||||
cmd.committer_email
|
||||
)?;
|
||||
writeln!(indented(f).with_str(" |__"), "Date........: {}", atime)?;
|
||||
// writeln!(indented(f).with_str(" |__"), "Summary.....: {}", msg)?;
|
||||
writeln!(indented(f).with_str(" |__"), "Path........: {}", cs.blob_path)?;
|
||||
// Construct Git Command
|
||||
let git_link =
|
||||
format!("{}/blob/{}/{}#L{}", &git_url_string, commit_id, cs.blob_path, line_num);
|
||||
let git_command =
|
||||
format!("git -C {} show {}:{}", e.repo_path.display(), commit_id, cs.blob_path);
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Link....: {}",
|
||||
self.style_metadata(&git_link)
|
||||
)?;
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Command.: {}",
|
||||
self.style_metadata(&git_command)
|
||||
)?;
|
||||
if let Some(file) = git.get("file") {
|
||||
if let Some(path) = file.get("path").and_then(|v| v.as_str()) {
|
||||
writeln!(indented(f).with_str(" |__"), "Path........: {}", path)?;
|
||||
}
|
||||
if let Some(url) = file.get("url").and_then(|v| v.as_str()) {
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Link....: {}",
|
||||
self.style_metadata(url)
|
||||
)?;
|
||||
}
|
||||
if let Some(cmd) = file.get("git_command").and_then(|v| v.as_str()) {
|
||||
writeln!(
|
||||
indented(f).with_str(" |__"),
|
||||
"Git Command.: {}",
|
||||
self.style_metadata(cmd)
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
// pub struct PrettyFinding<'a>(&'a DetailsReporter, &'a Finding);
|
||||
pub struct PrettyFinding<'a>(
|
||||
&'a DetailsReporter,
|
||||
&'a ReportMatch,
|
||||
&'a cli::commands::scan::ScanArgs,
|
||||
);
|
||||
impl<'a> Display for PrettyFinding<'a> {
|
||||
|
||||
pub struct PrettyFindingRecord<'a>(&'a DetailsReporter, &'a FindingReporterRecord);
|
||||
|
||||
impl<'a> Display for PrettyFindingRecord<'a> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
|
||||
let PrettyFinding(reporter, rm, args) = self;
|
||||
// Use Box<dyn Fn(&str) -> String> to store the closure
|
||||
let style_fn: Box<dyn Fn(&str) -> String> = if rm.validation_success {
|
||||
Box::new(|s: &str| reporter.style_active_creds(s).to_string()) // Convert StyledObject
|
||||
// to String
|
||||
let reporter = self.0;
|
||||
let record = self.1;
|
||||
let is_active = record.finding.validation.status == "Active Credential";
|
||||
let style_fn: Box<dyn Fn(&str) -> String> = if is_active {
|
||||
Box::new(|s| reporter.style_active_creds(s).to_string())
|
||||
} else {
|
||||
Box::new(|s: &str| reporter.style_match(s).to_string()) // Convert StyledObject to
|
||||
// String
|
||||
Box::new(|s| reporter.style_match(s).to_string())
|
||||
};
|
||||
let matching_finding =
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or(&[]);
|
||||
writeln!(f, " |Finding.......: {}", style_fn(&Escaped(matching_finding).to_string()))?;
|
||||
writeln!(f, " |Fingerprint...: {}", rm.m.finding_fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", rm.match_confidence.to_string())?;
|
||||
writeln!(f, " |Entropy.......: {:.2}", rm.m.calculated_entropy)?;
|
||||
let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16()
|
||||
|| rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16()
|
||||
{
|
||||
"Not Attempted".to_string()
|
||||
} else if rm.validation_success {
|
||||
"Active Credential".to_string()
|
||||
} else {
|
||||
"Inactive Credential".to_string()
|
||||
};
|
||||
writeln!(
|
||||
f,
|
||||
" |Validation....: {}",
|
||||
if rm.validation_success {
|
||||
reporter.style_finding_active_heading(&validation_status).to_string()
|
||||
// Convert StyledObject to String
|
||||
} else {
|
||||
(&validation_status).to_string()
|
||||
}
|
||||
)?;
|
||||
const MAX_RESPONSE_LENGTH: usize = 512;
|
||||
if rm.validation_response_status != StatusCode::CONTINUE.as_u16() {
|
||||
let truncated_body: String =
|
||||
rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect();
|
||||
let ellipsis =
|
||||
if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" };
|
||||
let finding = &record.finding;
|
||||
writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?;
|
||||
writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", finding.confidence)?;
|
||||
writeln!(f, " |Entropy.......: {}", finding.entropy)?;
|
||||
if is_active {
|
||||
writeln!(
|
||||
f,
|
||||
" |__Response....: {}{}",
|
||||
if rm.validation_success {
|
||||
reporter.style_active_creds(&truncated_body).to_string() // Convert StyledObject
|
||||
// to String
|
||||
} else {
|
||||
reporter.style_metadata(&truncated_body).to_string() // Convert StyledObject to
|
||||
// String
|
||||
},
|
||||
ellipsis
|
||||
" |Validation....: {}",
|
||||
reporter.style_finding_active_heading(&finding.validation.status).to_string()
|
||||
)?;
|
||||
} else {
|
||||
writeln!(f, " |Validation....: {}", finding.validation.status)?;
|
||||
}
|
||||
writeln!(
|
||||
f,
|
||||
" |Language......: {}",
|
||||
rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string())
|
||||
)?;
|
||||
|
||||
let source_span = &rm.m.location.source_span;
|
||||
writeln!(f, " |Line Num......: {}", source_span.start.line)?;
|
||||
|
||||
//print all the other areas where this was seen
|
||||
for p in rm.origin.iter() {
|
||||
match p {
|
||||
Origin::File(e) => {
|
||||
let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) {
|
||||
url
|
||||
} else if let Some(url) = reporter.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = reporter.s3_display_path(&e.path) {
|
||||
mapped
|
||||
} else if let Some(mapped) = reporter.docker_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
writeln!(
|
||||
f,
|
||||
" |Path..........: {}",
|
||||
if rm.validation_success {
|
||||
reporter.style_active_creds(&display_path).to_string()
|
||||
} else {
|
||||
display_path
|
||||
}
|
||||
)?;
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
if let Some(p) = e.path() {
|
||||
let display_path = p.display().to_string();
|
||||
writeln!(
|
||||
f,
|
||||
" |Path..........: {}",
|
||||
if rm.validation_success {
|
||||
reporter.style_active_creds(&display_path).to_string()
|
||||
} else {
|
||||
display_path
|
||||
}
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
reporter.write_git_metadata(f, e, args, source_span.start.line)?;
|
||||
}
|
||||
}
|
||||
if finding.validation.status != "Not Attempted" {
|
||||
writeln!(f, " |__Response....: {}", style_fn(&finding.validation.response))?;
|
||||
}
|
||||
writeln!(f, " |Language......: {}", finding.language)?;
|
||||
writeln!(f, " |Line Num......: {}", finding.line)?;
|
||||
writeln!(f, " |Path..........: {}", style_fn(&finding.path))?;
|
||||
if let Some(git) = &finding.git_metadata {
|
||||
reporter.write_git_metadata_value(f, git)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pretty_format_with_nan_entropy_panics() {
|
||||
use std::{
|
||||
io::Cursor,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
|
||||
use http::StatusCode;
|
||||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
blob::BlobMetadata,
|
||||
cli::commands::{
|
||||
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
|
||||
gitlab::GitLabRepoType,
|
||||
inputs::{ContentFilteringArgs, InputSpecifierArgs},
|
||||
output::{OutputArgs, ReportOutputFormat},
|
||||
rules::RuleSpecifierArgs,
|
||||
scan::{ConfidenceLevel, ScanArgs},
|
||||
},
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::{Match, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
reporter::{DetailsReporter, Styles},
|
||||
};
|
||||
|
||||
// Construct a fake match with NaN entropy
|
||||
let m = Match {
|
||||
rule_name: "dummy_rule".into(),
|
||||
rule_text_id: "dummy.id".into(),
|
||||
finding_fingerprint: 123456789,
|
||||
rule_finding_fingerprint: "abc".into(),
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 0, end: 1 },
|
||||
source_span: SourceSpan {
|
||||
start: SourcePoint { line: 1, column: 0 },
|
||||
end: SourcePoint { line: 1, column: 10 },
|
||||
},
|
||||
},
|
||||
blob_id: crate::blob::BlobId::default(),
|
||||
groups: SerializableCaptures { captures: vec![] },
|
||||
rule_confidence: crate::rules::rule::Confidence::Medium,
|
||||
validation_success: true,
|
||||
validation_response_status: StatusCode::OK.as_u16(),
|
||||
validation_response_body: "OK".into(),
|
||||
calculated_entropy: f32::NAN, // Here's the trigger
|
||||
visible: true,
|
||||
};
|
||||
|
||||
let _rm = crate::reporter::ReportMatch {
|
||||
origin: OriginSet::new(Origin::from_file("dummy.txt".into()), vec![]),
|
||||
blob_metadata: BlobMetadata {
|
||||
id: m.blob_id,
|
||||
num_bytes: 1,
|
||||
mime_essence: None,
|
||||
charset: None,
|
||||
language: Some("Rust".into()),
|
||||
},
|
||||
m,
|
||||
comment: None,
|
||||
visible: true,
|
||||
match_confidence: crate::rules::rule::Confidence::Medium,
|
||||
validation_response_body: "OK".into(),
|
||||
validation_response_status: StatusCode::OK.as_u16(),
|
||||
validation_success: true,
|
||||
};
|
||||
|
||||
let store = Arc::new(Mutex::new(crate::findings_store::FindingsStore::new(".".into())));
|
||||
let reporter =
|
||||
DetailsReporter { datastore: store, styles: Styles::new(false), only_valid: false };
|
||||
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
let args = ScanArgs {
|
||||
// core execution / performance
|
||||
num_jobs: 1,
|
||||
no_dedup: false,
|
||||
|
||||
// rule selection
|
||||
rules: RuleSpecifierArgs {
|
||||
rules_path: Vec::new(),
|
||||
rule: vec!["all".into()],
|
||||
load_builtins: true,
|
||||
},
|
||||
|
||||
// input discovery
|
||||
input_specifier_args: InputSpecifierArgs {
|
||||
path_inputs: Vec::new(),
|
||||
git_url: Vec::new(),
|
||||
github_user: Vec::new(),
|
||||
github_organization: Vec::new(),
|
||||
all_github_organizations: false,
|
||||
github_api_url: url::Url::parse("https://api.github.com/").unwrap(),
|
||||
github_repo_type: GitHubRepoType::Source,
|
||||
// new GitLab defaults
|
||||
gitlab_user: Vec::new(),
|
||||
gitlab_group: Vec::new(),
|
||||
all_gitlab_groups: false,
|
||||
gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(),
|
||||
gitlab_repo_type: GitLabRepoType::Owner,
|
||||
// Jira options
|
||||
jira_url: None,
|
||||
jql: None,
|
||||
max_results: 100,
|
||||
|
||||
// Slack options
|
||||
slack_query: None,
|
||||
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
|
||||
// s3
|
||||
s3_bucket: None,
|
||||
s3_prefix: None,
|
||||
role_arn: None,
|
||||
aws_local_profile: None,
|
||||
// Docker image scanning
|
||||
docker_image: Vec::new(),
|
||||
// git clone / history options
|
||||
git_clone: GitCloneMode::Bare,
|
||||
git_history: GitHistoryMode::Full,
|
||||
scan_nested_repos: true,
|
||||
commit_metadata: true,
|
||||
},
|
||||
|
||||
// content filtering
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
no_binary: true,
|
||||
},
|
||||
|
||||
// scanning behaviour
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
no_validate: false,
|
||||
rule_stats: false,
|
||||
only_valid: false,
|
||||
min_entropy: None,
|
||||
redact: false,
|
||||
git_repo_timeout: 1800, // 30 minutes
|
||||
|
||||
// output
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
|
||||
// display
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
// This will panic if the entropy isn't checked for NaN
|
||||
let _result = reporter.pretty_format(&mut buf, &args);
|
||||
// assert!(result.is_err() || result.is_ok(), "Should not crash"); // remove this line if panic
|
||||
// is expected pre-fix
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,274 +1,56 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
|
||||
use rayon::prelude::*;
|
||||
use serde_sarif::sarif;
|
||||
|
||||
use super::*;
|
||||
use crate::{bstring_escape::Escaped, defaults::get_builtin_rules, origin::get_repo_url};
|
||||
#[derive(Hash, Eq, PartialEq)]
|
||||
struct LocationKey {
|
||||
file_path: String,
|
||||
line: usize,
|
||||
column_start: usize,
|
||||
column_end: usize,
|
||||
text: String,
|
||||
}
|
||||
use crate::defaults::get_builtin_rules;
|
||||
|
||||
impl DetailsReporter {
|
||||
fn make_sarif_result(
|
||||
&self,
|
||||
finding: &Finding,
|
||||
no_dedup: bool,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<sarif::Result> {
|
||||
// Deduplicate exactly as in the JSON reporter
|
||||
// let matches = self.deduplicate_matches(finding.matches.clone(), no_dedup);
|
||||
// Deduplicate exactly as in the JSON reporter - but only if no_dedup is false
|
||||
let matches = if no_dedup {
|
||||
finding.matches.clone()
|
||||
} else {
|
||||
self.deduplicate_matches(finding.matches.clone(), no_dedup)
|
||||
};
|
||||
fn record_to_sarif_result(&self, record: &FindingReporterRecord) -> Result<sarif::Result> {
|
||||
let finding = &record.finding;
|
||||
let artifact_location =
|
||||
sarif::ArtifactLocationBuilder::default().uri(finding.path.clone()).build()?;
|
||||
let region = sarif::RegionBuilder::default()
|
||||
.start_line(finding.line as i64)
|
||||
.start_column(finding.column_start as i64)
|
||||
.end_line(finding.line as i64)
|
||||
.end_column(finding.column_end as i64)
|
||||
.snippet(
|
||||
sarif::ArtifactContentBuilder::default().text(finding.snippet.clone()).build()?,
|
||||
)
|
||||
.build()?;
|
||||
|
||||
let metadata = &finding.metadata;
|
||||
|
||||
let mut location_map: HashMap<LocationKey, Vec<(&OriginSet, &Match)>> = HashMap::new();
|
||||
for rm in &matches {
|
||||
let source_span = &rm.m.location.source_span;
|
||||
let snippet =
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or(&[]);
|
||||
let key = LocationKey {
|
||||
file_path: rm
|
||||
.origin
|
||||
.first()
|
||||
.blob_path()
|
||||
.map(|p| p.to_string_lossy().into_owned())
|
||||
.unwrap_or_default(),
|
||||
line: source_span.start.line,
|
||||
column_start: source_span.start.column,
|
||||
column_end: source_span.end.column,
|
||||
text: Escaped(snippet).to_string(),
|
||||
};
|
||||
location_map.entry(key).or_default().push((&rm.origin, &rm.m));
|
||||
let mut props = BTreeMap::new();
|
||||
props.insert("validation_status".to_string(), serde_json::json!(finding.validation.status));
|
||||
props.insert("entropy".to_string(), serde_json::json!(finding.entropy));
|
||||
if let Some(git) = &finding.git_metadata {
|
||||
props.insert("git_metadata".to_string(), git.clone());
|
||||
}
|
||||
let properties =
|
||||
sarif::PropertyBagBuilder::default().additional_properties(props).build()?;
|
||||
|
||||
let mut fpu64: u64 = 0;
|
||||
let location = sarif::LocationBuilder::default()
|
||||
.physical_location(
|
||||
sarif::PhysicalLocationBuilder::default()
|
||||
.artifact_location(artifact_location)
|
||||
.region(region)
|
||||
.build()?,
|
||||
)
|
||||
.properties(properties)
|
||||
.build()?;
|
||||
|
||||
let locations: Vec<sarif::Location> = location_map
|
||||
.into_iter()
|
||||
.filter_map(|(key, matches)| {
|
||||
let (prov, m) = matches[0];
|
||||
let source_span = &m.location.source_span;
|
||||
let mut artifact_locations = Vec::new();
|
||||
let mut git_metadata_list = Vec::new();
|
||||
|
||||
fpu64 = m.finding_fingerprint;
|
||||
|
||||
for p in prov.iter() {
|
||||
match p {
|
||||
Origin::File(e) => {
|
||||
let uri = if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
url
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
artifact_locations.push(
|
||||
sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?,
|
||||
);
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
if let Some(p) = e.path() {
|
||||
artifact_locations.push(
|
||||
sarif::ArtifactLocationBuilder::default()
|
||||
.uri(p.display().to_string())
|
||||
.build()
|
||||
.ok()?,
|
||||
);
|
||||
}
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
// Extract and store Git metadata
|
||||
if let Some(git_metadata) = self.extract_git_metadata(e, source_span) {
|
||||
git_metadata_list.push(git_metadata);
|
||||
}
|
||||
|
||||
// Build Git artifact location
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
.unwrap_or_else(|_| {
|
||||
e.repo_path.to_string_lossy().to_string().into()
|
||||
})
|
||||
.trim_end_matches(".git")
|
||||
.to_string();
|
||||
let git_url = format!(
|
||||
"{}/blob/{}/{}#L{}",
|
||||
repo_url,
|
||||
cs.commit_metadata.commit_id,
|
||||
cs.blob_path,
|
||||
source_span.start.line
|
||||
);
|
||||
artifact_locations.push(
|
||||
sarif::ArtifactLocationBuilder::default()
|
||||
.uri(git_url)
|
||||
.build()
|
||||
.ok()?,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if artifact_locations.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let region = sarif::RegionBuilder::default()
|
||||
.start_line(key.line as i64)
|
||||
.start_column(key.column_start as i64)
|
||||
.end_line(key.line as i64)
|
||||
.end_column(key.column_end as i64)
|
||||
.snippet(sarif::ArtifactContentBuilder::default().text(key.text).build().ok()?)
|
||||
.build()
|
||||
.ok()?;
|
||||
|
||||
let logical_location = sarif::LogicalLocationBuilder::default()
|
||||
.kind("blob")
|
||||
.name(m.finding_fingerprint.to_string())
|
||||
.build()
|
||||
.ok()?;
|
||||
|
||||
let validation_status =
|
||||
if m.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
"Not Attempted"
|
||||
} else if m.validation_success {
|
||||
"Active Credential"
|
||||
} else {
|
||||
"Inactive Credential"
|
||||
};
|
||||
|
||||
// Build combined properties including Git metadata and fingerprint
|
||||
let mut props = std::collections::BTreeMap::new();
|
||||
props.insert("validation_status".to_string(), serde_json::json!(validation_status));
|
||||
|
||||
props.insert(
|
||||
"entropy".to_string(),
|
||||
serde_json::json!(format!("{:.2}", m.calculated_entropy)),
|
||||
);
|
||||
|
||||
// Add the fingerprint property from the match
|
||||
props.insert("fingerprint".to_string(), serde_json::json!(m.finding_fingerprint));
|
||||
|
||||
if !git_metadata_list.is_empty() {
|
||||
props.insert("git_metadata".to_string(), serde_json::json!(git_metadata_list));
|
||||
}
|
||||
|
||||
let properties = sarif::PropertyBagBuilder::default()
|
||||
.additional_properties(props)
|
||||
.build()
|
||||
.ok()?;
|
||||
|
||||
// Create locations for each artifact location
|
||||
let locations = artifact_locations
|
||||
.into_iter()
|
||||
.map(|artifact_location| {
|
||||
sarif::LocationBuilder::default()
|
||||
.physical_location(
|
||||
sarif::PhysicalLocationBuilder::default()
|
||||
.artifact_location(artifact_location)
|
||||
.region(region.clone())
|
||||
.build()
|
||||
.ok()?,
|
||||
)
|
||||
.logical_locations(vec![logical_location.clone()])
|
||||
.properties(properties.clone())
|
||||
.build()
|
||||
.ok()
|
||||
})
|
||||
.collect::<Option<Vec<_>>>()?;
|
||||
Some(locations)
|
||||
})
|
||||
.flatten()
|
||||
.collect();
|
||||
// let message = sarif::MessageBuilder::default()
|
||||
// .text(format!(
|
||||
// "Rule {} found {} unique {}.\nFirst blob id matched: {}",
|
||||
// metadata.rule_name,
|
||||
// locations.len(),
|
||||
// if locations.len() == 1 { "match" } else { "matches" },
|
||||
// first_match_blob_id
|
||||
// ))
|
||||
// .build()?;
|
||||
// Create detailed message from first location's information
|
||||
let detailed_msg = if let Some(first_match) = matches.first() {
|
||||
let mut msg = format!(
|
||||
"Rule {} found {} unique {}.\n",
|
||||
metadata.rule_name,
|
||||
locations.len(),
|
||||
if locations.len() == 1 { "match" } else { "matches" }
|
||||
);
|
||||
// Add file or Git information based on origin
|
||||
// Get first origin of first match - we know this exists
|
||||
let p = first_match.origin.first();
|
||||
match p {
|
||||
Origin::File(e) => {
|
||||
let uri = if let Some(url) = self.jira_issue_url(&e.path, args) {
|
||||
url
|
||||
} else if let Some(url) = self.slack_message_url(&e.path) {
|
||||
url
|
||||
} else if let Some(mapped) = self.s3_display_path(&e.path) {
|
||||
mapped
|
||||
} else {
|
||||
e.path.display().to_string()
|
||||
};
|
||||
msg.push_str(&format!("Location: {}\n", uri));
|
||||
}
|
||||
Origin::Extended(e) => {
|
||||
if let Some(p) = e.path() {
|
||||
msg.push_str(&format!("Location: {}\n", p.display()));
|
||||
}
|
||||
}
|
||||
Origin::GitRepo(e) => {
|
||||
if let Some(cs) = &e.first_commit {
|
||||
let repo_url = get_repo_url(&e.repo_path)
|
||||
.unwrap_or_else(|_| e.repo_path.to_string_lossy().to_string().into())
|
||||
.trim_end_matches(".git")
|
||||
.to_string();
|
||||
// Add commit and author information
|
||||
let cmd = &cs.commit_metadata;
|
||||
msg.push_str(&format!("Repository: {}\n", repo_url));
|
||||
msg.push_str(&format!("Commit: {}\n", cmd.commit_id));
|
||||
msg.push_str(&format!(
|
||||
"Committer: {} <{}>\n",
|
||||
String::from_utf8_lossy(&cmd.committer_name),
|
||||
String::from_utf8_lossy(&cmd.committer_email)
|
||||
));
|
||||
msg.push_str(&format!("File: {}", cs.blob_path));
|
||||
}
|
||||
}
|
||||
}
|
||||
msg
|
||||
} else {
|
||||
format!("Rule {} found {} unique matches.", metadata.rule_name, locations.len(),)
|
||||
};
|
||||
let message = sarif::MessageBuilder::default().text(detailed_msg).build()?;
|
||||
let fingerprint_name = "fingerprint".to_string();
|
||||
let fingerprint = fpu64.to_string();
|
||||
let message = sarif::MessageBuilder::default()
|
||||
.text(format!("Rule {} matched {}", record.rule.name, finding.path))
|
||||
.build()?;
|
||||
|
||||
let result = sarif::ResultBuilder::default()
|
||||
.rule_id(&metadata.rule_name)
|
||||
.rule_id(&record.rule.name)
|
||||
.message(message)
|
||||
.kind(sarif::ResultKind::Review.to_string())
|
||||
.locations(locations)
|
||||
.locations(vec![location])
|
||||
.level(sarif::ResultLevel::Warning.to_string())
|
||||
.partial_fingerprints([(fingerprint_name, fingerprint)])
|
||||
.partial_fingerprints([("fingerprint".to_string(), finding.fingerprint.clone())])
|
||||
.build()?;
|
||||
Ok(result)
|
||||
}
|
||||
|
|
@ -276,54 +58,11 @@ impl DetailsReporter {
|
|||
pub fn sarif_format<W: std::io::Write>(
|
||||
&self,
|
||||
mut writer: W,
|
||||
no_dedup: bool,
|
||||
_no_dedup: bool,
|
||||
args: &cli::commands::scan::ScanArgs,
|
||||
) -> Result<()> {
|
||||
// Gather findings first
|
||||
let mut findings = self.gather_findings()?;
|
||||
|
||||
// If no_dedup is true, expand findings with multiple origins into separate findings
|
||||
if no_dedup {
|
||||
let mut expanded_findings = Vec::new();
|
||||
for finding in findings {
|
||||
// Check matches with multiple origins
|
||||
let matches_with_multiple_origins: Vec<_> =
|
||||
finding.matches.iter().filter(|rm| rm.origin.len() > 1).collect();
|
||||
|
||||
if !matches_with_multiple_origins.is_empty() {
|
||||
// For each match with multiple origins, create separate findings
|
||||
for rm in matches_with_multiple_origins {
|
||||
for origin in rm.origin.iter() {
|
||||
// Create a single-origin match
|
||||
let single_origin_rm = ReportMatch {
|
||||
origin: OriginSet::new(origin.clone(), Vec::new()),
|
||||
blob_metadata: rm.blob_metadata.clone(),
|
||||
m: rm.m.clone(),
|
||||
comment: rm.comment.clone(),
|
||||
visible: rm.visible,
|
||||
match_confidence: rm.match_confidence,
|
||||
validation_response_body: rm.validation_response_body.clone(),
|
||||
validation_response_status: rm.validation_response_status,
|
||||
validation_success: rm.validation_success,
|
||||
};
|
||||
|
||||
// Create a new finding with just this single-origin match
|
||||
let new_finding =
|
||||
Finding::new(finding.metadata.clone(), vec![single_origin_rm]);
|
||||
expanded_findings.push(new_finding);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If the finding has no matches with multiple origins, keep it as is
|
||||
expanded_findings.push(finding);
|
||||
}
|
||||
}
|
||||
findings = expanded_findings;
|
||||
}
|
||||
|
||||
// Filter only rules relevant to the findings
|
||||
let finding_rule_ids: std::collections::HashSet<_> =
|
||||
findings.iter().map(|f| f.metadata.rule_name.clone()).collect();
|
||||
let records = self.build_finding_records(args)?;
|
||||
let finding_rule_ids: HashSet<_> = records.iter().map(|r| r.rule.name.clone()).collect();
|
||||
let rules: Vec<sarif::ReportingDescriptor> = get_builtin_rules(None)?
|
||||
.iter_rules()
|
||||
.par_bridge()
|
||||
|
|
@ -366,10 +105,9 @@ impl DetailsReporter {
|
|||
)
|
||||
.build()?;
|
||||
|
||||
let sarif_results: Vec<sarif::Result> = findings
|
||||
.par_iter()
|
||||
.filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok())
|
||||
.collect();
|
||||
let sarif_results: Vec<sarif::Result> =
|
||||
records.iter().filter_map(|r| self.record_to_sarif_result(r).ok()).collect();
|
||||
|
||||
let run = sarif::RunBuilder::default().tool(tool).results(sarif_results).build()?;
|
||||
let sarif = sarif::SarifBuilder::default()
|
||||
.version(sarif::Version::V2_1_0.to_string())
|
||||
|
|
|
|||
12
src/s3.rs
12
src/s3.rs
|
|
@ -2,12 +2,12 @@ use anyhow::{Context, Result};
|
|||
use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion};
|
||||
use aws_credential_types::Credentials;
|
||||
use aws_sdk_s3::{
|
||||
error::ProvideErrorMetadata, // for .code()
|
||||
operation::list_objects_v2::ListObjectsV2Error, // modeled service error
|
||||
Client,
|
||||
operation::list_objects_v2::ListObjectsV2Error, // modeled service error
|
||||
error::ProvideErrorMetadata, // for .code()
|
||||
};
|
||||
use aws_types::region::Region;
|
||||
use reqwest; // HTTP client for HEAD fallback
|
||||
use reqwest; // HTTP client for HEAD fallback
|
||||
|
||||
pub async fn visit_bucket_objects<F>(
|
||||
bucket: &str,
|
||||
|
|
@ -43,9 +43,7 @@ where
|
|||
.configure(&config)
|
||||
.build()
|
||||
.await;
|
||||
let conf = aws_sdk_s3::config::Builder::from(&config)
|
||||
.credentials_provider(assume)
|
||||
.build();
|
||||
let conf = aws_sdk_s3::config::Builder::from(&config).credentials_provider(assume).build();
|
||||
Client::from_conf(conf)
|
||||
} else {
|
||||
Client::new(&config)
|
||||
|
|
@ -66,7 +64,7 @@ where
|
|||
|
||||
// On error, extract the modeled service error
|
||||
Err(err) => {
|
||||
let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError
|
||||
let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError
|
||||
|
||||
// If the bucket must be addressed at another region...
|
||||
if svc_err.code() == Some("PermanentRedirect") {
|
||||
|
|
|
|||
|
|
@ -21,14 +21,16 @@ use crate::{
|
|||
findings_store,
|
||||
git_binary::{CloneMode, Git},
|
||||
git_url::GitUrl,
|
||||
github, gitlab, jira,
|
||||
github, gitlab,
|
||||
guesser::Guesser,
|
||||
jira,
|
||||
matcher::{Match, Matcher, MatcherStats},
|
||||
origin::{Origin, OriginSet},
|
||||
rules_database::RulesDatabase,
|
||||
s3,
|
||||
scanner::processing::BlobProcessor,
|
||||
scanner_pool::ScannerPool,
|
||||
slack, guesser::Guesser, PathBuf,
|
||||
slack, PathBuf,
|
||||
};
|
||||
|
||||
pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option<f64>, Match)>);
|
||||
|
|
@ -291,7 +293,6 @@ pub async fn fetch_slack_messages(
|
|||
Ok(vec![output_dir])
|
||||
}
|
||||
|
||||
|
||||
pub async fn fetch_s3_objects(
|
||||
args: &scan::ScanArgs,
|
||||
datastore: &Arc<Mutex<findings_store::FindingsStore>>,
|
||||
|
|
@ -330,10 +331,12 @@ pub async fn fetch_s3_objects(
|
|||
);
|
||||
let blob = crate::blob::Blob::from_bytes(bytes);
|
||||
|
||||
if let Some((origin, blob_md, scored_matches)) = processor.run(origin, blob, args.no_dedup)? {
|
||||
if let Some((origin, blob_md, scored_matches)) =
|
||||
processor.run(origin, blob, args.no_dedup)?
|
||||
{
|
||||
// Wrap origin & metadata once:
|
||||
let origin_arc = Arc::new(origin);
|
||||
let blob_arc = Arc::new(blob_md);
|
||||
let blob_arc = Arc::new(blob_md);
|
||||
|
||||
// Now build a batch of exactly one FindingsStoreMessage per Match
|
||||
let mut batch = Vec::with_capacity(scored_matches.len());
|
||||
|
|
@ -350,4 +353,4 @@ pub async fn fetch_s3_objects(
|
|||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,6 @@ pub async fn run_async_scan(
|
|||
let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?;
|
||||
input_roots.extend(slack_dirs);
|
||||
|
||||
|
||||
// Save Docker images if specified
|
||||
if !args.input_specifier_args.docker_image.is_empty() {
|
||||
let clone_root = {
|
||||
|
|
@ -129,7 +128,6 @@ pub async fn run_async_scan(
|
|||
)?;
|
||||
}
|
||||
|
||||
|
||||
if !args.no_dedup {
|
||||
// Final deduplication step before validation (or before reporting)
|
||||
let reporter = crate::reporter::DetailsReporter {
|
||||
|
|
|
|||
|
|
@ -4,25 +4,26 @@ use kingfisher::s3::visit_bucket_objects;
|
|||
#[tokio::test]
|
||||
async fn test_visit_public_bucket() -> Result<()> {
|
||||
let mut objects = Vec::new();
|
||||
visit_bucket_objects("awsglue-datasets", Some("examples/us-legislators/all/"), None, None, |key, data| {
|
||||
objects.push((key, data));
|
||||
Ok(())
|
||||
})
|
||||
visit_bucket_objects(
|
||||
"awsglue-datasets",
|
||||
Some("examples/us-legislators/all/"),
|
||||
None,
|
||||
None,
|
||||
|key, data| {
|
||||
objects.push((key, data));
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert!(
|
||||
objects.iter().any(|(k, _)| k.ends_with("events.json")),
|
||||
"events.json object not found"
|
||||
);
|
||||
let creds = objects
|
||||
.iter()
|
||||
.find(|(k, _)| k.ends_with("events.json"))
|
||||
.expect("events.json object");
|
||||
let creds =
|
||||
objects.iter().find(|(k, _)| k.ends_with("events.json")).expect("events.json object");
|
||||
|
||||
let body = std::str::from_utf8(&creds.1)?;
|
||||
assert!(
|
||||
body.contains("Q4450263"),
|
||||
"expected events.json file"
|
||||
);
|
||||
assert!(body.contains("Q4450263"), "expected events.json file");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue