diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ff7097..4201640 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.35.0] - Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". +- Fixed issue where `--redact` did not function properly ## [1.34.0] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 0e0c795..87a8011 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -183,7 +183,7 @@ pub fn enumerate_filesystem_inputs( return Ok(()); } progress.inc(blob.len().try_into().unwrap()); - match processor.run(origin, blob, args.no_dedup) { + match processor.run(origin, blob, args.no_dedup, args.redact) { Ok(None) => { // nothing to record } diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs index 8c36514..e1551b0 100644 --- a/src/scanner/processing.rs +++ b/src/scanner/processing.rs @@ -25,11 +25,12 @@ impl<'a> BlobProcessor<'a> { origin: OriginSet, blob: Blob, no_dedup: bool, + redact: bool, ) -> Result> { let blob_id = blob.id.hex(); let _span = debug_span!("matcher", blob_id).entered(); let t1 = Instant::now(); - let res = self.matcher.scan_blob(&blob, &origin, None, false, no_dedup)?; + let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup)?; let scan_us = t1.elapsed().as_micros(); match res { // blob already seen, but with no matches; nothing to do! diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index c3bb8ba..7bd6b8d 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -349,7 +349,7 @@ pub async fn fetch_s3_objects( let blob = crate::blob::Blob::from_bytes(bytes); if let Some((origin, blob_md, scored_matches)) = - processor.run(origin, blob, args.no_dedup)? + processor.run(origin, blob, args.no_dedup, args.redact)? { // Wrap origin & metadata once: let origin_arc = Arc::new(origin); diff --git a/tests/int_redact.rs b/tests/int_redact.rs new file mode 100644 index 0000000..5d72c61 --- /dev/null +++ b/tests/int_redact.rs @@ -0,0 +1,115 @@ +// Integration test to ensure --redact replaces secret values with hashes +use std::{ + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, GlobalArgs, Mode}, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[tokio::test] +async fn test_redact_hashes_finding_values() -> Result<()> { + let temp_dir = TempDir::new()?; + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![PathBuf::from("testdata/generic_secrets.py")], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: true, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Never, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(temp_dir.path().to_path_buf()))); + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; + + let ds = datastore.lock().unwrap(); + let matches = ds.get_matches(); + assert!(!matches.is_empty()); + for m_arc in matches { + let m = &m_arc.2; + assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); + } + + Ok(()) +} \ No newline at end of file