From ec1e3a34323cc319de060bd748ee0449d4eaae67 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 23 Nov 2025 11:32:27 -0800 Subject: [PATCH] - Fixed deduplication to consider rule identifiers so overlapping patterns are not merged before validation - Moved 'URI with Username and Secret' to a low-confidence rule --- CHANGELOG.md | 2 ++ src/reporter.rs | 8 ++--- tests/fingerprint_dedup.rs | 68 +++++++++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50d8a40..47712e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ All notable changes to this project will be documented in this file. ## [v1.67.0] - Added checksum to GitLab rule +- Fixed deduplication to consider rule identifiers so overlapping patterns are not merged before validation +- Moved "URI with Username and Secret" to a low-confidence rule ## [v1.66.0] - Updating to support Bitbucket App Passwords diff --git a/src/reporter.rs b/src/reporter.rs index 48f53d3..4cabba4 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -333,11 +333,11 @@ impl DetailsReporter { } use std::collections::HashMap; - let mut by_fp: HashMap = HashMap::new(); + let mut by_fp: HashMap<(u64, String), ReportMatch> = HashMap::new(); for rm in matches { - let fp = rm.m.finding_fingerprint; - if let Some(existing) = by_fp.get_mut(&fp) { + let key = (rm.m.finding_fingerprint, rm.m.rule.id().to_string()); + if let Some(existing) = by_fp.get_mut(&key) { // merge origin sets (keep first origin, append the rest) for o in rm.origin.iter() { if !existing.origin.iter().any(|e| e == o) { @@ -355,7 +355,7 @@ impl DetailsReporter { } continue; } - by_fp.insert(fp, rm); + by_fp.insert(key, rm); } by_fp.into_values().collect() } diff --git a/tests/fingerprint_dedup.rs b/tests/fingerprint_dedup.rs index 5771cc4..dee2fd3 100644 --- a/tests/fingerprint_dedup.rs +++ b/tests/fingerprint_dedup.rs @@ -20,10 +20,10 @@ use kingfisher::{ use smallvec::smallvec; // ---- helpers ------------------------------------------------------------------------------- -fn make_match(fp: u64) -> Match { +fn make_match(fp: u64, rule_id: &str) -> Match { let syntax = RuleSyntax { name: "Example Rule".to_string(), - id: "RULE.1".to_string(), + id: rule_id.to_string(), pattern: "dummy".to_string(), min_entropy: 0.0, confidence: Confidence::Medium, @@ -99,8 +99,8 @@ fn git_origin(commit_id: &str) -> OriginSet { #[test] fn reporter_deduplicates_across_git_commits() -> Result<()> { // Build two matches with the same fingerprint. - let m1 = make_match(0xBADC0FFE); - let m2 = make_match(0xBADC0FFE); + let m1 = make_match(0xBADC0FFE, "RULE.1"); + let m2 = make_match(0xBADC0FFE, "RULE.1"); // Different commit ids -- old dedup logic *fails* to merge them. let origin_a = git_origin("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); @@ -156,3 +156,63 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> { Ok(()) } + +#[test] +fn dedup_preserves_distinct_rules_with_same_fingerprint() -> Result<()> { + let shared_fp = 0xDEADC0DE; + let m1 = make_match(shared_fp, "RULE.OPENAI"); + let m2 = make_match(shared_fp, "RULE.DEEPSEEK"); + + let origin = git_origin("cccccccccccccccccccccccccccccccccccccccc"); + + let reporter = DetailsReporter { + datastore: Arc::new(Mutex::new(FindingsStore::new(PathBuf::from("/tmp")))), + styles: Styles::new(false), + only_valid: false, + }; + + let matches = vec![ + ReportMatch { + origin: origin.clone(), + blob_metadata: BlobMetadata { + id: BlobId::new(b"dummy"), + num_bytes: 10, + mime_essence: None, + language: None, + }, + m: m1, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: String::new(), + validation_response_status: 0, + validation_success: false, + }, + ReportMatch { + origin, + blob_metadata: BlobMetadata { + id: BlobId::new(b"dummy"), + num_bytes: 10, + mime_essence: None, + language: None, + }, + m: m2, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: String::new(), + validation_response_status: 0, + validation_success: false, + }, + ]; + + let deduped = reporter.deduplicate_matches(matches, /* no_dedup= */ false); + + assert_eq!( + deduped.len(), + 2, + "matches from distinct rules must not be deduplicated" + ); + + Ok(()) +}