diff --git a/crates/kingfisher-rules/data/rules/redis.yml b/crates/kingfisher-rules/data/rules/redis.yml index 9c92f9b..3d6d5b8 100644 --- a/crates/kingfisher-rules/data/rules/redis.yml +++ b/crates/kingfisher-rules/data/rules/redis.yml @@ -90,7 +90,7 @@ rules: - "localhost" min_entropy: 3.0 - confidence: medium + confidence: low examples: - 'REDIS_PASSWORD="EXAMPLEp4ssw0rd123"' - 'redis_password=MyS3cur3R3d1sK3y' diff --git a/crates/kingfisher-rules/src/lib.rs b/crates/kingfisher-rules/src/lib.rs index 2c8164a..8455999 100644 --- a/crates/kingfisher-rules/src/lib.rs +++ b/crates/kingfisher-rules/src/lib.rs @@ -29,7 +29,6 @@ pub use rules::{Rules, RulesError}; // Re-export RulesDatabase pub use rules_database::{ format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase, - TreeSitterFallbackPolicy, }; // Re-export defaults diff --git a/crates/kingfisher-rules/src/rules_database.rs b/crates/kingfisher-rules/src/rules_database.rs index ca703a6..3a28fac 100644 --- a/crates/kingfisher-rules/src/rules_database.rs +++ b/crates/kingfisher-rules/src/rules_database.rs @@ -7,12 +7,6 @@ use vectorscan_rs::{BlockDatabase, Flag, Pattern}; use crate::rule::{Rule, RULE_COMMENTS_PATTERN}; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TreeSitterFallbackPolicy { - KeepRawWhenUnavailable, - SuppressWhenUnavailable, -} - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RuleDetectionProfileKind { SelfIdentifying, @@ -22,7 +16,6 @@ pub enum RuleDetectionProfileKind { #[derive(Debug, Clone, PartialEq, Eq)] pub struct RuleMatchProfile { pub kind: RuleDetectionProfileKind, - pub fallback_policy: TreeSitterFallbackPolicy, pub reason_codes: Vec<&'static str>, } @@ -86,7 +79,6 @@ impl RulesDatabase { reason_codes.push("self_identifying_prefix"); return RuleMatchProfile { kind: RuleDetectionProfileKind::SelfIdentifying, - fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable, reason_codes, }; } @@ -133,27 +125,14 @@ impl RulesDatabase { if !is_context_dependent { return RuleMatchProfile { kind: RuleDetectionProfileKind::SelfIdentifying, - fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable, reason_codes, }; } - - let fallback_policy = if has_depends_on { - reason_codes.push("depends_on_keep_when_unavailable"); - TreeSitterFallbackPolicy::KeepRawWhenUnavailable - } else if looks_generic_token && has_distance_operator { - reason_codes.push("strict_fallback_suppress_when_unavailable"); - TreeSitterFallbackPolicy::SuppressWhenUnavailable - } else { - reason_codes.push("fallback_keep_when_unavailable"); - TreeSitterFallbackPolicy::KeepRawWhenUnavailable - }; - - RuleMatchProfile { - kind: RuleDetectionProfileKind::ContextDependent, - fallback_policy, - reason_codes, + if looks_generic_token && has_distance_operator { + reason_codes.push("strict_contextual_shape"); } + + RuleMatchProfile { kind: RuleDetectionProfileKind::ContextDependent, reason_codes } } pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option> { @@ -454,7 +433,6 @@ mod test_rule_match_profiles { mk_rule("kingfisher.circleci.1", r"(?x)\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\b"); let profile = RulesDatabase::classify_rule_profile(&rule); assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); - assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable); assert!(profile.reason_codes.contains(&"self_identifying_prefix")); } @@ -466,8 +444,8 @@ mod test_rule_match_profiles { ); let profile = RulesDatabase::classify_rule_profile(&rule); assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent); - assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::SuppressWhenUnavailable); assert!(profile.reason_codes.contains(&"generic_token_shape")); + assert!(profile.reason_codes.contains(&"strict_contextual_shape")); } #[test] @@ -506,7 +484,6 @@ mod test_rule_match_profiles { let profile = RulesDatabase::classify_rule_profile(&rule); assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent); - assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable); - assert!(profile.reason_codes.contains(&"depends_on_keep_when_unavailable")); + assert!(profile.reason_codes.contains(&"depends_on_rule")); } } diff --git a/docs/TREE_SITTER.md b/docs/TREE_SITTER.md index 8ba4413..999826a 100644 --- a/docs/TREE_SITTER.md +++ b/docs/TREE_SITTER.md @@ -18,7 +18,7 @@ The goal is to confirm that a regex hit appears in a plausible code assignment/c 2. `Matcher::scan_blob` performs the primary regex scan and other filtering. 3. `maybe_apply_tree_sitter_verification` runs near the end of `scan_blob`. 4. Only candidate matches are checked against Tree-sitter extracted text. -5. Matches that fail verification can be dropped, depending on rule profile and fallback policy. +5. Matches that fail verification are dropped for context-dependent rules. ## Size and Mode Gates @@ -38,9 +38,9 @@ Tree-sitter verification is only applied to matches that are: - Classified as `ContextDependent` by rule profiling. - Not base64-derived findings (`is_base64 == false`). -Classification and fallback policy come from rule profiles in `kingfisher-rules`: +Classification comes from rule profiles in `kingfisher-rules`: -- `SelfIdentifying`: usually keep raw regex result. +- `SelfIdentifying`: keep raw regex result. - `ContextDependent`: may require Tree-sitter confirmation. ## How Verification Works @@ -61,14 +61,9 @@ When Tree-sitter is available: If no extracted fragment verifies the secret, that candidate finding is removed. -## Fallback Behavior When Tree-sitter Is Unavailable +## Behavior When Tree-sitter Is Unavailable -If Tree-sitter cannot run (size/mode/language/parse errors), behavior is rule-driven: - -- `KeepRawWhenUnavailable`: keep the regex finding. -- `SuppressWhenUnavailable`: drop the finding. - -`SuppressWhenUnavailable` is used for stricter generic-context patterns where false positives are likely without syntax-aware confirmation. +If Tree-sitter cannot run (size/mode/language/parse errors), Kingfisher keeps the original regex finding. ## Supported Languages in This Path @@ -100,6 +95,6 @@ Tree-sitter in Kingfisher is a conditional verifier, not the primary detector: - Regex finds candidates quickly. - Rule profiling decides which candidates need context verification. - Tree-sitter confirms contextual plausibility from parsed syntax. -- Fallback policy determines what to do when verification cannot run. +- If verification cannot run, scan results fall back to the regex pass. This keeps scanning fast while reducing noisy matches for context-dependent secret patterns. diff --git a/src/matcher/mod.rs b/src/matcher/mod.rs index 89cc2a8..a1e862c 100644 --- a/src/matcher/mod.rs +++ b/src/matcher/mod.rs @@ -446,7 +446,6 @@ fn maybe_apply_tree_sitter_verification<'a>( let Some(rule_idx) = match_rule_indices.get(idx).copied() else { continue; }; - let profile = &profiles[rule_idx]; let match_secret = matches[idx].matching_input; let re = &rules_db.anchored_regexes()[rule_idx]; @@ -462,7 +461,6 @@ fn maybe_apply_tree_sitter_verification<'a>( None => { // Tree-sitter is an optional precision layer. If parser context // is unavailable, always fall back to the original regex match. - let _ = profile.fallback_policy; } } } diff --git a/src/rules_database.rs b/src/rules_database.rs index 396fdf9..c37b0f4 100644 --- a/src/rules_database.rs +++ b/src/rules_database.rs @@ -4,5 +4,4 @@ pub use kingfisher_rules::rules_database::{ format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase, - TreeSitterFallbackPolicy, };