This commit is contained in:
Mick Grove 2026-03-11 21:36:30 -07:00
commit db67105221
6 changed files with 13 additions and 45 deletions

View file

@ -90,7 +90,7 @@ rules:
- "localhost"
min_entropy: 3.0
confidence: medium
confidence: low
examples:
- 'REDIS_PASSWORD="EXAMPLEp4ssw0rd123"'
- 'redis_password=MyS3cur3R3d1sK3y'

View file

@ -29,7 +29,6 @@ pub use rules::{Rules, RulesError};
// Re-export RulesDatabase
pub use rules_database::{
format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase,
TreeSitterFallbackPolicy,
};
// Re-export defaults

View file

@ -7,12 +7,6 @@ use vectorscan_rs::{BlockDatabase, Flag, Pattern};
use crate::rule::{Rule, RULE_COMMENTS_PATTERN};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TreeSitterFallbackPolicy {
KeepRawWhenUnavailable,
SuppressWhenUnavailable,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RuleDetectionProfileKind {
SelfIdentifying,
@ -22,7 +16,6 @@ pub enum RuleDetectionProfileKind {
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RuleMatchProfile {
pub kind: RuleDetectionProfileKind,
pub fallback_policy: TreeSitterFallbackPolicy,
pub reason_codes: Vec<&'static str>,
}
@ -86,7 +79,6 @@ impl RulesDatabase {
reason_codes.push("self_identifying_prefix");
return RuleMatchProfile {
kind: RuleDetectionProfileKind::SelfIdentifying,
fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable,
reason_codes,
};
}
@ -133,27 +125,14 @@ impl RulesDatabase {
if !is_context_dependent {
return RuleMatchProfile {
kind: RuleDetectionProfileKind::SelfIdentifying,
fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable,
reason_codes,
};
}
let fallback_policy = if has_depends_on {
reason_codes.push("depends_on_keep_when_unavailable");
TreeSitterFallbackPolicy::KeepRawWhenUnavailable
} else if looks_generic_token && has_distance_operator {
reason_codes.push("strict_fallback_suppress_when_unavailable");
TreeSitterFallbackPolicy::SuppressWhenUnavailable
} else {
reason_codes.push("fallback_keep_when_unavailable");
TreeSitterFallbackPolicy::KeepRawWhenUnavailable
};
RuleMatchProfile {
kind: RuleDetectionProfileKind::ContextDependent,
fallback_policy,
reason_codes,
if looks_generic_token && has_distance_operator {
reason_codes.push("strict_contextual_shape");
}
RuleMatchProfile { kind: RuleDetectionProfileKind::ContextDependent, reason_codes }
}
pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option<Arc<Rule>> {
@ -454,7 +433,6 @@ mod test_rule_match_profiles {
mk_rule("kingfisher.circleci.1", r"(?x)\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\b");
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable);
assert!(profile.reason_codes.contains(&"self_identifying_prefix"));
}
@ -466,8 +444,8 @@ mod test_rule_match_profiles {
);
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent);
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::SuppressWhenUnavailable);
assert!(profile.reason_codes.contains(&"generic_token_shape"));
assert!(profile.reason_codes.contains(&"strict_contextual_shape"));
}
#[test]
@ -506,7 +484,6 @@ mod test_rule_match_profiles {
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent);
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable);
assert!(profile.reason_codes.contains(&"depends_on_keep_when_unavailable"));
assert!(profile.reason_codes.contains(&"depends_on_rule"));
}
}

View file

@ -18,7 +18,7 @@ The goal is to confirm that a regex hit appears in a plausible code assignment/c
2. `Matcher::scan_blob` performs the primary regex scan and other filtering.
3. `maybe_apply_tree_sitter_verification` runs near the end of `scan_blob`.
4. Only candidate matches are checked against Tree-sitter extracted text.
5. Matches that fail verification can be dropped, depending on rule profile and fallback policy.
5. Matches that fail verification are dropped for context-dependent rules.
## Size and Mode Gates
@ -38,9 +38,9 @@ Tree-sitter verification is only applied to matches that are:
- Classified as `ContextDependent` by rule profiling.
- Not base64-derived findings (`is_base64 == false`).
Classification and fallback policy come from rule profiles in `kingfisher-rules`:
Classification comes from rule profiles in `kingfisher-rules`:
- `SelfIdentifying`: usually keep raw regex result.
- `SelfIdentifying`: keep raw regex result.
- `ContextDependent`: may require Tree-sitter confirmation.
## How Verification Works
@ -61,14 +61,9 @@ When Tree-sitter is available:
If no extracted fragment verifies the secret, that candidate finding is removed.
## Fallback Behavior When Tree-sitter Is Unavailable
## Behavior When Tree-sitter Is Unavailable
If Tree-sitter cannot run (size/mode/language/parse errors), behavior is rule-driven:
- `KeepRawWhenUnavailable`: keep the regex finding.
- `SuppressWhenUnavailable`: drop the finding.
`SuppressWhenUnavailable` is used for stricter generic-context patterns where false positives are likely without syntax-aware confirmation.
If Tree-sitter cannot run (size/mode/language/parse errors), Kingfisher keeps the original regex finding.
## Supported Languages in This Path
@ -100,6 +95,6 @@ Tree-sitter in Kingfisher is a conditional verifier, not the primary detector:
- Regex finds candidates quickly.
- Rule profiling decides which candidates need context verification.
- Tree-sitter confirms contextual plausibility from parsed syntax.
- Fallback policy determines what to do when verification cannot run.
- If verification cannot run, scan results fall back to the regex pass.
This keeps scanning fast while reducing noisy matches for context-dependent secret patterns.

View file

@ -446,7 +446,6 @@ fn maybe_apply_tree_sitter_verification<'a>(
let Some(rule_idx) = match_rule_indices.get(idx).copied() else {
continue;
};
let profile = &profiles[rule_idx];
let match_secret = matches[idx].matching_input;
let re = &rules_db.anchored_regexes()[rule_idx];
@ -462,7 +461,6 @@ fn maybe_apply_tree_sitter_verification<'a>(
None => {
// Tree-sitter is an optional precision layer. If parser context
// is unavailable, always fall back to the original regex match.
let _ = profile.fallback_policy;
}
}
}

View file

@ -4,5 +4,4 @@
pub use kingfisher_rules::rules_database::{
format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase,
TreeSitterFallbackPolicy,
};