forked from mirrors/kingfisher
v1.88.0
This commit is contained in:
parent
b99cbf9f50
commit
db67105221
6 changed files with 13 additions and 45 deletions
|
|
@ -90,7 +90,7 @@ rules:
|
|||
- "localhost"
|
||||
|
||||
min_entropy: 3.0
|
||||
confidence: medium
|
||||
confidence: low
|
||||
examples:
|
||||
- 'REDIS_PASSWORD="EXAMPLEp4ssw0rd123"'
|
||||
- 'redis_password=MyS3cur3R3d1sK3y'
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ pub use rules::{Rules, RulesError};
|
|||
// Re-export RulesDatabase
|
||||
pub use rules_database::{
|
||||
format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase,
|
||||
TreeSitterFallbackPolicy,
|
||||
};
|
||||
|
||||
// Re-export defaults
|
||||
|
|
|
|||
|
|
@ -7,12 +7,6 @@ use vectorscan_rs::{BlockDatabase, Flag, Pattern};
|
|||
|
||||
use crate::rule::{Rule, RULE_COMMENTS_PATTERN};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TreeSitterFallbackPolicy {
|
||||
KeepRawWhenUnavailable,
|
||||
SuppressWhenUnavailable,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum RuleDetectionProfileKind {
|
||||
SelfIdentifying,
|
||||
|
|
@ -22,7 +16,6 @@ pub enum RuleDetectionProfileKind {
|
|||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RuleMatchProfile {
|
||||
pub kind: RuleDetectionProfileKind,
|
||||
pub fallback_policy: TreeSitterFallbackPolicy,
|
||||
pub reason_codes: Vec<&'static str>,
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +79,6 @@ impl RulesDatabase {
|
|||
reason_codes.push("self_identifying_prefix");
|
||||
return RuleMatchProfile {
|
||||
kind: RuleDetectionProfileKind::SelfIdentifying,
|
||||
fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable,
|
||||
reason_codes,
|
||||
};
|
||||
}
|
||||
|
|
@ -133,27 +125,14 @@ impl RulesDatabase {
|
|||
if !is_context_dependent {
|
||||
return RuleMatchProfile {
|
||||
kind: RuleDetectionProfileKind::SelfIdentifying,
|
||||
fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable,
|
||||
reason_codes,
|
||||
};
|
||||
}
|
||||
|
||||
let fallback_policy = if has_depends_on {
|
||||
reason_codes.push("depends_on_keep_when_unavailable");
|
||||
TreeSitterFallbackPolicy::KeepRawWhenUnavailable
|
||||
} else if looks_generic_token && has_distance_operator {
|
||||
reason_codes.push("strict_fallback_suppress_when_unavailable");
|
||||
TreeSitterFallbackPolicy::SuppressWhenUnavailable
|
||||
} else {
|
||||
reason_codes.push("fallback_keep_when_unavailable");
|
||||
TreeSitterFallbackPolicy::KeepRawWhenUnavailable
|
||||
};
|
||||
|
||||
RuleMatchProfile {
|
||||
kind: RuleDetectionProfileKind::ContextDependent,
|
||||
fallback_policy,
|
||||
reason_codes,
|
||||
if looks_generic_token && has_distance_operator {
|
||||
reason_codes.push("strict_contextual_shape");
|
||||
}
|
||||
|
||||
RuleMatchProfile { kind: RuleDetectionProfileKind::ContextDependent, reason_codes }
|
||||
}
|
||||
|
||||
pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option<Arc<Rule>> {
|
||||
|
|
@ -454,7 +433,6 @@ mod test_rule_match_profiles {
|
|||
mk_rule("kingfisher.circleci.1", r"(?x)\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\b");
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable);
|
||||
assert!(profile.reason_codes.contains(&"self_identifying_prefix"));
|
||||
}
|
||||
|
||||
|
|
@ -466,8 +444,8 @@ mod test_rule_match_profiles {
|
|||
);
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent);
|
||||
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::SuppressWhenUnavailable);
|
||||
assert!(profile.reason_codes.contains(&"generic_token_shape"));
|
||||
assert!(profile.reason_codes.contains(&"strict_contextual_shape"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -506,7 +484,6 @@ mod test_rule_match_profiles {
|
|||
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent);
|
||||
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable);
|
||||
assert!(profile.reason_codes.contains(&"depends_on_keep_when_unavailable"));
|
||||
assert!(profile.reason_codes.contains(&"depends_on_rule"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ The goal is to confirm that a regex hit appears in a plausible code assignment/c
|
|||
2. `Matcher::scan_blob` performs the primary regex scan and other filtering.
|
||||
3. `maybe_apply_tree_sitter_verification` runs near the end of `scan_blob`.
|
||||
4. Only candidate matches are checked against Tree-sitter extracted text.
|
||||
5. Matches that fail verification can be dropped, depending on rule profile and fallback policy.
|
||||
5. Matches that fail verification are dropped for context-dependent rules.
|
||||
|
||||
## Size and Mode Gates
|
||||
|
||||
|
|
@ -38,9 +38,9 @@ Tree-sitter verification is only applied to matches that are:
|
|||
- Classified as `ContextDependent` by rule profiling.
|
||||
- Not base64-derived findings (`is_base64 == false`).
|
||||
|
||||
Classification and fallback policy come from rule profiles in `kingfisher-rules`:
|
||||
Classification comes from rule profiles in `kingfisher-rules`:
|
||||
|
||||
- `SelfIdentifying`: usually keep raw regex result.
|
||||
- `SelfIdentifying`: keep raw regex result.
|
||||
- `ContextDependent`: may require Tree-sitter confirmation.
|
||||
|
||||
## How Verification Works
|
||||
|
|
@ -61,14 +61,9 @@ When Tree-sitter is available:
|
|||
|
||||
If no extracted fragment verifies the secret, that candidate finding is removed.
|
||||
|
||||
## Fallback Behavior When Tree-sitter Is Unavailable
|
||||
## Behavior When Tree-sitter Is Unavailable
|
||||
|
||||
If Tree-sitter cannot run (size/mode/language/parse errors), behavior is rule-driven:
|
||||
|
||||
- `KeepRawWhenUnavailable`: keep the regex finding.
|
||||
- `SuppressWhenUnavailable`: drop the finding.
|
||||
|
||||
`SuppressWhenUnavailable` is used for stricter generic-context patterns where false positives are likely without syntax-aware confirmation.
|
||||
If Tree-sitter cannot run (size/mode/language/parse errors), Kingfisher keeps the original regex finding.
|
||||
|
||||
## Supported Languages in This Path
|
||||
|
||||
|
|
@ -100,6 +95,6 @@ Tree-sitter in Kingfisher is a conditional verifier, not the primary detector:
|
|||
- Regex finds candidates quickly.
|
||||
- Rule profiling decides which candidates need context verification.
|
||||
- Tree-sitter confirms contextual plausibility from parsed syntax.
|
||||
- Fallback policy determines what to do when verification cannot run.
|
||||
- If verification cannot run, scan results fall back to the regex pass.
|
||||
|
||||
This keeps scanning fast while reducing noisy matches for context-dependent secret patterns.
|
||||
|
|
|
|||
|
|
@ -446,7 +446,6 @@ fn maybe_apply_tree_sitter_verification<'a>(
|
|||
let Some(rule_idx) = match_rule_indices.get(idx).copied() else {
|
||||
continue;
|
||||
};
|
||||
let profile = &profiles[rule_idx];
|
||||
let match_secret = matches[idx].matching_input;
|
||||
let re = &rules_db.anchored_regexes()[rule_idx];
|
||||
|
||||
|
|
@ -462,7 +461,6 @@ fn maybe_apply_tree_sitter_verification<'a>(
|
|||
None => {
|
||||
// Tree-sitter is an optional precision layer. If parser context
|
||||
// is unavailable, always fall back to the original regex match.
|
||||
let _ = profile.fallback_policy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,5 +4,4 @@
|
|||
|
||||
pub use kingfisher_rules::rules_database::{
|
||||
format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase,
|
||||
TreeSitterFallbackPolicy,
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue