forked from mirrors/kingfisher
Added an optional exclude_words list to PatternRequirements so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
This commit is contained in:
parent
a3e426e6ee
commit
bd8bc09d0e
4 changed files with 149 additions and 6 deletions
|
|
@ -2,10 +2,11 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unrelease]
|
||||
## [Unreleased]
|
||||
- pattern_requirements for rules — Post-regex character-class gating to cut false positives without lookarounds. Authors can now require minimum counts of digits, uppercase, lowercase, and special characters, with an optional custom special-char set. Why? Hyperscan doesn’t support lookaheads/behinds, so many "must contain X and Y" checks had to be baked into the regex (hurting readability) or were impossible. `pattern_requirements` applies lightweight, in-memory checks after a match is found, keeping patterns fast and clean.
|
||||
- updated rules with support for `pattern_requirements`
|
||||
- Updated many rules with `pattern_requirements`
|
||||
- Automatically set `--no-dedup` whenever `--manage-baseline` is supplied so baseline management retains every occurrence of a finding
|
||||
- Added an optional `exclude_words` list to `PatternRequirements` so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
|
||||
|
||||
## [v1.61.0]
|
||||
- Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans.
|
||||
|
|
|
|||
|
|
@ -38,12 +38,14 @@ rules:
|
|||
- rule_id: kingfisher.aws.id
|
||||
variable: AKID # referenced as {{ AKID }}
|
||||
|
||||
pattern_requirements: # (optional) character type requirements
|
||||
pattern_requirements: # (optional) character/word requirements
|
||||
min_digits: 1 # require at least 1 digit
|
||||
min_uppercase: 1 # require at least 1 uppercase letter
|
||||
min_lowercase: 1 # require at least 1 lowercase letter
|
||||
min_special_chars: 1 # require at least 1 special character
|
||||
special_chars: "!@#$%^&*()" # optional: custom special character set
|
||||
exclude_words: # optional: drop matches containing these words
|
||||
- test
|
||||
|
||||
validation: # (optional) live validation
|
||||
type: Http
|
||||
|
|
@ -78,7 +80,7 @@ rules:
|
|||
| examples | Good matches; used for testing |
|
||||
| visible | false to hide non‑secret captures (e.g. IDs) |
|
||||
| depends_on_rule | Chain rules: use captures from one rule in another's validation |
|
||||
| pattern_requirements | Require specific character types (digits, uppercase, lowercase, special) |
|
||||
| pattern_requirements | Require character types and/or exclude placeholder words from matches |
|
||||
| validation | Configure HTTP, AWS, GCP, etc. checks to verify live validity |
|
||||
|
||||
|
||||
|
|
@ -264,10 +266,15 @@ pattern_requirements:
|
|||
min_lowercase: 1 # Require at least 1 lowercase letter (a-z)
|
||||
min_special_chars: 1 # Require at least 1 special character
|
||||
special_chars: "!@#$%^&*" # Optional: define which characters are "special"
|
||||
exclude_words: # Optional: reject matches containing any of these (case-insensitive)
|
||||
- test
|
||||
- demo
|
||||
```
|
||||
|
||||
All fields are optional. If `special_chars` is not specified, the default set includes: `!@#$%^&*()_+-=[]{}|;:'",.<>?/\`~`
|
||||
|
||||
`exclude_words` performs a case-insensitive substring check. If any entry (after trimming whitespace) appears within the match, the match is discarded. This is helpful for dropping known dummy tokens such as "test" or "demo" that otherwise satisfy the regex.
|
||||
|
||||
### Example: Secure API Key
|
||||
|
||||
```yaml
|
||||
|
|
@ -288,6 +295,8 @@ rules:
|
|||
min_uppercase: 1 # Must contain at least 1 uppercase letter
|
||||
min_lowercase: 1 # Must contain at least 1 lowercase letter
|
||||
min_special_chars: 1 # Must contain at least 1 special character
|
||||
exclude_words:
|
||||
- test
|
||||
examples:
|
||||
- api_key = "MyS3cur3K3y!2024"
|
||||
- api-key: "Abc123!@#Token"
|
||||
|
|
@ -298,6 +307,25 @@ In this example:
|
|||
- The `pattern_requirements` filters out matches that don't have at least one of each required type
|
||||
- A match like `"abcdefghijklmnopqrst"` would be rejected (no uppercase, no digit, no special)
|
||||
- A match like `"Abc123!SecureToken"` would be accepted (has all required types)
|
||||
- A match like `"Test123!SecureToken"` would be rejected because it contains the excluded word `test`
|
||||
|
||||
### Example: Excluding Dummy Values
|
||||
|
||||
```yaml
|
||||
rules:
|
||||
- name: Token without placeholders
|
||||
id: custom.token.2
|
||||
pattern: |-
|
||||
(?i)token[:=]\s*([A-Za-z0-9]{12,})
|
||||
pattern_requirements:
|
||||
exclude_words:
|
||||
- placeholder
|
||||
- sample
|
||||
examples:
|
||||
- token: "REALVALUE1234"
|
||||
negative_examples:
|
||||
- token = "SAMPLETOKEN9999" # dropped by exclude_words
|
||||
```
|
||||
|
||||
### Example: Custom Special Characters
|
||||
|
||||
|
|
|
|||
|
|
@ -1004,7 +1004,9 @@ mod test {
|
|||
use crate::{
|
||||
blob::{Blob, BlobIdMap},
|
||||
origin::{Origin, OriginSet},
|
||||
rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation},
|
||||
rules::rule::{
|
||||
DependsOnRule, HttpRequest, HttpValidation, PatternRequirements, RuleSyntax, Validation,
|
||||
},
|
||||
};
|
||||
|
||||
proptest! {
|
||||
|
|
@ -1138,6 +1140,51 @@ mod test {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_exclude_words_filters_matches() -> Result<()> {
|
||||
let rules = vec![Rule::new(RuleSyntax {
|
||||
id: "test.exclude".to_string(),
|
||||
name: "exclude words".to_string(),
|
||||
pattern: "(?P<token>prefix[A-Za-z]+)".to_string(),
|
||||
confidence: crate::rules::rule::Confidence::Medium,
|
||||
min_entropy: 0.0,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
pattern_requirements: Some(PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: Some(vec!["TEST".to_string()]),
|
||||
}),
|
||||
})];
|
||||
|
||||
let rules_db = RulesDatabase::from_rules(rules)?;
|
||||
let input = b"prefixgood prefixtest";
|
||||
let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
|
||||
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
|
||||
let mut matcher =
|
||||
Matcher::new(&rules_db, scanner_pool, &seen_blobs, None, false, None, &[], false)?;
|
||||
|
||||
matcher.scan_bytes_raw(input, "fname")?;
|
||||
|
||||
let matches = &matcher.user_data.raw_matches_scratch;
|
||||
assert_eq!(matches.len(), 1, "exclude_words should drop filtered matches");
|
||||
let RawMatch { start_idx, end_idx, .. } = matches[0];
|
||||
assert_eq!(
|
||||
&input[start_idx as usize..end_idx as usize],
|
||||
b"prefixgood",
|
||||
"remaining match should be the non-excluded token",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// additional deterministic unit-tests
|
||||
// ---------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -70,6 +70,9 @@ pub struct PatternRequirements {
|
|||
/// Custom set of characters to consider as "special" (defaults to common punctuation)
|
||||
#[serde(default)]
|
||||
pub special_chars: Option<String>,
|
||||
/// Words that should cause the match to be excluded when present (case-insensitive)
|
||||
#[serde(default)]
|
||||
pub exclude_words: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl PatternRequirements {
|
||||
|
|
@ -108,13 +111,33 @@ impl PatternRequirements {
|
|||
|
||||
// Check special character requirement
|
||||
if let Some(min_special) = self.min_special_chars {
|
||||
let special_chars = self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
|
||||
let special_chars =
|
||||
self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
|
||||
let special_count = s.chars().filter(|c| special_chars.contains(*c)).count();
|
||||
if special_count < min_special {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check exclude words requirement
|
||||
if let Some(exclude_words) = self.exclude_words.as_ref() {
|
||||
let lowercase_input = s.to_lowercase();
|
||||
if exclude_words
|
||||
.iter()
|
||||
.filter_map(|word| {
|
||||
let trimmed = word.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed.to_lowercase())
|
||||
}
|
||||
})
|
||||
.any(|word| lowercase_input.contains(&word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
@ -526,6 +549,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 digits
|
||||
|
|
@ -546,6 +570,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 uppercase
|
||||
|
|
@ -566,6 +591,7 @@ mod tests {
|
|||
min_lowercase: Some(2),
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 lowercase
|
||||
|
|
@ -586,6 +612,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: Some(2),
|
||||
special_chars: None, // uses default
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 2 special chars
|
||||
|
|
@ -606,6 +633,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: Some(2),
|
||||
special_chars: Some("$%^".to_string()),
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 2 custom special chars
|
||||
|
|
@ -626,6 +654,7 @@ mod tests {
|
|||
min_lowercase: Some(1),
|
||||
min_special_chars: Some(1),
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has all requirements
|
||||
|
|
@ -644,6 +673,43 @@ mod tests {
|
|||
assert!(!reqs.validate(b"Abc1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_exclude_words() {
|
||||
let reqs = PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: Some(vec!["test".to_string(), "Demo".to_string()]),
|
||||
};
|
||||
|
||||
// Should fail: contains "test" (case-insensitive)
|
||||
assert!(!reqs.validate(b"MyTestToken"));
|
||||
|
||||
// Should fail: contains "demo" (case-insensitive)
|
||||
assert!(!reqs.validate(b"example-demo-value"));
|
||||
|
||||
// Should pass: does not contain excluded words
|
||||
assert!(reqs.validate(b"example-value"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_exclude_words_ignores_empty_entries() {
|
||||
let reqs = PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
|
||||
};
|
||||
|
||||
// Should fail only when non-empty exclusion matches
|
||||
assert!(!reqs.validate(b"needs-blocking"));
|
||||
assert!(reqs.validate(b"allowed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_none() {
|
||||
let reqs = PatternRequirements {
|
||||
|
|
@ -652,6 +718,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: no requirements
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue