Added an optional exclude_words list to PatternRequirements so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.

This commit is contained in:
Mick Grove 2025-11-05 17:19:11 -08:00
commit dc02abac63
35 changed files with 299 additions and 108 deletions

View file

@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
## [Unreleased]
- Added `pattern_requirements` for rules. Enables post-regex character-class checks (digits, uppercase, lowercase, specials) to reduce false positives without lookarounds. Provides lightweight, in-memory validation after matches, keeping patterns fast and readable.
- Added an optional `exclude_words` list to `PatternRequirements` so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
- Added an optional `ignore_if_contains` list to `PatternRequirements` within the Rules structure, so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
- Updated many rules with `pattern_requirements`
- Automatically set `--no-dedup` whenever `--manage-baseline` is supplied so baseline management retains every occurrence of a finding

View file

@ -323,6 +323,21 @@ However, you may want to add your own custom rules, or modify a detection to bet
First, review [docs/RULES.md](/docs/RULES.md) to learn how to create custom Kingfisher rules.
### Pattern requirements and placeholder filtering
Every rule can declare optional `pattern_requirements` to enforce additional character checks after a regex matches. Each field
is independent:
- `min_digits`, `min_uppercase`, `min_lowercase`, and `min_special_chars` enforce complexity thresholds.
- `special_chars` lets you override the set of characters counted as "special" when `min_special_chars` is used.
- `ignore_if_contains` lists case-insensitive substrings that should cause a match to be discarded (for example, to drop
`test`, `demo`, or `localhost` values). Kingfisher still accepts the legacy `exclude_words` key as an alias when loading
existing rule files.
When a match is skipped because of `ignore_if_contains`, Kingfisher logs the event at the `DEBUG` level alongside the rule that
was evaluated. If you need to keep those matches for a particular scan, pass `--no-ignore-if-contains` to `kingfisher scan` to
disable the substring filter without editing any rule files.
Once you've done that, you can provide your custom rules (defined in a YAML file) and provide it to Kingfisher at runtime --- no recompiling required!
# 🎉 Usage
@ -1168,6 +1183,8 @@ leaves the default unchanged.
- `--skip-aws-account-file <FILE>`: Load AWS account numbers to skip from a file (one account per line; `#` comments allowed)
- `--ignore-comment <DIRECTIVE>`: Honor additional inline directives from other scanners (repeatable; e.g. `--ignore-comment "gitleaks:allow"`)
- `--no-ignore`: Disable inline directives entirely so every match is reported
- `--no-ignore-if-contains`: Ignore the `ignore_if_contains` filter in rules so placeholder words still produce findings
## Understanding `--confidence`
The `--confidence` flag sets a minimum confidence threshold, not an exact match.

View file

@ -9,7 +9,7 @@ rules:
(
[a-z0-9/+=]{372}
)
\b
(?:[^A-Za-z0-9/+=]|$)
pattern_requirements:
min_digits: 2
min_uppercase: 1

View file

@ -51,7 +51,7 @@ rules:
confidence: medium
examples:
- bitbucket_key=HedmnK9h6KD_eh9KK8FlI9ahUc8WfaNZ4gulbrtN2ouV
- bitbucket_secret=kd8j2h4jf9s8mf6l4k9j2h4jf9s8mf6l4k9j2h4jf9s8mf6l
- bitbucket_secret=kd8j2h4jf9s8mf6l4k9j2h4jf9s8mf6l4k9j2h4jf9s8
validation:
type: Http
content:

View file

@ -32,12 +32,12 @@ rules:
(
[A-Z0-9\+/]{64}
)
\b
(?:[^A-Za-z0-9/+=]|$)
min_entropy: 3.3
confidence: medium
examples:
- confluent secret=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890ab
- kafka_token=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzABCD
- kafka_token=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzAB
references:
- https://docs.confluent.io/cloud/current/api.html#tag/API-Keys-(iamv2)/operation/getIamV2ApiKey
validation:

View file

@ -3,7 +3,6 @@ rules:
id: kingfisher.coze.1
pattern: |
(?xi)
\b
coze
(?:.|[\n\r]){0,32}?
\b
@ -37,6 +36,6 @@ rules:
- https://www.coze.com/docs/developer_guides/coze_api_overview
- https://www.coze.com/docs/developer_guides/retrieve_files
examples:
- "pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f"
- "pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI"
- "pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV"
- "key_coze = pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f"
- "coze_token = pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI"
- "coze-key: pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV"

View file

@ -6,7 +6,7 @@ rules:
\b
(
EZ[AT]K
[A-Z0-9]{54}
[A-Za-z0-9]{54}
)
\b
pattern_requirements:

View file

@ -10,7 +10,7 @@ rules:
)
\b
pattern_requirements:
min_digits: 4
min_digits: 2
min_entropy: 3.3
confidence: low
examples:

View file

@ -18,7 +18,7 @@ rules:
examples:
- "intercom_access_token: dG9rOvI0NmJlMTA5XzQwM2NfNDVlM184MjQzXzkwMDnmOTE1NGIyONoxOjA="
- ic_token = "g1ZsclJXTjNfc1pBSzJDemE0eFVDU0U5c25CeDN4Vm9hQ2Zac0hXemZHNGVDPQ=="
- ic_token = "g1ZsclJXTjNfc1pBSzJDemE0eFVDU0U5c25CeDN4Vm9hQ2Zac0hXemZHNPQ=="
references:
- https://developers.intercom.com/docs/build-an-integration/learn-more/rest-apis

View file

@ -82,7 +82,7 @@ rules:
)
\b
pattern_requirements:
exclude_words:
ignore_if_contains:
- "@localhost"
- "@127.0.0.1"
min_entropy: 3

View file

@ -6,6 +6,10 @@ rules:
(?: User | User\ Id | UserId | Uid) \s*=\s* ([^\s;]{3,100}) \s* ;
[\ \t]* .{0,10} [\ \t]*
(?: Password | Pwd) \s*=\s* ([^\t\ ;]{3,100}) \s* (?: [;] | $)
pattern_requirements:
ignore_if_contains:
- "localhost"
- "127.0.0.1"
min_entropy: 3.3
confidence: medium
examples:

View file

@ -24,6 +24,10 @@ rules:
(?:
\d+
)
pattern_requirements:
ignore_if_contains:
- "@localhost"
- "@127.0.0.1"
min_entropy: 3.3
confidence: medium
examples:

View file

@ -9,7 +9,7 @@ rules:
(
6l[c-f][a-z0-9_-].{36}
)
\b
(?:[^A-Za-z0-9/]|$)
pattern_requirements:
min_digits: 3
min_entropy: 3

View file

@ -50,7 +50,7 @@ rules:
confidence: medium
examples:
- sntrys_eyJpYXQiOjE2OTA4ODAwMDAsInJlZ2lvbl91cmwiOiJodHRwczovL3NlbnRyeS5pby9vcmdzL215LW9yZy8ifQ==_abcdefghijklmnopqrstuvwx1234567890abcdefabc
- sntrys_eyJpYXQiOiIxNjkwODgwMDAwIiwicmVnaW9uX3VybCI6Imh0dHBzOi8vc2VudHJ5LmlvLyJ9_abcdABCD1234567890abcdABCD1234567890abcdABCD
- sntrys_eyJpYXQiOiIxNjkwODgwMDAwIiwicmVnaW9uX3VybCI6Imh0dHBzOi8vc2VudHJ5LmlvLyJ9_abcdABCD1234567890abcdABCD1234567890abcdABC
references:
- https://docs.sentry.io/api/auth/
validation:

View file

@ -16,7 +16,7 @@ rules:
min_entropy: 3.3
confidence: medium
examples:
- square EAAA7h9fL9zQJR8P0eAioAf9239345rDA2349bQ8edUA9FgA5JojdsF3A9f6nKLmn
- square EAAA7h9fL9zQJR8P0eAioAf9239345rDA2349bQ8edUA9FgA5JojdsF3A9f6nKLm
- square EAAAvlYh9H7dZwC9ash2hrHjtlL5D2srERGK5OM6F2nvle23he3NzA60PAeFXNHj
validation:
type: Http

View file

@ -3,7 +3,6 @@ rules:
id: kingfisher.twitch.1
pattern: |
(?xi)
\b
twitch
(?:.|[\n\r]){0,32}?
\b
@ -19,7 +18,7 @@ rules:
confidence: medium
examples:
- TWITCH_TOKEN=abcdefghijklmnopqrstuvwx123456
- "twitch_api_token: '0123456789abcdefghijklmnopqrstuv'"
- "twitch_api_token: '0123456789abcdefghijklmnopqrst'"
references:
- https://dev.twitch.tv/docs/authentication/validate-tokens/
validation:

View file

@ -44,7 +44,7 @@ rules:
min_lowercase: 1 # require at least 1 lowercase letter
min_special_chars: 1 # require at least 1 special character
special_chars: "!@#$%^&*()" # optional: custom special character set
exclude_words: # optional: drop matches containing these words
ignore_if_contains: # optional: drop matches containing these words
- test
validation: # (optional) live validation
@ -266,14 +266,16 @@ pattern_requirements:
min_lowercase: 1 # Require at least 1 lowercase letter (a-z)
min_special_chars: 1 # Require at least 1 special character
special_chars: "!@#$%^&*" # Optional: define which characters are "special"
exclude_words: # Optional: reject matches containing any of these (case-insensitive)
ignore_if_contains: # Optional: reject matches containing any of these (case-insensitive)
- test
- demo
```
All fields are optional. If `special_chars` is not specified, the default set includes: `!@#$%^&*()_+-=[]{}|;:'",.<>?/\`~`
`exclude_words` performs a case-insensitive substring check. If any entry (after trimming whitespace) appears within the match, the match is discarded. This is helpful for dropping known dummy tokens such as "test" or "demo" that otherwise satisfy the regex.
`ignore_if_contains` performs a case-insensitive substring check. If any entry (after trimming whitespace) appears within the match, the match is discarded. This is helpful for dropping known dummy tokens such as "test" or "demo" that otherwise satisfy the regex.
When this filter removes a match it is logged at the `DEBUG` level so you can see exactly which substring caused the skip. If you need to keep every match even when one of these substrings appears, pass `--no-ignore-if-contains` to `kingfisher scan`. The flag disables this post-processing step without changing the rule definitions.
### Example: Secure API Key
@ -295,7 +297,7 @@ rules:
min_uppercase: 1 # Must contain at least 1 uppercase letter
min_lowercase: 1 # Must contain at least 1 lowercase letter
min_special_chars: 1 # Must contain at least 1 special character
exclude_words:
ignore_if_contains:
- test
examples:
- api_key = "MyS3cur3K3y!2024"
@ -307,7 +309,7 @@ In this example:
- The `pattern_requirements` filters out matches that don't have at least one of each required type
- A match like `"abcdefghijklmnopqrst"` would be rejected (no uppercase, no digit, no special)
- A match like `"Abc123!SecureToken"` would be accepted (has all required types)
- A match like `"Test123!SecureToken"` would be rejected because it contains the excluded word `test`
- A match like `"Test123!SecureToken"` would be rejected because it contains the `ignore_if_contains` term `test`
### Example: Excluding Dummy Values
@ -318,13 +320,13 @@ rules:
pattern: |-
(?i)token[:=]\s*([A-Za-z0-9]{12,})
pattern_requirements:
exclude_words:
ignore_if_contains:
- placeholder
- sample
examples:
- token: "REALVALUE1234"
negative_examples:
- token = "SAMPLETOKEN9999" # dropped by exclude_words
- token = "SAMPLETOKEN9999" # dropped by ignore_if_contains
```
### Example: Custom Special Characters

View file

@ -144,6 +144,10 @@ pub struct ScanArgs {
/// Disable inline ignore directives entirely
#[arg(long = "no-ignore", default_value_t = false)]
pub no_inline_ignore: bool,
/// Disable rule-level `ignore_if_contains` filtering for pattern requirements
#[arg(long = "no-ignore-if-contains", default_value_t = false)]
pub no_ignore_if_contains: bool,
}
/// Confidence levels for findings

View file

@ -446,6 +446,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_base64: false,
no_inline_ignore: false,
no_ignore_if_contains: false,
}
}
/// Run the rules check command

View file

@ -29,7 +29,7 @@ use crate::{
parser,
parser::{Checker, Language},
rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
rules::rule::Rule,
rules::rule::{PatternValidationResult, Rule},
rules_database::RulesDatabase,
safe_list::{is_safe_match, is_user_match},
scanner_pool::ScannerPool,
@ -203,6 +203,9 @@ pub struct Matcher<'a> {
/// Configuration that controls inline ignore directives
inline_ignore_config: InlineIgnoreConfig,
/// Whether matches should honour `ignore_if_contains` requirements.
respect_ignore_if_contains: bool,
}
/// This `Drop` implementation updates the `global_stats` with the local stats
impl<'a> Drop for Matcher<'a> {
@ -232,6 +235,7 @@ impl<'a> Matcher<'a> {
shared_profiler: Option<Arc<ConcurrentRuleProfiler>>,
extra_ignore_directives: &[String],
disable_inline_ignores: bool,
respect_ignore_if_contains: bool,
) -> Result<Self> {
// Changed: removed `with_capacity(16384)` so we don't pre-allocate a large Vec
let raw_matches_scratch = Vec::new();
@ -258,6 +262,7 @@ impl<'a> Matcher<'a> {
} else {
InlineIgnoreConfig::new(extra_ignore_directives)
},
respect_ignore_if_contains,
})
}
@ -414,6 +419,7 @@ impl<'a> Matcher<'a> {
redact,
&filename,
self.profiler.as_ref(),
self.respect_ignore_if_contains,
&self.inline_ignore_config,
);
}
@ -439,6 +445,7 @@ impl<'a> Matcher<'a> {
redact,
&filename,
self.profiler.as_ref(),
self.respect_ignore_if_contains,
&self.inline_ignore_config,
);
}
@ -470,6 +477,7 @@ impl<'a> Matcher<'a> {
redact,
&filename,
self.profiler.as_ref(),
self.respect_ignore_if_contains,
&self.inline_ignore_config,
);
}
@ -574,6 +582,7 @@ fn filter_match<'b>(
redact: bool,
filename: &str,
profiler: Option<&Arc<ConcurrentRuleProfiler>>,
respect_ignore_if_contains: bool,
inline_ignore_config: &InlineIgnoreConfig,
) {
let mut timer =
@ -605,12 +614,22 @@ fn filter_match<'b>(
// Check character requirements if specified
if let Some(char_reqs) = rule.pattern_requirements() {
if !char_reqs.validate(mi_bytes) {
debug!(
"Skipping match that does not meet character requirements for rule {}",
rule.id()
);
continue;
match char_reqs.validate(mi_bytes, respect_ignore_if_contains) {
PatternValidationResult::Passed => {}
PatternValidationResult::Failed => {
debug!(
"Skipping match that does not meet character requirements for rule {}",
rule.id()
);
continue;
}
PatternValidationResult::IgnoredBySubstring { matched_term } => {
debug!(
"Skipping match for rule {} because it contains ignored term {matched_term}",
rule.id()
);
continue;
}
}
}
@ -1056,6 +1075,7 @@ mod test {
None,
&[],
false,
true,
)
.unwrap();
@ -1131,6 +1151,7 @@ mod test {
None, // Pass the shared profiler
&[],
false,
true,
)?;
matcher.scan_bytes_raw(input.as_bytes(), "fname")?;
assert_eq!(
@ -1141,7 +1162,7 @@ mod test {
}
#[test]
fn test_pattern_requirements_exclude_words_filters_matches() -> Result<()> {
fn test_pattern_requirements_ignore_if_contains_filters_matches() -> Result<()> {
let rules = vec![Rule::new(RuleSyntax {
id: "test.exclude".to_string(),
name: "exclude words".to_string(),
@ -1160,7 +1181,7 @@ mod test {
min_lowercase: None,
min_special_chars: None,
special_chars: None,
exclude_words: Some(vec!["TEST".to_string()]),
ignore_if_contains: Some(vec!["TEST".to_string()]),
}),
})];
@ -1168,8 +1189,17 @@ mod test {
let input = b"prefixgood prefixtest";
let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen_blobs, None, false, None, &[], false)?;
let mut matcher = Matcher::new(
&rules_db,
scanner_pool,
&seen_blobs,
None,
false,
None,
&[],
false,
true,
)?;
let blob = Blob::from_bytes(input.to_vec());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("exclude.txt")));
@ -1184,16 +1214,75 @@ mod test {
}
};
assert_eq!(matches.len(), 1, "exclude_words should drop filtered matches");
assert_eq!(matches.len(), 1, "ignore_if_contains should drop filtered matches");
assert_eq!(
matches[0].matching_input,
b"prefixgood",
matches[0].matching_input, b"prefixgood",
"remaining match should be the non-excluded token",
);
Ok(())
}
#[test]
fn test_pattern_requirements_ignore_if_contains_can_be_disabled_in_matcher() -> Result<()> {
let rules = vec![Rule::new(RuleSyntax {
id: "test.exclude".to_string(),
name: "exclude words".to_string(),
pattern: "(?P<token>prefix[A-Za-z]+)".to_string(),
confidence: crate::rules::rule::Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None,
depends_on_rule: vec![],
pattern_requirements: Some(PatternRequirements {
min_digits: None,
min_uppercase: None,
min_lowercase: None,
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec!["TEST".to_string()]),
}),
})];
let rules_db = RulesDatabase::from_rules(rules)?;
let input = b"prefixgood prefixtest";
let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher = Matcher::new(
&rules_db,
scanner_pool,
&seen_blobs,
None,
false,
None,
&[],
false,
false,
)?;
let blob = Blob::from_bytes(input.to_vec());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("exclude-disabled.txt")));
let matches = match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => matches,
ScanResult::SeenWithMatches => {
panic!(
"unexpected scan result: blob should not be considered previously seen with matches"
)
}
ScanResult::SeenSansMatches => {
panic!(
"unexpected scan result: blob should not be considered previously seen without matches"
)
}
};
assert_eq!(matches.len(), 2, "disabling ignore_if_contains should keep all matches");
Ok(())
}
// ---------------------------------------------------------------------
// additional deterministic unit-tests
@ -1274,7 +1363,8 @@ mod test {
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
let mut m =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let buf = b"dup dup"; // two literal hits, same rule
@ -1312,7 +1402,7 @@ mod test {
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let blob = Blob::from_bytes(b"let key = \"secret_token\" # kingfisher:ignore".to_vec());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("inline.txt")));
@ -1345,7 +1435,7 @@ mod test {
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let blob = Blob::from_bytes(
br#"let data = """
@ -1390,7 +1480,7 @@ line2
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let matches_without_compat =
match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => matches.len(),
@ -1402,7 +1492,7 @@ line2
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let extra = vec![String::from("gitleaks:allow")];
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false)?;
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false, true)?;
match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => assert!(matches.is_empty()),
_ => panic!("unexpected scan result"),

View file

@ -807,6 +807,7 @@ mod tests {
skip_aws_account: Vec::new(),
skip_aws_account_file: None,
no_inline_ignore: false,
no_ignore_if_contains: false,
}
}

View file

@ -180,6 +180,7 @@ mod tests {
skip_aws_account_file: None,
no_base64: false,
no_inline_ignore: false,
no_ignore_if_contains: false,
}
}

View file

@ -72,7 +72,7 @@ pub struct PatternRequirements {
pub special_chars: Option<String>,
/// Words that should cause the match to be excluded when present (case-insensitive)
#[serde(default)]
pub exclude_words: Option<Vec<String>>,
pub ignore_if_contains: Option<Vec<String>>,
}
impl PatternRequirements {
@ -80,8 +80,13 @@ impl PatternRequirements {
const DEFAULT_SPECIAL_CHARS: &'static str = "!@#$%^&*()_+-=[]{}|;:'\",.<>?/\\`~";
/// Validates whether the given byte slice meets the character requirements.
/// Returns true if all requirements are met, false otherwise.
pub fn validate(&self, input: &[u8]) -> bool {
/// Returns the validation outcome, including whether the match should be ignored
/// due to `ignore_if_contains` entries when that behaviour is enabled.
pub fn validate(
&self,
input: &[u8],
respect_ignore_if_contains: bool,
) -> PatternValidationResult {
// Convert to string (lossy for non-UTF8)
let s = String::from_utf8_lossy(input);
@ -89,7 +94,7 @@ impl PatternRequirements {
if let Some(min_digits) = self.min_digits {
let digit_count = s.chars().filter(|c| c.is_ascii_digit()).count();
if digit_count < min_digits {
return false;
return PatternValidationResult::Failed;
}
}
@ -97,7 +102,7 @@ impl PatternRequirements {
if let Some(min_uppercase) = self.min_uppercase {
let uppercase_count = s.chars().filter(|c| c.is_ascii_uppercase()).count();
if uppercase_count < min_uppercase {
return false;
return PatternValidationResult::Failed;
}
}
@ -105,7 +110,7 @@ impl PatternRequirements {
if let Some(min_lowercase) = self.min_lowercase {
let lowercase_count = s.chars().filter(|c| c.is_ascii_lowercase()).count();
if lowercase_count < min_lowercase {
return false;
return PatternValidationResult::Failed;
}
}
@ -115,33 +120,52 @@ impl PatternRequirements {
self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
let special_count = s.chars().filter(|c| special_chars.contains(*c)).count();
if special_count < min_special {
return false;
return PatternValidationResult::Failed;
}
}
// Check exclude words requirement
if let Some(exclude_words) = self.exclude_words.as_ref() {
let lowercase_input = s.to_lowercase();
if exclude_words
.iter()
.filter_map(|word| {
let trimmed = word.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_lowercase())
}
})
.any(|word| lowercase_input.contains(&word))
{
return false;
// Check ignore-if-contains requirement
if respect_ignore_if_contains {
if let Some(ignore_terms) = self.ignore_if_contains.as_ref() {
let lowercase_input = s.to_lowercase();
if let Some(matched_term) = ignore_terms
.iter()
.filter_map(|term| {
let trimmed = term.trim();
if trimmed.is_empty() {
None
} else {
Some((trimmed, trimmed.to_lowercase()))
}
})
.find_map(|(original, lowered)| {
if lowercase_input.contains(&lowered) {
Some(original.to_string())
} else {
None
}
})
{
return PatternValidationResult::IgnoredBySubstring { matched_term };
}
}
}
true
PatternValidationResult::Passed
}
}
/// Result of validating [`PatternRequirements`] against a potential match.
#[derive(Debug, PartialEq, Eq)]
pub enum PatternValidationResult {
/// All requirements are satisfied and the match should be kept.
Passed,
/// Requirements were not satisfied.
Failed,
/// The match contains one of the `ignore_if_contains` substrings and should be skipped.
IgnoredBySubstring { matched_term: String },
}
/// Configuration for HTTP validation. This contains a request configuration
/// and an optional multipart configuration.
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
@ -549,17 +573,17 @@ mod tests {
min_lowercase: None,
min_special_chars: None,
special_chars: None,
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: has 3 digits
assert!(reqs.validate(b"abc123def"));
assert!(matches!(reqs.validate(b"abc123def", true), PatternValidationResult::Passed));
// Should fail: only 1 digit
assert!(!reqs.validate(b"abc1def"));
assert!(matches!(reqs.validate(b"abc1def", true), PatternValidationResult::Failed));
// Should fail: no digits
assert!(!reqs.validate(b"abcdef"));
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
}
#[test]
@ -570,17 +594,17 @@ mod tests {
min_lowercase: None,
min_special_chars: None,
special_chars: None,
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: has 3 uppercase
assert!(reqs.validate(b"ABCdef"));
assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));
// Should fail: only 1 uppercase
assert!(!reqs.validate(b"Adef"));
assert!(matches!(reqs.validate(b"Adef", true), PatternValidationResult::Failed));
// Should fail: no uppercase
assert!(!reqs.validate(b"abcdef"));
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
}
#[test]
@ -591,17 +615,17 @@ mod tests {
min_lowercase: Some(2),
min_special_chars: None,
special_chars: None,
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: has 3 lowercase
assert!(reqs.validate(b"ABCdef"));
assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));
// Should fail: only 1 lowercase
assert!(!reqs.validate(b"ABCd"));
assert!(matches!(reqs.validate(b"ABCd", true), PatternValidationResult::Failed));
// Should fail: no lowercase
assert!(!reqs.validate(b"ABC123"));
assert!(matches!(reqs.validate(b"ABC123", true), PatternValidationResult::Failed));
}
#[test]
@ -612,17 +636,17 @@ mod tests {
min_lowercase: None,
min_special_chars: Some(2),
special_chars: None, // uses default
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: has 2 special chars
assert!(reqs.validate(b"abc!@def"));
assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Passed));
// Should fail: only 1 special char
assert!(!reqs.validate(b"abc!def"));
assert!(matches!(reqs.validate(b"abc!def", true), PatternValidationResult::Failed));
// Should fail: no special chars
assert!(!reqs.validate(b"abcdef"));
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
}
#[test]
@ -633,17 +657,17 @@ mod tests {
min_lowercase: None,
min_special_chars: Some(2),
special_chars: Some("$%^".to_string()),
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: has 2 custom special chars
assert!(reqs.validate(b"abc$%def"));
assert!(matches!(reqs.validate(b"abc$%def", true), PatternValidationResult::Passed));
// Should fail: has special chars but not the custom ones
assert!(!reqs.validate(b"abc!@def"));
assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Failed));
// Should fail: only 1 custom special char
assert!(!reqs.validate(b"abc$def"));
assert!(matches!(reqs.validate(b"abc$def", true), PatternValidationResult::Failed));
}
#[test]
@ -654,60 +678,90 @@ mod tests {
min_lowercase: Some(1),
min_special_chars: Some(1),
special_chars: None,
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: has all requirements
assert!(reqs.validate(b"Abc1!"));
assert!(matches!(reqs.validate(b"Abc1!", true), PatternValidationResult::Passed));
// Should fail: missing digit
assert!(!reqs.validate(b"Abc!"));
assert!(matches!(reqs.validate(b"Abc!", true), PatternValidationResult::Failed));
// Should fail: missing uppercase
assert!(!reqs.validate(b"abc1!"));
assert!(matches!(reqs.validate(b"abc1!", true), PatternValidationResult::Failed));
// Should fail: missing lowercase
assert!(!reqs.validate(b"ABC1!"));
assert!(matches!(reqs.validate(b"ABC1!", true), PatternValidationResult::Failed));
// Should fail: missing special
assert!(!reqs.validate(b"Abc1"));
assert!(matches!(reqs.validate(b"Abc1", true), PatternValidationResult::Failed));
}
#[test]
fn test_pattern_requirements_exclude_words() {
fn test_pattern_requirements_ignore_if_contains() {
let reqs = PatternRequirements {
min_digits: None,
min_uppercase: None,
min_lowercase: None,
min_special_chars: None,
special_chars: None,
exclude_words: Some(vec!["test".to_string(), "Demo".to_string()]),
ignore_if_contains: Some(vec!["test".to_string(), "Demo".to_string()]),
};
// Should fail: contains "test" (case-insensitive)
assert!(!reqs.validate(b"MyTestToken"));
assert!(matches!(
reqs.validate(b"MyTestToken", true),
PatternValidationResult::IgnoredBySubstring { .. }
));
// Should fail: contains "demo" (case-insensitive)
assert!(!reqs.validate(b"example-demo-value"));
assert!(matches!(
reqs.validate(b"example-demo-value", true),
PatternValidationResult::IgnoredBySubstring { .. }
));
// Should pass: does not contain excluded words
assert!(reqs.validate(b"example-value"));
assert!(matches!(reqs.validate(b"example-value", true), PatternValidationResult::Passed));
}
#[test]
fn test_pattern_requirements_exclude_words_ignores_empty_entries() {
fn test_pattern_requirements_ignore_if_contains_ignores_empty_entries() {
let reqs = PatternRequirements {
min_digits: None,
min_uppercase: None,
min_lowercase: None,
min_special_chars: None,
special_chars: None,
exclude_words: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
ignore_if_contains: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
};
// Should fail only when non-empty exclusion matches
assert!(!reqs.validate(b"needs-blocking"));
assert!(reqs.validate(b"allowed"));
assert!(matches!(
reqs.validate(b"needs-blocking", true),
PatternValidationResult::IgnoredBySubstring { .. }
));
assert!(matches!(reqs.validate(b"allowed", true), PatternValidationResult::Passed));
}
#[test]
fn test_pattern_requirements_ignore_if_contains_can_be_disabled() {
let reqs = PatternRequirements {
min_digits: None,
min_uppercase: None,
min_lowercase: None,
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec!["ignoreme".to_string()]),
};
// With ignoring enabled, the match is skipped
assert!(matches!(
reqs.validate(b"value-ignoreme", true),
PatternValidationResult::IgnoredBySubstring { .. }
));
// With ignoring disabled, the same input passes requirements
assert!(matches!(reqs.validate(b"value-ignoreme", false), PatternValidationResult::Passed));
}
#[test]
@ -718,12 +772,12 @@ mod tests {
min_lowercase: None,
min_special_chars: None,
special_chars: None,
exclude_words: None,
ignore_if_contains: None,
};
// Should pass: no requirements
assert!(reqs.validate(b"anything"));
assert!(reqs.validate(b"123"));
assert!(reqs.validate(b"!@#"));
assert!(matches!(reqs.validate(b"anything", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"123", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"!@#", true), PatternValidationResult::Passed));
}
}

View file

@ -169,6 +169,7 @@ pub fn enumerate_filesystem_inputs(
Some(shared_profiler),
&args.extra_ignore_comments,
args.no_inline_ignore,
!args.no_ignore_if_contains,
)?;
let blob_processor_init_time = Mutex::new(t1.elapsed());
let make_blob_processor = || -> BlobProcessor {

View file

@ -683,6 +683,7 @@ pub async fn fetch_s3_objects(
Some(shared_profiler.clone()),
&args.extra_ignore_comments,
args.no_inline_ignore,
!args.no_ignore_if_contains,
)?;
let mut processor = BlobProcessor { matcher };
@ -764,6 +765,7 @@ pub async fn fetch_gcs_objects(
Some(shared_profiler.clone()),
&args.extra_ignore_comments,
args.no_inline_ignore,
!args.no_ignore_if_contains,
)?;
let mut processor = BlobProcessor { matcher };

View file

@ -148,6 +148,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
skip_aws_account_file: None,
no_base64: false,
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -148,6 +148,7 @@ fn test_bitbucket_remote_scan() -> Result<()> {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -168,6 +168,7 @@ rules:
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -155,6 +155,7 @@ fn test_github_remote_scan() -> Result<()> {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
// Create global arguments
let global_args = GlobalArgs {

View file

@ -153,6 +153,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
skip_aws_account_file: None,
no_base64: false,
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {
@ -304,6 +305,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -131,6 +131,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -139,6 +139,7 @@ impl TestContext {
skip_aws_account_file: None,
no_base64: false,
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;
@ -278,6 +279,7 @@ async fn test_scan_slack_messages() -> Result<()> {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -211,6 +211,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
/* --------------------------------------------------------- *

View file

@ -154,6 +154,7 @@ impl TestContext {
no_base64: false,
extra_ignore_comments: Vec::new(),
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
@ -281,6 +282,7 @@ impl TestContext {
skip_aws_account_file: None,
no_base64: false,
no_inline_ignore: false,
no_ignore_if_contains: false,
};
let global_args = GlobalArgs {

View file

@ -1,9 +1,9 @@
use std::fs;
use assert_cmd::Command;
use clap::Parser;
use predicates::prelude::*;
use tempfile::tempdir;
use clap::Parser;
const GH_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";