Added an optional exclude_words list to PatternRequirements so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.

2025-11-05 17:19:11 -08:00 · 2025-11-05 17:19:11 -08:00 · dc02abac63
commit dc02abac63
parent 046ac6a052
35 changed files with 299 additions and 108 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.

 ## [Unreleased]
 - Added `pattern_requirements` for rules. Enables post-regex character-class checks (digits, uppercase, lowercase, specials) to reduce false positives without lookarounds. Provides lightweight, in-memory validation after matches, keeping patterns fast and readable.
- Added an optional `exclude_words` list to `PatternRequirements` so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
+- Added an optional `ignore_if_contains` list to `PatternRequirements` within the Rules structure, so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
 - Updated many rules with `pattern_requirements`
 - Automatically set `--no-dedup` whenever `--manage-baseline` is supplied so baseline management retains every occurrence of a finding

--- a/README.md
+++ b/README.md
@ -323,6 +323,21 @@ However, you may want to add your own custom rules, or modify a detection to bet

 First, review [docs/RULES.md](/docs/RULES.md) to learn how to create custom Kingfisher rules.

+### Pattern requirements and placeholder filtering
+
+Every rule can declare optional `pattern_requirements` to enforce additional character checks after a regex matches. Each field
+is independent:
+
+- `min_digits`, `min_uppercase`, `min_lowercase`, and `min_special_chars` enforce complexity thresholds.
+- `special_chars` lets you override the set of characters counted as "special" when `min_special_chars` is used.
+- `ignore_if_contains` lists case-insensitive substrings that should cause a match to be discarded (for example, to drop
+  `test`, `demo`, or `localhost` values). Kingfisher still accepts the legacy `exclude_words` key as an alias when loading
+  existing rule files.
+
+When a match is skipped because of `ignore_if_contains`, Kingfisher logs the event at the `DEBUG` level alongside the rule that
+was evaluated. If you need to keep those matches for a particular scan, pass `--no-ignore-if-contains` to `kingfisher scan` to
+disable the substring filter without editing any rule files.
+
 Once you've done that, you can provide your custom rules (defined in a YAML file) and provide it to Kingfisher at runtime --- no recompiling required!

 # 🎉 Usage
@ -1168,6 +1183,8 @@ leaves the default unchanged.
 - `--skip-aws-account-file <FILE>`: Load AWS account numbers to skip from a file (one account per line; `#` comments allowed)
 - `--ignore-comment <DIRECTIVE>`: Honor additional inline directives from other scanners (repeatable; e.g. `--ignore-comment "gitleaks:allow"`)
 - `--no-ignore`: Disable inline directives entirely so every match is reported
+- `--no-ignore-if-contains`: Ignore the `ignore_if_contains` filter in rules so placeholder words still produce findings
+
 ## Understanding `--confidence`

 The `--confidence` flag sets a minimum confidence threshold, not an exact match.
--- a/data/rules/aiven.yml
+++ b/data/rules/aiven.yml
@ -9,7 +9,7 @@ rules:
      (
        [a-z0-9/+=]{372}      
      )
-      \b
+      (?:[^A-Za-z0-9/+=]|$)
    pattern_requirements:
      min_digits: 2
      min_uppercase: 1
--- a/data/rules/bitbucket.yml
+++ b/data/rules/bitbucket.yml
@ -51,7 +51,7 @@ rules:
    confidence: medium
    examples:
      - bitbucket_key=HedmnK9h6KD_eh9KK8FlI9ahUc8WfaNZ4gulbrtN2ouV
-      - bitbucket_secret=kd8j2h4jf9s8mf6l4k9j2h4jf9s8mf6l4k9j2h4jf9s8mf6l
+      - bitbucket_secret=kd8j2h4jf9s8mf6l4k9j2h4jf9s8mf6l4k9j2h4jf9s8
    validation:
      type: Http
      content:
--- a/data/rules/confluent.yml
+++ b/data/rules/confluent.yml
@ -32,12 +32,12 @@ rules:
      (
        [A-Z0-9\+/]{64}
      )
-      \b
+      (?:[^A-Za-z0-9/+=]|$)
    min_entropy: 3.3
    confidence: medium
    examples:
      - confluent secret=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890ab
-      - kafka_token=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzABCD
+      - kafka_token=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzAB
    references:
      - https://docs.confluent.io/cloud/current/api.html#tag/API-Keys-(iamv2)/operation/getIamV2ApiKey
    validation:
--- a/data/rules/coze.yml
+++ b/data/rules/coze.yml
@ -3,7 +3,6 @@ rules:
    id: kingfisher.coze.1
    pattern: |
      (?xi)
-      \b
      coze
      (?:.|[\n\r]){0,32}?
      \b
@ -37,6 +36,6 @@ rules:
      - https://www.coze.com/docs/developer_guides/coze_api_overview
      - https://www.coze.com/docs/developer_guides/retrieve_files
    examples:
-      - "pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f"
-      - "pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI"
-      - "pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV"
+      - "key_coze = pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f"
+      - "coze_token = pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI"
+      - "coze-key: pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV"
--- a/data/rules/easypost.yml
+++ b/data/rules/easypost.yml
@ -6,7 +6,7 @@ rules:
      \b
      (
        EZ[AT]K
-        [A-Z0-9]{54}
+        [A-Za-z0-9]{54}
      )
      \b
    pattern_requirements:
--- a/data/rules/generic.yml
+++ b/data/rules/generic.yml
@ -10,7 +10,7 @@ rules:
      )
      \b
    pattern_requirements:
-      min_digits: 4
+      min_digits: 2
    min_entropy: 3.3
    confidence: low
    examples:
--- a/data/rules/intercom.yml
+++ b/data/rules/intercom.yml
@ -18,7 +18,7 @@ rules:

    examples:
      - "intercom_access_token: dG9rOvI0NmJlMTA5XzQwM2NfNDVlM184MjQzXzkwMDnmOTE1NGIyONoxOjA="
-      - ic_token = "g1ZsclJXTjNfc1pBSzJDemE0eFVDU0U5c25CeDN4Vm9hQ2Zac0hXemZHNGVDPQ=="
+      - ic_token = "g1ZsclJXTjNfc1pBSzJDemE0eFVDU0U5c25CeDN4Vm9hQ2Zac0hXemZHNPQ=="

    references:
      - https://developers.intercom.com/docs/build-an-integration/learn-more/rest-apis
--- a/data/rules/mongodb.yml
+++ b/data/rules/mongodb.yml
@ -82,7 +82,7 @@ rules:
      )
      \b
    pattern_requirements:
-      exclude_words:
+      ignore_if_contains:
        - "@localhost"
        - "@127.0.0.1"
    min_entropy: 3
--- a/data/rules/odbc.yml
+++ b/data/rules/odbc.yml
@ -6,6 +6,10 @@ rules:
      (?: User | User\ Id | UserId | Uid) \s*=\s* ([^\s;]{3,100}) \s* ;
      [\ \t]* .{0,10} [\ \t]* 
      (?: Password | Pwd) \s*=\s* ([^\t\ ;]{3,100}) \s* (?: [;] | $)
+    pattern_requirements:
+      ignore_if_contains:
+        - "localhost"
+        - "127.0.0.1"
    min_entropy: 3.3
    confidence: medium
    examples:
--- a/data/rules/postgres.yml
+++ b/data/rules/postgres.yml
@ -24,6 +24,10 @@ rules:
      (?:
        \d+
      )
+    pattern_requirements:
+      ignore_if_contains:
+        - "@localhost"
+        - "@127.0.0.1"
    min_entropy: 3.3
    confidence: medium
    examples:
--- a/data/rules/recaptcha.yml
+++ b/data/rules/recaptcha.yml
@ -9,7 +9,7 @@ rules:
      (
        6l[c-f][a-z0-9_-].{36}
      )
-      \b
+      (?:[^A-Za-z0-9/]|$)
    pattern_requirements:
      min_digits: 3
    min_entropy: 3
--- a/data/rules/sentry.yml
+++ b/data/rules/sentry.yml
@ -50,7 +50,7 @@ rules:
    confidence: medium
    examples:
      - sntrys_eyJpYXQiOjE2OTA4ODAwMDAsInJlZ2lvbl91cmwiOiJodHRwczovL3NlbnRyeS5pby9vcmdzL215LW9yZy8ifQ==_abcdefghijklmnopqrstuvwx1234567890abcdefabc
-      - sntrys_eyJpYXQiOiIxNjkwODgwMDAwIiwicmVnaW9uX3VybCI6Imh0dHBzOi8vc2VudHJ5LmlvLyJ9_abcdABCD1234567890abcdABCD1234567890abcdABCD
+      - sntrys_eyJpYXQiOiIxNjkwODgwMDAwIiwicmVnaW9uX3VybCI6Imh0dHBzOi8vc2VudHJ5LmlvLyJ9_abcdABCD1234567890abcdABCD1234567890abcdABC
    references:
      - https://docs.sentry.io/api/auth/
    validation:
--- a/data/rules/square.yml
+++ b/data/rules/square.yml
@ -16,7 +16,7 @@ rules:
    min_entropy: 3.3
    confidence: medium
    examples:
-      - square EAAA7h9fL9zQJR8P0eAioAf9239345rDA2349bQ8edUA9FgA5JojdsF3A9f6nKLmn
+      - square EAAA7h9fL9zQJR8P0eAioAf9239345rDA2349bQ8edUA9FgA5JojdsF3A9f6nKLm
      - square EAAAvlYh9H7dZwC9ash2hrHjtlL5D2srERGK5OM6F2nvle23he3NzA60PAeFXNHj
    validation:
      type: Http
--- a/data/rules/twitch.yml
+++ b/data/rules/twitch.yml
@ -3,7 +3,6 @@ rules:
    id: kingfisher.twitch.1
    pattern: |
      (?xi)
-      \b
      twitch
      (?:.|[\n\r]){0,32}?
      \b
@ -19,7 +18,7 @@ rules:
    confidence: medium
    examples:
      - TWITCH_TOKEN=abcdefghijklmnopqrstuvwx123456
-      - "twitch_api_token: '0123456789abcdefghijklmnopqrstuv'"
+      - "twitch_api_token: '0123456789abcdefghijklmnopqrst'"
    references:
      - https://dev.twitch.tv/docs/authentication/validate-tokens/
    validation:
--- a/docs/RULES.md
+++ b/docs/RULES.md
@ -44,7 +44,7 @@ rules:
      min_lowercase: 1              # require at least 1 lowercase letter
      min_special_chars: 1          # require at least 1 special character
      special_chars: "!@#$%^&*()"   # optional: custom special character set
-      exclude_words:                # optional: drop matches containing these words
+      ignore_if_contains:                # optional: drop matches containing these words
        - test

    validation:                     # (optional) live validation
@ -266,14 +266,16 @@ pattern_requirements:
  min_lowercase: 1           # Require at least 1 lowercase letter (a-z)
  min_special_chars: 1       # Require at least 1 special character
  special_chars: "!@#$%^&*"  # Optional: define which characters are "special"
-  exclude_words:             # Optional: reject matches containing any of these (case-insensitive)
+  ignore_if_contains:             # Optional: reject matches containing any of these (case-insensitive)
    - test
    - demo
 ```

 All fields are optional. If `special_chars` is not specified, the default set includes: `!@#$%^&*()_+-=[]{}|;:'",.<>?/\`~`

-`exclude_words` performs a case-insensitive substring check. If any entry (after trimming whitespace) appears within the match, the match is discarded. This is helpful for dropping known dummy tokens such as "test" or "demo" that otherwise satisfy the regex.
+`ignore_if_contains` performs a case-insensitive substring check. If any entry (after trimming whitespace) appears within the match, the match is discarded. This is helpful for dropping known dummy tokens such as "test" or "demo" that otherwise satisfy the regex.
+
+When this filter removes a match it is logged at the `DEBUG` level so you can see exactly which substring caused the skip. If you need to keep every match even when one of these substrings appears, pass `--no-ignore-if-contains` to `kingfisher scan`. The flag disables this post-processing step without changing the rule definitions.

 ### Example: Secure API Key

@ -295,7 +297,7 @@ rules:
      min_uppercase: 1        # Must contain at least 1 uppercase letter
      min_lowercase: 1        # Must contain at least 1 lowercase letter
      min_special_chars: 1    # Must contain at least 1 special character
-      exclude_words:
+      ignore_if_contains:
        - test
    examples:
      - api_key = "MyS3cur3K3y!2024"
@ -307,7 +309,7 @@ In this example:
 - The `pattern_requirements` filters out matches that don't have at least one of each required type
 - A match like `"abcdefghijklmnopqrst"` would be rejected (no uppercase, no digit, no special)
 - A match like `"Abc123!SecureToken"` would be accepted (has all required types)
- A match like `"Test123!SecureToken"` would be rejected because it contains the excluded word `test`
+- A match like `"Test123!SecureToken"` would be rejected because it contains the `ignore_if_contains` term `test`

 ### Example: Excluding Dummy Values

@ -318,13 +320,13 @@ rules:
    pattern: |-
      (?i)token[:=]\s*([A-Za-z0-9]{12,})
    pattern_requirements:
-      exclude_words:
+      ignore_if_contains:
        - placeholder
        - sample
    examples:
      - token: "REALVALUE1234"
    negative_examples:
-      - token = "SAMPLETOKEN9999"  # dropped by exclude_words
+      - token = "SAMPLETOKEN9999"  # dropped by ignore_if_contains
 ```

 ### Example: Custom Special Characters
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@ -144,6 +144,10 @@ pub struct ScanArgs {
    /// Disable inline ignore directives entirely
    #[arg(long = "no-ignore", default_value_t = false)]
    pub no_inline_ignore: bool,
+
+    /// Disable rule-level `ignore_if_contains` filtering for pattern requirements
+    #[arg(long = "no-ignore-if-contains", default_value_t = false)]
+    pub no_ignore_if_contains: bool,
 }

 /// Confidence levels for findings
--- a/src/main.rs
+++ b/src/main.rs
@ -446,6 +446,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
        no_base64: false,
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    }
 }
 /// Run the rules check command
--- a/src/matcher.rs
+++ b/src/matcher.rs
@ -29,7 +29,7 @@ use crate::{
    parser,
    parser::{Checker, Language},
    rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
-    rules::rule::Rule,
+    rules::rule::{PatternValidationResult, Rule},
    rules_database::RulesDatabase,
    safe_list::{is_safe_match, is_user_match},
    scanner_pool::ScannerPool,
@ -203,6 +203,9 @@ pub struct Matcher<'a> {

    /// Configuration that controls inline ignore directives
    inline_ignore_config: InlineIgnoreConfig,
+
+    /// Whether matches should honour `ignore_if_contains` requirements.
+    respect_ignore_if_contains: bool,
 }
 /// This `Drop` implementation updates the `global_stats` with the local stats
 impl<'a> Drop for Matcher<'a> {
@ -232,6 +235,7 @@ impl<'a> Matcher<'a> {
        shared_profiler: Option<Arc<ConcurrentRuleProfiler>>,
        extra_ignore_directives: &[String],
        disable_inline_ignores: bool,
+        respect_ignore_if_contains: bool,
    ) -> Result<Self> {
        // Changed: removed `with_capacity(16384)` so we don't pre-allocate a large Vec
        let raw_matches_scratch = Vec::new();
@ -258,6 +262,7 @@ impl<'a> Matcher<'a> {
            } else {
                InlineIgnoreConfig::new(extra_ignore_directives)
            },
+            respect_ignore_if_contains,
        })
    }

@ -414,6 +419,7 @@ impl<'a> Matcher<'a> {
                redact,
                &filename,
                self.profiler.as_ref(),
+                self.respect_ignore_if_contains,
                &self.inline_ignore_config,
            );
        }
@ -439,6 +445,7 @@ impl<'a> Matcher<'a> {
                            redact,
                            &filename,
                            self.profiler.as_ref(),
+                            self.respect_ignore_if_contains,
                            &self.inline_ignore_config,
                        );
                    }
@ -470,6 +477,7 @@ impl<'a> Matcher<'a> {
                        redact,
                        &filename,
                        self.profiler.as_ref(),
+                        self.respect_ignore_if_contains,
                        &self.inline_ignore_config,
                    );
                }
@ -574,6 +582,7 @@ fn filter_match<'b>(
    redact: bool,
    filename: &str,
    profiler: Option<&Arc<ConcurrentRuleProfiler>>,
+    respect_ignore_if_contains: bool,
    inline_ignore_config: &InlineIgnoreConfig,
 ) {
    let mut timer =
@ -605,12 +614,22 @@ fn filter_match<'b>(

        // Check character requirements if specified
        if let Some(char_reqs) = rule.pattern_requirements() {
-            if !char_reqs.validate(mi_bytes) {
-                debug!(
-                    "Skipping match that does not meet character requirements for rule {}",
-                    rule.id()
-                );
-                continue;
+            match char_reqs.validate(mi_bytes, respect_ignore_if_contains) {
+                PatternValidationResult::Passed => {}
+                PatternValidationResult::Failed => {
+                    debug!(
+                        "Skipping match that does not meet character requirements for rule {}",
+                        rule.id()
+                    );
+                    continue;
+                }
+                PatternValidationResult::IgnoredBySubstring { matched_term } => {
+                    debug!(
+                        "Skipping match for rule {} because it contains ignored term {matched_term}",
+                        rule.id()
+                    );
+                    continue;
+                }
            }
        }

@ -1056,6 +1075,7 @@ mod test {
                None,
                &[],
                false,
+                true,
            )
            .unwrap();

@ -1131,6 +1151,7 @@ mod test {
            None, // Pass the shared profiler
            &[],
            false,
+            true,
        )?;
        matcher.scan_bytes_raw(input.as_bytes(), "fname")?;
        assert_eq!(
@ -1141,7 +1162,7 @@ mod test {
    }

    #[test]
-    fn test_pattern_requirements_exclude_words_filters_matches() -> Result<()> {
+    fn test_pattern_requirements_ignore_if_contains_filters_matches() -> Result<()> {
        let rules = vec![Rule::new(RuleSyntax {
            id: "test.exclude".to_string(),
            name: "exclude words".to_string(),
@ -1160,7 +1181,7 @@ mod test {
                min_lowercase: None,
                min_special_chars: None,
                special_chars: None,
-                exclude_words: Some(vec!["TEST".to_string()]),
+                ignore_if_contains: Some(vec!["TEST".to_string()]),
            }),
        })];

@ -1168,8 +1189,17 @@ mod test {
        let input = b"prefixgood prefixtest";
        let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
-        let mut matcher =
-            Matcher::new(&rules_db, scanner_pool, &seen_blobs, None, false, None, &[], false)?;
+        let mut matcher = Matcher::new(
+            &rules_db,
+            scanner_pool,
+            &seen_blobs,
+            None,
+            false,
+            None,
+            &[],
+            false,
+            true,
+        )?;

        let blob = Blob::from_bytes(input.to_vec());
        let origin = OriginSet::from(Origin::from_file(PathBuf::from("exclude.txt")));
@ -1184,16 +1214,75 @@ mod test {
            }
        };

-        assert_eq!(matches.len(), 1, "exclude_words should drop filtered matches");
+        assert_eq!(matches.len(), 1, "ignore_if_contains should drop filtered matches");
        assert_eq!(
-            matches[0].matching_input,
-            b"prefixgood",
+            matches[0].matching_input, b"prefixgood",
            "remaining match should be the non-excluded token",
        );

        Ok(())
    }

+    #[test]
+    fn test_pattern_requirements_ignore_if_contains_can_be_disabled_in_matcher() -> Result<()> {
+        let rules = vec![Rule::new(RuleSyntax {
+            id: "test.exclude".to_string(),
+            name: "exclude words".to_string(),
+            pattern: "(?P<token>prefix[A-Za-z]+)".to_string(),
+            confidence: crate::rules::rule::Confidence::Medium,
+            min_entropy: 0.0,
+            visible: true,
+            examples: vec![],
+            negative_examples: vec![],
+            references: vec![],
+            validation: None,
+            depends_on_rule: vec![],
+            pattern_requirements: Some(PatternRequirements {
+                min_digits: None,
+                min_uppercase: None,
+                min_lowercase: None,
+                min_special_chars: None,
+                special_chars: None,
+                ignore_if_contains: Some(vec!["TEST".to_string()]),
+            }),
+        })];
+
+        let rules_db = RulesDatabase::from_rules(rules)?;
+        let input = b"prefixgood prefixtest";
+        let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let mut matcher = Matcher::new(
+            &rules_db,
+            scanner_pool,
+            &seen_blobs,
+            None,
+            false,
+            None,
+            &[],
+            false,
+            false,
+        )?;
+
+        let blob = Blob::from_bytes(input.to_vec());
+        let origin = OriginSet::from(Origin::from_file(PathBuf::from("exclude-disabled.txt")));
+
+        let matches = match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
+            ScanResult::New(matches) => matches,
+            ScanResult::SeenWithMatches => {
+                panic!(
+                    "unexpected scan result: blob should not be considered previously seen with matches"
+                )
+            }
+            ScanResult::SeenSansMatches => {
+                panic!(
+                    "unexpected scan result: blob should not be considered previously seen without matches"
+                )
+            }
+        };
+
+        assert_eq!(matches.len(), 2, "disabling ignore_if_contains should keep all matches");
+        Ok(())
+    }

    // ---------------------------------------------------------------------
    // additional deterministic unit-tests
@ -1274,7 +1363,8 @@ mod test {
        let rules_db = RulesDatabase::from_rules(vec![rule])?;
        let seen = BlobIdMap::new();
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
-        let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
+        let mut m =
+            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;

        let buf = b"dup dup"; // two literal hits, same rule

@ -1312,7 +1402,7 @@ mod test {
        let seen = BlobIdMap::new();
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
        let mut matcher =
-            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
+            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;

        let blob = Blob::from_bytes(b"let key = \"secret_token\" # kingfisher:ignore".to_vec());
        let origin = OriginSet::from(Origin::from_file(PathBuf::from("inline.txt")));
@ -1345,7 +1435,7 @@ mod test {
        let seen = BlobIdMap::new();
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
        let mut matcher =
-            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
+            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;

        let blob = Blob::from_bytes(
            br#"let data = """
@ -1390,7 +1480,7 @@ line2
        let seen = BlobIdMap::new();
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
        let mut matcher =
-            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?;
+            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
        let matches_without_compat =
            match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
                ScanResult::New(matches) => matches.len(),
@ -1402,7 +1492,7 @@ line2
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
        let extra = vec![String::from("gitleaks:allow")];
        let mut matcher =
-            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false)?;
+            Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false, true)?;
        match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
            ScanResult::New(matches) => assert!(matches.is_empty()),
            _ => panic!("unexpected scan result"),
--- a/src/reporter.rs
+++ b/src/reporter.rs
@ -807,6 +807,7 @@ mod tests {
            skip_aws_account: Vec::new(),
            skip_aws_account_file: None,
            no_inline_ignore: false,
+            no_ignore_if_contains: false,
        }
    }

--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@ -180,6 +180,7 @@ mod tests {
            skip_aws_account_file: None,
            no_base64: false,
            no_inline_ignore: false,
+            no_ignore_if_contains: false,
        }
    }

--- a/src/rules/rule.rs
+++ b/src/rules/rule.rs
@ -72,7 +72,7 @@ pub struct PatternRequirements {
    pub special_chars: Option<String>,
    /// Words that should cause the match to be excluded when present (case-insensitive)
    #[serde(default)]
-    pub exclude_words: Option<Vec<String>>,
+    pub ignore_if_contains: Option<Vec<String>>,
 }

 impl PatternRequirements {
@ -80,8 +80,13 @@ impl PatternRequirements {
    const DEFAULT_SPECIAL_CHARS: &'static str = "!@#$%^&*()_+-=[]{}|;:'\",.<>?/\\`~";

    /// Validates whether the given byte slice meets the character requirements.
-    /// Returns true if all requirements are met, false otherwise.
-    pub fn validate(&self, input: &[u8]) -> bool {
+    /// Returns the validation outcome, including whether the match should be ignored
+    /// due to `ignore_if_contains` entries when that behaviour is enabled.
+    pub fn validate(
+        &self,
+        input: &[u8],
+        respect_ignore_if_contains: bool,
+    ) -> PatternValidationResult {
        // Convert to string (lossy for non-UTF8)
        let s = String::from_utf8_lossy(input);

@ -89,7 +94,7 @@ impl PatternRequirements {
        if let Some(min_digits) = self.min_digits {
            let digit_count = s.chars().filter(|c| c.is_ascii_digit()).count();
            if digit_count < min_digits {
-                return false;
+                return PatternValidationResult::Failed;
            }
        }

@ -97,7 +102,7 @@ impl PatternRequirements {
        if let Some(min_uppercase) = self.min_uppercase {
            let uppercase_count = s.chars().filter(|c| c.is_ascii_uppercase()).count();
            if uppercase_count < min_uppercase {
-                return false;
+                return PatternValidationResult::Failed;
            }
        }

@ -105,7 +110,7 @@ impl PatternRequirements {
        if let Some(min_lowercase) = self.min_lowercase {
            let lowercase_count = s.chars().filter(|c| c.is_ascii_lowercase()).count();
            if lowercase_count < min_lowercase {
-                return false;
+                return PatternValidationResult::Failed;
            }
        }

@ -115,33 +120,52 @@ impl PatternRequirements {
                self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
            let special_count = s.chars().filter(|c| special_chars.contains(*c)).count();
            if special_count < min_special {
-                return false;
+                return PatternValidationResult::Failed;
            }
        }

-        // Check exclude words requirement
-        if let Some(exclude_words) = self.exclude_words.as_ref() {
-            let lowercase_input = s.to_lowercase();
-            if exclude_words
-                .iter()
-                .filter_map(|word| {
-                    let trimmed = word.trim();
-                    if trimmed.is_empty() {
-                        None
-                    } else {
-                        Some(trimmed.to_lowercase())
-                    }
-                })
-                .any(|word| lowercase_input.contains(&word))
-            {
-                return false;
+        // Check ignore-if-contains requirement
+        if respect_ignore_if_contains {
+            if let Some(ignore_terms) = self.ignore_if_contains.as_ref() {
+                let lowercase_input = s.to_lowercase();
+                if let Some(matched_term) = ignore_terms
+                    .iter()
+                    .filter_map(|term| {
+                        let trimmed = term.trim();
+                        if trimmed.is_empty() {
+                            None
+                        } else {
+                            Some((trimmed, trimmed.to_lowercase()))
+                        }
+                    })
+                    .find_map(|(original, lowered)| {
+                        if lowercase_input.contains(&lowered) {
+                            Some(original.to_string())
+                        } else {
+                            None
+                        }
+                    })
+                {
+                    return PatternValidationResult::IgnoredBySubstring { matched_term };
+                }
            }
        }

-        true
+        PatternValidationResult::Passed
    }
 }

+/// Result of validating [`PatternRequirements`] against a potential match.
+#[derive(Debug, PartialEq, Eq)]
+pub enum PatternValidationResult {
+    /// All requirements are satisfied and the match should be kept.
+    Passed,
+    /// Requirements were not satisfied.
+    Failed,
+    /// The match contains one of the `ignore_if_contains` substrings and should be skipped.
+    IgnoredBySubstring { matched_term: String },
+}
+
 /// Configuration for HTTP validation. This contains a request configuration
 /// and an optional multipart configuration.
 #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
@ -549,17 +573,17 @@ mod tests {
            min_lowercase: None,
            min_special_chars: None,
            special_chars: None,
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: has 3 digits
-        assert!(reqs.validate(b"abc123def"));
+        assert!(matches!(reqs.validate(b"abc123def", true), PatternValidationResult::Passed));

        // Should fail: only 1 digit
-        assert!(!reqs.validate(b"abc1def"));
+        assert!(matches!(reqs.validate(b"abc1def", true), PatternValidationResult::Failed));

        // Should fail: no digits
-        assert!(!reqs.validate(b"abcdef"));
+        assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
    }

    #[test]
@ -570,17 +594,17 @@ mod tests {
            min_lowercase: None,
            min_special_chars: None,
            special_chars: None,
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: has 3 uppercase
-        assert!(reqs.validate(b"ABCdef"));
+        assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));

        // Should fail: only 1 uppercase
-        assert!(!reqs.validate(b"Adef"));
+        assert!(matches!(reqs.validate(b"Adef", true), PatternValidationResult::Failed));

        // Should fail: no uppercase
-        assert!(!reqs.validate(b"abcdef"));
+        assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
    }

    #[test]
@ -591,17 +615,17 @@ mod tests {
            min_lowercase: Some(2),
            min_special_chars: None,
            special_chars: None,
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: has 3 lowercase
-        assert!(reqs.validate(b"ABCdef"));
+        assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));

        // Should fail: only 1 lowercase
-        assert!(!reqs.validate(b"ABCd"));
+        assert!(matches!(reqs.validate(b"ABCd", true), PatternValidationResult::Failed));

        // Should fail: no lowercase
-        assert!(!reqs.validate(b"ABC123"));
+        assert!(matches!(reqs.validate(b"ABC123", true), PatternValidationResult::Failed));
    }

    #[test]
@ -612,17 +636,17 @@ mod tests {
            min_lowercase: None,
            min_special_chars: Some(2),
            special_chars: None, // uses default
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: has 2 special chars
-        assert!(reqs.validate(b"abc!@def"));
+        assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Passed));

        // Should fail: only 1 special char
-        assert!(!reqs.validate(b"abc!def"));
+        assert!(matches!(reqs.validate(b"abc!def", true), PatternValidationResult::Failed));

        // Should fail: no special chars
-        assert!(!reqs.validate(b"abcdef"));
+        assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
    }

    #[test]
@ -633,17 +657,17 @@ mod tests {
            min_lowercase: None,
            min_special_chars: Some(2),
            special_chars: Some("$%^".to_string()),
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: has 2 custom special chars
-        assert!(reqs.validate(b"abc$%def"));
+        assert!(matches!(reqs.validate(b"abc$%def", true), PatternValidationResult::Passed));

        // Should fail: has special chars but not the custom ones
-        assert!(!reqs.validate(b"abc!@def"));
+        assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Failed));

        // Should fail: only 1 custom special char
-        assert!(!reqs.validate(b"abc$def"));
+        assert!(matches!(reqs.validate(b"abc$def", true), PatternValidationResult::Failed));
    }

    #[test]
@ -654,60 +678,90 @@ mod tests {
            min_lowercase: Some(1),
            min_special_chars: Some(1),
            special_chars: None,
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: has all requirements
-        assert!(reqs.validate(b"Abc1!"));
+        assert!(matches!(reqs.validate(b"Abc1!", true), PatternValidationResult::Passed));

        // Should fail: missing digit
-        assert!(!reqs.validate(b"Abc!"));
+        assert!(matches!(reqs.validate(b"Abc!", true), PatternValidationResult::Failed));

        // Should fail: missing uppercase
-        assert!(!reqs.validate(b"abc1!"));
+        assert!(matches!(reqs.validate(b"abc1!", true), PatternValidationResult::Failed));

        // Should fail: missing lowercase
-        assert!(!reqs.validate(b"ABC1!"));
+        assert!(matches!(reqs.validate(b"ABC1!", true), PatternValidationResult::Failed));

        // Should fail: missing special
-        assert!(!reqs.validate(b"Abc1"));
+        assert!(matches!(reqs.validate(b"Abc1", true), PatternValidationResult::Failed));
    }

    #[test]
-    fn test_pattern_requirements_exclude_words() {
+    fn test_pattern_requirements_ignore_if_contains() {
        let reqs = PatternRequirements {
            min_digits: None,
            min_uppercase: None,
            min_lowercase: None,
            min_special_chars: None,
            special_chars: None,
-            exclude_words: Some(vec!["test".to_string(), "Demo".to_string()]),
+            ignore_if_contains: Some(vec!["test".to_string(), "Demo".to_string()]),
        };

        // Should fail: contains "test" (case-insensitive)
-        assert!(!reqs.validate(b"MyTestToken"));
+        assert!(matches!(
+            reqs.validate(b"MyTestToken", true),
+            PatternValidationResult::IgnoredBySubstring { .. }
+        ));

        // Should fail: contains "demo" (case-insensitive)
-        assert!(!reqs.validate(b"example-demo-value"));
+        assert!(matches!(
+            reqs.validate(b"example-demo-value", true),
+            PatternValidationResult::IgnoredBySubstring { .. }
+        ));

        // Should pass: does not contain excluded words
-        assert!(reqs.validate(b"example-value"));
+        assert!(matches!(reqs.validate(b"example-value", true), PatternValidationResult::Passed));
    }

    #[test]
-    fn test_pattern_requirements_exclude_words_ignores_empty_entries() {
+    fn test_pattern_requirements_ignore_if_contains_ignores_empty_entries() {
        let reqs = PatternRequirements {
            min_digits: None,
            min_uppercase: None,
            min_lowercase: None,
            min_special_chars: None,
            special_chars: None,
-            exclude_words: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
+            ignore_if_contains: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
        };

        // Should fail only when non-empty exclusion matches
-        assert!(!reqs.validate(b"needs-blocking"));
-        assert!(reqs.validate(b"allowed"));
+        assert!(matches!(
+            reqs.validate(b"needs-blocking", true),
+            PatternValidationResult::IgnoredBySubstring { .. }
+        ));
+        assert!(matches!(reqs.validate(b"allowed", true), PatternValidationResult::Passed));
+    }
+
+    #[test]
+    fn test_pattern_requirements_ignore_if_contains_can_be_disabled() {
+        let reqs = PatternRequirements {
+            min_digits: None,
+            min_uppercase: None,
+            min_lowercase: None,
+            min_special_chars: None,
+            special_chars: None,
+            ignore_if_contains: Some(vec!["ignoreme".to_string()]),
+        };
+
+        // With ignoring enabled, the match is skipped
+        assert!(matches!(
+            reqs.validate(b"value-ignoreme", true),
+            PatternValidationResult::IgnoredBySubstring { .. }
+        ));
+
+        // With ignoring disabled, the same input passes requirements
+        assert!(matches!(reqs.validate(b"value-ignoreme", false), PatternValidationResult::Passed));
    }

    #[test]
@ -718,12 +772,12 @@ mod tests {
            min_lowercase: None,
            min_special_chars: None,
            special_chars: None,
-            exclude_words: None,
+            ignore_if_contains: None,
        };

        // Should pass: no requirements
-        assert!(reqs.validate(b"anything"));
-        assert!(reqs.validate(b"123"));
-        assert!(reqs.validate(b"!@#"));
+        assert!(matches!(reqs.validate(b"anything", true), PatternValidationResult::Passed));
+        assert!(matches!(reqs.validate(b"123", true), PatternValidationResult::Passed));
+        assert!(matches!(reqs.validate(b"!@#", true), PatternValidationResult::Passed));
    }
 }
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@ -169,6 +169,7 @@ pub fn enumerate_filesystem_inputs(
        Some(shared_profiler),
        &args.extra_ignore_comments,
        args.no_inline_ignore,
+        !args.no_ignore_if_contains,
    )?;
    let blob_processor_init_time = Mutex::new(t1.elapsed());
    let make_blob_processor = || -> BlobProcessor {
--- a/src/scanner/repos.rs
+++ b/src/scanner/repos.rs
@ -683,6 +683,7 @@ pub async fn fetch_s3_objects(
        Some(shared_profiler.clone()),
        &args.extra_ignore_comments,
        args.no_inline_ignore,
+        !args.no_ignore_if_contains,
    )?;
    let mut processor = BlobProcessor { matcher };

@ -764,6 +765,7 @@ pub async fn fetch_gcs_objects(
        Some(shared_profiler.clone()),
        &args.extra_ignore_comments,
        args.no_inline_ignore,
+        !args.no_ignore_if_contains,
    )?;
    let mut processor = BlobProcessor { matcher };

--- a/tests/int_allowlist.rs
+++ b/tests/int_allowlist.rs
@ -148,6 +148,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
        skip_aws_account_file: None,
        no_base64: false,
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_bitbucket.rs
+++ b/tests/int_bitbucket.rs
@ -148,6 +148,7 @@ fn test_bitbucket_remote_scan() -> Result<()> {
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@ -168,6 +168,7 @@ rules:
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@ -155,6 +155,7 @@ fn test_github_remote_scan() -> Result<()> {
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };
    // Create global arguments
    let global_args = GlobalArgs {
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@ -153,6 +153,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
        skip_aws_account_file: None,
        no_base64: false,
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
@ -304,6 +305,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_redact.rs
+++ b/tests/int_redact.rs
@ -131,6 +131,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_slack.rs
+++ b/tests/int_slack.rs
@ -139,6 +139,7 @@ impl TestContext {
            skip_aws_account_file: None,
            no_base64: false,
            no_inline_ignore: false,
+            no_ignore_if_contains: false,
        };

        let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;
@ -278,6 +279,7 @@ async fn test_scan_slack_messages() -> Result<()> {
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@ -211,6 +211,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
        no_base64: false,
        extra_ignore_comments: Vec::new(),
        no_inline_ignore: false,
+        no_ignore_if_contains: false,
    };

    /* --------------------------------------------------------- *
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@ -154,6 +154,7 @@ impl TestContext {
            no_base64: false,
            extra_ignore_comments: Vec::new(),
            no_inline_ignore: false,
+            no_ignore_if_contains: false,
        };

        let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
@ -281,6 +282,7 @@ impl TestContext {
            skip_aws_account_file: None,
            no_base64: false,
            no_inline_ignore: false,
+            no_ignore_if_contains: false,
        };

        let global_args = GlobalArgs {
--- a/tests/smoke_baseline.rs
+++ b/tests/smoke_baseline.rs
@ -1,9 +1,9 @@
 use std::fs;

 use assert_cmd::Command;
+use clap::Parser;
 use predicates::prelude::*;
 use tempfile::tempdir;
-use clap::Parser;

 const GH_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";