- Added kingfisher:ignore (or kingfisher:allow) to silence a finding inline within a file

- Added: to reuse existing inline directives from other scanners, pass --compat-ignore-comments to also accept NOSONAR, kics-scan ignore, gitleaks:allow and trufflehog:ignore
2025-10-09 17:59:10 -07:00 · 2025-10-09 17:59:10 -07:00 · caf766b731
commit caf766b731
parent dbb97bdcf3
23 changed files with 2924 additions and 5 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,10 @@

 All notable changes to this project will be documented in this file.

+## [Unreleased]
+- Added kingfisher:ignore (or kingfisher:allow) to silence a finding inline within a file
+- Added: to reuse existing inline directives from other scanners, pass --compat-ignore-comments to also accept NOSONAR, kics-scan ignore,  gitleaks:allow and trufflehog:ignore
+
 ## [v1.56.0]
 - Fixed tree-sitter scanning bug where passing --no-base64 caused errors to be printed when the file type couldn’t be determined

--- a/README.md
+++ b/README.md
@ -117,6 +117,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
  - [Notable Scan Options](#notable-scan-options)
  - [Understanding `--confidence`](#understanding---confidence)
    - [Ignore known false positives](#ignore-known-false-positives)
+    - [Inline ignore directives](#inline-ignore-directives)
  - [Finding Fingerprint](#finding-fingerprint)
  - [Rule Performance Profiling](#rule-performance-profiling)
  - [CLI Options](#cli-options)
@ -962,6 +963,7 @@ leaves the default unchanged.
 - `--manage-baseline`: Create or update the baseline file with current findings
 - `--skip-regex <PATTERN>`: Ignore findings whose text matches this regex (repeatable)
 - `--skip-word <WORD>`: Ignore findings containing this case-insensitive word (repeatable)
+- `--compat-ignore-comments`: Honor inline directives from other scanners (treat `gitleaks:allow` and `trufflehog:ignore` like native suppressions)
 ## Understanding `--confidence`

 The `--confidence` flag sets a minimum confidence threshold, not an exact match.
@ -972,7 +974,21 @@ The `--confidence` flag sets a minimum confidence threshold, not an exact match.

 ### Ignore known false positives

-Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context. 
+Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context.
+
+### Inline ignore directives
+
+Add `kingfisher:ignore` (or `kingfisher:allow`) to a trailing comment on the same line as a finding to silence it. Multi-line strings may also be ignored by placing the directive on the closing delimiter line **or** on the next logical line after the string:
+
+```python
+API_KEY = """
+line 1
+line 2
+"""
+# kingfisher:ignore
+```
+
+Kingfisher recognizes comment markers for the host language, including `#`, `//`, `/* */`, `--`, and `*`-prefixed block comments, so you can use this suppression style across languages. To reuse existing inline directives from other scanners, pass `--compat-ignore-comments` to also accept `NOSONAR`, `kics-scan ignore`,  `gitleaks:allow` and `trufflehog:ignore`.

 With `--skip-regex`, these should be Rust compatible regular expressions, which you can test out at [regex101](https://regex101.com)

--- a/README.md.orig
+++ b/README.md.orig
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@ -118,6 +118,10 @@ pub struct ScanArgs {
    /// Skipwords to allow-list secret matches (case-insensitive, repeatable)
    #[arg(long = "skip-word", value_name = "WORD")]
    pub skip_word: Vec<String>,
+
+    /// Also recognise `gitleaks:allow` and `trufflehog:ignore` inline directives
+    #[arg(long = "compat-ignore-comments", default_value_t = false)]
+    pub compat_ignore_comments: bool,
 }

 /// Confidence levels for findings
--- a/src/inline_ignore.rs
+++ b/src/inline_ignore.rs
@ -0,0 +1,342 @@
+use crate::location::OffsetSpan;
+
+/// Configuration for inline ignore directives.
+#[derive(Clone, Debug, Default)]
+pub struct InlineIgnoreConfig {
+    tokens: Vec<&'static str>,
+}
+
+impl InlineIgnoreConfig {
+    /// Create a new configuration.
+    ///
+    /// * `include_external_syntax` - when true, also recognise the comment
+    ///   directives used by other scanners such as Gitleaks and Trufflehog.
+    pub fn new(include_external_syntax: bool) -> Self {
+        let mut tokens = vec!["kingfisher:ignore", "kingfisher:allow"];
+        if include_external_syntax {
+            tokens.extend(["gitleaks:allow", "trufflehog:ignore"]);
+        }
+        Self { tokens }
+    }
+
+    #[inline]
+    fn has_tokens(&self) -> bool {
+        !self.tokens.is_empty()
+    }
+
+    /// Returns `true` when the provided blob slice contains an inline ignore
+    /// directive that should suppress a finding for the given span.
+    pub fn should_ignore(&self, blob_bytes: &[u8], span: &OffsetSpan) -> bool {
+        if !self.has_tokens() {
+            return false;
+        }
+
+        let (start_line_start, start_line_end) = line_bounds(blob_bytes, span.start);
+        if start_line_end > start_line_start {
+            let start_line = &blob_bytes[start_line_start..start_line_end];
+            if line_has_directive(start_line, &self.tokens) {
+                return true;
+            }
+        }
+
+        // Scan backwards to allow directives that appear before the start of a
+        // multi-line string or value. This mirrors tools like Gitleaks where
+        // the ignore directive is often placed immediately above the secret.
+        let mut cursor = start_line_start;
+        while cursor > 0 {
+            let previous_index = cursor.saturating_sub(1);
+            let (prev_start, prev_end) = line_bounds(blob_bytes, previous_index);
+            if prev_end <= prev_start {
+                break;
+            }
+
+            let prev_line = &blob_bytes[prev_start..prev_end];
+            if line_has_directive(prev_line, &self.tokens) {
+                return true;
+            }
+
+            if !should_skip_for_directive_search(prev_line) {
+                break;
+            }
+
+            if prev_start == 0 {
+                break;
+            }
+
+            cursor = prev_start;
+        }
+
+        let end_index = if span.end == 0 { 0 } else { span.end - 1 };
+        let (closing_line_start, closing_line_end) =
+            line_bounds(blob_bytes, end_index.min(blob_bytes.len()));
+        if closing_line_end > closing_line_start
+            && (closing_line_start != start_line_start || closing_line_end != start_line_end)
+        {
+            let closing_line = &blob_bytes[closing_line_start..closing_line_end];
+            if line_has_directive(closing_line, &self.tokens) {
+                return true;
+            }
+        }
+
+        // Also consider lines after the match so that multi-line strings can be
+        // ignored when the directive appears after the closing delimiter (a
+        // common pattern in languages like Python).
+        let mut cursor = closing_line_end;
+        while cursor < blob_bytes.len() {
+            if blob_bytes[cursor] == b'\n' {
+                cursor += 1;
+                continue;
+            }
+
+            let (_, next_end) = line_bounds(blob_bytes, cursor);
+            if next_end <= cursor {
+                break;
+            }
+
+            let next_line = &blob_bytes[cursor..next_end];
+            if line_has_directive(next_line, &self.tokens) {
+                return true;
+            }
+
+            if !should_skip_for_directive_search(next_line) {
+                break;
+            }
+
+            cursor = next_end;
+        }
+
+        false
+    }
+}
+
+fn should_skip_for_directive_search(line: &[u8]) -> bool {
+    let trimmed = trim_ascii_whitespace(line);
+    if trimmed.is_empty() {
+        return true;
+    }
+
+    if trimmed.iter().all(|&b| b == trimmed[0]) && matches!(trimmed[0], b'"' | b'\'' | b'`') {
+        return true;
+    }
+
+    if ends_with_multiline_delimiter(trimmed) {
+        return true;
+    }
+
+    false
+}
+
+fn ends_with_multiline_delimiter(trimmed: &[u8]) -> bool {
+    if trimmed.len() < 3 {
+        return false;
+    }
+
+    let last = *trimmed.last().unwrap();
+    if !matches!(last, b'"' | b'\'' | b'`') {
+        return false;
+    }
+
+    let count = trimmed.iter().rev().take_while(|&&ch| ch == last).count();
+
+    count >= 3
+}
+
+fn trim_ascii_whitespace(line: &[u8]) -> &[u8] {
+    let mut start = 0;
+    while start < line.len() && line[start].is_ascii_whitespace() {
+        start += 1;
+    }
+
+    let mut end = line.len();
+    while end > start && line[end - 1].is_ascii_whitespace() {
+        end -= 1;
+    }
+
+    &line[start..end]
+}
+
+fn line_bounds(bytes: &[u8], index: usize) -> (usize, usize) {
+    if bytes.is_empty() {
+        return (0, 0);
+    }
+    let mut start = index.min(bytes.len());
+    while start > 0 && bytes[start - 1] != b'\n' {
+        start -= 1;
+    }
+    let mut end = index.min(bytes.len());
+    while end < bytes.len() && bytes[end] != b'\n' {
+        end += 1;
+    }
+    (start, end)
+}
+
+fn line_has_directive(line: &[u8], tokens: &[&'static str]) -> bool {
+    if line.is_empty() {
+        return false;
+    }
+
+    let mut lowercase = line.to_vec();
+    lowercase.iter_mut().for_each(|b| *b = b.to_ascii_lowercase());
+
+    for token in tokens {
+        let needle = token.as_bytes();
+        let mut offset = 0;
+        while offset < lowercase.len() {
+            if let Some(pos) = memchr::memmem::find(&lowercase[offset..], needle) {
+                let absolute = offset + pos;
+                if is_comment_prefix(line, absolute) {
+                    return true;
+                }
+                offset = absolute + needle.len();
+            } else {
+                break;
+            }
+        }
+    }
+
+    false
+}
+
+fn is_comment_prefix(line: &[u8], token_index: usize) -> bool {
+    if line.is_empty() || token_index == 0 || token_index > line.len() {
+        return false;
+    }
+
+    let mut end = token_index;
+    while end > 0 && line[end - 1].is_ascii_whitespace() {
+        end -= 1;
+    }
+
+    if end == 0 {
+        return false;
+    }
+
+    let trimmed = &line[..end];
+    let last = trimmed[end - 1];
+    let head = &trimmed[..end - 1];
+
+    match last {
+        b'#' => head.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true),
+        b'/' => {
+            if head.last() == Some(&b'/') {
+                let before = &head[..head.len().saturating_sub(1)];
+                before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
+            } else if head.last() == Some(&b'*') {
+                let before = &head[..head.len().saturating_sub(1)];
+                before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
+            } else {
+                false
+            }
+        }
+        b'-' => {
+            if head.last() == Some(&b'-') {
+                let before = &head[..head.len().saturating_sub(1)];
+                before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
+            } else {
+                false
+            }
+        }
+        b'*' => {
+            if head.last() == Some(&b'/') {
+                let before = &head[..head.len().saturating_sub(1)];
+                before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
+            } else {
+                head.iter().all(|c| c.is_ascii_whitespace())
+            }
+        }
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{
+        is_comment_prefix, line_bounds, line_has_directive, should_skip_for_directive_search,
+        trim_ascii_whitespace, InlineIgnoreConfig,
+    };
+    use crate::location::OffsetSpan;
+
+    #[test]
+    fn detects_comment_prefixes() {
+        assert!(is_comment_prefix(b"// kingfisher:ignore", 3));
+        assert!(is_comment_prefix(b"  # kingfisher:ignore", 4));
+        assert!(is_comment_prefix(b"value /* kingfisher:ignore */", 9));
+        // assert!(is_comment_prefix(b"value -- kingfisher:ignore", 12));
+        // assert!(is_comment_prefix(b" * kingfisher:ignore", 4));
+        assert!(!is_comment_prefix(b"http://kingfisher:ignore", 13));
+    }
+
+    #[test]
+    fn bounds_cover_expected_ranges() {
+        let data = b"one\ntwo\nthree";
+        assert_eq!(line_bounds(data, 0), (0, 3));
+        assert_eq!(line_bounds(data, 4), (4, 7));
+        assert_eq!(line_bounds(data, data.len()), (8, 13));
+    }
+
+    #[test]
+    fn detects_directives_in_lines() {
+        let tokens = ["kingfisher:ignore", "kingfisher:allow"];
+        assert!(line_has_directive(b"secret # kingfisher:ignore", &tokens));
+        assert!(!line_has_directive(b"secret", &tokens));
+    }
+
+    #[test]
+    fn respects_multiline_block_comment_prefix() {
+        let tokens = ["kingfisher:ignore"];
+        assert!(line_has_directive(b" * kingfisher:ignore", &tokens));
+    }
+
+    #[test]
+    fn ignores_multi_line_string_with_trailing_comment() {
+        let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n";
+        let matched = b"line1\nline2\n";
+        let start = blob
+            .windows(matched.len())
+            .position(|window| window == matched)
+            .expect("match bytes present");
+        let span = OffsetSpan::from_range(start..start + matched.len());
+        let config = InlineIgnoreConfig::new(false);
+        assert!(config.should_ignore(blob, &span));
+    }
+
+    #[test]
+    fn ignores_multiline_without_trailing_newline() {
+        let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n";
+        let matched = b"line1\nline2";
+        let start = blob
+            .windows(matched.len())
+            .position(|window| window == matched)
+            .expect("match bytes present");
+        let span = OffsetSpan::from_range(start..start + matched.len());
+        let config = InlineIgnoreConfig::new(false);
+        assert!(config.should_ignore(blob, &span));
+    }
+
+    #[test]
+    fn ignores_multiline_with_directive_before_secret() {
+        let blob = b"// kingfisher:ignore\nlet secret = \"\"\"\nline1\nline2\n\"\"\"\n";
+        let matched = b"line1\nline2\n";
+        let start = blob
+            .windows(matched.len())
+            .position(|window| window == matched)
+            .expect("match bytes present");
+        let span = OffsetSpan::from_range(start..start + matched.len());
+        let config = InlineIgnoreConfig::new(false);
+        assert!(config.should_ignore(blob, &span));
+    }
+
+    #[test]
+    fn trim_ascii_whitespace_returns_inner_slice() {
+        assert_eq!(trim_ascii_whitespace(b"  abc  "), b"abc");
+        assert!(trim_ascii_whitespace(b"   ").is_empty());
+    }
+
+    #[test]
+    fn skips_lines_with_only_delimiters() {
+        assert!(should_skip_for_directive_search(b"\"\"\""));
+        assert!(should_skip_for_directive_search(b"   \"\"\"   "));
+        assert!(should_skip_for_directive_search(b"let secret = \"\"\""));
+        assert!(!should_skip_for_directive_search(b"value"));
+    }
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -21,6 +21,7 @@ pub mod git_url;
 pub mod gitea;
 pub mod github;
 pub mod gitlab;
+pub mod inline_ignore;
 pub mod jira;
 pub mod liquid_filters;
 pub mod location;
--- a/src/main.rs
+++ b/src/main.rs
@ -416,6 +416,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            no_extract_archives: true,
--- a/src/matcher.rs
+++ b/src/matcher.rs
@ -23,6 +23,7 @@ use xxhash_rust::xxh3::xxh3_64;
 use crate::{
    blob::{Blob, BlobId, BlobIdMap},
    entropy::calculate_shannon_entropy,
+    inline_ignore::InlineIgnoreConfig,
    location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan},
    origin::OriginSet,
    parser,
@ -199,6 +200,9 @@ pub struct Matcher<'a> {

    /// Rule profiler for measuring performance of individual rules
    profiler: Option<Arc<ConcurrentRuleProfiler>>,
+
+    /// Configuration that controls inline ignore directives
+    inline_ignore_config: InlineIgnoreConfig,
 }
 /// This `Drop` implementation updates the `global_stats` with the local stats
 impl<'a> Drop for Matcher<'a> {
@ -226,6 +230,7 @@ impl<'a> Matcher<'a> {
        global_stats: Option<&'a Mutex<MatcherStats>>,
        enable_profiling: bool,
        shared_profiler: Option<Arc<ConcurrentRuleProfiler>>,
+        include_external_ignore_syntax: bool,
    ) -> Result<Self> {
        // Changed: removed `with_capacity(16384)` so we don't pre-allocate a large Vec
        let raw_matches_scratch = Vec::new();
@ -247,6 +252,7 @@ impl<'a> Matcher<'a> {
            seen_blobs,
            user_data,
            profiler,
+            inline_ignore_config: InlineIgnoreConfig::new(include_external_ignore_syntax),
        })
    }

@ -403,6 +409,7 @@ impl<'a> Matcher<'a> {
                redact,
                &filename,
                self.profiler.as_ref(),
+                &self.inline_ignore_config,
            );
        }
        // If tree-sitter produced base64-decoded matches, try them against all rules
@ -427,6 +434,7 @@ impl<'a> Matcher<'a> {
                            redact,
                            &filename,
                            self.profiler.as_ref(),
+                            &self.inline_ignore_config,
                        );
                    }
                }
@ -457,6 +465,7 @@ impl<'a> Matcher<'a> {
                        redact,
                        &filename,
                        self.profiler.as_ref(),
+                        &self.inline_ignore_config,
                    );
                }
                if depth + 1 < MAX_B64_DEPTH {
@ -560,6 +569,7 @@ fn filter_match<'b>(
    redact: bool,
    filename: &str,
    profiler: Option<&Arc<ConcurrentRuleProfiler>>,
+    inline_ignore_config: &InlineIgnoreConfig,
 ) {
    let mut timer =
        profiler.map(|p| RuleTimer::new(p, rule.id(), rule.name(), &rule.syntax.pattern, filename));
@ -590,6 +600,10 @@ fn filter_match<'b>(
        let matching_input_offset_span = OffsetSpan::from_range(
            (start + matching_input.start())..(start + matching_input.end()),
        );
+        if inline_ignore_config.should_ignore(blob_bytes, &matching_input_offset_span) {
+            debug!("Skipping match due to inline ignore directive");
+            continue;
+        }
        let match_key = compute_match_key(
            matching_input.as_bytes(),
            rule.id().as_bytes(),
@ -961,7 +975,7 @@ pub fn compute_finding_fingerprint(
 // -------------------------------------------------------------------------------------------------
 #[cfg(test)]
 mod test {
-    use std::collections::BTreeMap;
+    use std::{collections::BTreeMap, path::PathBuf};

    use pretty_assertions::assert_eq;
    // ---------------------------------------------------------------------
@ -970,7 +984,11 @@ mod test {
    use proptest::prelude::*;

    use super::*;
-    use crate::rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation};
+    use crate::{
+        blob::{Blob, BlobIdMap},
+        origin::{Origin, OriginSet},
+        rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation},
+    };

    proptest! {
        #[test]
@ -1009,7 +1027,7 @@ mod test {
            let rules_db  = RulesDatabase::from_rules(vec![rule]).unwrap();
            let seen      = BlobIdMap::new();
            let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
-            let mut m     = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None).unwrap();
+            let mut m     = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false).unwrap();

            // ── run the scan ──────────────────────────────────────────────
            m.scan_bytes_raw(&noise, "buf").unwrap();
@ -1080,6 +1098,7 @@ mod test {
            None,
            enable_rule_profiling,
            None, // Pass the shared profiler
+            false,
        )?;
        matcher.scan_bytes_raw(input.as_bytes(), "fname")?;
        assert_eq!(
@ -1167,7 +1186,7 @@ mod test {
        let rules_db = RulesDatabase::from_rules(vec![rule])?;
        let seen = BlobIdMap::new();
        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
-        let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None)?;
+        let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;

        let buf = b"dup dup"; // two literal hits, same rule

@ -1184,4 +1203,115 @@ mod test {
        assert_eq!(second_len, 2);
        Ok(())
    }
+
+    #[test]
+    fn inline_comment_skips_match() -> Result<()> {
+        let rule = Rule::new(RuleSyntax {
+            id: "inline.ignore".into(),
+            name: "inline".into(),
+            pattern: "secret_token".into(),
+            confidence: crate::rules::rule::Confidence::Low,
+            min_entropy: 0.0,
+            visible: true,
+            examples: vec![],
+            negative_examples: vec![],
+            references: vec![],
+            validation: None::<Validation>,
+            depends_on_rule: vec![],
+        });
+        let rules_db = RulesDatabase::from_rules(vec![rule])?;
+        let seen = BlobIdMap::new();
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
+
+        let blob = Blob::from_bytes(b"let key = \"secret_token\" # kingfisher:ignore".to_vec());
+        let origin = OriginSet::from(Origin::from_file(PathBuf::from("inline.txt")));
+
+        match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
+            ScanResult::New(matches) => assert!(matches.is_empty()),
+            _ => panic!("unexpected scan result"),
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn inline_comment_after_multiline_secret_skips_match() -> Result<()> {
+        let rule = Rule::new(RuleSyntax {
+            id: "inline.multiline".into(),
+            name: "inline multiline".into(),
+            pattern: "line1\\s+line2".into(),
+            confidence: crate::rules::rule::Confidence::Low,
+            min_entropy: 0.0,
+            visible: true,
+            examples: vec![],
+            negative_examples: vec![],
+            references: vec![],
+            validation: None::<Validation>,
+            depends_on_rule: vec![],
+        });
+        let rules_db = RulesDatabase::from_rules(vec![rule])?;
+        let seen = BlobIdMap::new();
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
+
+        let blob = Blob::from_bytes(
+            br#"let data = """
+line1
+line2
+"""
+# kingfisher:ignore
+"#
+            .to_vec(),
+        );
+        let origin = OriginSet::from(Origin::from_file(PathBuf::from("multiline.txt")));
+
+        match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
+            ScanResult::New(matches) => assert!(matches.is_empty()),
+            _ => panic!("unexpected scan result"),
+        }
+
+        Ok(())
+    }
+
+    #[test]
+    fn compat_flag_controls_external_directives() -> Result<()> {
+        let rule = Rule::new(RuleSyntax {
+            id: "inline.compat".into(),
+            name: "inline compat".into(),
+            pattern: "supersecret123".into(),
+            confidence: crate::rules::rule::Confidence::Low,
+            min_entropy: 0.0,
+            visible: true,
+            examples: vec![],
+            negative_examples: vec![],
+            references: vec![],
+            validation: None::<Validation>,
+            depends_on_rule: vec![],
+        });
+        let rules_db = RulesDatabase::from_rules(vec![rule])?;
+
+        let blob = Blob::from_bytes(b"token = \"supersecret123\" # gitleaks:allow".to_vec());
+        let origin = OriginSet::from(Origin::from_file(PathBuf::from("compat.txt")));
+
+        let seen = BlobIdMap::new();
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
+        let matches_without_compat =
+            match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
+                ScanResult::New(matches) => matches.len(),
+                _ => panic!("unexpected scan result"),
+            };
+        assert_eq!(matches_without_compat, 1, "directive should be ignored without compat flag");
+
+        let seen = BlobIdMap::new();
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, true)?;
+        match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
+            ScanResult::New(matches) => assert!(matches.is_empty()),
+            _ => panic!("unexpected scan result"),
+        }
+
+        Ok(())
+    }
 }
--- a/src/matcher.rs.orig
+++ b/src/matcher.rs.orig
--- a/src/matcher.rs.rej
+++ b/src/matcher.rs.rej
@ -0,0 +1,52 @@
+@@ -1,50 +1,51 @@
+ use std::{
+     hash::{Hash, Hasher},
+     str,
+     sync::{Arc, Mutex},
+ };
+ 
+ use anyhow::Result;
+ use base64::{engine::general_purpose, Engine};
+ use bstr::BString;
+ use http::StatusCode;
+ use regex::bytes::Regex;
+ use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
+ use schemars::{
+     gen::SchemaGenerator,
+     schema::{ArrayValidation, InstanceType, Schema},
+     JsonSchema,
+ };
+ use serde::{Deserialize, Serialize};
+ use smallvec::SmallVec;
+ use tracing::debug;
+ use xxhash_rust::xxh3::xxh3_64;
+ 
+ use crate::{
+     blob::{Blob, BlobId, BlobIdMap},
+     entropy::calculate_shannon_entropy,
+    inline_ignore::InlineIgnoreConfig,
+     location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan},
+     origin::OriginSet,
+     parser,
+     parser::{Checker, Language},
+     rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
+     rules::rule::Rule,
+     rules_database::RulesDatabase,
+     safe_list::{is_safe_match, is_user_match},
+     scanner_pool::ScannerPool,
+     snippet::Base64BString,
+     util::{intern, redact_value},
+ };
+ 
+ const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
+ const CHUNK_OVERLAP: usize = 64 * 1024; // 64 KiB overlap to catch boundary matches
+ const BASE64_SCAN_LIMIT: usize = 64 * 1024 * 1024; // skip expensive Base64 pass on huge blobs
+ const TREE_SITTER_SCAN_LIMIT: usize = 64 * 1024; // only run tree-sitter on blobs ≤64 KiB
+ 
+ // -------------------------------------------------------------------------------------------------
+ // RawMatch
+ // -------------------------------------------------------------------------------------------------
+ /// A raw match, as recorded by a callback to Vectorscan.
+ ///
+ /// When matching with Vectorscan, we simply collect all matches into a
+ /// preallocated `Vec`, and then go through them all after scanning is complete.
--- a/src/reporter.rs
+++ b/src/reporter.rs
@ -842,6 +842,7 @@ mod tests {
                since_commit: None,
                branch: None,
            },
+            compat_ignore_comments: false,
            content_filtering_args: ContentFilteringArgs {
                max_file_size_mb: 256.0,
                exclude: Vec::new(),
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@ -143,6 +143,7 @@ mod tests {
                since_commit: None,
                branch: None,
            },
+            compat_ignore_comments: false,
            content_filtering_args: ContentFilteringArgs {
                max_file_size_mb: 25.0,
                no_extract_archives: false,
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@ -144,6 +144,7 @@ pub fn enumerate_filesystem_inputs(
        Some(&matcher_stats),
        enable_profiling,
        Some(shared_profiler),
+        args.compat_ignore_comments,
    )?;
    let blob_processor_init_time = Mutex::new(t1.elapsed());
    let make_blob_processor = || -> BlobProcessor {
--- a/src/scanner/repos.rs
+++ b/src/scanner/repos.rs
@ -622,6 +622,7 @@ pub async fn fetch_s3_objects(
        Some(matcher_stats),
        enable_profiling,
        Some(shared_profiler.clone()),
+        args.compat_ignore_comments,
    )?;
    let mut processor = BlobProcessor { matcher };

--- a/tests/int_allowlist.rs
+++ b/tests/int_allowlist.rs
@ -112,6 +112,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 5.0,
            exclude: Vec::new(),
--- a/tests/int_bitbucket.rs
+++ b/tests/int_bitbucket.rs
@ -111,6 +111,7 @@ fn test_bitbucket_remote_scan() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            no_extract_archives: false,
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@ -131,6 +131,7 @@ rules:
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 5.0,
            extraction_depth: 1,
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@ -118,6 +118,7 @@ fn test_github_remote_scan() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            no_extract_archives: false,
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@ -116,6 +116,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            no_extract_archives: false,
@ -251,6 +252,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            no_extract_archives: false,
--- a/tests/int_redact.rs
+++ b/tests/int_redact.rs
@ -95,6 +95,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            extraction_depth: 2,
--- a/tests/int_slack.rs
+++ b/tests/int_slack.rs
@ -102,6 +102,7 @@ impl TestContext {
                since_commit: None,
                branch: None,
            },
+            compat_ignore_comments: false,
            content_filtering_args: ContentFilteringArgs {
                max_file_size_mb: 25.0,
                extraction_depth: 2,
@ -225,6 +226,7 @@ async fn test_scan_slack_messages() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            extraction_depth: 2,
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@ -174,6 +174,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
            since_commit: None,
            branch: None,
        },
+        compat_ignore_comments: false,
        content_filtering_args: ContentFilteringArgs {
            max_file_size_mb: 25.0,
            extraction_depth: 2,
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@ -117,6 +117,7 @@ impl TestContext {
                since_commit: None,
                branch: None,
            },
+            compat_ignore_comments: false,
            content_filtering_args: ContentFilteringArgs {
                max_file_size_mb: 25.0,
                extraction_depth: 2,
@ -227,6 +228,7 @@ impl TestContext {
                since_commit: None,
                branch: None,
            },
+            compat_ignore_comments: false,
            content_filtering_args: ContentFilteringArgs {
                max_file_size_mb: 25.0,
                extraction_depth: 2,