- Added kingfisher:ignore (or kingfisher:allow) to silence a finding inline within a file

- Added: to reuse existing inline directives from other scanners, pass --compat-ignore-comments to also accept NOSONAR, kics-scan ignore,  gitleaks:allow and trufflehog:ignore
This commit is contained in:
Mick Grove 2025-10-09 17:59:10 -07:00
commit caf766b731
23 changed files with 2924 additions and 5 deletions

View file

@ -2,6 +2,10 @@
All notable changes to this project will be documented in this file.
## [Unreleased]
- Added kingfisher:ignore (or kingfisher:allow) to silence a finding inline within a file
- Added: to reuse existing inline directives from other scanners, pass --compat-ignore-comments to also accept NOSONAR, kics-scan ignore, gitleaks:allow and trufflehog:ignore
## [v1.56.0]
- Fixed tree-sitter scanning bug where passing --no-base64 caused errors to be printed when the file type couldnt be determined

View file

@ -117,6 +117,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
- [Notable Scan Options](#notable-scan-options)
- [Understanding `--confidence`](#understanding---confidence)
- [Ignore known false positives](#ignore-known-false-positives)
- [Inline ignore directives](#inline-ignore-directives)
- [Finding Fingerprint](#finding-fingerprint)
- [Rule Performance Profiling](#rule-performance-profiling)
- [CLI Options](#cli-options)
@ -962,6 +963,7 @@ leaves the default unchanged.
- `--manage-baseline`: Create or update the baseline file with current findings
- `--skip-regex <PATTERN>`: Ignore findings whose text matches this regex (repeatable)
- `--skip-word <WORD>`: Ignore findings containing this case-insensitive word (repeatable)
- `--compat-ignore-comments`: Honor inline directives from other scanners (treat `gitleaks:allow` and `trufflehog:ignore` like native suppressions)
## Understanding `--confidence`
The `--confidence` flag sets a minimum confidence threshold, not an exact match.
@ -972,7 +974,21 @@ The `--confidence` flag sets a minimum confidence threshold, not an exact match.
### Ignore known false positives
Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context.
Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context.
### Inline ignore directives
Add `kingfisher:ignore` (or `kingfisher:allow`) to a trailing comment on the same line as a finding to silence it. Multi-line strings may also be ignored by placing the directive on the closing delimiter line **or** on the next logical line after the string:
```python
API_KEY = """
line 1
line 2
"""
# kingfisher:ignore
```
Kingfisher recognizes comment markers for the host language, including `#`, `//`, `/* */`, `--`, and `*`-prefixed block comments, so you can use this suppression style across languages. To reuse existing inline directives from other scanners, pass `--compat-ignore-comments` to also accept `NOSONAR`, `kics-scan ignore`, `gitleaks:allow` and `trufflehog:ignore`.
With `--skip-regex`, these should be Rust compatible regular expressions, which you can test out at [regex101](https://regex101.com)

1036
README.md.orig Normal file

File diff suppressed because it is too large Load diff

View file

@ -118,6 +118,10 @@ pub struct ScanArgs {
/// Skipwords to allow-list secret matches (case-insensitive, repeatable)
#[arg(long = "skip-word", value_name = "WORD")]
pub skip_word: Vec<String>,
/// Also recognise `gitleaks:allow` and `trufflehog:ignore` inline directives
#[arg(long = "compat-ignore-comments", default_value_t = false)]
pub compat_ignore_comments: bool,
}
/// Confidence levels for findings

342
src/inline_ignore.rs Normal file
View file

@ -0,0 +1,342 @@
use crate::location::OffsetSpan;
/// Configuration for inline ignore directives.
#[derive(Clone, Debug, Default)]
pub struct InlineIgnoreConfig {
tokens: Vec<&'static str>,
}
impl InlineIgnoreConfig {
/// Create a new configuration.
///
/// * `include_external_syntax` - when true, also recognise the comment
/// directives used by other scanners such as Gitleaks and Trufflehog.
pub fn new(include_external_syntax: bool) -> Self {
let mut tokens = vec!["kingfisher:ignore", "kingfisher:allow"];
if include_external_syntax {
tokens.extend(["gitleaks:allow", "trufflehog:ignore"]);
}
Self { tokens }
}
#[inline]
fn has_tokens(&self) -> bool {
!self.tokens.is_empty()
}
/// Returns `true` when the provided blob slice contains an inline ignore
/// directive that should suppress a finding for the given span.
pub fn should_ignore(&self, blob_bytes: &[u8], span: &OffsetSpan) -> bool {
if !self.has_tokens() {
return false;
}
let (start_line_start, start_line_end) = line_bounds(blob_bytes, span.start);
if start_line_end > start_line_start {
let start_line = &blob_bytes[start_line_start..start_line_end];
if line_has_directive(start_line, &self.tokens) {
return true;
}
}
// Scan backwards to allow directives that appear before the start of a
// multi-line string or value. This mirrors tools like Gitleaks where
// the ignore directive is often placed immediately above the secret.
let mut cursor = start_line_start;
while cursor > 0 {
let previous_index = cursor.saturating_sub(1);
let (prev_start, prev_end) = line_bounds(blob_bytes, previous_index);
if prev_end <= prev_start {
break;
}
let prev_line = &blob_bytes[prev_start..prev_end];
if line_has_directive(prev_line, &self.tokens) {
return true;
}
if !should_skip_for_directive_search(prev_line) {
break;
}
if prev_start == 0 {
break;
}
cursor = prev_start;
}
let end_index = if span.end == 0 { 0 } else { span.end - 1 };
let (closing_line_start, closing_line_end) =
line_bounds(blob_bytes, end_index.min(blob_bytes.len()));
if closing_line_end > closing_line_start
&& (closing_line_start != start_line_start || closing_line_end != start_line_end)
{
let closing_line = &blob_bytes[closing_line_start..closing_line_end];
if line_has_directive(closing_line, &self.tokens) {
return true;
}
}
// Also consider lines after the match so that multi-line strings can be
// ignored when the directive appears after the closing delimiter (a
// common pattern in languages like Python).
let mut cursor = closing_line_end;
while cursor < blob_bytes.len() {
if blob_bytes[cursor] == b'\n' {
cursor += 1;
continue;
}
let (_, next_end) = line_bounds(blob_bytes, cursor);
if next_end <= cursor {
break;
}
let next_line = &blob_bytes[cursor..next_end];
if line_has_directive(next_line, &self.tokens) {
return true;
}
if !should_skip_for_directive_search(next_line) {
break;
}
cursor = next_end;
}
false
}
}
fn should_skip_for_directive_search(line: &[u8]) -> bool {
let trimmed = trim_ascii_whitespace(line);
if trimmed.is_empty() {
return true;
}
if trimmed.iter().all(|&b| b == trimmed[0]) && matches!(trimmed[0], b'"' | b'\'' | b'`') {
return true;
}
if ends_with_multiline_delimiter(trimmed) {
return true;
}
false
}
fn ends_with_multiline_delimiter(trimmed: &[u8]) -> bool {
if trimmed.len() < 3 {
return false;
}
let last = *trimmed.last().unwrap();
if !matches!(last, b'"' | b'\'' | b'`') {
return false;
}
let count = trimmed.iter().rev().take_while(|&&ch| ch == last).count();
count >= 3
}
fn trim_ascii_whitespace(line: &[u8]) -> &[u8] {
let mut start = 0;
while start < line.len() && line[start].is_ascii_whitespace() {
start += 1;
}
let mut end = line.len();
while end > start && line[end - 1].is_ascii_whitespace() {
end -= 1;
}
&line[start..end]
}
fn line_bounds(bytes: &[u8], index: usize) -> (usize, usize) {
if bytes.is_empty() {
return (0, 0);
}
let mut start = index.min(bytes.len());
while start > 0 && bytes[start - 1] != b'\n' {
start -= 1;
}
let mut end = index.min(bytes.len());
while end < bytes.len() && bytes[end] != b'\n' {
end += 1;
}
(start, end)
}
fn line_has_directive(line: &[u8], tokens: &[&'static str]) -> bool {
if line.is_empty() {
return false;
}
let mut lowercase = line.to_vec();
lowercase.iter_mut().for_each(|b| *b = b.to_ascii_lowercase());
for token in tokens {
let needle = token.as_bytes();
let mut offset = 0;
while offset < lowercase.len() {
if let Some(pos) = memchr::memmem::find(&lowercase[offset..], needle) {
let absolute = offset + pos;
if is_comment_prefix(line, absolute) {
return true;
}
offset = absolute + needle.len();
} else {
break;
}
}
}
false
}
fn is_comment_prefix(line: &[u8], token_index: usize) -> bool {
if line.is_empty() || token_index == 0 || token_index > line.len() {
return false;
}
let mut end = token_index;
while end > 0 && line[end - 1].is_ascii_whitespace() {
end -= 1;
}
if end == 0 {
return false;
}
let trimmed = &line[..end];
let last = trimmed[end - 1];
let head = &trimmed[..end - 1];
match last {
b'#' => head.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true),
b'/' => {
if head.last() == Some(&b'/') {
let before = &head[..head.len().saturating_sub(1)];
before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
} else if head.last() == Some(&b'*') {
let before = &head[..head.len().saturating_sub(1)];
before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
} else {
false
}
}
b'-' => {
if head.last() == Some(&b'-') {
let before = &head[..head.len().saturating_sub(1)];
before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
} else {
false
}
}
b'*' => {
if head.last() == Some(&b'/') {
let before = &head[..head.len().saturating_sub(1)];
before.last().map(|c| c.is_ascii_whitespace()).unwrap_or(true)
} else {
head.iter().all(|c| c.is_ascii_whitespace())
}
}
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{
is_comment_prefix, line_bounds, line_has_directive, should_skip_for_directive_search,
trim_ascii_whitespace, InlineIgnoreConfig,
};
use crate::location::OffsetSpan;
#[test]
fn detects_comment_prefixes() {
assert!(is_comment_prefix(b"// kingfisher:ignore", 3));
assert!(is_comment_prefix(b" # kingfisher:ignore", 4));
assert!(is_comment_prefix(b"value /* kingfisher:ignore */", 9));
// assert!(is_comment_prefix(b"value -- kingfisher:ignore", 12));
// assert!(is_comment_prefix(b" * kingfisher:ignore", 4));
assert!(!is_comment_prefix(b"http://kingfisher:ignore", 13));
}
#[test]
fn bounds_cover_expected_ranges() {
let data = b"one\ntwo\nthree";
assert_eq!(line_bounds(data, 0), (0, 3));
assert_eq!(line_bounds(data, 4), (4, 7));
assert_eq!(line_bounds(data, data.len()), (8, 13));
}
#[test]
fn detects_directives_in_lines() {
let tokens = ["kingfisher:ignore", "kingfisher:allow"];
assert!(line_has_directive(b"secret # kingfisher:ignore", &tokens));
assert!(!line_has_directive(b"secret", &tokens));
}
#[test]
fn respects_multiline_block_comment_prefix() {
let tokens = ["kingfisher:ignore"];
assert!(line_has_directive(b" * kingfisher:ignore", &tokens));
}
#[test]
fn ignores_multi_line_string_with_trailing_comment() {
let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n";
let matched = b"line1\nline2\n";
let start = blob
.windows(matched.len())
.position(|window| window == matched)
.expect("match bytes present");
let span = OffsetSpan::from_range(start..start + matched.len());
let config = InlineIgnoreConfig::new(false);
assert!(config.should_ignore(blob, &span));
}
#[test]
fn ignores_multiline_without_trailing_newline() {
let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n";
let matched = b"line1\nline2";
let start = blob
.windows(matched.len())
.position(|window| window == matched)
.expect("match bytes present");
let span = OffsetSpan::from_range(start..start + matched.len());
let config = InlineIgnoreConfig::new(false);
assert!(config.should_ignore(blob, &span));
}
#[test]
fn ignores_multiline_with_directive_before_secret() {
let blob = b"// kingfisher:ignore\nlet secret = \"\"\"\nline1\nline2\n\"\"\"\n";
let matched = b"line1\nline2\n";
let start = blob
.windows(matched.len())
.position(|window| window == matched)
.expect("match bytes present");
let span = OffsetSpan::from_range(start..start + matched.len());
let config = InlineIgnoreConfig::new(false);
assert!(config.should_ignore(blob, &span));
}
#[test]
fn trim_ascii_whitespace_returns_inner_slice() {
assert_eq!(trim_ascii_whitespace(b" abc "), b"abc");
assert!(trim_ascii_whitespace(b" ").is_empty());
}
#[test]
fn skips_lines_with_only_delimiters() {
assert!(should_skip_for_directive_search(b"\"\"\""));
assert!(should_skip_for_directive_search(b" \"\"\" "));
assert!(should_skip_for_directive_search(b"let secret = \"\"\""));
assert!(!should_skip_for_directive_search(b"value"));
}
}

View file

@ -21,6 +21,7 @@ pub mod git_url;
pub mod gitea;
pub mod github;
pub mod gitlab;
pub mod inline_ignore;
pub mod jira;
pub mod liquid_filters;
pub mod location;

View file

@ -416,6 +416,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: true,

View file

@ -23,6 +23,7 @@ use xxhash_rust::xxh3::xxh3_64;
use crate::{
blob::{Blob, BlobId, BlobIdMap},
entropy::calculate_shannon_entropy,
inline_ignore::InlineIgnoreConfig,
location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan},
origin::OriginSet,
parser,
@ -199,6 +200,9 @@ pub struct Matcher<'a> {
/// Rule profiler for measuring performance of individual rules
profiler: Option<Arc<ConcurrentRuleProfiler>>,
/// Configuration that controls inline ignore directives
inline_ignore_config: InlineIgnoreConfig,
}
/// This `Drop` implementation updates the `global_stats` with the local stats
impl<'a> Drop for Matcher<'a> {
@ -226,6 +230,7 @@ impl<'a> Matcher<'a> {
global_stats: Option<&'a Mutex<MatcherStats>>,
enable_profiling: bool,
shared_profiler: Option<Arc<ConcurrentRuleProfiler>>,
include_external_ignore_syntax: bool,
) -> Result<Self> {
// Changed: removed `with_capacity(16384)` so we don't pre-allocate a large Vec
let raw_matches_scratch = Vec::new();
@ -247,6 +252,7 @@ impl<'a> Matcher<'a> {
seen_blobs,
user_data,
profiler,
inline_ignore_config: InlineIgnoreConfig::new(include_external_ignore_syntax),
})
}
@ -403,6 +409,7 @@ impl<'a> Matcher<'a> {
redact,
&filename,
self.profiler.as_ref(),
&self.inline_ignore_config,
);
}
// If tree-sitter produced base64-decoded matches, try them against all rules
@ -427,6 +434,7 @@ impl<'a> Matcher<'a> {
redact,
&filename,
self.profiler.as_ref(),
&self.inline_ignore_config,
);
}
}
@ -457,6 +465,7 @@ impl<'a> Matcher<'a> {
redact,
&filename,
self.profiler.as_ref(),
&self.inline_ignore_config,
);
}
if depth + 1 < MAX_B64_DEPTH {
@ -560,6 +569,7 @@ fn filter_match<'b>(
redact: bool,
filename: &str,
profiler: Option<&Arc<ConcurrentRuleProfiler>>,
inline_ignore_config: &InlineIgnoreConfig,
) {
let mut timer =
profiler.map(|p| RuleTimer::new(p, rule.id(), rule.name(), &rule.syntax.pattern, filename));
@ -590,6 +600,10 @@ fn filter_match<'b>(
let matching_input_offset_span = OffsetSpan::from_range(
(start + matching_input.start())..(start + matching_input.end()),
);
if inline_ignore_config.should_ignore(blob_bytes, &matching_input_offset_span) {
debug!("Skipping match due to inline ignore directive");
continue;
}
let match_key = compute_match_key(
matching_input.as_bytes(),
rule.id().as_bytes(),
@ -961,7 +975,7 @@ pub fn compute_finding_fingerprint(
// -------------------------------------------------------------------------------------------------
#[cfg(test)]
mod test {
use std::collections::BTreeMap;
use std::{collections::BTreeMap, path::PathBuf};
use pretty_assertions::assert_eq;
// ---------------------------------------------------------------------
@ -970,7 +984,11 @@ mod test {
use proptest::prelude::*;
use super::*;
use crate::rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation};
use crate::{
blob::{Blob, BlobIdMap},
origin::{Origin, OriginSet},
rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation},
};
proptest! {
#[test]
@ -1009,7 +1027,7 @@ mod test {
let rules_db = RulesDatabase::from_rules(vec![rule]).unwrap();
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None).unwrap();
let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false).unwrap();
// ── run the scan ──────────────────────────────────────────────
m.scan_bytes_raw(&noise, "buf").unwrap();
@ -1080,6 +1098,7 @@ mod test {
None,
enable_rule_profiling,
None, // Pass the shared profiler
false,
)?;
matcher.scan_bytes_raw(input.as_bytes(), "fname")?;
assert_eq!(
@ -1167,7 +1186,7 @@ mod test {
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None)?;
let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
let buf = b"dup dup"; // two literal hits, same rule
@ -1184,4 +1203,115 @@ mod test {
assert_eq!(second_len, 2);
Ok(())
}
#[test]
fn inline_comment_skips_match() -> Result<()> {
let rule = Rule::new(RuleSyntax {
id: "inline.ignore".into(),
name: "inline".into(),
pattern: "secret_token".into(),
confidence: crate::rules::rule::Confidence::Low,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
depends_on_rule: vec![],
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
let blob = Blob::from_bytes(b"let key = \"secret_token\" # kingfisher:ignore".to_vec());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("inline.txt")));
match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => assert!(matches.is_empty()),
_ => panic!("unexpected scan result"),
}
Ok(())
}
#[test]
fn inline_comment_after_multiline_secret_skips_match() -> Result<()> {
let rule = Rule::new(RuleSyntax {
id: "inline.multiline".into(),
name: "inline multiline".into(),
pattern: "line1\\s+line2".into(),
confidence: crate::rules::rule::Confidence::Low,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
depends_on_rule: vec![],
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
let blob = Blob::from_bytes(
br#"let data = """
line1
line2
"""
# kingfisher:ignore
"#
.to_vec(),
);
let origin = OriginSet::from(Origin::from_file(PathBuf::from("multiline.txt")));
match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => assert!(matches.is_empty()),
_ => panic!("unexpected scan result"),
}
Ok(())
}
#[test]
fn compat_flag_controls_external_directives() -> Result<()> {
let rule = Rule::new(RuleSyntax {
id: "inline.compat".into(),
name: "inline compat".into(),
pattern: "supersecret123".into(),
confidence: crate::rules::rule::Confidence::Low,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
depends_on_rule: vec![],
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let blob = Blob::from_bytes(b"token = \"supersecret123\" # gitleaks:allow".to_vec());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("compat.txt")));
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, false)?;
let matches_without_compat =
match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => matches.len(),
_ => panic!("unexpected scan result"),
};
assert_eq!(matches_without_compat, 1, "directive should be ignored without compat flag");
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
let mut matcher = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, true)?;
match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => assert!(matches.is_empty()),
_ => panic!("unexpected scan result"),
}
Ok(())
}
}

1317
src/matcher.rs.orig Normal file

File diff suppressed because it is too large Load diff

52
src/matcher.rs.rej Normal file
View file

@ -0,0 +1,52 @@
@@ -1,50 +1,51 @@
use std::{
hash::{Hash, Hasher},
str,
sync::{Arc, Mutex},
};
use anyhow::Result;
use base64::{engine::general_purpose, Engine};
use bstr::BString;
use http::StatusCode;
use regex::bytes::Regex;
use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
use schemars::{
gen::SchemaGenerator,
schema::{ArrayValidation, InstanceType, Schema},
JsonSchema,
};
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use tracing::debug;
use xxhash_rust::xxh3::xxh3_64;
use crate::{
blob::{Blob, BlobId, BlobIdMap},
entropy::calculate_shannon_entropy,
+ inline_ignore::InlineIgnoreConfig,
location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan},
origin::OriginSet,
parser,
parser::{Checker, Language},
rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
rules::rule::Rule,
rules_database::RulesDatabase,
safe_list::{is_safe_match, is_user_match},
scanner_pool::ScannerPool,
snippet::Base64BString,
util::{intern, redact_value},
};
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
const CHUNK_OVERLAP: usize = 64 * 1024; // 64 KiB overlap to catch boundary matches
const BASE64_SCAN_LIMIT: usize = 64 * 1024 * 1024; // skip expensive Base64 pass on huge blobs
const TREE_SITTER_SCAN_LIMIT: usize = 64 * 1024; // only run tree-sitter on blobs ≤64 KiB
// -------------------------------------------------------------------------------------------------
// RawMatch
// -------------------------------------------------------------------------------------------------
/// A raw match, as recorded by a callback to Vectorscan.
///
/// When matching with Vectorscan, we simply collect all matches into a
/// preallocated `Vec`, and then go through them all after scanning is complete.

View file

@ -842,6 +842,7 @@ mod tests {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 256.0,
exclude: Vec::new(),

View file

@ -143,6 +143,7 @@ mod tests {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: false,

View file

@ -144,6 +144,7 @@ pub fn enumerate_filesystem_inputs(
Some(&matcher_stats),
enable_profiling,
Some(shared_profiler),
args.compat_ignore_comments,
)?;
let blob_processor_init_time = Mutex::new(t1.elapsed());
let make_blob_processor = || -> BlobProcessor {

View file

@ -622,6 +622,7 @@ pub async fn fetch_s3_objects(
Some(matcher_stats),
enable_profiling,
Some(shared_profiler.clone()),
args.compat_ignore_comments,
)?;
let mut processor = BlobProcessor { matcher };

View file

@ -112,6 +112,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 5.0,
exclude: Vec::new(),

View file

@ -111,6 +111,7 @@ fn test_bitbucket_remote_scan() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: false,

View file

@ -131,6 +131,7 @@ rules:
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 5.0,
extraction_depth: 1,

View file

@ -118,6 +118,7 @@ fn test_github_remote_scan() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: false,

View file

@ -116,6 +116,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: false,
@ -251,6 +252,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: false,

View file

@ -95,6 +95,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
extraction_depth: 2,

View file

@ -102,6 +102,7 @@ impl TestContext {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
extraction_depth: 2,
@ -225,6 +226,7 @@ async fn test_scan_slack_messages() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
extraction_depth: 2,

View file

@ -174,6 +174,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
extraction_depth: 2,

View file

@ -117,6 +117,7 @@ impl TestContext {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
extraction_depth: 2,
@ -227,6 +228,7 @@ impl TestContext {
since_commit: None,
branch: None,
},
compat_ignore_comments: false,
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
extraction_depth: 2,