diff --git a/CHANGELOG.md b/CHANGELOG.md index 1278533..1be8150 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [v1.57.0] +- Added inline ignore directive detection to treat suppression tokens anywhere on surrounding lines, including multi-line handling +- Added a `--no-ignore` CLI flag to disable inline directives when you need every potential secret reported +- Added: repeatable `--ignore-comment ` flag to reuse inline directives from other scanners (for example `NOSONAR`, `kics-scan ignore`, `gitleaks:allow`, etc) +- Respect user color settings in update messages by using the same color helper as the main reporter, ensuring consistent output and no ANSI codes on update check, when color is disabled + ## [v1.56.0] - Fixed tree-sitter scanning bug where passing --no-base64 caused errors to be printed when the file type couldn’t be determined diff --git a/Cargo.toml b/Cargo.toml index 37a2e32..852194d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.56.0" +version = "1.57.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index 14faccf..d77d01e 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) - [Notable Scan Options](#notable-scan-options) - [Understanding `--confidence`](#understanding---confidence) - [Ignore known false positives](#ignore-known-false-positives) + - [Inline ignore directives](#inline-ignore-directives) - [Finding Fingerprint](#finding-fingerprint) - [Rule Performance Profiling](#rule-performance-profiling) - [CLI Options](#cli-options) @@ -962,6 +963,8 @@ leaves the default unchanged. - `--manage-baseline`: Create or update the baseline file with current findings - `--skip-regex `: Ignore findings whose text matches this regex (repeatable) - `--skip-word `: Ignore findings containing this case-insensitive word (repeatable) +- `--ignore-comment `: Honor additional inline directives from other scanners (repeatable; e.g. `--ignore-comment "gitleaks:allow"`) +- `--no-ignore`: Disable inline directives entirely so every match is reported ## Understanding `--confidence` The `--confidence` flag sets a minimum confidence threshold, not an exact match. @@ -972,7 +975,7 @@ The `--confidence` flag sets a minimum confidence threshold, not an exact match. ### Ignore known false positives -Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context. +Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context. With `--skip-regex`, these should be Rust compatible regular expressions, which you can test out at [regex101](https://regex101.com) @@ -993,6 +996,22 @@ kingfisher scan \ If a `--skip-regex` regular expression fails to compile, the scan aborts with an error so that typos are caught early. +### Inline ignore directives + +Add `kingfisher:ignore` anywhere on the same line as a finding to silence it. Multi-line strings and PEM-style blocks may also be ignored by placing the directive on the closing delimiter line (for example, `""" # kingfisher:ignore`), on the next logical line after the string, **or** on a comment immediately before the value: + +```python +# kingfisher:ignore +API_KEY = """ +line 1 +line 2 +""" +# kingfisher:ignore +``` + +Kingfisher searches the surrounding lines for these tokens without requiring language-specific comment markers. To reuse existing inline directives from other scanners, add them with repeatable `--ignore-comment` flags (for example `--ignore-comment "gitleaks:allow" --ignore-comment "NOSONAR"`). Use `--no-ignore` when you want to disable inline suppressions entirely. + + ## Finding Fingerprint The document below details the four-field formula (rule SHA-1, origin label, start & end offsets) hashed with XXH3-64 to create Kingfisher’s 64-bit finding fingerprint, and explains how this ID powers safe deduplication; plus how `--no-dedup` can be used shows every raw match. diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 255417f..78312d8 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -118,6 +118,14 @@ pub struct ScanArgs { /// Skipwords to allow-list secret matches (case-insensitive, repeatable) #[arg(long = "skip-word", value_name = "WORD")] pub skip_word: Vec, + + /// Additional inline ignore directives to recognise (repeatable) + #[arg(long = "ignore-comment", value_name = "DIRECTIVE")] + pub extra_ignore_comments: Vec, + + /// Disable inline ignore directives entirely + #[arg(long = "no-ignore", default_value_t = false)] + pub no_inline_ignore: bool, } /// Confidence levels for findings diff --git a/src/inline_ignore.rs b/src/inline_ignore.rs new file mode 100644 index 0000000..22f47ea --- /dev/null +++ b/src/inline_ignore.rs @@ -0,0 +1,423 @@ +use crate::location::OffsetSpan; + +/// Configuration for inline ignore directives. +#[derive(Clone, Debug, Default)] +pub struct InlineIgnoreConfig { + tokens: Vec>, +} + +impl InlineIgnoreConfig { + /// Create a new configuration. + /// + /// * `additional_tokens` - inline ignore directives supplied by the user. + pub fn new(additional_tokens: &[String]) -> Self { + let mut tokens = vec![b"kingfisher:ignore".to_vec()]; + + for token in additional_tokens { + let trimmed = token.trim(); + if trimmed.is_empty() { + continue; + } + + let lowered = trimmed.to_ascii_lowercase().into_bytes(); + if tokens.iter().any(|existing| existing == &lowered) { + continue; + } + + tokens.push(lowered); + } + + Self { tokens } + } + + /// Return a configuration with inline ignores disabled. + pub fn disabled() -> Self { + Self { tokens: Vec::new() } + } + + #[inline] + fn has_tokens(&self) -> bool { + !self.tokens.is_empty() + } + + /// Returns `true` when the provided blob slice contains an inline ignore + /// directive that should suppress a finding for the given span. + pub fn should_ignore(&self, blob_bytes: &[u8], span: &OffsetSpan) -> bool { + if !self.has_tokens() { + return false; + } + + let (start_line_start, start_line_end) = line_bounds(blob_bytes, span.start); + if start_line_end > start_line_start { + let start_line = &blob_bytes[start_line_start..start_line_end]; + if line_has_directive(start_line, &self.tokens) { + return true; + } + } + + // Scan backwards to allow directives that appear before the start of a + // multi-line string or value. This mirrors tools like Gitleaks where + // the ignore directive is often placed immediately above the secret. + let mut cursor = start_line_start; + while cursor > 0 { + let previous_index = cursor.saturating_sub(1); + let (prev_start, prev_end) = line_bounds(blob_bytes, previous_index); + if prev_end <= prev_start { + break; + } + + let prev_line = &blob_bytes[prev_start..prev_end]; + if line_has_directive(prev_line, &self.tokens) { + return true; + } + + if !should_skip_for_directive_search(prev_line) { + break; + } + + if prev_start == 0 { + break; + } + + cursor = prev_start; + } + + let end_index = if span.end == 0 { 0 } else { span.end - 1 }; + let (closing_line_start, closing_line_end) = + line_bounds(blob_bytes, end_index.min(blob_bytes.len())); + if closing_line_end > closing_line_start + && (closing_line_start != start_line_start || closing_line_end != start_line_end) + { + let closing_line = &blob_bytes[closing_line_start..closing_line_end]; + if line_has_directive(closing_line, &self.tokens) { + return true; + } + } + + // Also consider lines after the match so that multi-line strings can be + // ignored when the directive appears after the closing delimiter (a + // common pattern in languages like Python). + let mut cursor = closing_line_end; + while cursor < blob_bytes.len() { + if blob_bytes[cursor] == b'\n' { + cursor += 1; + continue; + } + + let (_, next_end) = line_bounds(blob_bytes, cursor); + if next_end <= cursor { + break; + } + + let next_line = &blob_bytes[cursor..next_end]; + if line_has_directive(next_line, &self.tokens) { + return true; + } + + if !should_skip_for_directive_search(next_line) { + break; + } + + cursor = next_end; + } + + false + } +} + +fn should_skip_for_directive_search(line: &[u8]) -> bool { + let trimmed = trim_ascii_whitespace(line); + if trimmed.is_empty() { + return true; + } + + if trimmed.iter().all(|&b| b == trimmed[0]) && matches!(trimmed[0], b'"' | b'\'' | b'`') { + return true; + } + + if ends_with_multiline_delimiter(trimmed) { + return true; + } + + if looks_like_pem_boundary(trimmed) { + return true; + } + + if looks_like_encoded_secret_body(trimmed) { + return true; + } + + false +} + +fn ends_with_multiline_delimiter(trimmed: &[u8]) -> bool { + if trimmed.len() < 3 { + return false; + } + + let last = *trimmed.last().unwrap(); + if !matches!(last, b'"' | b'\'' | b'`') { + return false; + } + + let count = trimmed.iter().rev().take_while(|&&ch| ch == last).count(); + + count >= 3 +} + +fn looks_like_pem_boundary(trimmed: &[u8]) -> bool { + trimmed.starts_with(b"-----BEGIN ") || trimmed.starts_with(b"-----END ") +} + +fn looks_like_encoded_secret_body(trimmed: &[u8]) -> bool { + const MIN_LEN: usize = 16; + + if trimmed.len() < MIN_LEN { + return false; + } + + let is_base64ish = trimmed.iter().all(|&b| { + matches!( + b, + b'A'..=b'Z' + | b'a'..=b'z' + | b'0'..=b'9' + | b'+' + | b'/' + | b'=' + | b'-' + | b'_' + ) + }); + if is_base64ish { + return true; + } + + let is_hexish = trimmed.iter().all(|&b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')); + if is_hexish { + return true; + } + + let is_base32ish = trimmed.iter().all(|&b| matches!(b, b'A'..=b'Z' | b'2'..=b'7' | b'=')); + if is_base32ish { + return true; + } + + // Allow directives to be placed after payloads that mix a high percentage of + // alpha-numeric characters commonly seen in encoded data (e.g. cryptographic + // material that includes punctuation like ':' or '.') without risking + // accidentally skipping regular source lines. + let allowed = |b: u8| { + matches!( + b, + b'A'..=b'Z' + | b'a'..=b'z' + | b'0'..=b'9' + | b'+' + | b'/' + | b'=' + | b'-' + | b'_' + | b':' + | b'.' + ) + }; + + let allowed_count = trimmed.iter().copied().filter(|&b| allowed(b)).count(); + allowed_count * 10 >= trimmed.len() * 9 +} + +fn trim_ascii_whitespace(line: &[u8]) -> &[u8] { + let mut start = 0; + while start < line.len() && line[start].is_ascii_whitespace() { + start += 1; + } + + let mut end = line.len(); + while end > start && line[end - 1].is_ascii_whitespace() { + end -= 1; + } + + &line[start..end] +} + +fn line_bounds(bytes: &[u8], index: usize) -> (usize, usize) { + if bytes.is_empty() { + return (0, 0); + } + let mut start = index.min(bytes.len()); + while start > 0 && bytes[start - 1] != b'\n' { + start -= 1; + } + let mut end = index.min(bytes.len()); + while end < bytes.len() && bytes[end] != b'\n' { + end += 1; + } + (start, end) +} + +fn line_has_directive(line: &[u8], tokens: &[Vec]) -> bool { + if line.is_empty() { + return false; + } + + let mut lowercase = line.to_vec(); + lowercase.iter_mut().for_each(|b| *b = b.to_ascii_lowercase()); + + tokens.iter().any(|token| memchr::memmem::find(&lowercase, token.as_slice()).is_some()) +} + +#[cfg(test)] +mod tests { + use super::{ + line_bounds, line_has_directive, should_skip_for_directive_search, trim_ascii_whitespace, + InlineIgnoreConfig, + }; + use crate::location::OffsetSpan; + + #[test] + fn bounds_cover_expected_ranges() { + let data = b"one\ntwo\nthree"; + assert_eq!(line_bounds(data, 0), (0, 3)); + assert_eq!(line_bounds(data, 4), (4, 7)); + assert_eq!(line_bounds(data, data.len()), (8, 13)); + } + + #[test] + fn detects_directives_in_lines() { + let tokens = vec![b"kingfisher:ignore".to_vec()]; + assert!(line_has_directive(b"secret # kingfisher:ignore", &tokens)); + assert!(line_has_directive(b"kingfisher:ignore before value", &tokens)); + assert!(line_has_directive(b"value // Gitleaks:Allow", &[b"gitleaks:allow".to_vec()])); + assert!(!line_has_directive(b"secret", &tokens)); + } + + #[test] + fn respects_multiline_block_comment_prefix() { + let tokens = vec![b"kingfisher:ignore".to_vec()]; + assert!(line_has_directive(b" * kingfisher:ignore", &tokens)); + } + + #[test] + fn ignores_multi_line_string_with_trailing_comment() { + let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n"; + let matched = b"line1\nline2\n"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn ignores_multiline_with_directive_on_closing_line() { + let blob = b"api_key = \"\"\"\nline1\nline2\n\"\"\" // kingfisher:ignore\n"; + let matched = b"line1\nline2\n"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn ignores_pem_with_directive_before_block() { + let blob = b"// kingfisher:ignore\napi_key = \"\"\"\n-----BEGIN RSA PRIVATE KEY-----\nMIICWwIBAAKBgQC7\n-----END RSA PRIVATE KEY-----\n\"\"\"\n"; + let matched = b"MIICWwIBAAKBgQC7\n"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn ignores_multiline_hex_payload_with_directive() { + let blob = b"# kingfisher:ignore\nsecret = \"\"\"\n00112233445566778899aabbccddeeff\nffeeddccbbaa99887766554433221100\n\"\"\"\n"; + let matched = b"00112233445566778899aabbccddeeff\nffeeddccbbaa99887766554433221100\n"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn ignores_multiline_base32_payload_with_directive_after_block() { + let blob = + b"secret = \"\"\"\nMFRGGZDFMZTWQ2LK\nONSWG4TFOQ======\n\"\"\"\n// kingfisher:ignore\n"; + let matched = b"MFRGGZDFMZTWQ2LK\nONSWG4TFOQ======\n"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn ignores_multiline_without_trailing_newline() { + let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n"; + let matched = b"line1\nline2"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn ignores_multiline_with_directive_before_secret() { + let blob = b"// kingfisher:ignore\nlet secret = \"\"\"\nline1\nline2\n\"\"\"\n"; + let matched = b"line1\nline2\n"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::new(&[]); + assert!(config.should_ignore(blob, &span)); + } + + #[test] + fn trim_ascii_whitespace_returns_inner_slice() { + assert_eq!(trim_ascii_whitespace(b" abc "), b"abc"); + assert!(trim_ascii_whitespace(b" ").is_empty()); + } + + #[test] + fn skips_lines_with_only_delimiters() { + assert!(should_skip_for_directive_search(b"\"\"\"")); + assert!(should_skip_for_directive_search(b" \"\"\" ")); + assert!(should_skip_for_directive_search(b"let secret = \"\"\"")); + assert!(!should_skip_for_directive_search(b"value")); + assert!(should_skip_for_directive_search(b"-----BEGIN RSA PRIVATE KEY-----")); + assert!(should_skip_for_directive_search(b"MIICWwIBAAKBgQC7")); + assert!(should_skip_for_directive_search(b"0011223344556677")); + assert!(should_skip_for_directive_search(b"MFRGGZDFMZTWQ2LK")); + } + + #[test] + fn disabled_config_never_ignores() { + let blob = b"let secret = 'value' # kingfisher:ignore"; + let matched = b"value"; + let start = blob + .windows(matched.len()) + .position(|window| window == matched) + .expect("match bytes present"); + let span = OffsetSpan::from_range(start..start + matched.len()); + let config = InlineIgnoreConfig::disabled(); + assert!(!config.should_ignore(blob, &span)); + } +} diff --git a/src/lib.rs b/src/lib.rs index 3ceed02..9fc8106 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ pub mod git_url; pub mod gitea; pub mod github; pub mod gitlab; +pub mod inline_ignore; pub mod jira; pub mod liquid_filters; pub mod location; diff --git a/src/main.rs b/src/main.rs index b6bb1fd..c0f6f9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -416,6 +416,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { since_commit: None, branch: None, }, + extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, no_extract_archives: true, @@ -437,6 +438,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { skip_word: Vec::new(), output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_base64: false, + no_inline_ignore: false, } } /// Run the rules check command diff --git a/src/matcher.rs b/src/matcher.rs index 102820b..f93576a 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -23,6 +23,7 @@ use xxhash_rust::xxh3::xxh3_64; use crate::{ blob::{Blob, BlobId, BlobIdMap}, entropy::calculate_shannon_entropy, + inline_ignore::InlineIgnoreConfig, location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan}, origin::OriginSet, parser, @@ -199,6 +200,9 @@ pub struct Matcher<'a> { /// Rule profiler for measuring performance of individual rules profiler: Option>, + + /// Configuration that controls inline ignore directives + inline_ignore_config: InlineIgnoreConfig, } /// This `Drop` implementation updates the `global_stats` with the local stats impl<'a> Drop for Matcher<'a> { @@ -226,6 +230,8 @@ impl<'a> Matcher<'a> { global_stats: Option<&'a Mutex>, enable_profiling: bool, shared_profiler: Option>, + extra_ignore_directives: &[String], + disable_inline_ignores: bool, ) -> Result { // Changed: removed `with_capacity(16384)` so we don't pre-allocate a large Vec let raw_matches_scratch = Vec::new(); @@ -247,6 +253,11 @@ impl<'a> Matcher<'a> { seen_blobs, user_data, profiler, + inline_ignore_config: if disable_inline_ignores { + InlineIgnoreConfig::disabled() + } else { + InlineIgnoreConfig::new(extra_ignore_directives) + }, }) } @@ -403,6 +414,7 @@ impl<'a> Matcher<'a> { redact, &filename, self.profiler.as_ref(), + &self.inline_ignore_config, ); } // If tree-sitter produced base64-decoded matches, try them against all rules @@ -427,6 +439,7 @@ impl<'a> Matcher<'a> { redact, &filename, self.profiler.as_ref(), + &self.inline_ignore_config, ); } } @@ -457,6 +470,7 @@ impl<'a> Matcher<'a> { redact, &filename, self.profiler.as_ref(), + &self.inline_ignore_config, ); } if depth + 1 < MAX_B64_DEPTH { @@ -560,6 +574,7 @@ fn filter_match<'b>( redact: bool, filename: &str, profiler: Option<&Arc>, + inline_ignore_config: &InlineIgnoreConfig, ) { let mut timer = profiler.map(|p| RuleTimer::new(p, rule.id(), rule.name(), &rule.syntax.pattern, filename)); @@ -590,6 +605,10 @@ fn filter_match<'b>( let matching_input_offset_span = OffsetSpan::from_range( (start + matching_input.start())..(start + matching_input.end()), ); + if inline_ignore_config.should_ignore(blob_bytes, &matching_input_offset_span) { + debug!("Skipping match due to inline ignore directive"); + continue; + } let match_key = compute_match_key( matching_input.as_bytes(), rule.id().as_bytes(), @@ -961,7 +980,7 @@ pub fn compute_finding_fingerprint( // ------------------------------------------------------------------------------------------------- #[cfg(test)] mod test { - use std::collections::BTreeMap; + use std::{collections::BTreeMap, path::PathBuf}; use pretty_assertions::assert_eq; // --------------------------------------------------------------------- @@ -970,7 +989,11 @@ mod test { use proptest::prelude::*; use super::*; - use crate::rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation}; + use crate::{ + blob::{Blob, BlobIdMap}, + origin::{Origin, OriginSet}, + rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation}, + }; proptest! { #[test] @@ -1009,7 +1032,17 @@ mod test { let rules_db = RulesDatabase::from_rules(vec![rule]).unwrap(); let seen = BlobIdMap::new(); let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); - let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None).unwrap(); + let mut m = Matcher::new( + &rules_db, + scanner_pool, + &seen, + None, + false, + None, + &[], + false, + ) + .unwrap(); // ── run the scan ────────────────────────────────────────────── m.scan_bytes_raw(&noise, "buf").unwrap(); @@ -1080,6 +1113,8 @@ mod test { None, enable_rule_profiling, None, // Pass the shared profiler + &[], + false, )?; matcher.scan_bytes_raw(input.as_bytes(), "fname")?; assert_eq!( @@ -1167,7 +1202,7 @@ mod test { let rules_db = RulesDatabase::from_rules(vec![rule])?; let seen = BlobIdMap::new(); let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); - let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None)?; + let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; let buf = b"dup dup"; // two literal hits, same rule @@ -1184,4 +1219,120 @@ mod test { assert_eq!(second_len, 2); Ok(()) } + + #[test] + fn inline_comment_skips_match() -> Result<()> { + let rule = Rule::new(RuleSyntax { + id: "inline.ignore".into(), + name: "inline".into(), + pattern: "secret_token".into(), + confidence: crate::rules::rule::Confidence::Low, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + depends_on_rule: vec![], + }); + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + + let blob = Blob::from_bytes(b"let key = \"secret_token\" # kingfisher:ignore".to_vec()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("inline.txt"))); + + match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => assert!(matches.is_empty()), + _ => panic!("unexpected scan result"), + } + + Ok(()) + } + + #[test] + fn inline_comment_after_multiline_secret_skips_match() -> Result<()> { + let rule = Rule::new(RuleSyntax { + id: "inline.multiline".into(), + name: "inline multiline".into(), + pattern: "line1\\s+line2".into(), + confidence: crate::rules::rule::Confidence::Low, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + depends_on_rule: vec![], + }); + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + + let blob = Blob::from_bytes( + br#"let data = """ +line1 +line2 +""" +# kingfisher:ignore +"# + .to_vec(), + ); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("multiline.txt"))); + + match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => assert!(matches.is_empty()), + _ => panic!("unexpected scan result"), + } + + Ok(()) + } + + #[test] + fn compat_flag_controls_external_directives() -> Result<()> { + let rule = Rule::new(RuleSyntax { + id: "inline.compat".into(), + name: "inline compat".into(), + pattern: "supersecret123".into(), + confidence: crate::rules::rule::Confidence::Low, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + depends_on_rule: vec![], + }); + let rules_db = RulesDatabase::from_rules(vec![rule])?; + + let blob = Blob::from_bytes(b"token = \"supersecret123\" # gitleaks:allow".to_vec()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("compat.txt"))); + + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + let matches_without_compat = + match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches.len(), + _ => panic!("unexpected scan result"), + }; + assert_eq!(matches_without_compat, 1, "directive should be ignored without compat flag"); + + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let extra = vec![String::from("gitleaks:allow")]; + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false)?; + match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => assert!(matches.is_empty()), + _ => panic!("unexpected scan result"), + } + + Ok(()) + } } diff --git a/src/reporter.rs b/src/reporter.rs index 73bc541..52a491c 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -842,6 +842,7 @@ mod tests { since_commit: None, branch: None, }, + extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { max_file_size_mb: 256.0, exclude: Vec::new(), @@ -863,6 +864,7 @@ mod tests { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_inline_ignore: false, }; let record = reporter.build_finding_record(&report_match, &scan_args); diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 8b4f59c..02dc20c 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -143,6 +143,7 @@ mod tests { since_commit: None, branch: None, }, + extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, no_extract_archives: false, @@ -163,6 +164,7 @@ mod tests { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + no_inline_ignore: false, } } diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index df56427..46a2b4d 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -144,6 +144,8 @@ pub fn enumerate_filesystem_inputs( Some(&matcher_stats), enable_profiling, Some(shared_profiler), + &args.extra_ignore_comments, + args.no_inline_ignore, )?; let blob_processor_init_time = Mutex::new(t1.elapsed()); let make_blob_processor = || -> BlobProcessor { diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index eb4ad10..fb6577a 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -622,6 +622,8 @@ pub async fn fetch_s3_objects( Some(matcher_stats), enable_profiling, Some(shared_profiler.clone()), + &args.extra_ignore_comments, + args.no_inline_ignore, )?; let mut processor = BlobProcessor { matcher }; diff --git a/src/update.rs b/src/update.rs index db8d9cb..30e57b4 100644 --- a/src/update.rs +++ b/src/update.rs @@ -36,12 +36,13 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt return None; } - // Decide once whether we want coloured output. - let use_color = std::io::stderr().is_terminal() && !global_args.quiet; + // Respect the user's color preferences when printing update + // by delegating to the same helper used by the main reporter logic. This keeps + // the update checker in sync with the rest of the application and avoids + // emitting raw ANSI escape codes when colour output has been disabled. + let use_color = !global_args.quiet && global_args.use_color(std::io::stderr()); let styles = Styles::new(use_color); - info!("{}", "Checking for updates…"); - let mut builder = Update::configure(); builder .repo_owner("mongodb") diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index 72bd950..3947548 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -112,6 +112,7 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result, skip_skipword: Vec) -> Result Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; let global_args = GlobalArgs { diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index cd83a7f..e86c589 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -152,6 +152,8 @@ rules: skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; let global_args = GlobalArgs { diff --git a/tests/int_github.rs b/tests/int_github.rs index 06c67a7..dcf936a 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -139,6 +139,8 @@ fn test_github_remote_scan() -> Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; // Create global arguments let global_args = GlobalArgs { diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index e55655a..7c2e0b9 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -116,6 +116,7 @@ fn test_gitlab_remote_scan() -> Result<()> { since_commit: None, branch: None, }, + extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, no_extract_archives: false, @@ -137,6 +138,7 @@ fn test_gitlab_remote_scan() -> Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + no_inline_ignore: false, }; let global_args = GlobalArgs { @@ -272,6 +274,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; let global_args = GlobalArgs { diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 48247af..7d7accb 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -116,6 +116,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; let global_args = GlobalArgs { diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 2575a3c..5f2d01a 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -102,6 +102,7 @@ impl TestContext { since_commit: None, branch: None, }, + extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, extraction_depth: 2, @@ -123,6 +124,7 @@ impl TestContext { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + no_inline_ignore: false, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; @@ -246,6 +248,8 @@ async fn test_scan_slack_messages() -> Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; let global_args = GlobalArgs { diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index ea1c809..74cf6e4 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -195,6 +195,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; /* --------------------------------------------------------- * diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index b87d721..464377a 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -138,6 +138,8 @@ impl TestContext { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + extra_ignore_comments: Vec::new(), + no_inline_ignore: false, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) @@ -227,6 +229,7 @@ impl TestContext { since_commit: None, branch: None, }, + extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, extraction_depth: 2, @@ -248,6 +251,7 @@ impl TestContext { skip_regex: Vec::new(), skip_word: Vec::new(), no_base64: false, + no_inline_ignore: false, }; let global_args = GlobalArgs {