use crate::location::OffsetSpan; /// Configuration for inline ignore directives. #[derive(Clone, Debug, Default)] pub struct InlineIgnoreConfig { tokens: Vec>, } impl InlineIgnoreConfig { /// Create a new configuration. /// /// * `additional_tokens` - inline ignore directives supplied by the user. pub fn new(additional_tokens: &[String]) -> Self { let mut tokens = vec![b"kingfisher:ignore".to_vec()]; for token in additional_tokens { let trimmed = token.trim(); if trimmed.is_empty() { continue; } let lowered = trimmed.to_ascii_lowercase().into_bytes(); if tokens.iter().any(|existing| existing == &lowered) { continue; } tokens.push(lowered); } Self { tokens } } /// Return a configuration with inline ignores disabled. pub fn disabled() -> Self { Self { tokens: Vec::new() } } #[inline] fn has_tokens(&self) -> bool { !self.tokens.is_empty() } /// Returns `true` when the provided blob slice contains an inline ignore /// directive that should suppress a finding for the given span. pub fn should_ignore(&self, blob_bytes: &[u8], span: &OffsetSpan) -> bool { if !self.has_tokens() { return false; } let (start_line_start, start_line_end) = line_bounds(blob_bytes, span.start); if start_line_end > start_line_start { let start_line = &blob_bytes[start_line_start..start_line_end]; if line_has_directive(start_line, &self.tokens) { return true; } } // Scan backwards to allow directives that appear before the start of a // multi-line string or value. This mirrors tools like Gitleaks where // the ignore directive is often placed immediately above the secret. let mut cursor = start_line_start; while cursor > 0 { let previous_index = cursor.saturating_sub(1); let (prev_start, prev_end) = line_bounds(blob_bytes, previous_index); if prev_end <= prev_start { break; } let prev_line = &blob_bytes[prev_start..prev_end]; if line_has_directive(prev_line, &self.tokens) { return true; } if !should_skip_for_directive_search(prev_line) { break; } if prev_start == 0 { break; } cursor = prev_start; } let end_index = if span.end == 0 { 0 } else { span.end - 1 }; let (closing_line_start, closing_line_end) = line_bounds(blob_bytes, end_index.min(blob_bytes.len())); if closing_line_end > closing_line_start && (closing_line_start != start_line_start || closing_line_end != start_line_end) { let closing_line = &blob_bytes[closing_line_start..closing_line_end]; if line_has_directive(closing_line, &self.tokens) { return true; } } // Also consider lines after the match so that multi-line strings can be // ignored when the directive appears after the closing delimiter (a // common pattern in languages like Python). let mut cursor = closing_line_end; while cursor < blob_bytes.len() { if blob_bytes[cursor] == b'\n' { cursor += 1; continue; } let (_, next_end) = line_bounds(blob_bytes, cursor); if next_end <= cursor { break; } let next_line = &blob_bytes[cursor..next_end]; if line_has_directive(next_line, &self.tokens) { return true; } if !should_skip_for_directive_search(next_line) { break; } cursor = next_end; } false } } fn should_skip_for_directive_search(line: &[u8]) -> bool { let trimmed = trim_ascii_whitespace(line); if trimmed.is_empty() { return true; } if trimmed.iter().all(|&b| b == trimmed[0]) && matches!(trimmed[0], b'"' | b'\'' | b'`') { return true; } if ends_with_multiline_delimiter(trimmed) { return true; } if looks_like_pem_boundary(trimmed) { return true; } if looks_like_encoded_secret_body(trimmed) { return true; } false } fn ends_with_multiline_delimiter(trimmed: &[u8]) -> bool { if trimmed.len() < 3 { return false; } let last = *trimmed.last().unwrap(); if !matches!(last, b'"' | b'\'' | b'`') { return false; } let count = trimmed.iter().rev().take_while(|&&ch| ch == last).count(); count >= 3 } fn looks_like_pem_boundary(trimmed: &[u8]) -> bool { trimmed.starts_with(b"-----BEGIN ") || trimmed.starts_with(b"-----END ") } fn looks_like_encoded_secret_body(trimmed: &[u8]) -> bool { const MIN_LEN: usize = 16; if trimmed.len() < MIN_LEN { return false; } let is_base64ish = trimmed.iter().all(|&b| { matches!( b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'=' | b'-' | b'_' ) }); if is_base64ish { return true; } let is_hexish = trimmed.iter().all(|&b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')); if is_hexish { return true; } let is_base32ish = trimmed.iter().all(|&b| matches!(b, b'A'..=b'Z' | b'2'..=b'7' | b'=')); if is_base32ish { return true; } // Allow directives to be placed after payloads that mix a high percentage of // alpha-numeric characters commonly seen in encoded data (e.g. cryptographic // material that includes punctuation like ':' or '.') without risking // accidentally skipping regular source lines. let allowed = |b: u8| { matches!( b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'=' | b'-' | b'_' | b':' | b'.' ) }; let allowed_count = trimmed.iter().copied().filter(|&b| allowed(b)).count(); allowed_count * 10 >= trimmed.len() * 9 } fn trim_ascii_whitespace(line: &[u8]) -> &[u8] { let mut start = 0; while start < line.len() && line[start].is_ascii_whitespace() { start += 1; } let mut end = line.len(); while end > start && line[end - 1].is_ascii_whitespace() { end -= 1; } &line[start..end] } fn line_bounds(bytes: &[u8], index: usize) -> (usize, usize) { if bytes.is_empty() { return (0, 0); } let mut start = index.min(bytes.len()); while start > 0 && bytes[start - 1] != b'\n' { start -= 1; } let mut end = index.min(bytes.len()); while end < bytes.len() && bytes[end] != b'\n' { end += 1; } (start, end) } fn line_has_directive(line: &[u8], tokens: &[Vec]) -> bool { if line.is_empty() { return false; } let mut lowercase = line.to_vec(); lowercase.iter_mut().for_each(|b| *b = b.to_ascii_lowercase()); tokens.iter().any(|token| memchr::memmem::find(&lowercase, token.as_slice()).is_some()) } #[cfg(test)] mod tests { use super::{ line_bounds, line_has_directive, should_skip_for_directive_search, trim_ascii_whitespace, InlineIgnoreConfig, }; use crate::location::OffsetSpan; #[test] fn bounds_cover_expected_ranges() { let data = b"one\ntwo\nthree"; assert_eq!(line_bounds(data, 0), (0, 3)); assert_eq!(line_bounds(data, 4), (4, 7)); assert_eq!(line_bounds(data, data.len()), (8, 13)); } #[test] fn detects_directives_in_lines() { let tokens = vec![b"kingfisher:ignore".to_vec()]; assert!(line_has_directive(b"secret # kingfisher:ignore", &tokens)); assert!(line_has_directive(b"kingfisher:ignore before value", &tokens)); assert!(line_has_directive(b"value // Gitleaks:Allow", &[b"gitleaks:allow".to_vec()])); assert!(!line_has_directive(b"secret", &tokens)); } #[test] fn respects_multiline_block_comment_prefix() { let tokens = vec![b"kingfisher:ignore".to_vec()]; assert!(line_has_directive(b" * kingfisher:ignore", &tokens)); } #[test] fn ignores_multi_line_string_with_trailing_comment() { let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n"; let matched = b"line1\nline2\n"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_multiline_with_directive_on_closing_line() { let blob = b"api_key = \"\"\"\nline1\nline2\n\"\"\" // kingfisher:ignore\n"; let matched = b"line1\nline2\n"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_pem_with_directive_before_block() { let blob = b"// kingfisher:ignore\napi_key = \"\"\"\n-----BEGIN RSA PRIVATE KEY-----\nMIICWwIBAAKBgQC7\n-----END RSA PRIVATE KEY-----\n\"\"\"\n"; let matched = b"MIICWwIBAAKBgQC7\n"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_multiline_hex_payload_with_directive() { let blob = b"# kingfisher:ignore\nsecret = \"\"\"\n00112233445566778899aabbccddeeff\nffeeddccbbaa99887766554433221100\n\"\"\"\n"; let matched = b"00112233445566778899aabbccddeeff\nffeeddccbbaa99887766554433221100\n"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_multiline_base32_payload_with_directive_after_block() { let blob = b"secret = \"\"\"\nMFRGGZDFMZTWQ2LK\nONSWG4TFOQ======\n\"\"\"\n// kingfisher:ignore\n"; let matched = b"MFRGGZDFMZTWQ2LK\nONSWG4TFOQ======\n"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_multiline_without_trailing_newline() { let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n"; let matched = b"line1\nline2"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_multiline_with_directive_before_secret() { let blob = b"// kingfisher:ignore\nlet secret = \"\"\"\nline1\nline2\n\"\"\"\n"; let matched = b"line1\nline2\n"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&[]); assert!(config.should_ignore(blob, &span)); } #[test] fn ignores_single_line_secret_with_directive_on_previous_line() { let blob = b"# safe-secret\n123456\n"; let matched = b"123456"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::new(&["safe-secret".to_string()]); assert!(config.should_ignore(blob, &span)); } #[test] fn trim_ascii_whitespace_returns_inner_slice() { assert_eq!(trim_ascii_whitespace(b" abc "), b"abc"); assert!(trim_ascii_whitespace(b" ").is_empty()); } #[test] fn skips_lines_with_only_delimiters() { assert!(should_skip_for_directive_search(b"\"\"\"")); assert!(should_skip_for_directive_search(b" \"\"\" ")); assert!(should_skip_for_directive_search(b"let secret = \"\"\"")); assert!(!should_skip_for_directive_search(b"value")); assert!(should_skip_for_directive_search(b"-----BEGIN RSA PRIVATE KEY-----")); assert!(should_skip_for_directive_search(b"MIICWwIBAAKBgQC7")); assert!(should_skip_for_directive_search(b"0011223344556677")); assert!(should_skip_for_directive_search(b"MFRGGZDFMZTWQ2LK")); } #[test] fn disabled_config_never_ignores() { let blob = b"let secret = 'value' # kingfisher:ignore"; let matched = b"value"; let start = blob .windows(matched.len()) .position(|window| window == matched) .expect("match bytes present"); let span = OffsetSpan::from_range(start..start + matched.len()); let config = InlineIgnoreConfig::disabled(); assert!(!config.should_ignore(blob, &span)); } }