forked from mirrors/kingfisher
436 lines
14 KiB
Rust
436 lines
14 KiB
Rust
use crate::location::OffsetSpan;
|
|
|
|
/// Configuration for inline ignore directives.
|
|
#[derive(Clone, Debug, Default)]
|
|
pub struct InlineIgnoreConfig {
|
|
tokens: Vec<Vec<u8>>,
|
|
}
|
|
|
|
impl InlineIgnoreConfig {
|
|
/// Create a new configuration.
|
|
///
|
|
/// * `additional_tokens` - inline ignore directives supplied by the user.
|
|
pub fn new(additional_tokens: &[String]) -> Self {
|
|
let mut tokens = vec![b"kingfisher:ignore".to_vec()];
|
|
|
|
for token in additional_tokens {
|
|
let trimmed = token.trim();
|
|
if trimmed.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let lowered = trimmed.to_ascii_lowercase().into_bytes();
|
|
if tokens.iter().any(|existing| existing == &lowered) {
|
|
continue;
|
|
}
|
|
|
|
tokens.push(lowered);
|
|
}
|
|
|
|
Self { tokens }
|
|
}
|
|
|
|
/// Return a configuration with inline ignores disabled.
|
|
pub fn disabled() -> Self {
|
|
Self { tokens: Vec::new() }
|
|
}
|
|
|
|
#[inline]
|
|
fn has_tokens(&self) -> bool {
|
|
!self.tokens.is_empty()
|
|
}
|
|
|
|
/// Returns `true` when the provided blob slice contains an inline ignore
|
|
/// directive that should suppress a finding for the given span.
|
|
pub fn should_ignore(&self, blob_bytes: &[u8], span: &OffsetSpan) -> bool {
|
|
if !self.has_tokens() {
|
|
return false;
|
|
}
|
|
|
|
let (start_line_start, start_line_end) = line_bounds(blob_bytes, span.start);
|
|
if start_line_end > start_line_start {
|
|
let start_line = &blob_bytes[start_line_start..start_line_end];
|
|
if line_has_directive(start_line, &self.tokens) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Scan backwards to allow directives that appear before the start of a
|
|
// multi-line string or value. This mirrors tools like Gitleaks where
|
|
// the ignore directive is often placed immediately above the secret.
|
|
let mut cursor = start_line_start;
|
|
while cursor > 0 {
|
|
let previous_index = cursor.saturating_sub(1);
|
|
let (prev_start, prev_end) = line_bounds(blob_bytes, previous_index);
|
|
if prev_end <= prev_start {
|
|
break;
|
|
}
|
|
|
|
let prev_line = &blob_bytes[prev_start..prev_end];
|
|
if line_has_directive(prev_line, &self.tokens) {
|
|
return true;
|
|
}
|
|
|
|
if !should_skip_for_directive_search(prev_line) {
|
|
break;
|
|
}
|
|
|
|
if prev_start == 0 {
|
|
break;
|
|
}
|
|
|
|
cursor = prev_start;
|
|
}
|
|
|
|
let end_index = if span.end == 0 { 0 } else { span.end - 1 };
|
|
let (closing_line_start, closing_line_end) =
|
|
line_bounds(blob_bytes, end_index.min(blob_bytes.len()));
|
|
if closing_line_end > closing_line_start
|
|
&& (closing_line_start != start_line_start || closing_line_end != start_line_end)
|
|
{
|
|
let closing_line = &blob_bytes[closing_line_start..closing_line_end];
|
|
if line_has_directive(closing_line, &self.tokens) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Also consider lines after the match so that multi-line strings can be
|
|
// ignored when the directive appears after the closing delimiter (a
|
|
// common pattern in languages like Python).
|
|
let mut cursor = closing_line_end;
|
|
while cursor < blob_bytes.len() {
|
|
if blob_bytes[cursor] == b'\n' {
|
|
cursor += 1;
|
|
continue;
|
|
}
|
|
|
|
let (_, next_end) = line_bounds(blob_bytes, cursor);
|
|
if next_end <= cursor {
|
|
break;
|
|
}
|
|
|
|
let next_line = &blob_bytes[cursor..next_end];
|
|
if line_has_directive(next_line, &self.tokens) {
|
|
return true;
|
|
}
|
|
|
|
if !should_skip_for_directive_search(next_line) {
|
|
break;
|
|
}
|
|
|
|
cursor = next_end;
|
|
}
|
|
|
|
false
|
|
}
|
|
}
|
|
|
|
fn should_skip_for_directive_search(line: &[u8]) -> bool {
|
|
let trimmed = trim_ascii_whitespace(line);
|
|
if trimmed.is_empty() {
|
|
return true;
|
|
}
|
|
|
|
if trimmed.iter().all(|&b| b == trimmed[0]) && matches!(trimmed[0], b'"' | b'\'' | b'`') {
|
|
return true;
|
|
}
|
|
|
|
if ends_with_multiline_delimiter(trimmed) {
|
|
return true;
|
|
}
|
|
|
|
if looks_like_pem_boundary(trimmed) {
|
|
return true;
|
|
}
|
|
|
|
if looks_like_encoded_secret_body(trimmed) {
|
|
return true;
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
fn ends_with_multiline_delimiter(trimmed: &[u8]) -> bool {
|
|
if trimmed.len() < 3 {
|
|
return false;
|
|
}
|
|
|
|
let last = *trimmed.last().unwrap();
|
|
if !matches!(last, b'"' | b'\'' | b'`') {
|
|
return false;
|
|
}
|
|
|
|
let count = trimmed.iter().rev().take_while(|&&ch| ch == last).count();
|
|
|
|
count >= 3
|
|
}
|
|
|
|
fn looks_like_pem_boundary(trimmed: &[u8]) -> bool {
|
|
trimmed.starts_with(b"-----BEGIN ") || trimmed.starts_with(b"-----END ")
|
|
}
|
|
|
|
fn looks_like_encoded_secret_body(trimmed: &[u8]) -> bool {
|
|
const MIN_LEN: usize = 16;
|
|
|
|
if trimmed.len() < MIN_LEN {
|
|
return false;
|
|
}
|
|
|
|
let is_base64ish = trimmed.iter().all(|&b| {
|
|
matches!(
|
|
b,
|
|
b'A'..=b'Z'
|
|
| b'a'..=b'z'
|
|
| b'0'..=b'9'
|
|
| b'+'
|
|
| b'/'
|
|
| b'='
|
|
| b'-'
|
|
| b'_'
|
|
)
|
|
});
|
|
if is_base64ish {
|
|
return true;
|
|
}
|
|
|
|
let is_hexish = trimmed.iter().all(|&b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'));
|
|
if is_hexish {
|
|
return true;
|
|
}
|
|
|
|
let is_base32ish = trimmed.iter().all(|&b| matches!(b, b'A'..=b'Z' | b'2'..=b'7' | b'='));
|
|
if is_base32ish {
|
|
return true;
|
|
}
|
|
|
|
// Allow directives to be placed after payloads that mix a high percentage of
|
|
// alpha-numeric characters commonly seen in encoded data (e.g. cryptographic
|
|
// material that includes punctuation like ':' or '.') without risking
|
|
// accidentally skipping regular source lines.
|
|
let allowed = |b: u8| {
|
|
matches!(
|
|
b,
|
|
b'A'..=b'Z'
|
|
| b'a'..=b'z'
|
|
| b'0'..=b'9'
|
|
| b'+'
|
|
| b'/'
|
|
| b'='
|
|
| b'-'
|
|
| b'_'
|
|
| b':'
|
|
| b'.'
|
|
)
|
|
};
|
|
|
|
let allowed_count = trimmed.iter().copied().filter(|&b| allowed(b)).count();
|
|
allowed_count * 10 >= trimmed.len() * 9
|
|
}
|
|
|
|
fn trim_ascii_whitespace(line: &[u8]) -> &[u8] {
|
|
let mut start = 0;
|
|
while start < line.len() && line[start].is_ascii_whitespace() {
|
|
start += 1;
|
|
}
|
|
|
|
let mut end = line.len();
|
|
while end > start && line[end - 1].is_ascii_whitespace() {
|
|
end -= 1;
|
|
}
|
|
|
|
&line[start..end]
|
|
}
|
|
|
|
fn line_bounds(bytes: &[u8], index: usize) -> (usize, usize) {
|
|
if bytes.is_empty() {
|
|
return (0, 0);
|
|
}
|
|
let mut start = index.min(bytes.len());
|
|
while start > 0 && bytes[start - 1] != b'\n' {
|
|
start -= 1;
|
|
}
|
|
let mut end = index.min(bytes.len());
|
|
while end < bytes.len() && bytes[end] != b'\n' {
|
|
end += 1;
|
|
}
|
|
(start, end)
|
|
}
|
|
|
|
fn line_has_directive(line: &[u8], tokens: &[Vec<u8>]) -> bool {
|
|
if line.is_empty() {
|
|
return false;
|
|
}
|
|
|
|
let mut lowercase = line.to_vec();
|
|
lowercase.iter_mut().for_each(|b| *b = b.to_ascii_lowercase());
|
|
|
|
tokens.iter().any(|token| memchr::memmem::find(&lowercase, token.as_slice()).is_some())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::{
|
|
line_bounds, line_has_directive, should_skip_for_directive_search, trim_ascii_whitespace,
|
|
InlineIgnoreConfig,
|
|
};
|
|
use crate::location::OffsetSpan;
|
|
|
|
#[test]
|
|
fn bounds_cover_expected_ranges() {
|
|
let data = b"one\ntwo\nthree";
|
|
assert_eq!(line_bounds(data, 0), (0, 3));
|
|
assert_eq!(line_bounds(data, 4), (4, 7));
|
|
assert_eq!(line_bounds(data, data.len()), (8, 13));
|
|
}
|
|
|
|
#[test]
|
|
fn detects_directives_in_lines() {
|
|
let tokens = vec![b"kingfisher:ignore".to_vec()];
|
|
assert!(line_has_directive(b"secret # kingfisher:ignore", &tokens));
|
|
assert!(line_has_directive(b"kingfisher:ignore before value", &tokens));
|
|
assert!(line_has_directive(b"value // Gitleaks:Allow", &[b"gitleaks:allow".to_vec()]));
|
|
assert!(!line_has_directive(b"secret", &tokens));
|
|
}
|
|
|
|
#[test]
|
|
fn respects_multiline_block_comment_prefix() {
|
|
let tokens = vec![b"kingfisher:ignore".to_vec()];
|
|
assert!(line_has_directive(b" * kingfisher:ignore", &tokens));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_multi_line_string_with_trailing_comment() {
|
|
let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n";
|
|
let matched = b"line1\nline2\n";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_multiline_with_directive_on_closing_line() {
|
|
let blob = b"api_key = \"\"\"\nline1\nline2\n\"\"\" // kingfisher:ignore\n";
|
|
let matched = b"line1\nline2\n";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_pem_with_directive_before_block() {
|
|
let blob = b"// kingfisher:ignore\napi_key = \"\"\"\n-----BEGIN RSA PRIVATE KEY-----\nMIICWwIBAAKBgQC7\n-----END RSA PRIVATE KEY-----\n\"\"\"\n";
|
|
let matched = b"MIICWwIBAAKBgQC7\n";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_multiline_hex_payload_with_directive() {
|
|
let blob = b"# kingfisher:ignore\nsecret = \"\"\"\n00112233445566778899aabbccddeeff\nffeeddccbbaa99887766554433221100\n\"\"\"\n";
|
|
let matched = b"00112233445566778899aabbccddeeff\nffeeddccbbaa99887766554433221100\n";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_multiline_base32_payload_with_directive_after_block() {
|
|
let blob =
|
|
b"secret = \"\"\"\nMFRGGZDFMZTWQ2LK\nONSWG4TFOQ======\n\"\"\"\n// kingfisher:ignore\n";
|
|
let matched = b"MFRGGZDFMZTWQ2LK\nONSWG4TFOQ======\n";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_multiline_without_trailing_newline() {
|
|
let blob = b"let secret = \"\"\"\nline1\nline2\n\"\"\"\n# kingfisher:ignore\n";
|
|
let matched = b"line1\nline2";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_multiline_with_directive_before_secret() {
|
|
let blob = b"// kingfisher:ignore\nlet secret = \"\"\"\nline1\nline2\n\"\"\"\n";
|
|
let matched = b"line1\nline2\n";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&[]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn ignores_single_line_secret_with_directive_on_previous_line() {
|
|
let blob = b"# safe-secret\n123456\n";
|
|
let matched = b"123456";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::new(&["safe-secret".to_string()]);
|
|
assert!(config.should_ignore(blob, &span));
|
|
}
|
|
|
|
#[test]
|
|
fn trim_ascii_whitespace_returns_inner_slice() {
|
|
assert_eq!(trim_ascii_whitespace(b" abc "), b"abc");
|
|
assert!(trim_ascii_whitespace(b" ").is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn skips_lines_with_only_delimiters() {
|
|
assert!(should_skip_for_directive_search(b"\"\"\""));
|
|
assert!(should_skip_for_directive_search(b" \"\"\" "));
|
|
assert!(should_skip_for_directive_search(b"let secret = \"\"\""));
|
|
assert!(!should_skip_for_directive_search(b"value"));
|
|
assert!(should_skip_for_directive_search(b"-----BEGIN RSA PRIVATE KEY-----"));
|
|
assert!(should_skip_for_directive_search(b"MIICWwIBAAKBgQC7"));
|
|
assert!(should_skip_for_directive_search(b"0011223344556677"));
|
|
assert!(should_skip_for_directive_search(b"MFRGGZDFMZTWQ2LK"));
|
|
}
|
|
|
|
#[test]
|
|
fn disabled_config_never_ignores() {
|
|
let blob = b"let secret = 'value' # kingfisher:ignore";
|
|
let matched = b"value";
|
|
let start = blob
|
|
.windows(matched.len())
|
|
.position(|window| window == matched)
|
|
.expect("match bytes present");
|
|
let span = OffsetSpan::from_range(start..start + matched.len());
|
|
let config = InlineIgnoreConfig::disabled();
|
|
assert!(!config.should_ignore(blob, &span));
|
|
}
|
|
}
|