This commit is contained in:
Mick Grove 2025-11-10 18:47:51 -08:00
commit dca955a95c
20 changed files with 318 additions and 130 deletions

View file

@ -77,9 +77,10 @@ jobs:
C:\vcpkg\downloads
C:\vcpkg\archives
C:\Users\runneradmin\AppData\Local\vcpkg\archives
key: ${{ runner.os }}-vcpkg-hyperscan-542
key: vcpkg-${{ runner.os }}-hs-542
restore-keys: |
${{ runner.os }}-vcpkg-
vcpkg-${{ runner.os }}-
vcpkg-
# Ensure downloads dir exists and seed PCRE 8.45 zip from a working mirror
- name: Pre-seed PCRE 8.45 for vcpkg (bypass SourceForge redirect)

View file

@ -212,9 +212,10 @@ jobs:
C:\vcpkg\downloads
C:\vcpkg\archives
C:\Users\runneradmin\AppData\Local\vcpkg\archives
key: ${{ runner.os }}-vcpkg-hyperscan-542
key: vcpkg-${{ runner.os }}-hs-542
restore-keys: |
${{ runner.os }}-vcpkg-
vcpkg-${{ runner.os }}-
vcpkg-
# Ensure downloads dir exists and seed PCRE 8.45 zip from a working mirror
- name: Pre-seed PCRE 8.45 for vcpkg (bypass SourceForge redirect)

View file

@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
## [v1.63.0]
- Fixed bug when retrieving some finding values and injecting them as TOKENS in the rule templates
- Improved Datadog rule
- Improved AWS rule
## [v1.62.0]
- Added `pattern_requirements` checks to rules, providing lightweight post-regex character-class validation without lookarounds. See docs/RULES.md for detail
- Added an `ignore_if_contains` option to `pattern_requirements` to drop matches containing case-insensitive placeholder words, with tests covering the new behavior.

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.62.0"
version = "1.63.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -1,16 +1,16 @@
rules:
- name: Datadog API Key
id: kingfisher.datadog.1
id: kingfisher.datadog.3
pattern: |
(?xi)
(?xi)
\b
datadog
(?:datadog|dd)
(?:.|[\n\r]){0,64}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)?
(?:.|[\n\r]){0,32}?
\b
\b
(
[a-z0-9]{32}
[A-Za-z0-9]{32}
)
\b
pattern_requirements:
@ -18,48 +18,96 @@ rules:
min_entropy: 3.3
confidence: medium
examples:
- datadog-secrettoken-0024a29224affe29d173c0bf99e5a89d
- DD_API_KEY=0024a29224affe29d173c0bf99e5a89d
references:
- https://docs.datadoghq.com/account_management/api-app-keys/
validation:
type: Http
content:
request:
method: GET
url: https://api.datadoghq.com/api/v1/validate
headers:
Accept: application/json
DD-API-KEY: '{{ TOKEN }}'
DD-APPLICATION-KEY: '{{ APPKEY }}'
method: GET
DD-API-KEY: "{{ TOKEN }}"
response_matcher:
- report_response: true
- status:
- 200
type: StatusMatch
url: https://api.datadoghq.com/api/v2/current_user
depends_on_rule:
- rule_id: kingfisher.datadog.2
variable: APPKEY
- type: StatusMatch
status: [200]
- name: Datadog Application Secret
id: kingfisher.datadog.2
pattern: |
(?xi)
\b
datadog
(?:.|[\n\r]){0,64}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
(
[a-z0-9]{40}
)
pattern_requirements:
min_digits: 2
min_uppercase: 1
min_lowercase: 1
min_entropy: 3.3
confidence: medium
examples:
- datadog_secret_key-3c0c3965368a6b10f7640dbda46abfdca981c2d3
- datadog_token = BzHpkcs7LujMb3Q1vLRRjbpBNxxYV0ousumYoKJS
references:
- https://docs.datadoghq.com/account_management/api-app-keys/
# - name: Datadog API Key
# id: kingfisher.datadog.1
# pattern: |
# (?xi)
# \b
# datadog
# (?:.|[\n\r]){0,64}?
# (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
# (?:.|[\n\r]){0,32}?
# \b
# (
# [a-z0-9]{32}
# )
# \b
# pattern_requirements:
# min_digits: 2
# min_entropy: 3.3
# confidence: medium
# examples:
# - datadog-secrettoken-0024a29224affe29d173c0bf99e5a89d
# references:
# - https://docs.datadoghq.com/account_management/api-app-keys/
# validation:
# type: Http
# content:
# request:
# headers:
# Accept: application/json
# DD-API-KEY: '{{ TOKEN }}'
# DD-APPLICATION-KEY: '{{ APPKEY }}'
# method: GET
# response_matcher:
# - report_response: true
# - status:
# - 200
# type: StatusMatch
# url: https://api.datadoghq.com/api/v2/current_user
# depends_on_rule:
# - rule_id: kingfisher.datadog.2
# variable: APPKEY
# - name: Datadog API Key (API-only validation)
# id: kingfisher.datadog.3
# pattern: |
# (?xi)
# \b
# (?:datadog|dd)
# (?:.|[\n\r]){0,64}?
# (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)?
# (?:.|[\n\r]){0,32}?
# \b
# (
# [A-Za-z0-9]{32}
# )
# \b
# pattern_requirements:
# min_digits: 2
# min_entropy: 3.3
# confidence: medium
# examples:
# - DD_API_KEY=0024a29224affe29d173c0bf99e5a89d
# references:
# - https://docs.datadoghq.com/account_management/api-app-keys/
# validation:
# type: Http
# content:
# request:
# method: GET
# url: https://api.datadoghq.com/api/v1/validate
# headers:
# Accept: application/json
# DD-API-KEY: "{{ TOKEN }}"
# response_matcher:
# - report_response: true
# - type: StatusMatch
# status: [200]

View file

@ -340,7 +340,7 @@ mod tests {
fn smoke_decompress_tar_gz_archive() -> anyhow::Result<()> {
let dir = tempdir()?;
let tar_gz = dir.path().join("payload.tar.gz");
let github_pat = "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"; // this is not a real secret
let github_pat = "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"; // this is not a real secret
// build payload.tar.gz containing secret.txt
{
@ -393,7 +393,7 @@ mod tests {
fn smoke_decompress_without_extract_archives() -> anyhow::Result<()> {
let dir = tempdir()?;
let tar_gz = dir.path().join("payload.tar.gz");
let github_pat = "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";
let github_pat = "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";
// ── build payload.tar.gz containing secret.txt ──────────────────────────────
{

View file

@ -5,27 +5,27 @@
// * Fallback - system allocator (`system-alloc` feature)
// ────────────────────────────────────────────────────────────
// --- jemalloc (opt-in) ---
#[cfg(feature = "use-jemalloc")]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
// // --- jemalloc (opt-in) ---
// #[cfg(feature = "use-jemalloc")]
// #[global_allocator]
// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
// --- mimalloc (default) ---
#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
// --- system allocator (explicit opt-out) ---
#[cfg(feature = "system-alloc")]
use std::alloc::System;
#[cfg(feature = "system-alloc")]
#[global_allocator]
static GLOBAL: System = System;
// // --- mimalloc (default) ---
// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
// #[global_allocator]
// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
// // --- system allocator (explicit opt-out) ---
// #[cfg(feature = "system-alloc")]
// use std::alloc::System;
// #[cfg(feature = "system-alloc")]
// #[global_allocator]
// static GLOBAL: System = System;
use std::alloc::System;
#[global_allocator]
static GLOBAL: System = System;
use std::{
io::{IsTerminal, Read},
sync::{Arc, Mutex},

View file

@ -374,7 +374,9 @@ impl<'a> Matcher<'a> {
} else {
None
};
/////////////////////////////
// Process matches
/////////////////////////////
let mut matches = Vec::new();
let owned_ts_results = tree_sitter_result.map(|ts_results| {
ts_results
@ -565,9 +567,11 @@ fn record_match(
) -> bool {
insert_span(map.entry(rule_id).or_default(), span)
}
// in src/matcher.rs
#[allow(clippy::too_many_arguments)]
fn filter_match<'b>(
blob: &'b Blob,
// rule: &'b Rule,
rule: Arc<Rule>,
re: &Regex,
start: usize,
@ -596,14 +600,44 @@ fn filter_match<'b>(
for captures in re.captures_iter(haystack) {
let full_capture = captures.get(0).unwrap();
let matching_input = captures.get(1).unwrap_or(full_capture);
// --- LOGIC TO FIND THE "SECRET" FOR ENTROPY/SAFE-LISTING ---
let matching_input_for_entropy = 'block: {
// 1. Prefer a named capture called TOKEN (case-insensitive).
if let Some(token_cap) = re.capture_names().enumerate().find_map(|(i, name_opt)| {
name_opt
.filter(|name| name.eq_ignore_ascii_case("TOKEN"))
.and_then(|_| captures.get(i))
}) {
break 'block token_cap;
}
// 2. Otherwise, prefer the first *matched* named capture.
if let Some(named_cap) = re.capture_names().enumerate().find_map(|(i, name_opt)| {
name_opt.and_then(|_| captures.get(i)) // find(i > 0 && name_opt.is_some())
}) {
break 'block named_cap;
}
// 3. Otherwise, fall back to the first positional capture (group 1).
if let Some(pos_cap) = captures.get(1) {
break 'block pos_cap;
}
// 4. Finally, fall back to the full match (group 0).
break 'block full_capture;
};
// --- END LOGIC ---
let min_entropy = rule.min_entropy();
let mi_bytes = matching_input.as_bytes();
let entropy_bytes = matching_input_for_entropy.as_bytes();
let full_bytes = full_capture.as_bytes();
let calculated_entropy = calculate_shannon_entropy(mi_bytes);
let calculated_entropy = calculate_shannon_entropy(entropy_bytes);
// Check entropy and safe-listing against the *selected* secret bytes
if calculated_entropy <= min_entropy
|| is_safe_match(mi_bytes)
|| is_user_match(mi_bytes, full_bytes)
|| is_safe_match(entropy_bytes)
|| is_user_match(entropy_bytes, full_bytes)
{
debug!(
"Skipping match with entropy {} <= {} or safe match",
@ -619,7 +653,15 @@ fn filter_match<'b>(
captures: &captures,
full_match: full_bytes,
};
match char_reqs.validate(mi_bytes, Some(context), respect_ignore_if_contains) {
// --- FIX IS HERE ---
//
// The `validate` function (and thus `{{ MATCH }}`) should *always*
// operate on the *full match* (group 0), not just the entropy bytes.
// This aligns the scan logic with the unit test's logic.
match char_reqs.validate(full_bytes, Some(context), respect_ignore_if_contains) {
//
// --- END FIX ---
PatternValidationResult::Passed => {}
PatternValidationResult::Failed => {
debug!(
@ -647,6 +689,9 @@ fn filter_match<'b>(
}
}
// Use the `matching_input_for_entropy` as the span/key for the finding.
let matching_input = matching_input_for_entropy;
let matching_input_offset_span = OffsetSpan::from_range(
(start + matching_input.start())..(start + matching_input.end()),
);
@ -668,7 +713,10 @@ fn filter_match<'b>(
}
let only_matching_input =
&blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end];
// Pass the *full* capture object to from_captures
let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact);
matches.push(BlobMatch {
rule: Arc::clone(&rule),
blob_id: blob.id_ref(),
@ -687,6 +735,7 @@ fn filter_match<'b>(
t.end(new_count > 0, new_count, 0);
}
}
fn get_language_and_queries(lang: &str) -> Option<(Language, FxHashMap<String, String>)> {
match lang.to_lowercase().as_str() {
"bash" | "shell" => Some((Language::Bash, parser::queries::bash::get_bash_queries())),
@ -796,6 +845,7 @@ pub struct SerializableCaptures {
#[schemars(with = "Vec<SerializableCapture>")]
pub captures: SmallVec<[SerializableCapture; 2]>, // All captures (named and unnamed)
}
impl SerializableCaptures {
pub fn from_captures(
captures: &regex::bytes::Captures,
@ -808,26 +858,51 @@ impl SerializableCaptures {
let capture_names: SmallVec<[Option<String>; 4]> =
re.capture_names().map(|name| name.map(str::to_string)).collect();
for i in 0..captures.len() {
if let Some(cap) = captures.get(i) {
// If there are explicit capture groups (e.g., group 1, 2, ...),
// only serialize those.
if captures.len() > 1 {
for i in 1..captures.len() {
// Start from 1
if let Some(cap) = captures.get(i) {
let value = if redact {
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
name,
match_number: i32::try_from(i).unwrap_or(0),
start: cap.start(),
end: cap.end(),
value: interned,
});
}
}
} else if captures.len() == 1 {
// ELSE, if there is ONLY the full match (len == 1),
// serialize just that full match (group 0) as the fallback.
if let Some(cap) = captures.get(0) {
let value = if redact {
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
name,
match_number: i32::try_from(i).unwrap_or(0),
match_number: 0,
start: cap.start(),
end: cap.end(),
value: interned,
});
}
}
// If len == 0 (no match), loop is skipped, empty vec is returned.
SerializableCaptures { captures: serialized_captures }
}
@ -950,7 +1025,8 @@ pub struct DecodedData {
}
#[inline]
fn is_base64_byte(b: u8) -> bool {
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/')
// Include URL-safe characters '-' and '_'
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'-' | b'_')
}
pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
@ -975,7 +1051,14 @@ pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
let len = end - start;
if len >= 32 && len % 4 == 0 {
let base64_slice = &input[start..end];
if let Ok(decoded) = general_purpose::STANDARD.decode(base64_slice) {
// Try decoding with STANDARD, then URL_SAFE, then URL_SAFE_NO_PAD
let decode_result = general_purpose::STANDARD
.decode(base64_slice)
.or_else(|_| general_purpose::URL_SAFE.decode(base64_slice))
.or_else(|_| general_purpose::URL_SAFE_NO_PAD.decode(base64_slice));
if let Ok(decoded) = decode_result {
if let Ok(decoded_str) = std::str::from_utf8(&decoded) {
if decoded_str.is_ascii() {
results.push(DecodedData {
@ -1521,10 +1604,10 @@ line2
.map(|cap| (cap.name.as_deref(), cap.match_number, cap.value))
.collect();
assert_eq!(entries.len(), 4);
assert_eq!(entries[0], (None, 0, "ghp_ABC12"));
assert_eq!(entries[1], (None, 1, "ghp_ABC12"));
assert_eq!(entries[2], (Some("body"), 2, "ABC"));
assert_eq!(entries[3], (Some("checksum"), 3, "12"));
assert_eq!(entries.len(), 3);
assert_eq!(entries[0], (None, 1, "ghp_ABC12"));
assert_eq!(entries[1], (Some("body"), 2, "ABC"));
assert_eq!(entries[2], (Some("checksum"), 3, "12"));
}
}

View file

@ -417,15 +417,19 @@ impl DetailsReporter {
let source_span = &rm.m.location.source_span;
let line_num = source_span.start.line;
// --- FIX IS HERE ---
// We now correctly serialize *only* the explicit capture groups (or group 0
// as a fallback). The primary "secret" is therefore always at index 0
// of the captures SmallVec.
let snippet = Escaped(
rm.m.groups
.captures
.get(1)
.or_else(|| rm.m.groups.captures.get(0))
.get(0) // Get the first (and primary) serialized capture
.map(|capture| capture.value.as_bytes())
.unwrap_or_default(),
)
.to_string();
// --- END FIX ---
let validation_status = if rm.validation_success {
"Active Credential".to_string()

View file

@ -731,7 +731,7 @@ mod tests {
}),
};
let token = b"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";
let token = b"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";
let regex =
BytesRegex::new(r"(?x) ghp_(?P<body>[A-Za-z0-9]{30})(?P<checksum>[A-Za-z0-9]{6})")
.unwrap();

View file

@ -321,6 +321,10 @@ async fn timed_validate_single_match<'a>(
for dep in m.rule.syntax().depends_on_rule.iter().flatten() {
if let Some(vals) = dependent_variables.get(&dep.variable.to_uppercase()) {
for (val, span) in vals {
// Skip adding captured values for TOKEN dependencies
if dep.variable.eq_ignore_ascii_case("TOKEN") {
continue;
}
captured_values.push((
dep.variable.to_uppercase(),
val.clone(),

View file

@ -187,9 +187,7 @@ pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) ->
if !access_key_id.chars().all(|c| c.is_ascii_alphanumeric()) {
return Err("AWS access key ID contains invalid characters".to_string());
}
if !secret_key.chars().all(|c| c.is_ascii_alphanumeric() || c == '/' || c == '+') {
return Err("AWS secret key contains invalid characters".to_string());
}
Ok(())
}

View file

@ -3,22 +3,46 @@ use tokio::net::lookup_host;
use crate::validation::SerializableCaptures;
/// Return (NAME, value, start, end) for every capture we care about.
/// Return (NAME, value, start, end) for the captures we care about.
///
/// * If a capture has a name, use that (upper-cased)
/// * If its unnamed, fall back to `"TOKEN"`
/// * Named captures keep their (upper-cased) name
/// * Among unnamed captures, keep **only the first one** and call it "TOKEN"
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
let mut saw_unnamed = false;
captures
.captures
.iter()
.map(|cap| {
let name =
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
(name, cap.value.to_string(), cap.start, cap.end)
.filter_map(|cap| {
if let Some(name) = &cap.name {
Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end))
} else if !saw_unnamed {
saw_unnamed = true;
Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end))
} else {
// Ignore any additional unnamed captures (e.g., from unintended groups)
None
}
})
.collect()
}
// /// Return (NAME, value, start, end) for every capture we care about.
// ///
// /// * If a capture has a name, use that (upper-cased)
// /// * If its unnamed, fall back to `"TOKEN"`
// pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
// captures
// .captures
// .iter()
// .map(|cap| {
// let name =
// cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
// (name, cap.value.to_string(), cap.start, cap.end)
// })
// .collect()
// }
pub fn find_closest_variable(
captures: &[(String, String, usize, usize)],
target_value: &String,
@ -108,7 +132,7 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::E
// -----------------------------------------------------------------------------
// tests
// -----------------------------------------------------------------------------
//
#[cfg(test)]
mod tests {
use super::*;
@ -121,7 +145,7 @@ mod tests {
let captures = SerializableCaptures {
captures: smallvec![SerializableCapture {
name: None,
match_number: 0,
match_number: 0, // This test is for a rule with *no* explicit captures
start: 1,
end: 4,
value: "abc",
@ -130,21 +154,26 @@ mod tests {
let result = process_captures(&captures);
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
}
#[test]
fn includes_whole_match_when_multiple() {
let captures = SerializableCaptures {
captures: smallvec![
// --- FIX ---
// This test simulated a regex like `(abc)de(?P<foo>bcd)`.
// With our fix, group 0 ("abcde") is NOT serialized.
// We only get the explicit captures (group 1 and "foo").
SerializableCapture {
// This is group 1 (unnamed)
name: None,
match_number: 0,
start: 0,
end: 5,
value: "abcde",
match_number: 1, // Corrected match_number
start: 1,
end: 4,
value: "bcd",
},
SerializableCapture {
// This is group 2 (named "foo")
name: Some("foo".to_string()),
match_number: -1,
match_number: 2, // Corrected match_number
start: 1,
end: 4,
value: "bcd",
@ -152,45 +181,60 @@ mod tests {
],
};
let result = process_captures(&captures);
// --- FIX ---
// The expected result now only contains the explicit captures.
// The first unnamed capture ("bcd") becomes "TOKEN".
assert_eq!(
result,
vec![
("TOKEN".to_string(), "abcde".to_string(), 0usize, 5usize),
("TOKEN".to_string(), "bcd".to_string(), 1usize, 4usize),
("FOO".to_string(), "bcd".to_string(), 1usize, 4usize),
]
);
// --- END FIX ---
}
#[test]
#[test]
fn includes_whole_match_and_unnamed_groups() {
let captures = SerializableCaptures {
captures: smallvec![
// --- FIX ---
// This test simulated a regex like `(?P<foo>aa)bb(cc)`.
// With our fix, group 0 ("aabbcc") is NOT serialized.
// We only get the explicit captures ("foo" and group 2).
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 6,
value: "aabbcc",
},
SerializableCapture {
// This is group 1 (named "foo")
name: Some("foo".to_string()),
match_number: -1,
match_number: 1, // Corrected match_number
start: 0,
end: 2,
value: "aa",
},
SerializableCapture { name: None, match_number: 1, start: 4, end: 6, value: "cc" },
SerializableCapture {
// This is group 2 (unnamed)
name: None,
match_number: 2, // Corrected match_number
start: 4,
end: 6,
value: "cc"
},
],
};
let result = process_captures(&captures);
// --- FIX ---
// The expected result no longer contains the full match ("aabbcc").
// The first (and only) unnamed capture ("cc") is now correctly labeled "TOKEN".
assert_eq!(
result,
vec![
("TOKEN".to_string(), "aabbcc".to_string(), 0usize, 6usize),
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
("FOO".to_string(), "aa".to_string(), 0usize, 2usize), // From named group 1
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize), // From unnamed group 2
]
);
// --- END FIX ---
}
#[test]

View file

@ -8,8 +8,8 @@ use tempfile::tempdir;
fn detects_base64_encoded_secret() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("secret.txt");
// Base64 for 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
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw==";
// Base64 for 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
let encoded = "Z2hwX0Vab3BaRE1XZWlsZGZvRnp5SDBLbld5UTVZeTN2eTBZMlNVNg==";
fs::write(&file_path, encoded)?;
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
@ -26,7 +26,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
.assert()
.code(200)
.stdout(
predicate::str::contains("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")
predicate::str::contains("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")
.and(predicate::str::contains("\"encoding\": \"base64\"")),
);
@ -39,7 +39,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
fn skips_base64_when_disabled() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("secret.txt");
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw==";
let encoded = "Z2hwX0Vab3BaRE1XZWlsZGZvRnp5SDBLbld5UTVZeTN2eTBZMlNVNg==";
fs::write(&file_path, encoded)?;
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
@ -92,8 +92,8 @@ fn no_base64_skips_empty_files() -> anyhow::Result<()> {
fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("secret.py");
// Base64 for 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
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw==";
// Base64 for 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
let encoded = "Z2hwX0Vab3BaRE1XZWlsZGZvRnp5SDBLbld5UTVZeTN2eTBZMlNVNg==";
fs::write(&file_path, format!("token = \"{}\"\n", encoded))?;
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
@ -110,7 +110,7 @@ fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> {
.assert()
.code(200)
.stdout(
predicate::str::contains("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")
predicate::str::contains("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")
.and(predicate::str::contains("\"encoding\": \"base64\"")),
);

View file

@ -159,7 +159,7 @@ async fn test_scan_slack_messages() -> Result<()> {
"messages": {
"matches": [{
"permalink": "https://example.slack.com/archives/C123/p1234",
"text": "This contains a github token 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",
"text": "This contains a github token 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",
"ts": "1234.56",
"channel": {"id": "C123", "name": "general"}
}],

View file

@ -7,7 +7,7 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> {
let dir = tempfile::tempdir()?;
let tar_gz = dir.path().join("payload.tar.gz");
let github_pat = "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";
let github_pat = "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";
// --- build a payload.tar.gz -------------------------------------------------
{

View file

@ -5,7 +5,7 @@ use clap::Parser;
use predicates::prelude::*;
use tempfile::tempdir;
const GH_PAT: &str = "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";
const GH_PAT: &str = "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";
#[test]
fn manage_baseline_enables_no_dedup() -> anyhow::Result<()> {

View file

@ -4,7 +4,7 @@ use assert_cmd::Command;
use predicates::prelude::*;
use tempfile::tempdir;
const SECRET: &str = "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";
const SECRET: &str = "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";
#[test]
fn exclude_pattern_hides_matches() -> anyhow::Result<()> {

View file

@ -5,7 +5,7 @@ use assert_cmd::prelude::*;
use predicates::prelude::*;
use tempfile::tempdir;
const GITHUB_PAT: &str = "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";
const GITHUB_PAT: &str = "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";
#[test]
fn smoke_scan_filesystem_text_and_binary() -> anyhow::Result<()> {

View file

@ -15,7 +15,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> {
// commit v1
let file_path = repo_dir.join("config.yml");
fs::write(&file_path, b"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")?;
fs::write(&file_path, b"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")?;
let mut idx = repo.index()?;
idx.add_path(std::path::Path::new("config.yml"))?;
let oid1 = idx.write_tree()?;
@ -23,7 +23,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> {
repo.commit(Some("HEAD"), &sig, &sig, "init", &tree1, &[])?;
// commit v2 (same leak, will test dedup)
fs::write(&file_path, b"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 # unchanged")?;
fs::write(&file_path, b"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 # unchanged")?;
idx.add_path(std::path::Path::new("config.yml"))?;
let oid2 = idx.write_tree()?;
let tree2 = repo.find_tree(oid2)?;
@ -44,7 +44,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> {
])
.assert()
.code(200) // ← kingfishers “findings present” status
.stdout(predicate::str::contains("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"));
.stdout(predicate::str::contains("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"));
dir.close()?;
Ok(())