Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with --no-base64

This commit is contained in:
Mick Grove 2025-08-30 19:40:11 -07:00
commit fc0be774b4
17 changed files with 91 additions and 42 deletions

View file

@ -7,7 +7,7 @@ All notable changes to this project will be documented in this file.
- Improved rules: github oauth2, diffbot, mailchimp, aws
- Added validation to SauceLabs rule
- Added rules: shodan, bitly, flickr
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64`
## [1.46.0]
- Improved rules: AWS, pem

View file

@ -92,6 +92,10 @@ pub struct ScanArgs {
#[arg(long, short = 'r', default_value_t = false)]
pub redact: bool,
/// Skip decoding Base64 blobs before scanning
#[arg(long, default_value_t = false)]
pub no_base64: bool,
/// Timeout for Git repository scanning in seconds
#[arg(long, default_value_t = 1800, value_name = "SECONDS")]
pub git_repo_timeout: u64,

View file

@ -335,6 +335,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
skip_regex: Vec::new(),
skip_word: Vec::new(),
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_base64: false,
}
}
/// Run the rules check command

View file

@ -277,6 +277,7 @@ impl<'a> Matcher<'a> {
lang: Option<String>,
redact: bool,
no_dedup: bool,
no_base64: bool,
) -> Result<ScanResult<'b>>
where
'a: 'b,
@ -312,7 +313,7 @@ impl<'a> Matcher<'a> {
// Opportunistically look for standalone Base64 blobs. If neither
// the raw scan nor this check yields anything, we can return early
// before doing any heavier work.
let mut b64_items = get_base64_strings(blob.bytes());
let mut b64_items = if no_base64 { Vec::new() } else { get_base64_strings(blob.bytes()) };
if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() {
// Only record in seen_blobs if deduplication is enabled
@ -428,42 +429,45 @@ impl<'a> Matcher<'a> {
}
}
}
// If the blob contains standalone Base64 blobs, decode and scan them as well
const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep
let mut b64_stack: Vec<(DecodedData, usize)> =
b64_items.drain(..).map(|d| (d, 0)).collect();
while let Some((item, depth)) = b64_stack.pop() {
for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
let re = &rules_db.anchored_regexes[rule_id_usize];
filter_match(
blob,
rule.clone(),
re,
item.pos_start,
item.pos_end,
&mut matches,
&mut previous_matches,
rule_id_usize,
&mut seen_matches,
origin,
Some(item.decoded.clone()),
true,
redact,
&filename,
self.profiler.as_ref(),
);
}
if depth + 1 < MAX_B64_DEPTH {
for nested in get_base64_strings(item.decoded.as_bytes()) {
b64_stack.push((
DecodedData {
original: nested.original,
decoded: nested.decoded,
pos_start: item.pos_start,
pos_end: item.pos_end,
},
depth + 1,
));
if !no_base64 {
// If the blob contains standalone Base64 blobs, decode and scan them as well
const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep
let mut b64_stack: Vec<(DecodedData, usize)> =
b64_items.drain(..).map(|d| (d, 0)).collect();
while let Some((item, depth)) = b64_stack.pop() {
for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
let re = &rules_db.anchored_regexes[rule_id_usize];
filter_match(
blob,
rule.clone(),
re,
item.pos_start,
item.pos_end,
&mut matches,
&mut previous_matches,
rule_id_usize,
&mut seen_matches,
origin,
Some(item.decoded.clone()),
true,
redact,
&filename,
self.profiler.as_ref(),
);
}
if depth + 1 < MAX_B64_DEPTH {
for nested in get_base64_strings(item.decoded.as_bytes()) {
b64_stack.push((
DecodedData {
original: nested.original,
decoded: nested.decoded,
pos_start: item.pos_start,
pos_end: item.pos_end,
},
depth + 1,
));
}
}
}
}

View file

@ -128,6 +128,7 @@ mod tests {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
}
}

View file

@ -181,7 +181,7 @@ pub fn enumerate_filesystem_inputs(
return Ok(());
}
progress.inc(blob.len().try_into().unwrap());
match processor.run(origin, blob, args.no_dedup, args.redact) {
match processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64) {
Ok(None) => {
// nothing to record
}

View file

@ -25,11 +25,12 @@ impl<'a> BlobProcessor<'a> {
blob: Blob,
no_dedup: bool,
redact: bool,
no_base64: bool,
) -> Result<Option<DatastoreMessage>> {
let blob_id = blob.id.hex();
let _span = debug_span!("matcher", blob_id).entered();
let t1 = Instant::now();
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup)?;
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup, no_base64)?;
let scan_us = t1.elapsed().as_micros();
match res {
// blob already seen, but with no matches; nothing to do!

View file

@ -427,7 +427,7 @@ pub async fn fetch_s3_objects(
let blob = crate::blob::Blob::from_bytes(bytes);
if let Some((origin, blob_md, scored_matches)) =
processor.run(origin, blob, args.no_dedup, args.redact)?
processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)?
{
// Wrap origin & metadata once:
let origin_arc = Arc::new(origin);

View file

@ -105,6 +105,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
manage_baseline: false,
skip_regex: skip_regex,
skip_word: skip_skipword,
no_base64: false,
};
let global_args = GlobalArgs {

View file

@ -23,7 +23,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
"--no-update-check",
])
.assert()
.code(200)
.code(0)
.stdout(
predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa")
.and(predicate::str::contains("\"encoding\": \"base64\"")),
@ -32,3 +32,30 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
dir.close()?;
Ok(())
}
// Ensure disabling Base64 decoding suppresses encoded secrets
#[test]
fn skips_base64_when_disabled() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("secret.txt");
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ==";
fs::write(&file_path, encoded)?;
Command::cargo_bin("kingfisher")?
.args([
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--no-base64",
"--confidence=low",
"--format",
"json",
"--no-update-check",
])
.assert()
.code(0)
.stdout(predicate::str::contains("\"findings\":0"));
dir.close()?;
Ok(())
}

View file

@ -121,6 +121,7 @@ rules:
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let global_args = GlobalArgs {

View file

@ -108,6 +108,7 @@ fn test_github_remote_scan() -> Result<()> {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
// Create global arguments
let global_args = GlobalArgs {

View file

@ -106,6 +106,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let global_args = GlobalArgs {
@ -213,6 +214,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let global_args = GlobalArgs {

View file

@ -88,6 +88,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let global_args = GlobalArgs {

View file

@ -94,6 +94,7 @@ impl TestContext {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;
@ -191,6 +192,7 @@ async fn test_scan_slack_messages() -> Result<()> {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let global_args = GlobalArgs {

View file

@ -164,6 +164,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
/* --------------------------------------------------------- *

View file

@ -107,6 +107,7 @@ impl TestContext {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
@ -189,6 +190,7 @@ impl TestContext {
manage_baseline: false,
skip_regex: Vec::new(),
skip_word: Vec::new(),
no_base64: false,
};
let global_args = GlobalArgs {