forked from mirrors/kingfisher
Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with --no-base64
This commit is contained in:
parent
984231e25c
commit
fc0be774b4
17 changed files with 91 additions and 42 deletions
|
|
@ -7,7 +7,7 @@ All notable changes to this project will be documented in this file.
|
|||
- Improved rules: github oauth2, diffbot, mailchimp, aws
|
||||
- Added validation to SauceLabs rule
|
||||
- Added rules: shodan, bitly, flickr
|
||||
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance
|
||||
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64`
|
||||
|
||||
## [1.46.0]
|
||||
- Improved rules: AWS, pem
|
||||
|
|
|
|||
|
|
@ -92,6 +92,10 @@ pub struct ScanArgs {
|
|||
#[arg(long, short = 'r', default_value_t = false)]
|
||||
pub redact: bool,
|
||||
|
||||
/// Skip decoding Base64 blobs before scanning
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub no_base64: bool,
|
||||
|
||||
/// Timeout for Git repository scanning in seconds
|
||||
#[arg(long, default_value_t = 1800, value_name = "SECONDS")]
|
||||
pub git_repo_timeout: u64,
|
||||
|
|
|
|||
|
|
@ -335,6 +335,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_base64: false,
|
||||
}
|
||||
}
|
||||
/// Run the rules check command
|
||||
|
|
|
|||
|
|
@ -277,6 +277,7 @@ impl<'a> Matcher<'a> {
|
|||
lang: Option<String>,
|
||||
redact: bool,
|
||||
no_dedup: bool,
|
||||
no_base64: bool,
|
||||
) -> Result<ScanResult<'b>>
|
||||
where
|
||||
'a: 'b,
|
||||
|
|
@ -312,7 +313,7 @@ impl<'a> Matcher<'a> {
|
|||
// Opportunistically look for standalone Base64 blobs. If neither
|
||||
// the raw scan nor this check yields anything, we can return early
|
||||
// before doing any heavier work.
|
||||
let mut b64_items = get_base64_strings(blob.bytes());
|
||||
let mut b64_items = if no_base64 { Vec::new() } else { get_base64_strings(blob.bytes()) };
|
||||
|
||||
if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() {
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
|
|
@ -428,42 +429,45 @@ impl<'a> Matcher<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
// If the blob contains standalone Base64 blobs, decode and scan them as well
|
||||
const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep
|
||||
let mut b64_stack: Vec<(DecodedData, usize)> =
|
||||
b64_items.drain(..).map(|d| (d, 0)).collect();
|
||||
while let Some((item, depth)) = b64_stack.pop() {
|
||||
for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
|
||||
let re = &rules_db.anchored_regexes[rule_id_usize];
|
||||
filter_match(
|
||||
blob,
|
||||
rule.clone(),
|
||||
re,
|
||||
item.pos_start,
|
||||
item.pos_end,
|
||||
&mut matches,
|
||||
&mut previous_matches,
|
||||
rule_id_usize,
|
||||
&mut seen_matches,
|
||||
origin,
|
||||
Some(item.decoded.clone()),
|
||||
true,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
);
|
||||
}
|
||||
if depth + 1 < MAX_B64_DEPTH {
|
||||
for nested in get_base64_strings(item.decoded.as_bytes()) {
|
||||
b64_stack.push((
|
||||
DecodedData {
|
||||
original: nested.original,
|
||||
decoded: nested.decoded,
|
||||
pos_start: item.pos_start,
|
||||
pos_end: item.pos_end,
|
||||
},
|
||||
depth + 1,
|
||||
));
|
||||
|
||||
if !no_base64 {
|
||||
// If the blob contains standalone Base64 blobs, decode and scan them as well
|
||||
const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep
|
||||
let mut b64_stack: Vec<(DecodedData, usize)> =
|
||||
b64_items.drain(..).map(|d| (d, 0)).collect();
|
||||
while let Some((item, depth)) = b64_stack.pop() {
|
||||
for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
|
||||
let re = &rules_db.anchored_regexes[rule_id_usize];
|
||||
filter_match(
|
||||
blob,
|
||||
rule.clone(),
|
||||
re,
|
||||
item.pos_start,
|
||||
item.pos_end,
|
||||
&mut matches,
|
||||
&mut previous_matches,
|
||||
rule_id_usize,
|
||||
&mut seen_matches,
|
||||
origin,
|
||||
Some(item.decoded.clone()),
|
||||
true,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
);
|
||||
}
|
||||
if depth + 1 < MAX_B64_DEPTH {
|
||||
for nested in get_base64_strings(item.decoded.as_bytes()) {
|
||||
b64_stack.push((
|
||||
DecodedData {
|
||||
original: nested.original,
|
||||
decoded: nested.decoded,
|
||||
pos_start: item.pos_start,
|
||||
pos_end: item.pos_end,
|
||||
},
|
||||
depth + 1,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ mod tests {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ pub fn enumerate_filesystem_inputs(
|
|||
return Ok(());
|
||||
}
|
||||
progress.inc(blob.len().try_into().unwrap());
|
||||
match processor.run(origin, blob, args.no_dedup, args.redact) {
|
||||
match processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64) {
|
||||
Ok(None) => {
|
||||
// nothing to record
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,12 @@ impl<'a> BlobProcessor<'a> {
|
|||
blob: Blob,
|
||||
no_dedup: bool,
|
||||
redact: bool,
|
||||
no_base64: bool,
|
||||
) -> Result<Option<DatastoreMessage>> {
|
||||
let blob_id = blob.id.hex();
|
||||
let _span = debug_span!("matcher", blob_id).entered();
|
||||
let t1 = Instant::now();
|
||||
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup)?;
|
||||
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup, no_base64)?;
|
||||
let scan_us = t1.elapsed().as_micros();
|
||||
match res {
|
||||
// blob already seen, but with no matches; nothing to do!
|
||||
|
|
|
|||
|
|
@ -427,7 +427,7 @@ pub async fn fetch_s3_objects(
|
|||
let blob = crate::blob::Blob::from_bytes(bytes);
|
||||
|
||||
if let Some((origin, blob_md, scored_matches)) =
|
||||
processor.run(origin, blob, args.no_dedup, args.redact)?
|
||||
processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)?
|
||||
{
|
||||
// Wrap origin & metadata once:
|
||||
let origin_arc = Arc::new(origin);
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
|
|||
manage_baseline: false,
|
||||
skip_regex: skip_regex,
|
||||
skip_word: skip_skipword,
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
|||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.code(0)
|
||||
.stdout(
|
||||
predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa")
|
||||
.and(predicate::str::contains("\"encoding\": \"base64\"")),
|
||||
|
|
@ -32,3 +32,30 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
|||
dir.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Ensure disabling Base64 decoding suppresses encoded secrets
|
||||
#[test]
|
||||
fn skips_base64_when_disabled() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.txt");
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ==";
|
||||
fs::write(&file_path, encoded)?;
|
||||
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--no-base64",
|
||||
"--confidence=low",
|
||||
"--format",
|
||||
"json",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(0)
|
||||
.stdout(predicate::str::contains("\"findings\":0"));
|
||||
|
||||
dir.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -121,6 +121,7 @@ rules:
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
// Create global arguments
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
@ -213,6 +214,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ impl TestContext {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;
|
||||
|
|
@ -191,6 +192,7 @@ async fn test_scan_slack_messages() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -164,6 +164,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
/* --------------------------------------------------------- *
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ impl TestContext {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
|
||||
|
|
@ -189,6 +190,7 @@ impl TestContext {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue