forked from mirrors/kingfisher
Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance
This commit is contained in:
parent
e54dbe90d0
commit
9de355a5c8
14 changed files with 1266 additions and 52 deletions
|
|
@ -2,11 +2,12 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
## [1.47.0]
|
||||
- MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them
|
||||
- Improved rules: github oauth2, diffbot, mailchimp, aws
|
||||
- Added validation to SauceLabs rule
|
||||
- Added rules: shodan, bitly, flickr
|
||||
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance
|
||||
|
||||
## [1.46.0]
|
||||
- Improved rules: AWS, pem
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.46.0"
|
||||
version = "1.47.0"
|
||||
description = "MongoDB's blazingly fast secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
|
|||
48
data/rules/docker.yml
Normal file
48
data/rules/docker.yml
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
rules:
|
||||
- name: Docker Registry Credentials (auths JSON)
|
||||
id: kingfisher.docker.auths.1
|
||||
pattern: |
|
||||
(?xis)
|
||||
"auths"\s*:\s*\{
|
||||
[^}]*?
|
||||
" (?P<REG> (?:https?:\/\/)? [a-z0-9.\-:+/]+ ) "\s*:\s*\{
|
||||
[^}]*?
|
||||
"auth"\s*:\s*"(?P<B64> [A-Za-z0-9+/=]{16,} )"
|
||||
[^}]*?
|
||||
\}
|
||||
[^}]*?
|
||||
\}
|
||||
min_entropy: 2.0
|
||||
confidence: medium
|
||||
examples:
|
||||
- |
|
||||
{
|
||||
"auths": {
|
||||
"quay.io": {
|
||||
"auth": "cmhkaCtyaHRhcDowM1BERk1RTTJQTDlaQUE5T1gzSU9IQjFYTUlXOVNGNU1XRzNSRVRHNThKVVpKMzEwV0ZZRVNOQTdGMExNNTYx"
|
||||
}
|
||||
}
|
||||
}
|
||||
- |
|
||||
{"auths":{"index.docker.io/v1/":{"auth":"dXNlcjp0b2tlbg=="}}}
|
||||
references:
|
||||
- https://distribution.github.io/distribution/spec/api/
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: >
|
||||
{%- assign r = REG -%}
|
||||
{%- if r contains "://" -%}
|
||||
{{ r | replace: "/$", "" }}/v2/auth
|
||||
{%- else -%}
|
||||
https://{{ r }}/v2/auth
|
||||
{%- endif -%}
|
||||
headers:
|
||||
Authorization: "Basic {{ B64 }}"
|
||||
Accept: application/json
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
|
|
@ -192,4 +192,22 @@ rules:
|
|||
password = 'abuser123456' # some other comment
|
||||
- |
|
||||
user = 'Aladdin'
|
||||
password = 'open sesame'
|
||||
password = 'open sesame'
|
||||
- name: Docker Robot Credentials (plaintext pair)
|
||||
id: kingfisher.generic.9
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
(
|
||||
(?P<USER> [a-z0-9._-]+ \+ [a-z0-9._-]+ )
|
||||
:
|
||||
(?P<PASS> [A-Z0-9]{32,80} )
|
||||
)
|
||||
\b
|
||||
min_entropy: 2.0
|
||||
confidence: low
|
||||
examples:
|
||||
- some+thing:02PDFMQN2PL2ZAB9OX3IOHC1XMIW1SE5NWG3RETG58JUZJ310WFYESRA7F0LM461
|
||||
- org+builder:1C2F9D0BB1E67E9F6B3B5B9A2A3D4E5F6A7B8C9D0E1F2A3B4C5D6E7F8A9B0C1
|
||||
references:
|
||||
- https://docs.quay.io/use_quay.html#robot-accounts
|
||||
30
src/main.rs
30
src/main.rs
|
|
@ -5,27 +5,27 @@
|
|||
// * Fallback - system allocator (`system-alloc` feature)
|
||||
// ────────────────────────────────────────────────────────────
|
||||
|
||||
// --- jemalloc (opt-in) ---
|
||||
#[cfg(feature = "use-jemalloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
// // --- jemalloc (opt-in) ---
|
||||
// #[cfg(feature = "use-jemalloc")]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
// --- mimalloc (default) ---
|
||||
#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
|
||||
#[global_allocator]
|
||||
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// --- system allocator (explicit opt-out) ---
|
||||
#[cfg(feature = "system-alloc")]
|
||||
use std::alloc::System;
|
||||
#[cfg(feature = "system-alloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
// // --- mimalloc (default) ---
|
||||
// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// // --- system allocator (explicit opt-out) ---
|
||||
// #[cfg(feature = "system-alloc")]
|
||||
// use std::alloc::System;
|
||||
// #[cfg(feature = "system-alloc")]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: System = System;
|
||||
|
||||
use std::alloc::System;
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
|
||||
use std::{
|
||||
io::Read,
|
||||
sync::{Arc, Mutex},
|
||||
|
|
|
|||
114
src/matcher.rs
114
src/matcher.rs
|
|
@ -65,6 +65,7 @@ pub struct OwnedBlobMatch {
|
|||
pub validation_response_status: StatusCode,
|
||||
pub validation_success: bool,
|
||||
pub calculated_entropy: f32,
|
||||
pub is_base64: bool,
|
||||
}
|
||||
impl<'a> Matcher<'a> {
|
||||
pub fn get_profiling_report(&self) -> Option<Vec<RuleStats>> {
|
||||
|
|
@ -85,6 +86,7 @@ impl OwnedBlobMatch {
|
|||
.unwrap_or(StatusCode::CONTINUE),
|
||||
validation_success: m.validation_success,
|
||||
calculated_entropy: m.calculated_entropy,
|
||||
is_base64: m.is_base64,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +110,7 @@ impl OwnedBlobMatch {
|
|||
validation_success: blob_match.validation_success,
|
||||
calculated_entropy: blob_match.calculated_entropy,
|
||||
finding_fingerprint: 0, //default
|
||||
is_base64: blob_match.is_base64,
|
||||
};
|
||||
|
||||
// Convert matching_finding to a &str (using lossy conversion if needed)
|
||||
|
|
@ -154,6 +157,7 @@ pub struct BlobMatch<'a> {
|
|||
|
||||
pub validation_success: bool,
|
||||
pub calculated_entropy: f32,
|
||||
pub is_base64: bool,
|
||||
}
|
||||
#[derive(Clone)]
|
||||
struct UserData {
|
||||
|
|
@ -305,8 +309,12 @@ impl<'a> Matcher<'a> {
|
|||
// Perform the scan
|
||||
self.scan_bytes_raw(&blob.bytes(), &filename)?;
|
||||
|
||||
// Early exit if no matches found
|
||||
if self.user_data.raw_matches_scratch.is_empty() {
|
||||
// Opportunistically look for standalone Base64 blobs. If neither
|
||||
// the raw scan nor this check yields anything, we can return early
|
||||
// before doing any heavier work.
|
||||
let mut b64_items = get_base64_strings(blob.bytes());
|
||||
|
||||
if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() {
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
if !no_dedup {
|
||||
return Ok(match self.seen_blobs.insert(blob.id, false) {
|
||||
|
|
@ -322,18 +330,22 @@ impl<'a> Matcher<'a> {
|
|||
let rules_db = self.rules_db;
|
||||
let mut seen_matches = FxHashSet::default();
|
||||
let mut previous_matches = Vec::new();
|
||||
let tree_sitter_result = lang.and_then(|lang_str| {
|
||||
get_language_and_queries(&lang_str).and_then(|(language, queries)| {
|
||||
let checker = Checker { language, rules: queries };
|
||||
match checker.check(&blob.bytes()) {
|
||||
Ok(results) => Some(results),
|
||||
Err(e) => {
|
||||
println!("Error in checker.check: {}", e);
|
||||
None
|
||||
let tree_sitter_result = if self.user_data.raw_matches_scratch.is_empty() {
|
||||
None
|
||||
} else {
|
||||
lang.and_then(|lang_str| {
|
||||
get_language_and_queries(&lang_str).and_then(|(language, queries)| {
|
||||
let checker = Checker { language, rules: queries };
|
||||
match checker.check(&blob.bytes()) {
|
||||
Ok(results) => Some(results),
|
||||
Err(e) => {
|
||||
println!("Error in checker.check: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
});
|
||||
};
|
||||
// Process matches
|
||||
let mut matches = Vec::new();
|
||||
let owned_ts_results = tree_sitter_result.map(|ts_results| {
|
||||
|
|
@ -383,6 +395,7 @@ impl<'a> Matcher<'a> {
|
|||
&mut seen_matches,
|
||||
origin,
|
||||
None,
|
||||
false,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
|
|
@ -406,6 +419,7 @@ impl<'a> Matcher<'a> {
|
|||
&mut seen_matches,
|
||||
origin,
|
||||
Some(ts_match.clone()),
|
||||
*is_base64_decoded,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
|
|
@ -414,6 +428,45 @@ impl<'a> Matcher<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
// If the blob contains standalone Base64 blobs, decode and scan them as well
|
||||
const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep
|
||||
let mut b64_stack: Vec<(DecodedData, usize)> =
|
||||
b64_items.drain(..).map(|d| (d, 0)).collect();
|
||||
while let Some((item, depth)) = b64_stack.pop() {
|
||||
for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
|
||||
let re = &rules_db.anchored_regexes[rule_id_usize];
|
||||
filter_match(
|
||||
blob,
|
||||
rule.clone(),
|
||||
re,
|
||||
item.pos_start,
|
||||
item.pos_end,
|
||||
&mut matches,
|
||||
&mut previous_matches,
|
||||
rule_id_usize,
|
||||
&mut seen_matches,
|
||||
origin,
|
||||
Some(item.decoded.clone()),
|
||||
true,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
);
|
||||
}
|
||||
if depth + 1 < MAX_B64_DEPTH {
|
||||
for nested in get_base64_strings(item.decoded.as_bytes()) {
|
||||
b64_stack.push((
|
||||
DecodedData {
|
||||
original: nested.original,
|
||||
decoded: nested.decoded,
|
||||
pos_start: item.pos_start,
|
||||
pos_end: item.pos_end,
|
||||
},
|
||||
depth + 1,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finalize
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
if !no_dedup {
|
||||
|
|
@ -457,6 +510,7 @@ fn filter_match<'b>(
|
|||
seen_matches: &mut FxHashSet<u64>,
|
||||
_origin: &OriginSet,
|
||||
ts_match: Option<String>,
|
||||
is_base64: bool,
|
||||
redact: bool,
|
||||
filename: &str,
|
||||
profiler: Option<&Arc<ConcurrentRuleProfiler>>,
|
||||
|
|
@ -521,6 +575,7 @@ fn filter_match<'b>(
|
|||
validation_response_status: StatusCode::from_u16(0).unwrap_or(StatusCode::CONTINUE),
|
||||
validation_success: false,
|
||||
calculated_entropy,
|
||||
is_base64,
|
||||
});
|
||||
previous_matches.push((rule_id, matching_input_offset_span));
|
||||
}
|
||||
|
|
@ -729,6 +784,8 @@ pub struct Match {
|
|||
pub calculated_entropy: f32,
|
||||
|
||||
pub visible: bool,
|
||||
#[serde(default)]
|
||||
pub is_base64: bool,
|
||||
}
|
||||
impl Match {
|
||||
#[inline]
|
||||
|
|
@ -780,6 +837,7 @@ impl Match {
|
|||
validation_response_status: owned_blob_match.validation_response_status.as_u16(),
|
||||
validation_success: owned_blob_match.validation_success,
|
||||
calculated_entropy: owned_blob_match.calculated_entropy,
|
||||
is_base64: owned_blob_match.is_base64,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -832,33 +890,26 @@ pub struct DecodedData {
|
|||
}
|
||||
pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
|
||||
lazy_static! {
|
||||
static ref RE_BASE64: Regex =
|
||||
Regex::new(r"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?").unwrap();
|
||||
// Require a reasonably long run of valid Base64 characters to reduce
|
||||
// noise. 32 bytes corresponds to 24 decoded bytes.
|
||||
static ref RE_BASE64: Regex = Regex::new(r"[A-Za-z0-9+/]{32,}={0,2}").unwrap();
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
for capture in RE_BASE64.captures_iter(input) {
|
||||
let base64_match = capture.get(0).unwrap();
|
||||
|
||||
if base64_match.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = base64_match.start();
|
||||
let end = base64_match.end();
|
||||
let base64_string = &input[start..end];
|
||||
// Check if the length is a multiple of 4
|
||||
for m in RE_BASE64.find_iter(input) {
|
||||
let base64_string = m.as_bytes();
|
||||
// Skip candidates whose length isn't a multiple of four – they cannot
|
||||
// be valid Base64.
|
||||
if base64_string.len() % 4 != 0 {
|
||||
continue;
|
||||
}
|
||||
if let Ok(decoded) = general_purpose::STANDARD.decode(base64_string) {
|
||||
// Check if the decoded string is valid UTF-8
|
||||
if let Ok(decoded_str) = std::str::from_utf8(&decoded) {
|
||||
if decoded_str.is_ascii() {
|
||||
results.push(DecodedData {
|
||||
original: String::from_utf8_lossy(base64_string).into_owned(),
|
||||
decoded: decoded_str.to_string(),
|
||||
pos_start: start,
|
||||
pos_end: end,
|
||||
pos_start: m.start(),
|
||||
pos_end: m.end(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -1026,12 +1077,13 @@ mod test {
|
|||
/// and report correct byte-offsets.
|
||||
#[test]
|
||||
fn test_get_base64_strings_basic() {
|
||||
let raw = b"foo SGVsbG8gV29ybGQ= bar"; // "Hello World"
|
||||
let raw = b"foo MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY= bar";
|
||||
// decodes to "0123456789abcdef0123456789abcdef"
|
||||
let hits = get_base64_strings(raw);
|
||||
assert_eq!(hits.len(), 1);
|
||||
let item = &hits[0];
|
||||
assert_eq!(item.decoded, "Hello World");
|
||||
assert_eq!(item.original, "SGVsbG8gV29ybGQ=");
|
||||
assert_eq!(item.decoded, "0123456789abcdef0123456789abcdef");
|
||||
assert_eq!(item.original, "MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY=");
|
||||
// "foo␠" is 4 bytes, so the start offset is 4
|
||||
assert_eq!((item.pos_start, item.pos_end), (4, 4 + item.original.len()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -384,6 +384,7 @@ impl DetailsReporter {
|
|||
column_start: source_span.start.column as u32,
|
||||
column_end: source_span.end.column as u32,
|
||||
path: file_path,
|
||||
encoding: if rm.m.is_base64 { Some("base64".to_string()) } else { None },
|
||||
git_metadata: git_metadata_val,
|
||||
},
|
||||
}
|
||||
|
|
@ -521,6 +522,8 @@ pub struct FindingRecordData {
|
|||
pub column_end: u32,
|
||||
pub path: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub encoding: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub git_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@ mod tests {
|
|||
validation_success,
|
||||
calculated_entropy: 4.5,
|
||||
visible: true,
|
||||
is_base64: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -100,6 +100,9 @@ impl<'a> Display for PrettyFindingRecord<'a> {
|
|||
};
|
||||
let finding = &record.finding;
|
||||
writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?;
|
||||
if let Some(enc) = &finding.encoding {
|
||||
writeln!(f, " |Encoding.....: {}", enc)?;
|
||||
}
|
||||
writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", finding.confidence)?;
|
||||
writeln!(f, " |Entropy.......: {}", finding.entropy)?;
|
||||
|
|
|
|||
|
|
@ -1028,6 +1028,7 @@ rules:
|
|||
validation_response_status: StatusCode::OK,
|
||||
validation_success: false,
|
||||
calculated_entropy: 0.0, // or compute your own
|
||||
is_base64: false,
|
||||
};
|
||||
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
|
||||
let client = reqwest::Client::new();
|
||||
|
|
|
|||
1052
src/validation.rs.orig
Normal file
1052
src/validation.rs.orig
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -93,9 +93,9 @@ fn is_local_host(h: &str) -> bool {
|
|||
|
||||
const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs
|
||||
const FAST_SELECT_MS: u64 = 300;
|
||||
const SRV_PARSE_MS: u64 = 1_000; // limit DNS resolution time
|
||||
const SRV_CONNECT_MS: u64 = 1500;//700;
|
||||
const SRV_SELECT_MS: u64 = 1500;//300;
|
||||
const SRV_PARSE_MS: u64 = 2_000; // limit DNS resolution time
|
||||
const SRV_CONNECT_MS: u64 = 2500; //700;
|
||||
const SRV_SELECT_MS: u64 = 2500; //300;
|
||||
|
||||
/// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the
|
||||
/// boolean indicates success and the string provides a status message.
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ fn make_match(fp: u64) -> Match {
|
|||
validation_success: false,
|
||||
calculated_entropy: 0.0,
|
||||
visible: true,
|
||||
is_base64: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
34
tests/int_base64.rs
Normal file
34
tests/int_base64.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
use assert_cmd::prelude::*;
|
||||
use predicates::prelude::*;
|
||||
use std::{fs, process::Command};
|
||||
use tempfile::tempdir;
|
||||
|
||||
// Ensure base64 encoded secrets are decoded and detected
|
||||
#[test]
|
||||
fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.txt");
|
||||
// Base64 for ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ==";
|
||||
fs::write(&file_path, encoded)?;
|
||||
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--confidence=low",
|
||||
"--format",
|
||||
"json",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa")
|
||||
.and(predicate::str::contains("\"encoding\": \"base64\"")),
|
||||
);
|
||||
|
||||
dir.close()?;
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue