forked from mirrors/kingfisher
commit
2bf9e54ad9
9 changed files with 180 additions and 45 deletions
|
|
@ -2,6 +2,11 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [v1.76.0]
|
||||
- Fixed validation deduplication for rules with nested unnamed captures (e.g. `(?<REGEX>...(ABC|DEF)...)`) to use the primary capture for grouping, ensuring each unique match triggers a separate validation request.
|
||||
- Added trace-level (`-vv`) logging for internal validation dedup keys and grouping to aid debugging.
|
||||
- Switched compression dependencies to pure-Rust bzip2/lzma implementations and pared zip features to avoid C-based codecs for bz2/xz handling.
|
||||
|
||||
## [v1.75.0]
|
||||
- Enhanced Access Map View: added fingerprint display, enabled searching by fingerprint, and implemented bidirectional navigation between Findings and Access Map nodes.
|
||||
- Added Slack Access Map support with granular permissions in the tree view.
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.75.0"
|
||||
version = "1.76.0"
|
||||
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
@ -150,10 +150,10 @@ tree_magic_mini = "3.2"
|
|||
content_inspector = "0.2.4"
|
||||
rustc-hash = "2.1.1"
|
||||
term_size = "0.3.2"
|
||||
bzip2 = "0.5.2"
|
||||
zip = "2.4.2"
|
||||
bzip2-rs = "0.1.2"
|
||||
zip = { version = "2.4.2", default-features = false, features = ["deflate", "deflate64", "time"] }
|
||||
tar = "0.4.44"
|
||||
xz2 = "0.1.7"
|
||||
lzma-rs = "0.3.0"
|
||||
asar = "0.3.0"
|
||||
blake3 = "1.8.2"
|
||||
memchr = "2.7"
|
||||
|
|
|
|||
|
|
@ -10,6 +10,10 @@ rules:
|
|||
password \s+ ([^\s]+)
|
||||
min_entropy: 3.3
|
||||
confidence: medium
|
||||
pattern_requirements:
|
||||
ignore_if_contains:
|
||||
- '${'
|
||||
- '$('
|
||||
examples:
|
||||
- 'machine api.github.com login ziggy^stardust password 012345abcdef'
|
||||
- |
|
||||
|
|
|
|||
|
|
@ -1,18 +1,18 @@
|
|||
use std::{
|
||||
fs,
|
||||
io::Read,
|
||||
io::{BufReader, Read},
|
||||
path::{Component, Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::Result;
|
||||
use asar::AsarReader;
|
||||
use bzip2::read::BzDecoder;
|
||||
use bzip2_rs::DecoderReader;
|
||||
use flate2::read::{GzDecoder, ZlibDecoder};
|
||||
use lzma_rs::xz_decompress;
|
||||
use memmap2::Mmap;
|
||||
use tar::Archive;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
use uuid::Uuid;
|
||||
use xz2::read::XzDecoder;
|
||||
use zip::ZipArchive;
|
||||
|
||||
/// Formats that are basically a ZIP container.
|
||||
|
|
@ -175,22 +175,43 @@ fn handle_asar_archive_in_memory(buffer: &[u8], archive_path: &Path) -> Result<C
|
|||
}
|
||||
}
|
||||
|
||||
fn stream_to_file<R: Read>(mut decoder: R, out_path: &Path) -> Result<CompressedContent> {
|
||||
if !is_safe_extract_path(out_path) {
|
||||
anyhow::bail!("unsafe path during decompression: {}", out_path.display());
|
||||
/// Validate and open a file for reading, checking for path traversal attacks.
|
||||
fn safe_open_for_read(path: &Path) -> Result<fs::File> {
|
||||
if !is_safe_extract_path(path) {
|
||||
anyhow::bail!("unsafe input path during decompression: {}", path.display());
|
||||
}
|
||||
let mut out_file = fs::File::create(out_path)?;
|
||||
Ok(fs::File::open(path)?)
|
||||
}
|
||||
|
||||
/// Validate and create a file for writing, checking for path traversal attacks.
|
||||
fn safe_create_for_write(path: &Path) -> Result<fs::File> {
|
||||
if !is_safe_extract_path(path) {
|
||||
anyhow::bail!("unsafe output path during decompression: {}", path.display());
|
||||
}
|
||||
Ok(fs::File::create(path)?)
|
||||
}
|
||||
|
||||
fn stream_to_file<R: Read>(mut decoder: R, out_path: &Path) -> Result<CompressedContent> {
|
||||
let mut out_file = safe_create_for_write(out_path)?;
|
||||
std::io::copy(&mut decoder, &mut out_file)?;
|
||||
Ok(CompressedContent::RawFile(out_path.to_owned()))
|
||||
}
|
||||
|
||||
fn stream_xz_to_file(path: &Path, out_path: &Path) -> Result<CompressedContent> {
|
||||
let input = safe_open_for_read(path)?;
|
||||
let mut reader = BufReader::new(input);
|
||||
let mut out_file = safe_create_for_write(out_path)?;
|
||||
xz_decompress(&mut reader, &mut out_file)?;
|
||||
Ok(CompressedContent::RawFile(out_path.to_owned()))
|
||||
}
|
||||
|
||||
/* ───────────────────────────────────────────────────────────────
|
||||
one *step* of decompression
|
||||
───────────────────────────────────────────────────────────── */
|
||||
fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedContent> {
|
||||
let extension = path.extension().and_then(|ext| ext.to_str()).map(|s| s.to_ascii_lowercase());
|
||||
|
||||
let mut file = fs::File::open(path)?;
|
||||
let mut file = safe_open_for_read(path)?;
|
||||
|
||||
if let Some(ext) = extension.as_deref() {
|
||||
match ext {
|
||||
|
|
@ -216,22 +237,21 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedCon
|
|||
}
|
||||
"gz" | "gzip" => {
|
||||
let out_path = make_output_path(path, base_dir, "decomp.tar");
|
||||
let decoder = GzDecoder::new(fs::File::open(path)?);
|
||||
let decoder = GzDecoder::new(BufReader::new(safe_open_for_read(path)?));
|
||||
return stream_to_file(decoder, &out_path);
|
||||
}
|
||||
"bz2" | "bzip2" => {
|
||||
let out_path = make_output_path(path, base_dir, "decomp.tar");
|
||||
let decoder = BzDecoder::new(fs::File::open(path)?);
|
||||
let decoder = DecoderReader::new(BufReader::new(safe_open_for_read(path)?));
|
||||
return stream_to_file(decoder, &out_path);
|
||||
}
|
||||
"xz" => {
|
||||
let out_path = make_output_path(path, base_dir, "decomp.tar");
|
||||
let decoder = XzDecoder::new(fs::File::open(path)?);
|
||||
return stream_to_file(decoder, &out_path);
|
||||
return stream_xz_to_file(path, &out_path);
|
||||
}
|
||||
"zlib" => {
|
||||
let out_path = make_output_path(path, base_dir, "decomp.tar");
|
||||
let decoder = ZlibDecoder::new(fs::File::open(path)?);
|
||||
let decoder = ZlibDecoder::new(BufReader::new(safe_open_for_read(path)?));
|
||||
return stream_to_file(decoder, &out_path);
|
||||
}
|
||||
_ => {}
|
||||
|
|
|
|||
|
|
@ -96,7 +96,15 @@ impl OwnedBlobMatch {
|
|||
}
|
||||
|
||||
pub fn from_blob_match(blob_match: BlobMatch) -> Self {
|
||||
// Get the matching value from capture group 1 (or 0 if not available)
|
||||
// EXTERNAL FINGERPRINT: Use get(1).or_else(get(0)) for backward compatibility.
|
||||
//
|
||||
// This indexing is intentionally different from the internal `validation_dedup_key()`
|
||||
// (which uses get(0)) to maintain stable external fingerprints. Changing this would break:
|
||||
// - Historical baselines that rely on fingerprint matching
|
||||
// - Dedup entries stored in external systems
|
||||
//
|
||||
// For rules with nested captures like (?<REGEX>...(ABC)...), this may pick up
|
||||
// the inner group, but that behavior is now established and must be preserved.
|
||||
let matching_finding = blob_match
|
||||
.captures
|
||||
.captures
|
||||
|
|
@ -1021,8 +1029,8 @@ impl Match {
|
|||
origin_type: &'a str,
|
||||
) -> Self {
|
||||
let offset_span = owned_blob_match.matching_input_offset_span;
|
||||
// Extract the matched secret content. Use capture group 1 if it exists, otherwise fall back
|
||||
// to group 0.
|
||||
// EXTERNAL FINGERPRINT: Use get(1).or_else(get(0)) for backward compatibility.
|
||||
// See comment in from_blob_match() for why this differs from validation_dedup_key().
|
||||
let matching_finding_bytes = owned_blob_match
|
||||
.captures
|
||||
.captures
|
||||
|
|
|
|||
|
|
@ -228,6 +228,12 @@ impl DetailsReporter {
|
|||
}
|
||||
|
||||
fn normalized_finding_fingerprint(m: &Match, origin: &OriginSet) -> u64 {
|
||||
// EXTERNAL FINGERPRINT: Use get(1).or_else(get(0)) for backward compatibility.
|
||||
//
|
||||
// This indexing is intentionally different from the internal `validation_dedup_key()`
|
||||
// (which uses get(0)) to maintain stable external fingerprints and consistent
|
||||
// reporting output. Changing this would break historical baselines and alter
|
||||
// finding appearance.
|
||||
let finding_value = m
|
||||
.groups
|
||||
.captures
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ use liquid::Parser;
|
|||
use reqwest::{Client, StatusCode};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tokio::{sync::Notify, time::timeout};
|
||||
use tracing::trace;
|
||||
|
||||
use crate::{
|
||||
access_map::AccessMapRequest,
|
||||
|
|
@ -156,16 +157,35 @@ pub async fn run_secret_validation(
|
|||
if !simple_matches.is_empty() {
|
||||
let mut groups: FxHashMap<String, Vec<Arc<FindingsStoreMessage>>> = FxHashMap::default();
|
||||
for arc_msg in simple_matches {
|
||||
let secret = arc_msg
|
||||
.2
|
||||
.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| arc_msg.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.raw_value());
|
||||
groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg);
|
||||
// VALIDATION DEDUP: Use get(0) to get the first/primary capture for grouping.
|
||||
//
|
||||
// This differs from fingerprint/reporting code (which uses get(1).or_else(get(0)))
|
||||
// for backward compatibility reasons - changing fingerprint calculation would break
|
||||
// historical baselines and dedup entries.
|
||||
//
|
||||
// For validation deduplication, we need the PRIMARY secret value to ensure each
|
||||
// unique secret triggers a separate validation request. Using get(1) first would
|
||||
// incorrectly pick up inner unnamed groups when patterns have nested captures
|
||||
// like (?<REGEX>...(ABC|DEF)...), causing all matches to share the same
|
||||
// validation result.
|
||||
let secret = arc_msg.2.groups.captures.get(0).map_or("", |c| c.raw_value());
|
||||
let group_key = format!("{}|{}", arc_msg.2.rule.id(), secret);
|
||||
trace!(
|
||||
rule_id = %arc_msg.2.rule.id(),
|
||||
secret_value = %secret,
|
||||
external_fingerprint = arc_msg.2.finding_fingerprint,
|
||||
validation_group_key = %group_key,
|
||||
"Grouping finding for validation"
|
||||
);
|
||||
groups.entry(group_key).or_default().push(arc_msg);
|
||||
}
|
||||
|
||||
trace!(
|
||||
total_findings = groups.values().map(|v| v.len()).sum::<usize>(),
|
||||
unique_validation_groups = groups.len(),
|
||||
"Validation grouping complete (internal dedup)"
|
||||
);
|
||||
|
||||
let validation_results = DashMap::<String, CachedResponse>::new();
|
||||
|
||||
let pb = ProgressBar::new(groups.len() as u64).with_message("Validating secrets…");
|
||||
|
|
@ -195,13 +215,9 @@ pub async fn run_secret_validation(
|
|||
let access_map = access_map.clone();
|
||||
|
||||
async move {
|
||||
let secret = rep_arc
|
||||
.2
|
||||
.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rep_arc.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.raw_value());
|
||||
// VALIDATION DEDUP: Use get(0) for the primary secret value.
|
||||
// See comment above for why this differs from fingerprint/reporting code.
|
||||
let secret = rep_arc.2.groups.captures.get(0).map_or("", |c| c.raw_value());
|
||||
let key = format!("{}|{}", rep_arc.2.rule.id(), secret);
|
||||
|
||||
match val_res.entry(key.clone()) {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ use once_cell::sync::{Lazy, OnceCell};
|
|||
use reqwest::{header, header::HeaderValue, multipart, Client, Url};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tokio::{sync::Notify, time};
|
||||
use tracing::debug;
|
||||
use tracing::{debug, trace};
|
||||
|
||||
use crate::{
|
||||
location::OffsetSpan,
|
||||
|
|
@ -91,16 +91,38 @@ pub fn set_user_agent_suffix<S: Into<String>>(suffix: Option<S>) {
|
|||
// Use SkipMap-based cache instead of a mutex-wrapped FxHashMap.
|
||||
type Cache = Arc<SkipMap<String, CachedResponse>>;
|
||||
|
||||
/// Returns an opaque 64-bit fingerprint for “same secret under the same rule”.
|
||||
fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 {
|
||||
/// Returns an opaque 64-bit key for internal validation deduplication.
|
||||
///
|
||||
/// This is an INTERNAL key used only for validation deduplication within a single scan.
|
||||
/// It uses `captures.get(0)` to get the primary secret value.
|
||||
///
|
||||
/// **Important**: This is distinct from the EXTERNAL `finding_fingerprint` used for:
|
||||
/// - Baseline comparisons across scans
|
||||
/// - Deduplication entries in external systems
|
||||
/// - Reporting output
|
||||
///
|
||||
/// The external fingerprint uses `get(1).or_else(get(0))` for backward compatibility
|
||||
/// and must remain stable. This internal key can evolve independently.
|
||||
fn validation_dedup_key(m: &OwnedBlobMatch) -> u64 {
|
||||
let mut hasher = xxhash_rust::xxh3::Xxh3::new();
|
||||
m.rule.syntax().id.hash(&mut hasher);
|
||||
|
||||
// first capture = the secret text itself
|
||||
if let Some(c0) = m.captures.captures.get(0) {
|
||||
c0.raw_value().hash(&mut hasher);
|
||||
// Use the first capture (primary secret) for deduplication.
|
||||
// Note: capture_value is stored in a variable because it's also used in trace! below.
|
||||
let capture_value = m.captures.captures.get(0).map(|c| c.raw_value());
|
||||
if let Some(val) = capture_value {
|
||||
val.hash(&mut hasher);
|
||||
}
|
||||
hasher.finish()
|
||||
let key = hasher.finish();
|
||||
|
||||
trace!(
|
||||
rule_id = %m.rule.syntax().id,
|
||||
capture_value = ?capture_value,
|
||||
validation_dedup_key = key,
|
||||
"Computed internal validation dedup key"
|
||||
);
|
||||
|
||||
key
|
||||
}
|
||||
|
||||
static VALIDATION_CACHE: OnceCell<DashMap<u64, CachedResponse>> = OnceCell::new();
|
||||
|
|
@ -171,11 +193,16 @@ pub fn collect_variables_and_dependencies(
|
|||
|
||||
if !matching_dependencies.is_empty() {
|
||||
for other_match in matching_dependencies {
|
||||
// VALIDATION: Use get(0) for the primary capture value when collecting
|
||||
// dependent variables. This ensures we get the main captured value rather
|
||||
// than inner unnamed groups from nested captures like (?<REGEX>...(ABC)...).
|
||||
//
|
||||
// Note: This differs from fingerprint/reporting code which uses
|
||||
// get(1).or_else(get(0)) for backward compatibility.
|
||||
let matching_input = other_match
|
||||
.captures
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| other_match.captures.captures.get(0))
|
||||
.get(0)
|
||||
.expect("Expected at least one capture");
|
||||
variable_map
|
||||
.entry(dependency.variable.to_uppercase())
|
||||
|
|
@ -297,7 +324,7 @@ async fn timed_validate_single_match<'a>(
|
|||
// ──────────────────────────────────────────────────────────
|
||||
// 1. process-wide fingerprint de-dup
|
||||
// ──────────────────────────────────────────────────────────
|
||||
let fp = secret_fingerprint(m);
|
||||
let fp = validation_dedup_key(m);
|
||||
|
||||
if let Some(entry) = VALIDATION_CACHE.get_or_init(DashMap::new).get(&fp) {
|
||||
if entry.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
|
||||
|
|
@ -453,10 +480,21 @@ async fn timed_validate_single_match<'a>(
|
|||
header_map.insert(name.as_str().to_string(), v.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Render the body template to include in cache key
|
||||
let rendered_body =
|
||||
http_validation.request.body.as_ref().and_then(|body_template| {
|
||||
parser
|
||||
.parse(body_template)
|
||||
.ok()
|
||||
.and_then(|template| template.render(&globals).ok())
|
||||
});
|
||||
|
||||
cache_key = httpvalidation::generate_http_cache_key_parts(
|
||||
http_validation.request.method.as_str(),
|
||||
&url,
|
||||
&header_map,
|
||||
rendered_body.as_deref(),
|
||||
);
|
||||
if let Some(cached) = cache.get(&cache_key) {
|
||||
let c = cached.value();
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ pub fn generate_http_cache_key_parts(
|
|||
method: &str,
|
||||
url: &Url,
|
||||
headers: &BTreeMap<String, String>,
|
||||
body: Option<&str>,
|
||||
) -> String {
|
||||
let method = method.to_uppercase(); // ensure "get" == "GET"
|
||||
let url = url.as_str(); // canonical form from `reqwest::Url`
|
||||
|
|
@ -49,6 +50,13 @@ pub fn generate_http_cache_key_parts(
|
|||
hasher.update(b"\0");
|
||||
}
|
||||
|
||||
// Include the request body in the cache key if present
|
||||
if let Some(b) = body {
|
||||
hasher.update(b"BODY\0");
|
||||
hasher.update(b.as_bytes());
|
||||
hasher.update(b"\0");
|
||||
}
|
||||
|
||||
// Hex-encode and prefix so callers can tell this key came from HTTP logic
|
||||
format!("HTTP:{:x}", hasher.finalize())
|
||||
}
|
||||
|
|
@ -605,4 +613,34 @@ mod tests {
|
|||
"Should correctly identify HTML even with multi-byte characters at boundary"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cache_key_includes_body() {
|
||||
let url = Url::from_str("https://example.com/api").unwrap();
|
||||
let headers =
|
||||
BTreeMap::from([("Content-Type".to_string(), "application/json".to_string())]);
|
||||
|
||||
// Same method, url, headers but different bodies should produce different cache keys
|
||||
let key_no_body = generate_http_cache_key_parts("POST", &url, &headers, None);
|
||||
let key_body_a =
|
||||
generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "abc"}"#));
|
||||
let key_body_b =
|
||||
generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "xyz"}"#));
|
||||
|
||||
// All three should be different
|
||||
assert_ne!(
|
||||
key_no_body, key_body_a,
|
||||
"Cache key with body should differ from key without body"
|
||||
);
|
||||
assert_ne!(
|
||||
key_no_body, key_body_b,
|
||||
"Cache key with body should differ from key without body"
|
||||
);
|
||||
assert_ne!(key_body_a, key_body_b, "Cache keys with different bodies should be different");
|
||||
|
||||
// Same body should produce same key
|
||||
let key_body_a_dup =
|
||||
generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "abc"}"#));
|
||||
assert_eq!(key_body_a, key_body_a_dup, "Same inputs should produce same cache key");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue