updated allocator

This commit is contained in:
Mick Grove 2025-11-11 13:24:06 -08:00
commit d6c1dfc9d0
11 changed files with 94 additions and 49 deletions

View file

@ -2,6 +2,9 @@
All notable changes to this project will be documented in this file.
## [v1.64.0]
- Fixed a bug when using --redact, that broke validation
## [v1.63.1]
- Updated allocator

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.63.1"
version = "1.64.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -154,15 +154,15 @@ impl FindingsStore {
.captures
.iter()
.find(|c| c.name.is_none() && c.match_number == 0)
.map(|c| c.value)
.map(|c| c.raw_value())
.or_else(|| {
m.groups
.captures
.iter()
.find(|c| matches!(c.name.as_deref(), Some("TOKEN")))
.map(|c| c.value)
.map(|c| c.raw_value())
})
.or_else(|| m.groups.captures.get(0).map(|c| c.value))
.or_else(|| m.groups.captures.get(0).map(|c| c.raw_value()))
.unwrap_or("");
let origin_kind = match origin.first() {

View file

@ -34,7 +34,7 @@ use crate::{
safe_list::{is_safe_match, is_user_match},
scanner_pool::ScannerPool,
snippet::Base64BString,
util::{intern, redact_value},
util::intern,
};
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
@ -100,7 +100,7 @@ impl OwnedBlobMatch {
.captures
.get(1)
.or_else(|| blob_match.captures.captures.get(0))
.map(|capture| capture.value.as_bytes().to_vec())
.map(|capture| capture.raw_value().as_bytes().to_vec())
.unwrap_or_else(Vec::new);
let mut owned_blob_match = OwnedBlobMatch {
@ -714,7 +714,7 @@ fn filter_match<'b>(
&blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end];
// Pass the *full* capture object to from_captures
let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact);
let groups = SerializableCaptures::from_captures(&captures, haystack, re);
matches.push(BlobMatch {
rule: Arc::clone(&rule),
@ -829,16 +829,47 @@ impl JsonSchema for Groups {
// pub end: usize, // End position of the match
// pub value: String, // The actual captured value
// }
#[derive(Debug, Clone, Serialize, JsonSchema)]
#[derive(Debug, Clone, JsonSchema)]
pub struct SerializableCapture {
pub name: Option<String>,
pub match_number: i32,
pub start: usize,
pub end: usize,
/// Interned value of the capture.
/// Interned original (unredacted) value.
#[serde(skip_serializing, skip_deserializing)]
pub value: &'static str,
}
impl SerializableCapture {
/// Returns the original captured value.
pub fn raw_value(&self) -> &'static str {
self.value
}
/// Returns the value that should be shown in user-facing output.
pub fn display_value(&self) -> std::borrow::Cow<'static, str> {
crate::util::display_value(self.value)
}
}
impl serde::Serialize for SerializableCapture {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeStruct;
let mut state = serializer.serialize_struct("SerializableCapture", 5)?;
state.serialize_field("name", &self.name)?;
state.serialize_field("match_number", &self.match_number)?;
state.serialize_field("start", &self.start)?;
state.serialize_field("end", &self.end)?;
let value = self.display_value();
state.serialize_field("value", &value)?;
state.end()
}
}
#[derive(Debug, Clone, Serialize, JsonSchema)]
pub struct SerializableCaptures {
#[schemars(with = "Vec<SerializableCapture>")]
@ -846,12 +877,7 @@ pub struct SerializableCaptures {
}
impl SerializableCaptures {
pub fn from_captures(
captures: &regex::bytes::Captures,
_input: &[u8],
re: &Regex,
redact: bool,
) -> Self {
pub fn from_captures(captures: &regex::bytes::Captures, _input: &[u8], re: &Regex) -> Self {
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
let capture_names: SmallVec<[Option<String>; 4]> =
@ -863,12 +889,8 @@ impl SerializableCaptures {
for i in 1..captures.len() {
// Start from 1
if let Some(cap) = captures.get(i) {
let value = if redact {
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string();
let raw_interned = intern(&raw_value);
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
@ -876,7 +898,7 @@ impl SerializableCaptures {
match_number: i32::try_from(i).unwrap_or(0),
start: cap.start(),
end: cap.end(),
value: interned,
value: raw_interned,
});
}
}
@ -884,12 +906,8 @@ impl SerializableCaptures {
// ELSE, if there is ONLY the full match (len == 1),
// serialize just that full match (group 0) as the fallback.
if let Some(cap) = captures.get(0) {
let value = if redact {
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string();
let raw_interned = intern(&raw_value);
let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
@ -897,7 +915,7 @@ impl SerializableCaptures {
match_number: 0,
start: cap.start(),
end: cap.end(),
value: interned,
value: raw_interned,
});
}
}
@ -959,7 +977,7 @@ impl Match {
.captures
.get(1)
.or_else(|| owned_blob_match.captures.captures.get(0))
.map(|capture| capture.value.as_bytes())
.map(|capture| capture.raw_value().as_bytes())
.unwrap_or_default();
// The fingerprint will be based on the content of the secret.
@ -1596,7 +1614,7 @@ line2
Regex::new(r"(?xi)\b(ghp_(?P<body>[A-Z0-9]{3})(?P<checksum>[A-Z0-9]{2}))").unwrap();
let caps = re.captures(b"ghp_ABC12").expect("expected captures");
let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false);
let serialized = SerializableCaptures::from_captures(&caps, b"", &re);
let entries: Vec<(Option<&str>, i32, &str)> = serialized
.captures
.iter()

View file

@ -421,14 +421,12 @@ impl DetailsReporter {
// We now correctly serialize *only* the explicit capture groups (or group 0
// as a fallback). The primary "secret" is therefore always at index 0
// of the captures SmallVec.
let snippet = Escaped(
rm.m.groups
.captures
.get(0) // Get the first (and primary) serialized capture
.map(|capture| capture.value.as_bytes())
.unwrap_or_default(),
)
.to_string();
let snippet = if let Some(capture) = rm.m.groups.captures.get(0) {
let displayed = capture.display_value();
Escaped(displayed.as_ref().as_bytes()).to_string()
} else {
String::new()
};
// --- END FIX ---
let validation_status = if rm.validation_success {

View file

@ -33,6 +33,7 @@ use crate::{
run_secret_validation, save_docker_images,
summary::print_scan_summary,
},
util::set_redaction_enabled,
};
pub async fn run_scan(
@ -75,6 +76,8 @@ pub async fn run_async_scan(
let progress_enabled = global_args.use_progress();
initialize_environment()?;
set_redaction_enabled(args.redact);
let mut repo_urls = enumerate_github_repos(args, global_args).await?;
let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?;
let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?;

View file

@ -73,7 +73,7 @@ pub async fn run_secret_validation(
.captures
.get(1)
.or_else(|| arc_msg.2.groups.captures.get(0))
.map_or("", |c| c.value);
.map_or("", |c| c.raw_value());
groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg);
}
@ -111,7 +111,7 @@ pub async fn run_secret_validation(
.captures
.get(1)
.or_else(|| rep_arc.2.groups.captures.get(0))
.map_or("", |c| c.value);
.map_or("", |c| c.raw_value());
let key = format!("{}|{}", rep_arc.2.rule.id(), secret);
match val_res.entry(key.clone()) {
@ -352,7 +352,7 @@ async fn validate_single(
sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::<Vec<_>>().join("|")
})
.unwrap_or_default();
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string());
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string());
let cache_key = format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str);
// Check cache first
if let Some(cached) = cache.get(&cache_key) {
@ -443,6 +443,6 @@ fn build_cache_key(
.unwrap_or_default();
// For demonstration, well do a simplistic approach
// You can adapt from your existing logic
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string());
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string());
format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str)
}

View file

@ -1,7 +1,9 @@
use std::{
borrow::Cow,
fs::File,
io::{stdin, stdout, BufReader, BufWriter},
path::Path,
sync::atomic::{AtomicBool, Ordering},
};
use blake3::Hasher;
@ -11,6 +13,7 @@ use path_dedot::ParseDot;
use ring::rand::{SecureRandom, SystemRandom};
// Generate a random salt once and use it for the entire application runtime
static APP_SALT: Lazy<String> = Lazy::new(|| generate_salt());
static REDACTION_ENABLED: AtomicBool = AtomicBool::new(false);
/// Interns a string once and returns a `'static` reference to it.
pub fn intern(s: &str) -> &'static str {
@ -41,6 +44,26 @@ pub fn redact_value(value: &str) -> String {
let hash = hasher.finalize();
format!("[REDACTED:{}]", hash_to_short_id(&hash))
}
/// Enables or disables global output redaction.
pub fn set_redaction_enabled(enabled: bool) {
REDACTION_ENABLED.store(enabled, Ordering::Relaxed);
}
/// Returns true if redaction is enabled for user-facing output.
pub fn redaction_enabled() -> bool {
REDACTION_ENABLED.load(Ordering::Relaxed)
}
/// Returns either the original value or a redacted placeholder depending on
/// the current redaction setting.
pub fn display_value(value: &'static str) -> Cow<'static, str> {
if redaction_enabled() {
Cow::Owned(redact_value(value))
} else {
Cow::Borrowed(value)
}
}
// Generate a random salt (16-character alphanumeric string)
fn generate_salt() -> String {
let rng = SystemRandom::new();

View file

@ -80,7 +80,7 @@ fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 {
// first capture = the secret text itself
if let Some(c0) = m.captures.captures.get(0) {
c0.value.hash(&mut hasher);
c0.raw_value().hash(&mut hasher);
}
hasher.finish()
}
@ -148,7 +148,7 @@ pub fn collect_variables_and_dependencies(
.entry(dependency.variable.to_uppercase())
.or_insert_with(Vec::new)
.push((
matching_input.value.to_string(),
matching_input.raw_value().to_string(),
other_match.matching_input_offset_span,
));
}

View file

@ -15,10 +15,10 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String,
.iter()
.filter_map(|cap| {
if let Some(name) = &cap.name {
Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end))
Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end))
} else if !saw_unnamed {
saw_unnamed = true;
Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end))
Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end))
} else {
// Ignore any additional unnamed captures (e.g., from unintended groups)
None
@ -201,7 +201,7 @@ mod tests {
match_number: 2, // Corrected match_number
start: 4,
end: 6,
value: "cc"
value: "cc",
},
],
};

View file

@ -157,7 +157,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
assert!(!matches.is_empty());
for m_arc in matches {
let m = &m_arc.2;
assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:")));
assert!(m.groups.captures.iter().any(|cap| cap.display_value().starts_with("[REDACTED:")));
}
Ok(())