forked from mirrors/kingfisher
updated allocator
This commit is contained in:
parent
97fbcef1ba
commit
d6c1dfc9d0
11 changed files with 94 additions and 49 deletions
|
|
@ -2,6 +2,9 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [v1.64.0]
|
||||
- Fixed a bug when using --redact, that broke validation
|
||||
|
||||
## [v1.63.1]
|
||||
- Updated allocator
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.63.1"
|
||||
version = "1.64.0"
|
||||
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
|
|||
|
|
@ -154,15 +154,15 @@ impl FindingsStore {
|
|||
.captures
|
||||
.iter()
|
||||
.find(|c| c.name.is_none() && c.match_number == 0)
|
||||
.map(|c| c.value)
|
||||
.map(|c| c.raw_value())
|
||||
.or_else(|| {
|
||||
m.groups
|
||||
.captures
|
||||
.iter()
|
||||
.find(|c| matches!(c.name.as_deref(), Some("TOKEN")))
|
||||
.map(|c| c.value)
|
||||
.map(|c| c.raw_value())
|
||||
})
|
||||
.or_else(|| m.groups.captures.get(0).map(|c| c.value))
|
||||
.or_else(|| m.groups.captures.get(0).map(|c| c.raw_value()))
|
||||
.unwrap_or("");
|
||||
|
||||
let origin_kind = match origin.first() {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ use crate::{
|
|||
safe_list::{is_safe_match, is_user_match},
|
||||
scanner_pool::ScannerPool,
|
||||
snippet::Base64BString,
|
||||
util::{intern, redact_value},
|
||||
util::intern,
|
||||
};
|
||||
|
||||
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
|
||||
|
|
@ -100,7 +100,7 @@ impl OwnedBlobMatch {
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| blob_match.captures.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes().to_vec())
|
||||
.map(|capture| capture.raw_value().as_bytes().to_vec())
|
||||
.unwrap_or_else(Vec::new);
|
||||
|
||||
let mut owned_blob_match = OwnedBlobMatch {
|
||||
|
|
@ -714,7 +714,7 @@ fn filter_match<'b>(
|
|||
&blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end];
|
||||
|
||||
// Pass the *full* capture object to from_captures
|
||||
let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact);
|
||||
let groups = SerializableCaptures::from_captures(&captures, haystack, re);
|
||||
|
||||
matches.push(BlobMatch {
|
||||
rule: Arc::clone(&rule),
|
||||
|
|
@ -829,16 +829,47 @@ impl JsonSchema for Groups {
|
|||
// pub end: usize, // End position of the match
|
||||
// pub value: String, // The actual captured value
|
||||
// }
|
||||
#[derive(Debug, Clone, Serialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, JsonSchema)]
|
||||
pub struct SerializableCapture {
|
||||
pub name: Option<String>,
|
||||
pub match_number: i32,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
/// Interned value of the capture.
|
||||
/// Interned original (unredacted) value.
|
||||
#[serde(skip_serializing, skip_deserializing)]
|
||||
pub value: &'static str,
|
||||
}
|
||||
|
||||
impl SerializableCapture {
|
||||
/// Returns the original captured value.
|
||||
pub fn raw_value(&self) -> &'static str {
|
||||
self.value
|
||||
}
|
||||
|
||||
/// Returns the value that should be shown in user-facing output.
|
||||
pub fn display_value(&self) -> std::borrow::Cow<'static, str> {
|
||||
crate::util::display_value(self.value)
|
||||
}
|
||||
}
|
||||
|
||||
impl serde::Serialize for SerializableCapture {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
use serde::ser::SerializeStruct;
|
||||
|
||||
let mut state = serializer.serialize_struct("SerializableCapture", 5)?;
|
||||
state.serialize_field("name", &self.name)?;
|
||||
state.serialize_field("match_number", &self.match_number)?;
|
||||
state.serialize_field("start", &self.start)?;
|
||||
state.serialize_field("end", &self.end)?;
|
||||
let value = self.display_value();
|
||||
state.serialize_field("value", &value)?;
|
||||
state.end()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, JsonSchema)]
|
||||
pub struct SerializableCaptures {
|
||||
#[schemars(with = "Vec<SerializableCapture>")]
|
||||
|
|
@ -846,12 +877,7 @@ pub struct SerializableCaptures {
|
|||
}
|
||||
|
||||
impl SerializableCaptures {
|
||||
pub fn from_captures(
|
||||
captures: ®ex::bytes::Captures,
|
||||
_input: &[u8],
|
||||
re: &Regex,
|
||||
redact: bool,
|
||||
) -> Self {
|
||||
pub fn from_captures(captures: ®ex::bytes::Captures, _input: &[u8], re: &Regex) -> Self {
|
||||
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
|
||||
|
||||
let capture_names: SmallVec<[Option<String>; 4]> =
|
||||
|
|
@ -863,12 +889,8 @@ impl SerializableCaptures {
|
|||
for i in 1..captures.len() {
|
||||
// Start from 1
|
||||
if let Some(cap) = captures.get(i) {
|
||||
let value = if redact {
|
||||
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
|
||||
} else {
|
||||
String::from_utf8_lossy(cap.as_bytes()).to_string()
|
||||
};
|
||||
let interned = intern(&value);
|
||||
let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string();
|
||||
let raw_interned = intern(&raw_value);
|
||||
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
|
||||
|
||||
serialized_captures.push(SerializableCapture {
|
||||
|
|
@ -876,7 +898,7 @@ impl SerializableCaptures {
|
|||
match_number: i32::try_from(i).unwrap_or(0),
|
||||
start: cap.start(),
|
||||
end: cap.end(),
|
||||
value: interned,
|
||||
value: raw_interned,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -884,12 +906,8 @@ impl SerializableCaptures {
|
|||
// ELSE, if there is ONLY the full match (len == 1),
|
||||
// serialize just that full match (group 0) as the fallback.
|
||||
if let Some(cap) = captures.get(0) {
|
||||
let value = if redact {
|
||||
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
|
||||
} else {
|
||||
String::from_utf8_lossy(cap.as_bytes()).to_string()
|
||||
};
|
||||
let interned = intern(&value);
|
||||
let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string();
|
||||
let raw_interned = intern(&raw_value);
|
||||
let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned();
|
||||
|
||||
serialized_captures.push(SerializableCapture {
|
||||
|
|
@ -897,7 +915,7 @@ impl SerializableCaptures {
|
|||
match_number: 0,
|
||||
start: cap.start(),
|
||||
end: cap.end(),
|
||||
value: interned,
|
||||
value: raw_interned,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -959,7 +977,7 @@ impl Match {
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| owned_blob_match.captures.captures.get(0))
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.map(|capture| capture.raw_value().as_bytes())
|
||||
.unwrap_or_default();
|
||||
|
||||
// The fingerprint will be based on the content of the secret.
|
||||
|
|
@ -1596,7 +1614,7 @@ line2
|
|||
Regex::new(r"(?xi)\b(ghp_(?P<body>[A-Z0-9]{3})(?P<checksum>[A-Z0-9]{2}))").unwrap();
|
||||
let caps = re.captures(b"ghp_ABC12").expect("expected captures");
|
||||
|
||||
let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false);
|
||||
let serialized = SerializableCaptures::from_captures(&caps, b"", &re);
|
||||
let entries: Vec<(Option<&str>, i32, &str)> = serialized
|
||||
.captures
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -421,14 +421,12 @@ impl DetailsReporter {
|
|||
// We now correctly serialize *only* the explicit capture groups (or group 0
|
||||
// as a fallback). The primary "secret" is therefore always at index 0
|
||||
// of the captures SmallVec.
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(0) // Get the first (and primary) serialized capture
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
let snippet = if let Some(capture) = rm.m.groups.captures.get(0) {
|
||||
let displayed = capture.display_value();
|
||||
Escaped(displayed.as_ref().as_bytes()).to_string()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
// --- END FIX ---
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ use crate::{
|
|||
run_secret_validation, save_docker_images,
|
||||
summary::print_scan_summary,
|
||||
},
|
||||
util::set_redaction_enabled,
|
||||
};
|
||||
|
||||
pub async fn run_scan(
|
||||
|
|
@ -75,6 +76,8 @@ pub async fn run_async_scan(
|
|||
let progress_enabled = global_args.use_progress();
|
||||
initialize_environment()?;
|
||||
|
||||
set_redaction_enabled(args.redact);
|
||||
|
||||
let mut repo_urls = enumerate_github_repos(args, global_args).await?;
|
||||
let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?;
|
||||
let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?;
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ pub async fn run_secret_validation(
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| arc_msg.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.value);
|
||||
.map_or("", |c| c.raw_value());
|
||||
groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg);
|
||||
}
|
||||
|
||||
|
|
@ -111,7 +111,7 @@ pub async fn run_secret_validation(
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rep_arc.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.value);
|
||||
.map_or("", |c| c.raw_value());
|
||||
let key = format!("{}|{}", rep_arc.2.rule.id(), secret);
|
||||
|
||||
match val_res.entry(key.clone()) {
|
||||
|
|
@ -352,7 +352,7 @@ async fn validate_single(
|
|||
sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::<Vec<_>>().join("|")
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string());
|
||||
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string());
|
||||
let cache_key = format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str);
|
||||
// Check cache first
|
||||
if let Some(cached) = cache.get(&cache_key) {
|
||||
|
|
@ -443,6 +443,6 @@ fn build_cache_key(
|
|||
.unwrap_or_default();
|
||||
// For demonstration, we’ll do a simplistic approach
|
||||
// You can adapt from your existing logic
|
||||
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string());
|
||||
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string());
|
||||
format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str)
|
||||
}
|
||||
|
|
|
|||
23
src/util.rs
23
src/util.rs
|
|
@ -1,7 +1,9 @@
|
|||
use std::{
|
||||
borrow::Cow,
|
||||
fs::File,
|
||||
io::{stdin, stdout, BufReader, BufWriter},
|
||||
path::Path,
|
||||
sync::atomic::{AtomicBool, Ordering},
|
||||
};
|
||||
|
||||
use blake3::Hasher;
|
||||
|
|
@ -11,6 +13,7 @@ use path_dedot::ParseDot;
|
|||
use ring::rand::{SecureRandom, SystemRandom};
|
||||
// Generate a random salt once and use it for the entire application runtime
|
||||
static APP_SALT: Lazy<String> = Lazy::new(|| generate_salt());
|
||||
static REDACTION_ENABLED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
/// Interns a string once and returns a `'static` reference to it.
|
||||
pub fn intern(s: &str) -> &'static str {
|
||||
|
|
@ -41,6 +44,26 @@ pub fn redact_value(value: &str) -> String {
|
|||
let hash = hasher.finalize();
|
||||
format!("[REDACTED:{}]", hash_to_short_id(&hash))
|
||||
}
|
||||
|
||||
/// Enables or disables global output redaction.
|
||||
pub fn set_redaction_enabled(enabled: bool) {
|
||||
REDACTION_ENABLED.store(enabled, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
/// Returns true if redaction is enabled for user-facing output.
|
||||
pub fn redaction_enabled() -> bool {
|
||||
REDACTION_ENABLED.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
/// Returns either the original value or a redacted placeholder depending on
|
||||
/// the current redaction setting.
|
||||
pub fn display_value(value: &'static str) -> Cow<'static, str> {
|
||||
if redaction_enabled() {
|
||||
Cow::Owned(redact_value(value))
|
||||
} else {
|
||||
Cow::Borrowed(value)
|
||||
}
|
||||
}
|
||||
// Generate a random salt (16-character alphanumeric string)
|
||||
fn generate_salt() -> String {
|
||||
let rng = SystemRandom::new();
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 {
|
|||
|
||||
// first capture = the secret text itself
|
||||
if let Some(c0) = m.captures.captures.get(0) {
|
||||
c0.value.hash(&mut hasher);
|
||||
c0.raw_value().hash(&mut hasher);
|
||||
}
|
||||
hasher.finish()
|
||||
}
|
||||
|
|
@ -148,7 +148,7 @@ pub fn collect_variables_and_dependencies(
|
|||
.entry(dependency.variable.to_uppercase())
|
||||
.or_insert_with(Vec::new)
|
||||
.push((
|
||||
matching_input.value.to_string(),
|
||||
matching_input.raw_value().to_string(),
|
||||
other_match.matching_input_offset_span,
|
||||
));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,10 +15,10 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String,
|
|||
.iter()
|
||||
.filter_map(|cap| {
|
||||
if let Some(name) = &cap.name {
|
||||
Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end))
|
||||
Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end))
|
||||
} else if !saw_unnamed {
|
||||
saw_unnamed = true;
|
||||
Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end))
|
||||
Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end))
|
||||
} else {
|
||||
// Ignore any additional unnamed captures (e.g., from unintended groups)
|
||||
None
|
||||
|
|
@ -201,7 +201,7 @@ mod tests {
|
|||
match_number: 2, // Corrected match_number
|
||||
start: 4,
|
||||
end: 6,
|
||||
value: "cc"
|
||||
value: "cc",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
|
|||
assert!(!matches.is_empty());
|
||||
for m_arc in matches {
|
||||
let m = &m_arc.2;
|
||||
assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:")));
|
||||
assert!(m.groups.captures.iter().any(|cap| cap.display_value().starts_with("[REDACTED:")));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue