diff --git a/CHANGELOG.md b/CHANGELOG.md index d234a8a..90658f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [v1.64.0] +- Fixed a bug when using --redact, that broke validation + ## [v1.63.1] - Updated allocator diff --git a/Cargo.toml b/Cargo.toml index 4eab59a..be4e2e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.63.1" +version = "1.64.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/src/findings_store.rs b/src/findings_store.rs index 6148fc6..e51a881 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -154,15 +154,15 @@ impl FindingsStore { .captures .iter() .find(|c| c.name.is_none() && c.match_number == 0) - .map(|c| c.value) + .map(|c| c.raw_value()) .or_else(|| { m.groups .captures .iter() .find(|c| matches!(c.name.as_deref(), Some("TOKEN"))) - .map(|c| c.value) + .map(|c| c.raw_value()) }) - .or_else(|| m.groups.captures.get(0).map(|c| c.value)) + .or_else(|| m.groups.captures.get(0).map(|c| c.raw_value())) .unwrap_or(""); let origin_kind = match origin.first() { diff --git a/src/matcher.rs b/src/matcher.rs index 79007fb..96f7602 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -34,7 +34,7 @@ use crate::{ safe_list::{is_safe_match, is_user_match}, scanner_pool::ScannerPool, snippet::Base64BString, - util::{intern, redact_value}, + util::intern, }; const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment @@ -100,7 +100,7 @@ impl OwnedBlobMatch { .captures .get(1) .or_else(|| blob_match.captures.captures.get(0)) - .map(|capture| capture.value.as_bytes().to_vec()) + .map(|capture| capture.raw_value().as_bytes().to_vec()) .unwrap_or_else(Vec::new); let mut owned_blob_match = OwnedBlobMatch { @@ -714,7 +714,7 @@ fn filter_match<'b>( &blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end]; // Pass the *full* capture object to from_captures - let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact); + let groups = SerializableCaptures::from_captures(&captures, haystack, re); matches.push(BlobMatch { rule: Arc::clone(&rule), @@ -829,16 +829,47 @@ impl JsonSchema for Groups { // pub end: usize, // End position of the match // pub value: String, // The actual captured value // } -#[derive(Debug, Clone, Serialize, JsonSchema)] +#[derive(Debug, Clone, JsonSchema)] pub struct SerializableCapture { pub name: Option, pub match_number: i32, pub start: usize, pub end: usize, - /// Interned value of the capture. + /// Interned original (unredacted) value. + #[serde(skip_serializing, skip_deserializing)] pub value: &'static str, } +impl SerializableCapture { + /// Returns the original captured value. + pub fn raw_value(&self) -> &'static str { + self.value + } + + /// Returns the value that should be shown in user-facing output. + pub fn display_value(&self) -> std::borrow::Cow<'static, str> { + crate::util::display_value(self.value) + } +} + +impl serde::Serialize for SerializableCapture { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + + let mut state = serializer.serialize_struct("SerializableCapture", 5)?; + state.serialize_field("name", &self.name)?; + state.serialize_field("match_number", &self.match_number)?; + state.serialize_field("start", &self.start)?; + state.serialize_field("end", &self.end)?; + let value = self.display_value(); + state.serialize_field("value", &value)?; + state.end() + } +} + #[derive(Debug, Clone, Serialize, JsonSchema)] pub struct SerializableCaptures { #[schemars(with = "Vec")] @@ -846,12 +877,7 @@ pub struct SerializableCaptures { } impl SerializableCaptures { - pub fn from_captures( - captures: ®ex::bytes::Captures, - _input: &[u8], - re: &Regex, - redact: bool, - ) -> Self { + pub fn from_captures(captures: ®ex::bytes::Captures, _input: &[u8], re: &Regex) -> Self { let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new(); let capture_names: SmallVec<[Option; 4]> = @@ -863,12 +889,8 @@ impl SerializableCaptures { for i in 1..captures.len() { // Start from 1 if let Some(cap) = captures.get(i) { - let value = if redact { - redact_value(&String::from_utf8_lossy(cap.as_bytes())) - } else { - String::from_utf8_lossy(cap.as_bytes()).to_string() - }; - let interned = intern(&value); + let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string(); + let raw_interned = intern(&raw_value); let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned(); serialized_captures.push(SerializableCapture { @@ -876,7 +898,7 @@ impl SerializableCaptures { match_number: i32::try_from(i).unwrap_or(0), start: cap.start(), end: cap.end(), - value: interned, + value: raw_interned, }); } } @@ -884,12 +906,8 @@ impl SerializableCaptures { // ELSE, if there is ONLY the full match (len == 1), // serialize just that full match (group 0) as the fallback. if let Some(cap) = captures.get(0) { - let value = if redact { - redact_value(&String::from_utf8_lossy(cap.as_bytes())) - } else { - String::from_utf8_lossy(cap.as_bytes()).to_string() - }; - let interned = intern(&value); + let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string(); + let raw_interned = intern(&raw_value); let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned(); serialized_captures.push(SerializableCapture { @@ -897,7 +915,7 @@ impl SerializableCaptures { match_number: 0, start: cap.start(), end: cap.end(), - value: interned, + value: raw_interned, }); } } @@ -959,7 +977,7 @@ impl Match { .captures .get(1) .or_else(|| owned_blob_match.captures.captures.get(0)) - .map(|capture| capture.value.as_bytes()) + .map(|capture| capture.raw_value().as_bytes()) .unwrap_or_default(); // The fingerprint will be based on the content of the secret. @@ -1596,7 +1614,7 @@ line2 Regex::new(r"(?xi)\b(ghp_(?P[A-Z0-9]{3})(?P[A-Z0-9]{2}))").unwrap(); let caps = re.captures(b"ghp_ABC12").expect("expected captures"); - let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false); + let serialized = SerializableCaptures::from_captures(&caps, b"", &re); let entries: Vec<(Option<&str>, i32, &str)> = serialized .captures .iter() diff --git a/src/reporter.rs b/src/reporter.rs index f1a1679..48f53d3 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -421,14 +421,12 @@ impl DetailsReporter { // We now correctly serialize *only* the explicit capture groups (or group 0 // as a fallback). The primary "secret" is therefore always at index 0 // of the captures SmallVec. - let snippet = Escaped( - rm.m.groups - .captures - .get(0) // Get the first (and primary) serialized capture - .map(|capture| capture.value.as_bytes()) - .unwrap_or_default(), - ) - .to_string(); + let snippet = if let Some(capture) = rm.m.groups.captures.get(0) { + let displayed = capture.display_value(); + Escaped(displayed.as_ref().as_bytes()).to_string() + } else { + String::new() + }; // --- END FIX --- let validation_status = if rm.validation_success { diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index cc6cd52..39decc3 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -33,6 +33,7 @@ use crate::{ run_secret_validation, save_docker_images, summary::print_scan_summary, }, + util::set_redaction_enabled, }; pub async fn run_scan( @@ -75,6 +76,8 @@ pub async fn run_async_scan( let progress_enabled = global_args.use_progress(); initialize_environment()?; + set_redaction_enabled(args.redact); + let mut repo_urls = enumerate_github_repos(args, global_args).await?; let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?; let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?; diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index d69dbeb..6544d40 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -73,7 +73,7 @@ pub async fn run_secret_validation( .captures .get(1) .or_else(|| arc_msg.2.groups.captures.get(0)) - .map_or("", |c| c.value); + .map_or("", |c| c.raw_value()); groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg); } @@ -111,7 +111,7 @@ pub async fn run_secret_validation( .captures .get(1) .or_else(|| rep_arc.2.groups.captures.get(0)) - .map_or("", |c| c.value); + .map_or("", |c| c.raw_value()); let key = format!("{}|{}", rep_arc.2.rule.id(), secret); match val_res.entry(key.clone()) { @@ -352,7 +352,7 @@ async fn validate_single( sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::>().join("|") }) .unwrap_or_default(); - let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string()); + let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); let cache_key = format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str); // Check cache first if let Some(cached) = cache.get(&cache_key) { @@ -443,6 +443,6 @@ fn build_cache_key( .unwrap_or_default(); // For demonstration, we’ll do a simplistic approach // You can adapt from your existing logic - let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string()); + let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str) } diff --git a/src/util.rs b/src/util.rs index 0113df4..67ceeaf 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,9 @@ use std::{ + borrow::Cow, fs::File, io::{stdin, stdout, BufReader, BufWriter}, path::Path, + sync::atomic::{AtomicBool, Ordering}, }; use blake3::Hasher; @@ -11,6 +13,7 @@ use path_dedot::ParseDot; use ring::rand::{SecureRandom, SystemRandom}; // Generate a random salt once and use it for the entire application runtime static APP_SALT: Lazy = Lazy::new(|| generate_salt()); +static REDACTION_ENABLED: AtomicBool = AtomicBool::new(false); /// Interns a string once and returns a `'static` reference to it. pub fn intern(s: &str) -> &'static str { @@ -41,6 +44,26 @@ pub fn redact_value(value: &str) -> String { let hash = hasher.finalize(); format!("[REDACTED:{}]", hash_to_short_id(&hash)) } + +/// Enables or disables global output redaction. +pub fn set_redaction_enabled(enabled: bool) { + REDACTION_ENABLED.store(enabled, Ordering::Relaxed); +} + +/// Returns true if redaction is enabled for user-facing output. +pub fn redaction_enabled() -> bool { + REDACTION_ENABLED.load(Ordering::Relaxed) +} + +/// Returns either the original value or a redacted placeholder depending on +/// the current redaction setting. +pub fn display_value(value: &'static str) -> Cow<'static, str> { + if redaction_enabled() { + Cow::Owned(redact_value(value)) + } else { + Cow::Borrowed(value) + } +} // Generate a random salt (16-character alphanumeric string) fn generate_salt() -> String { let rng = SystemRandom::new(); diff --git a/src/validation.rs b/src/validation.rs index 825e2ef..e5fdace 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -80,7 +80,7 @@ fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 { // first capture = the secret text itself if let Some(c0) = m.captures.captures.get(0) { - c0.value.hash(&mut hasher); + c0.raw_value().hash(&mut hasher); } hasher.finish() } @@ -148,7 +148,7 @@ pub fn collect_variables_and_dependencies( .entry(dependency.variable.to_uppercase()) .or_insert_with(Vec::new) .push(( - matching_input.value.to_string(), + matching_input.raw_value().to_string(), other_match.matching_input_offset_span, )); } diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 0080fd5..e15c7e2 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -15,10 +15,10 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, .iter() .filter_map(|cap| { if let Some(name) = &cap.name { - Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end)) + Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end)) } else if !saw_unnamed { saw_unnamed = true; - Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end)) + Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end)) } else { // Ignore any additional unnamed captures (e.g., from unintended groups) None @@ -201,7 +201,7 @@ mod tests { match_number: 2, // Corrected match_number start: 4, end: 6, - value: "cc" + value: "cc", }, ], }; diff --git a/tests/int_redact.rs b/tests/int_redact.rs index b9fb1f8..c885f28 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -157,7 +157,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { assert!(!matches.is_empty()); for m_arc in matches { let m = &m_arc.2; - assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); + assert!(m.groups.captures.iter().any(|cap| cap.display_value().starts_with("[REDACTED:"))); } Ok(())