From b8d8b71a199d80f3e585528b08bf6965148b2410 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 11 Nov 2025 13:24:06 -0800 Subject: [PATCH 1/6] updated allocator --- CHANGELOG.md | 3 ++ Cargo.toml | 2 +- src/findings_store.rs | 6 ++-- src/matcher.rs | 72 ++++++++++++++++++++++++--------------- src/reporter.rs | 14 ++++---- src/scanner/runner.rs | 3 ++ src/scanner/validation.rs | 8 ++--- src/util.rs | 23 +++++++++++++ src/validation.rs | 4 +-- src/validation/utils.rs | 6 ++-- tests/int_redact.rs | 2 +- 11 files changed, 94 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d234a8a..90658f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [v1.64.0] +- Fixed a bug when using --redact, that broke validation + ## [v1.63.1] - Updated allocator diff --git a/Cargo.toml b/Cargo.toml index 4eab59a..be4e2e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.63.1" +version = "1.64.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/src/findings_store.rs b/src/findings_store.rs index 6148fc6..e51a881 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -154,15 +154,15 @@ impl FindingsStore { .captures .iter() .find(|c| c.name.is_none() && c.match_number == 0) - .map(|c| c.value) + .map(|c| c.raw_value()) .or_else(|| { m.groups .captures .iter() .find(|c| matches!(c.name.as_deref(), Some("TOKEN"))) - .map(|c| c.value) + .map(|c| c.raw_value()) }) - .or_else(|| m.groups.captures.get(0).map(|c| c.value)) + .or_else(|| m.groups.captures.get(0).map(|c| c.raw_value())) .unwrap_or(""); let origin_kind = match origin.first() { diff --git a/src/matcher.rs b/src/matcher.rs index 79007fb..96f7602 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -34,7 +34,7 @@ use crate::{ safe_list::{is_safe_match, is_user_match}, scanner_pool::ScannerPool, snippet::Base64BString, - util::{intern, redact_value}, + util::intern, }; const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment @@ -100,7 +100,7 @@ impl OwnedBlobMatch { .captures .get(1) .or_else(|| blob_match.captures.captures.get(0)) - .map(|capture| capture.value.as_bytes().to_vec()) + .map(|capture| capture.raw_value().as_bytes().to_vec()) .unwrap_or_else(Vec::new); let mut owned_blob_match = OwnedBlobMatch { @@ -714,7 +714,7 @@ fn filter_match<'b>( &blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end]; // Pass the *full* capture object to from_captures - let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact); + let groups = SerializableCaptures::from_captures(&captures, haystack, re); matches.push(BlobMatch { rule: Arc::clone(&rule), @@ -829,16 +829,47 @@ impl JsonSchema for Groups { // pub end: usize, // End position of the match // pub value: String, // The actual captured value // } -#[derive(Debug, Clone, Serialize, JsonSchema)] +#[derive(Debug, Clone, JsonSchema)] pub struct SerializableCapture { pub name: Option, pub match_number: i32, pub start: usize, pub end: usize, - /// Interned value of the capture. + /// Interned original (unredacted) value. + #[serde(skip_serializing, skip_deserializing)] pub value: &'static str, } +impl SerializableCapture { + /// Returns the original captured value. + pub fn raw_value(&self) -> &'static str { + self.value + } + + /// Returns the value that should be shown in user-facing output. + pub fn display_value(&self) -> std::borrow::Cow<'static, str> { + crate::util::display_value(self.value) + } +} + +impl serde::Serialize for SerializableCapture { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + + let mut state = serializer.serialize_struct("SerializableCapture", 5)?; + state.serialize_field("name", &self.name)?; + state.serialize_field("match_number", &self.match_number)?; + state.serialize_field("start", &self.start)?; + state.serialize_field("end", &self.end)?; + let value = self.display_value(); + state.serialize_field("value", &value)?; + state.end() + } +} + #[derive(Debug, Clone, Serialize, JsonSchema)] pub struct SerializableCaptures { #[schemars(with = "Vec")] @@ -846,12 +877,7 @@ pub struct SerializableCaptures { } impl SerializableCaptures { - pub fn from_captures( - captures: ®ex::bytes::Captures, - _input: &[u8], - re: &Regex, - redact: bool, - ) -> Self { + pub fn from_captures(captures: ®ex::bytes::Captures, _input: &[u8], re: &Regex) -> Self { let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new(); let capture_names: SmallVec<[Option; 4]> = @@ -863,12 +889,8 @@ impl SerializableCaptures { for i in 1..captures.len() { // Start from 1 if let Some(cap) = captures.get(i) { - let value = if redact { - redact_value(&String::from_utf8_lossy(cap.as_bytes())) - } else { - String::from_utf8_lossy(cap.as_bytes()).to_string() - }; - let interned = intern(&value); + let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string(); + let raw_interned = intern(&raw_value); let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned(); serialized_captures.push(SerializableCapture { @@ -876,7 +898,7 @@ impl SerializableCaptures { match_number: i32::try_from(i).unwrap_or(0), start: cap.start(), end: cap.end(), - value: interned, + value: raw_interned, }); } } @@ -884,12 +906,8 @@ impl SerializableCaptures { // ELSE, if there is ONLY the full match (len == 1), // serialize just that full match (group 0) as the fallback. if let Some(cap) = captures.get(0) { - let value = if redact { - redact_value(&String::from_utf8_lossy(cap.as_bytes())) - } else { - String::from_utf8_lossy(cap.as_bytes()).to_string() - }; - let interned = intern(&value); + let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string(); + let raw_interned = intern(&raw_value); let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned(); serialized_captures.push(SerializableCapture { @@ -897,7 +915,7 @@ impl SerializableCaptures { match_number: 0, start: cap.start(), end: cap.end(), - value: interned, + value: raw_interned, }); } } @@ -959,7 +977,7 @@ impl Match { .captures .get(1) .or_else(|| owned_blob_match.captures.captures.get(0)) - .map(|capture| capture.value.as_bytes()) + .map(|capture| capture.raw_value().as_bytes()) .unwrap_or_default(); // The fingerprint will be based on the content of the secret. @@ -1596,7 +1614,7 @@ line2 Regex::new(r"(?xi)\b(ghp_(?P[A-Z0-9]{3})(?P[A-Z0-9]{2}))").unwrap(); let caps = re.captures(b"ghp_ABC12").expect("expected captures"); - let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false); + let serialized = SerializableCaptures::from_captures(&caps, b"", &re); let entries: Vec<(Option<&str>, i32, &str)> = serialized .captures .iter() diff --git a/src/reporter.rs b/src/reporter.rs index f1a1679..48f53d3 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -421,14 +421,12 @@ impl DetailsReporter { // We now correctly serialize *only* the explicit capture groups (or group 0 // as a fallback). The primary "secret" is therefore always at index 0 // of the captures SmallVec. - let snippet = Escaped( - rm.m.groups - .captures - .get(0) // Get the first (and primary) serialized capture - .map(|capture| capture.value.as_bytes()) - .unwrap_or_default(), - ) - .to_string(); + let snippet = if let Some(capture) = rm.m.groups.captures.get(0) { + let displayed = capture.display_value(); + Escaped(displayed.as_ref().as_bytes()).to_string() + } else { + String::new() + }; // --- END FIX --- let validation_status = if rm.validation_success { diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index cc6cd52..39decc3 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -33,6 +33,7 @@ use crate::{ run_secret_validation, save_docker_images, summary::print_scan_summary, }, + util::set_redaction_enabled, }; pub async fn run_scan( @@ -75,6 +76,8 @@ pub async fn run_async_scan( let progress_enabled = global_args.use_progress(); initialize_environment()?; + set_redaction_enabled(args.redact); + let mut repo_urls = enumerate_github_repos(args, global_args).await?; let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?; let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?; diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index d69dbeb..6544d40 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -73,7 +73,7 @@ pub async fn run_secret_validation( .captures .get(1) .or_else(|| arc_msg.2.groups.captures.get(0)) - .map_or("", |c| c.value); + .map_or("", |c| c.raw_value()); groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg); } @@ -111,7 +111,7 @@ pub async fn run_secret_validation( .captures .get(1) .or_else(|| rep_arc.2.groups.captures.get(0)) - .map_or("", |c| c.value); + .map_or("", |c| c.raw_value()); let key = format!("{}|{}", rep_arc.2.rule.id(), secret); match val_res.entry(key.clone()) { @@ -352,7 +352,7 @@ async fn validate_single( sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::>().join("|") }) .unwrap_or_default(); - let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string()); + let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); let cache_key = format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str); // Check cache first if let Some(cached) = cache.get(&cache_key) { @@ -443,6 +443,6 @@ fn build_cache_key( .unwrap_or_default(); // For demonstration, we’ll do a simplistic approach // You can adapt from your existing logic - let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string()); + let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str) } diff --git a/src/util.rs b/src/util.rs index 0113df4..67ceeaf 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,9 @@ use std::{ + borrow::Cow, fs::File, io::{stdin, stdout, BufReader, BufWriter}, path::Path, + sync::atomic::{AtomicBool, Ordering}, }; use blake3::Hasher; @@ -11,6 +13,7 @@ use path_dedot::ParseDot; use ring::rand::{SecureRandom, SystemRandom}; // Generate a random salt once and use it for the entire application runtime static APP_SALT: Lazy = Lazy::new(|| generate_salt()); +static REDACTION_ENABLED: AtomicBool = AtomicBool::new(false); /// Interns a string once and returns a `'static` reference to it. pub fn intern(s: &str) -> &'static str { @@ -41,6 +44,26 @@ pub fn redact_value(value: &str) -> String { let hash = hasher.finalize(); format!("[REDACTED:{}]", hash_to_short_id(&hash)) } + +/// Enables or disables global output redaction. +pub fn set_redaction_enabled(enabled: bool) { + REDACTION_ENABLED.store(enabled, Ordering::Relaxed); +} + +/// Returns true if redaction is enabled for user-facing output. +pub fn redaction_enabled() -> bool { + REDACTION_ENABLED.load(Ordering::Relaxed) +} + +/// Returns either the original value or a redacted placeholder depending on +/// the current redaction setting. +pub fn display_value(value: &'static str) -> Cow<'static, str> { + if redaction_enabled() { + Cow::Owned(redact_value(value)) + } else { + Cow::Borrowed(value) + } +} // Generate a random salt (16-character alphanumeric string) fn generate_salt() -> String { let rng = SystemRandom::new(); diff --git a/src/validation.rs b/src/validation.rs index 825e2ef..e5fdace 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -80,7 +80,7 @@ fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 { // first capture = the secret text itself if let Some(c0) = m.captures.captures.get(0) { - c0.value.hash(&mut hasher); + c0.raw_value().hash(&mut hasher); } hasher.finish() } @@ -148,7 +148,7 @@ pub fn collect_variables_and_dependencies( .entry(dependency.variable.to_uppercase()) .or_insert_with(Vec::new) .push(( - matching_input.value.to_string(), + matching_input.raw_value().to_string(), other_match.matching_input_offset_span, )); } diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 0080fd5..e15c7e2 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -15,10 +15,10 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, .iter() .filter_map(|cap| { if let Some(name) = &cap.name { - Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end)) + Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end)) } else if !saw_unnamed { saw_unnamed = true; - Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end)) + Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end)) } else { // Ignore any additional unnamed captures (e.g., from unintended groups) None @@ -201,7 +201,7 @@ mod tests { match_number: 2, // Corrected match_number start: 4, end: 6, - value: "cc" + value: "cc", }, ], }; diff --git a/tests/int_redact.rs b/tests/int_redact.rs index b9fb1f8..c885f28 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -157,7 +157,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { assert!(!matches.is_empty()); for m_arc in matches { let m = &m_arc.2; - assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); + assert!(m.groups.captures.iter().any(|cap| cap.display_value().starts_with("[REDACTED:"))); } Ok(()) From 57ab249960a647983ff3786374b81227d664aff2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 12 Nov 2025 22:25:33 -0800 Subject: [PATCH 2/6] added jdbc rule and validator --- CHANGELOG.md | 1 + data/rules/jdbc.yml | 24 +++++++ src/rules/rule.rs | 1 + src/safe_list.rs | 7 ++ src/validation.rs | 53 ++++++++++++++ src/validation/jdbc.rs | 154 +++++++++++++++++++++++++++++++++++++++++ tests/jdbc_rule.rs | 79 +++++++++++++++++++++ 7 files changed, 319 insertions(+) create mode 100644 data/rules/jdbc.yml create mode 100644 src/validation/jdbc.rs create mode 100644 tests/jdbc_rule.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 90658f7..ba64619 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [v1.64.0] - Fixed a bug when using --redact, that broke validation +- Added JDBC rule with validator ## [v1.63.1] - Updated allocator diff --git a/data/rules/jdbc.yml b/data/rules/jdbc.yml new file mode 100644 index 0000000..81553be --- /dev/null +++ b/data/rules/jdbc.yml @@ -0,0 +1,24 @@ +rules: + - name: JDBC connection string with embedded credentials + id: kingfisher.jdbc.1 + pattern: | + (?xi) + ( + jdbc: + [a-z][a-z0-9+.-]{2,30} + (?:[:][a-z0-9+.-]{1,30})* + : + [^\s"'<>,(){}\[\]]{10,512} + ) + min_entropy: 3.3 + confidence: medium + validation: + type: Jdbc + examples: + - jdbc:postgresql://db.example.com:5432/app?user=admin&password=s3cr3t + - jdbc:mysql://admin:s3cr3t@prod.internal:3306/inventory + - jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1 + - jdbc:sqlserver://sql.example.org:1433;databaseName=inventory;user=sa;password=s3cr3t! + references: + - https://docs.oracle.com/javase/8/docs/api/java/sql/DriverManager.html + - https://www.postgresql.org/docs/current/jdbc-use.html diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 4048e35..63c8208 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -47,6 +47,7 @@ pub enum Validation { GCP, MongoDB, Postgres, + Jdbc, JWT, Raw(String), Http(HttpValidation), diff --git a/src/safe_list.rs b/src/safe_list.rs index ba7f852..c146658 100644 --- a/src/safe_list.rs +++ b/src/safe_list.rs @@ -198,6 +198,13 @@ pub fn is_safe_match_reason(input: &[u8]) -> Option<&'static str> { .map(|rule| rule.description) } +/// Test helper: clear all user-provided allow-list configuration. +#[doc(hidden)] +pub fn clear_user_filters_for_tests() { + USER_SAFE_REGEXES.lock().unwrap().clear(); + USER_SAFE_SKIPWORDS.lock().unwrap().clear(); +} + /// Returns true if the input likely contains *benign* placeholder/test strings, /// and logs which rule triggered at `debug!` level. pub fn is_safe_match(input: &[u8]) -> bool { diff --git a/src/validation.rs b/src/validation.rs index e5fdace..ee37b21 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -29,6 +29,7 @@ mod azure; mod coinbase; mod gcp; mod httpvalidation; +mod jdbc; mod jwt; mod mongodb; mod postgres; @@ -676,6 +677,58 @@ async fn timed_validate_single_match<'a>( ); } + // ---------------------------------------------------- JDBC validator + Some(Validation::Jdbc) => { + let jdbc_conn = captured_values + .iter() + .find(|(n, ..)| n == "TOKEN") + .map(|(_, v, ..)| v.clone()) + .unwrap_or_default(); + + if jdbc_conn.is_empty() { + m.validation_success = false; + m.validation_response_body = "JDBC connection string not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = jdbc::generate_jdbc_cache_key(&jdbc_conn); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match jdbc::validate_jdbc(&jdbc_conn).await { + Ok(outcome) => { + m.validation_success = outcome.valid; + m.validation_response_body = outcome.message; + m.validation_response_status = outcome.status; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("JDBC validation error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + // ------------------------------------------------ Postgres validator Some(Validation::Postgres) => { let pg_url = globals diff --git a/src/validation/jdbc.rs b/src/validation/jdbc.rs new file mode 100644 index 0000000..5c6cb73 --- /dev/null +++ b/src/validation/jdbc.rs @@ -0,0 +1,154 @@ +use anyhow::{anyhow, Context, Result}; +use http::StatusCode; +use tracing::debug; +use url::Url; +use xxhash_rust::xxh3::xxh3_64; + +use super::postgres; + +/// Result of attempting to validate a JDBC connection string. +pub struct JdbcValidationOutcome { + pub valid: bool, + pub status: StatusCode, + pub message: String, +} + +/// Produce a short-lived cache key for JDBC validations. +pub fn generate_jdbc_cache_key(raw: &str) -> String { + format!("Jdbc:{:016x}", xxh3_64(raw.as_bytes())) +} + +/// Validate a JDBC connection string by dispatching to the supported backend validators. +pub async fn validate_jdbc(jdbc_conn: &str) -> Result { + let trimmed = jdbc_conn.trim(); + if !trimmed.to_ascii_lowercase().starts_with("jdbc:") { + return Err(anyhow!("JDBC connection string must start with `jdbc:`")); + } + + let without_prefix = &trimmed[5..]; + let (raw_subprotocol, subname) = without_prefix + .split_once(':') + .ok_or_else(|| anyhow!("JDBC connection string is missing a subprotocol"))?; + let subprotocol = raw_subprotocol.trim(); + let subprotocol_lower = subprotocol.to_ascii_lowercase(); + + match subprotocol_lower.as_str() { + "postgres" | "postgresql" | "postgis" => { + validate_postgres_jdbc(subname).await.context("Postgres JDBC validation failed") + } + other => { + debug!("Unsupported JDBC subprotocol encountered: {}", other); + Ok(JdbcValidationOutcome { + valid: false, + status: StatusCode::NOT_IMPLEMENTED, + message: format!( + "JDBC validation not implemented for subprotocol `{}`.", + subprotocol + ), + }) + } + } +} + +async fn validate_postgres_jdbc(subname: &str) -> Result { + let normalized = normalize_postgres_url(subname)?; + let (ok, meta) = postgres::validate_postgres(&normalized).await?; + + let mut message = if ok { + "JDBC Postgres connection is valid.".to_string() + } else { + "JDBC Postgres connection failed.".to_string() + }; + + if !meta.is_empty() { + let joined = meta.join("; "); + if ok { + message.push_str(&format!(" Details: {}", joined)); + } else { + message = format!("JDBC Postgres validation result: {}", joined); + } + } + + let status = if ok { + StatusCode::OK + } else if meta.iter().any(|m| m.to_ascii_lowercase().contains("skip")) { + StatusCode::CONTINUE + } else { + StatusCode::UNAUTHORIZED + }; + + Ok(JdbcValidationOutcome { valid: ok, status, message }) +} + +fn normalize_postgres_url(subname: &str) -> Result { + let trimmed = subname.trim(); + if trimmed.is_empty() { + return Err(anyhow!("Postgres JDBC connection string is empty")); + } + + // First try parsing using the standard JDBC layout, otherwise fall back to a canonical URL. + let candidate = format!("postgresql:{}", trimmed); + let mut url = Url::parse(&candidate).or_else(|_| { + let fallback = format!("postgresql://{}", trimmed.trim_start_matches('/')); + Url::parse(&fallback) + })?; + + // Extract credentials from the query string when they are present. + let mut user = None; + let mut password = None; + if url.query().is_some() { + let mut preserved = Vec::new(); + for (key, value) in url.query_pairs() { + match key.to_ascii_lowercase().as_str() { + "user" | "username" => user = Some(value.into_owned()), + "password" | "pass" | "pwd" => password = Some(value.into_owned()), + _ => preserved.push((key.into_owned(), value.into_owned())), + } + } + + { + let mut pairs = url.query_pairs_mut(); + pairs.clear(); + for (key, value) in preserved { + pairs.append_pair(&key, &value); + } + } + } + + if let Some(user) = user { + url.set_username(&user).map_err(|_| anyhow!("Failed to apply Postgres username"))?; + } + if let Some(password) = password { + url.set_password(Some(&password)) + .map_err(|_| anyhow!("Failed to apply Postgres password"))?; + } + + Ok(url.to_string()) +} + +#[cfg(test)] +mod tests { + use super::normalize_postgres_url; + use pretty_assertions::assert_eq; + + #[test] + fn normalizes_postgres_query_credentials() { + let normalized = normalize_postgres_url( + "//db.example.com:5432/app?user=admin&password=s3cr3t&sslmode=require", + ) + .unwrap(); + assert_eq!(normalized, "postgresql://admin:s3cr3t@db.example.com:5432/app?sslmode=require"); + } + + #[test] + fn preserves_existing_credentials() { + let normalized = + normalize_postgres_url("//db.example.com:5432/app?sslmode=prefer").unwrap(); + assert_eq!(normalized, "postgresql://db.example.com:5432/app?sslmode=prefer"); + } + + #[test] + fn rejects_empty_input() { + assert!(normalize_postgres_url("").is_err()); + } +} diff --git a/tests/jdbc_rule.rs b/tests/jdbc_rule.rs new file mode 100644 index 0000000..def97a9 --- /dev/null +++ b/tests/jdbc_rule.rs @@ -0,0 +1,79 @@ +use std::collections::BTreeSet; + +use anyhow::{anyhow, Result}; +use kingfisher::{rules::rule::RuleSyntax, safe_list}; + +fn load_jdbc_rule() -> Result { + let rules = RuleSyntax::from_yaml_file("data/rules/jdbc.yml")?; + rules + .into_iter() + .find(|rule| rule.id == "kingfisher.jdbc.1") + .ok_or_else(|| anyhow!("JDBC rule not found")) +} + +#[test] +fn jdbc_rule_matches_expected_patterns() -> Result<()> { + let rule = load_jdbc_rule()?; + let regex = rule.as_regex()?; + + let sample = r#" + datasource.url=jdbc:postgresql://db.acme.local:5432/app?user=svc_writer&password=P@s5w0rd + connection.read=jdbc:mysql://analyst:letmein@reports.internal:3306/analytics + cache="jdbc:sqlite:/var/lib/app/cache.db" + vendor.dsn=jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1 + backup=jdbc:mysql://host:3306/db,other_token + jdbc:xyz:short // this should be ignored + somejdbc:mysql://host/db // false prefix + jdbc:mysql://host/db>next // malformed with trailing bracket + "#; + + let matches: BTreeSet = regex + .captures_iter(sample.as_bytes()) + .filter_map(|caps| caps.name("TOKEN")) + .map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned()) + .collect(); + + let expected = BTreeSet::from([ + "jdbc:postgresql://db.acme.local:5432/app?user=svc_writer&password=P@s5w0rd".to_string(), + "jdbc:mysql://analyst:letmein@reports.internal:3306/analytics".to_string(), + "jdbc:sqlite:/var/lib/app/cache.db".to_string(), + "jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1".to_string(), + "jdbc:mysql://host:3306/db".to_string(), + ]); + + assert_eq!(matches, expected); + Ok(()) +} + +#[test] +fn jdbc_rule_respects_user_skip_regex() -> Result<()> { + safe_list::clear_user_filters_for_tests(); + safe_list::add_user_regex(r"^jdbc:sqlite::temporary_ignore_secret$")?; + + let rule = load_jdbc_rule()?; + let regex = rule.as_regex()?; + + let sample = r#" + jdbc:sqlite::temporary_ignore_secret + jdbc:mysql://data_ingest:pa55word@analytics.internal:3306/raw + "#; + + let matches: Vec = regex + .captures_iter(sample.as_bytes()) + .filter_map(|caps| caps.name("TOKEN")) + .map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned()) + .collect(); + + let retained: Vec = matches + .into_iter() + .filter(|m| !safe_list::is_user_match(m.as_bytes(), m.as_bytes())) + .collect(); + + safe_list::clear_user_filters_for_tests(); + + assert_eq!( + retained, + vec!["jdbc:mysql://data_ingest:pa55word@analytics.internal:3306/raw".to_string()] + ); + Ok(()) +} From 0ef163af81f3d6a20f945a89eca2255d4bd490dd Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 12 Nov 2025 22:26:29 -0800 Subject: [PATCH 3/6] added jdbc rule and validator --- data/rules/jdbc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/rules/jdbc.yml b/data/rules/jdbc.yml index 81553be..7456334 100644 --- a/data/rules/jdbc.yml +++ b/data/rules/jdbc.yml @@ -8,7 +8,7 @@ rules: [a-z][a-z0-9+.-]{2,30} (?:[:][a-z0-9+.-]{1,30})* : - [^\s"'<>,(){}\[\]]{10,512} + [^\s"'<>,(){}\[\]]{10,448} ) min_entropy: 3.3 confidence: medium From c14adbdadd2ad72b3cb5df13fb015111459e7e68 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 12 Nov 2025 22:58:31 -0800 Subject: [PATCH 4/6] added jdbc rule and validator --- src/rules/rule.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 63c8208..37fd05d 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -599,11 +599,28 @@ impl RuleSyntax { let path = path.as_ref(); let contents = std::fs::read_to_string(path) .with_context(|| format!("Failed to read file: {}", path.display()))?; - serde_yaml::from_str(&contents).map_err(|e| { + #[derive(Deserialize)] + #[serde(untagged)] + enum RuleSyntaxDocument { + Sequence(Vec), + Mapped { rules: Vec }, + } + + let parsed: RuleSyntaxDocument = serde_yaml::from_str(&contents).map_err(|e| { let context = e.location().map_or(String::new(), |loc| { format!(" at line {} column {}", loc.line(), loc.column()) }); - anyhow!("Failed to parse YAML from {}{}: {}", path.display(), context, e) + anyhow!( + "Failed to parse YAML from {}{}: {}", + path.display(), + context, + e + ) + })?; + + Ok(match parsed { + RuleSyntaxDocument::Sequence(rules) => rules, + RuleSyntaxDocument::Mapped { rules } => rules, }) } } From 62ea3fd6152c567cedc7f7410f8b8f9d0414b7fa Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 13 Nov 2025 14:30:27 -0800 Subject: [PATCH 5/6] Filter out empty 'KF_BITBUCKET_*' environment values when constructing the Bitbucket authentication configuration so blank variables no longer override valid credentials --- CHANGELOG.md | 1 + README.md | 27 ++++++++++----- src/bitbucket.rs | 85 +++++++++++++++++++++++++++++++++++++++++++---- src/git_binary.rs | 35 ++++++++++++++++--- src/rules/rule.rs | 7 +--- 5 files changed, 130 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba64619..0ab5993 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ## [v1.64.0] - Fixed a bug when using --redact, that broke validation - Added JDBC rule with validator +- Filter out empty 'KF_BITBUCKET_*' environment values when constructing the Bitbucket authentication configuration so blank variables no longer override valid credentials ## [v1.63.1] - Updated allocator diff --git a/README.md b/README.md index 81b38b1..1f7649a 100644 --- a/README.md +++ b/README.md @@ -882,7 +882,7 @@ KF_GITEA_TOKEN="gtoken" kingfisher scan gitea --user johndoe --gitea-api-url htt ```bash kingfisher scan bitbucket --workspace my-team # include Bitbucket Cloud repositories from every accessible workspace -KF_BITBUCKET_USERNAME="$USER" KF_BITBUCKET_APP_PASSWORD="$APP_PASSWORD" \ +KF_BITBUCKET_TOKEN="$BITBUCKET_TOKEN" \ kingfisher scan bitbucket --all-workspaces ``` @@ -915,8 +915,7 @@ require credentials (see [Authenticate to Bitbucket](#authenticate-to-bitbucket) kingfisher scan --git-url https://bitbucket.org/hashashash/secretstest.git # Include repository issues -KF_BITBUCKET_USERNAME="user" \ -KF_BITBUCKET_APP_PASSWORD="app-password" \ +KF_BITBUCKET_TOKEN="$BITBUCKET_TOKEN" \ kingfisher scan --git-url https://bitbucket.org/workspace/project.git --repo-artifacts ``` @@ -925,7 +924,7 @@ KF_BITBUCKET_APP_PASSWORD="app-password" \ ```bash kingfisher scan bitbucket --workspace my-team --list-only # enumerate all accessible workspaces or projects -KF_BITBUCKET_USERNAME="$USER" KF_BITBUCKET_APP_PASSWORD="$APP_PASSWORD" \ +KF_BITBUCKET_TOKEN="$BITBUCKET_TOKEN" \ kingfisher scan bitbucket --all-workspaces --list-only # filter out repositories using glob patterns kingfisher scan bitbucket --workspace my-team --bitbucket-exclude my-team/**/experimental-* --list-only @@ -935,14 +934,23 @@ kingfisher scan bitbucket --workspace my-team --bitbucket-exclude my-team/**/exp Kingfisher supports Bitbucket Cloud and Bitbucket Server credentials: -- **App password or server token** – set `KF_BITBUCKET_USERNAME` and either - `KF_BITBUCKET_APP_PASSWORD`, `KF_BITBUCKET_TOKEN`, or - `KF_BITBUCKET_PASSWORD`. +- **Workspace API token (Cloud)** – set `KF_BITBUCKET_TOKEN`. `KF_BITBUCKET_USERNAME` + is optional; Kingfisher automatically uses the token for Bitbucket REST APIs + and authenticates git operations as `x-token-auth`. +- **Bitbucket Server token** – set `KF_BITBUCKET_USERNAME` and either + `KF_BITBUCKET_TOKEN` or `KF_BITBUCKET_PASSWORD`. +- **Legacy app password (Cloud)** – set `KF_BITBUCKET_USERNAME` and + `KF_BITBUCKET_APP_PASSWORD`. - **OAuth/PAT token** – set `KF_BITBUCKET_OAUTH_TOKEN`. These credentials match the options described in the [ghorg setup guide](https://github.com/gabrie30/ghorg/blob/master/README.md#bitbucket-setup). +Bitbucket no longer supports App Tokens as of September 9, 2025: +https://support.atlassian.com/bitbucket-cloud/docs/api-tokens/ + +> As of September 9, 2025, app passwords can no longer be created. Use API tokens with scopes instead. All existing app passwords will be disabled on June 9, 2026. Migrate any integrations before then to avoid disruptions. + ### Self-hosted Bitbucket Server Use `--bitbucket-api-url` to point Kingfisher at your server's REST endpoint, for example @@ -1050,8 +1058,9 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan slack "akia" \ | `KF_GITEA_USERNAME` | Username for private Gitea clones (used with `KF_GITEA_TOKEN`) | | `KF_AZURE_TOKEN` / `KF_AZURE_PAT` | Azure Repos Personal Access Token | | `KF_AZURE_USERNAME` | Username to use with Azure Repos PATs (defaults to `pat` when unset) | -| `KF_BITBUCKET_USERNAME` | Bitbucket username for basic authentication | -| `KF_BITBUCKET_APP_PASSWORD` / `KF_BITBUCKET_TOKEN` | Bitbucket app password or server token | +| `KF_BITBUCKET_TOKEN` | Bitbucket Cloud workspace API token or Bitbucket Server PAT | +| `KF_BITBUCKET_USERNAME` | Optional Bitbucket username for legacy app passwords or server tokens | +| `KF_BITBUCKET_APP_PASSWORD` | Legacy Bitbucket app password (deprecated September 9, 2025; disabled June 9, 2026) | | `KF_BITBUCKET_OAUTH_TOKEN` | Bitbucket OAuth or PAT token | | `KF_HUGGINGFACE_TOKEN` | Hugging Face access token for API enumeration and git cloning | | `KF_HUGGINGFACE_USERNAME` | Optional username for Hugging Face git operations (defaults to `hf_user`) | diff --git a/src/bitbucket.rs b/src/bitbucket.rs index fef0f7f..6f5f374 100644 --- a/src/bitbucket.rs +++ b/src/bitbucket.rs @@ -40,18 +40,43 @@ pub struct AuthConfig { pub bearer_token: Option, } +pub(crate) fn is_bitbucket_access_token(token: &str) -> bool { + token.len() > 40 && token.starts_with("AT") && token.contains('_') +} + impl AuthConfig { pub fn from_options( username: Option, password: Option, bearer_token: Option, ) -> Self { - let username = username.or_else(|| env::var("KF_BITBUCKET_USERNAME").ok()); - let password = password - .or_else(|| env::var("KF_BITBUCKET_APP_PASSWORD").ok()) - .or_else(|| env::var("KF_BITBUCKET_TOKEN").ok()) - .or_else(|| env::var("KF_BITBUCKET_PASSWORD").ok()); - let bearer_token = bearer_token.or_else(|| env::var("KF_BITBUCKET_OAUTH_TOKEN").ok()); + fn normalized(value: Option) -> Option { + value.and_then(|v| if v.trim().is_empty() { None } else { Some(v) }) + } + + fn env_var(name: &str) -> Option { + match env::var(name) { + Ok(value) if value.trim().is_empty() => None, + Ok(value) => Some(value), + Err(_) => None, + } + } + + let username = normalized(username).or_else(|| env_var("KF_BITBUCKET_USERNAME")); + let password = normalized(password) + .or_else(|| env_var("KF_BITBUCKET_APP_PASSWORD")) + .or_else(|| env_var("KF_BITBUCKET_TOKEN")) + .or_else(|| env_var("KF_BITBUCKET_PASSWORD")); + let mut bearer_token = + normalized(bearer_token).or_else(|| env_var("KF_BITBUCKET_OAUTH_TOKEN")); + + if bearer_token.is_none() { + if let Some(password) = &password { + if is_bitbucket_access_token(password) { + bearer_token = Some(password.clone()); + } + } + } Self { username, password, bearer_token } } @@ -709,4 +734,52 @@ mod tests { Some("ws/repo") ); } + + #[test] + fn auth_config_ignores_empty_environment_values() { + temp_env::with_vars( + &[ + ("KF_BITBUCKET_USERNAME", Some("")), + ("KF_BITBUCKET_APP_PASSWORD", Some("")), + ("KF_BITBUCKET_OAUTH_TOKEN", Some(" ")), + ], + || { + let auth = AuthConfig::from_env(); + assert!(auth.username.is_none()); + assert!(auth.password.is_none()); + assert!(auth.bearer_token.is_none()); + }, + ); + } + + #[test] + fn auth_config_prefers_basic_auth_when_bearer_is_empty() { + temp_env::with_vars( + &[ + ("KF_BITBUCKET_USERNAME", Some("user")), + ("KF_BITBUCKET_APP_PASSWORD", Some("pass")), + ("KF_BITBUCKET_OAUTH_TOKEN", Some("")), + ], + || { + let auth = AuthConfig::from_env(); + assert_eq!(auth.username.as_deref(), Some("user")); + assert_eq!(auth.password.as_deref(), Some("pass")); + assert!(auth.bearer_token.is_none()); + }, + ); + } + + #[test] + fn auth_config_treats_access_token_as_bearer() { + let token = "AT1234567890_ACCESS_TOKEN_EXAMPLE_WITH_UNDERSCORE"; + temp_env::with_vars( + &[("KF_BITBUCKET_USERNAME", Some("user")), ("KF_BITBUCKET_TOKEN", Some(token))], + || { + let auth = AuthConfig::from_env(); + assert_eq!(auth.username.as_deref(), Some("user")); + assert_eq!(auth.password.as_deref(), Some(token)); + assert_eq!(auth.bearer_token.as_deref(), Some(token)); + }, + ); + } } diff --git a/src/git_binary.rs b/src/git_binary.rs index a629373..6472cfc 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -5,7 +5,7 @@ use std::{ use tracing::{debug, debug_span}; -use crate::git_url::GitUrl; +use crate::{bitbucket::is_bitbucket_access_token, git_url::GitUrl}; const BITBUCKET_CREDENTIAL_HELPER: &str = r#"credential.helper=!_bbcreds() { if [ -n "$KF_BITBUCKET_OAUTH_TOKEN" ]; then @@ -13,6 +13,11 @@ const BITBUCKET_CREDENTIAL_HELPER: &str = r#"credential.helper=!_bbcreds() { echo password="$KF_BITBUCKET_OAUTH_TOKEN"; return; fi + if [ -n "$KF_BITBUCKET_ACCESS_TOKEN" ]; then + echo username="x-token-auth"; + echo password="$KF_BITBUCKET_ACCESS_TOKEN"; + return; + fi if [ -n "$KF_BITBUCKET_USERNAME" ]; then bb_pass="${KF_BITBUCKET_APP_PASSWORD:-${KF_BITBUCKET_TOKEN:-${KF_BITBUCKET_PASSWORD:-}}}"; if [ -n "$bb_pass" ]; then @@ -95,6 +100,7 @@ fn summarize_output(output: &[u8]) -> Option { pub struct Git { credentials: Vec, ignore_certs: bool, + bitbucket_access_token: Option, } impl Git { @@ -112,14 +118,18 @@ impl Git { matches!(std::env::var("KF_GITEA_TOKEN"), Ok(token) if !token.is_empty()); let has_bitbucket_username = matches!(std::env::var("KF_BITBUCKET_USERNAME"), Ok(value) if !value.is_empty()); + let bitbucket_access_token = std::env::var("KF_BITBUCKET_TOKEN") + .ok() + .filter(|value| !value.is_empty() && is_bitbucket_access_token(value)); let has_bitbucket_password = ["KF_BITBUCKET_APP_PASSWORD", "KF_BITBUCKET_TOKEN", "KF_BITBUCKET_PASSWORD"] .iter() .any(|key| matches!(std::env::var(key), Ok(value) if !value.is_empty())); let has_bitbucket_oauth_token = matches!(std::env::var("KF_BITBUCKET_OAUTH_TOKEN"), Ok(value) if !value.is_empty()); - let has_bitbucket_credentials = - has_bitbucket_oauth_token || (has_bitbucket_username && has_bitbucket_password); + let has_bitbucket_credentials = has_bitbucket_oauth_token + || bitbucket_access_token.is_some() + || (has_bitbucket_username && has_bitbucket_password); let has_azure_token = ["KF_AZURE_TOKEN", "KF_AZURE_PAT"] .iter() .any(|key| matches!(std::env::var(key), Ok(value) if !value.is_empty())); @@ -176,7 +186,7 @@ impl Git { credentials.push(HUGGINGFACE_CREDENTIAL_HELPER.into()); } - Self { credentials, ignore_certs } + Self { credentials, ignore_certs, bitbucket_access_token } } /// Create a basic `git` `Command` with environment variables set to @@ -191,6 +201,9 @@ impl Git { if self.ignore_certs { cmd.env("GIT_SSL_NO_VERIFY", "1"); } + if let Some(token) = &self.bitbucket_access_token { + cmd.env("KF_BITBUCKET_ACCESS_TOKEN", token); + } cmd.args(&self.credentials); cmd.stdin(Stdio::null()); cmd @@ -302,6 +315,7 @@ mod tests { let git = Git::new(false); assert!(!git.ignore_certs); assert!(git.credentials.is_empty()); + assert!(git.bitbucket_access_token.is_none()); temp_env::with_var("KF_GITHUB_TOKEN", Some("test_token"), || { let git = Git::new(false); @@ -315,6 +329,7 @@ mod tests { let git = Git::new(false); assert_eq!(git.credentials.len(), 4); assert!(git.credentials.iter().any(|value| value == BITBUCKET_CREDENTIAL_HELPER)); + assert!(git.bitbucket_access_token.is_none()); }); } @@ -329,10 +344,22 @@ mod tests { let git = Git::new(false); assert_eq!(git.credentials.len(), 4); assert!(git.credentials.iter().any(|value| value == BITBUCKET_CREDENTIAL_HELPER)); + assert!(git.bitbucket_access_token.is_none()); }, ); } + #[test] + fn test_git_new_bitbucket_access_token() { + let token = "AT1234567890_ACCESS_TOKEN_EXAMPLE_WITH_UNDERSCORE"; + temp_env::with_var("KF_BITBUCKET_TOKEN", Some(token), || { + let git = Git::new(false); + assert_eq!(git.credentials.len(), 4); + assert!(git.credentials.iter().any(|value| value == BITBUCKET_CREDENTIAL_HELPER)); + assert_eq!(git.bitbucket_access_token.as_deref(), Some(token)); + }); + } + #[test] fn test_clone_mode_arg() { assert_eq!(CloneMode::Bare.arg(), Some("--bare")); diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 37fd05d..74adab9 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -610,12 +610,7 @@ impl RuleSyntax { let context = e.location().map_or(String::new(), |loc| { format!(" at line {} column {}", loc.line(), loc.column()) }); - anyhow!( - "Failed to parse YAML from {}{}: {}", - path.display(), - context, - e - ) + anyhow!("Failed to parse YAML from {}{}: {}", path.display(), context, e) })?; Ok(match parsed { From 3417d0f254cae4feef518b739c147054b9dfc901 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 13 Nov 2025 15:22:48 -0800 Subject: [PATCH 6/6] Fixed broken tests --- tests/jdbc_rule.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/jdbc_rule.rs b/tests/jdbc_rule.rs index def97a9..cff5393 100644 --- a/tests/jdbc_rule.rs +++ b/tests/jdbc_rule.rs @@ -29,7 +29,7 @@ fn jdbc_rule_matches_expected_patterns() -> Result<()> { let matches: BTreeSet = regex .captures_iter(sample.as_bytes()) - .filter_map(|caps| caps.name("TOKEN")) + .filter_map(|caps| caps.get(1)) .map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned()) .collect(); @@ -60,7 +60,7 @@ fn jdbc_rule_respects_user_skip_regex() -> Result<()> { let matches: Vec = regex .captures_iter(sample.as_bytes()) - .filter_map(|caps| caps.name("TOKEN")) + .filter_map(|caps| caps.get(1)) .map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned()) .collect();