From e54dbe90d06d5b354c3a423e643c98e0088c61bf Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 29 Aug 2025 17:24:26 -0700 Subject: [PATCH 1/9] - Improved rules: github oauth2, diffbot, mailchimp, aws - Added validation to SauceLabs rule - Added rules: shodan, bitly, flickr --- CHANGELOG.md | 7 ++ data/rules/aws.yml | 4 +- data/rules/bitly.yml | 36 +++++++++ data/rules/diffbot.yml | 5 +- data/rules/flickr.yml | 72 ++++++++++++++++++ data/rules/github.yml | 21 ++++++ data/rules/mailchimp.yml | 3 +- data/rules/sauce.yml | 87 ++++++++++++++++++---- data/rules/shodan.yml | 34 +++++++++ src/main.rs | 2 +- src/validation.rs | 21 ++++-- src/validation/mongodb.rs | 145 ++++++++++++++++++++++++++++++------- src/validation/postgres.rs | 105 ++++++++++++++++++++++++++- 13 files changed, 487 insertions(+), 55 deletions(-) create mode 100644 data/rules/bitly.yml create mode 100644 data/rules/flickr.yml create mode 100644 data/rules/shodan.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 41373bd..69ee670 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,17 @@ All notable changes to this project will be documented in this file. +## [Unreleased] +- MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them +- Improved rules: github oauth2, diffbot, mailchimp, aws +- Added validation to SauceLabs rule +- Added rules: shodan, bitly, flickr + ## [1.46.0] - Improved rules: AWS, pem - Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu - Added `self-update` command to update the binary independently. Now supports updating over homebrew managed binary +- MongoDB validator now checks `mongodb+srv://` URIs with fast-fail timeouts ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` diff --git a/data/rules/aws.yml b/data/rules/aws.yml index 62041da..1855aa3 100644 --- a/data/rules/aws.yml +++ b/data/rules/aws.yml @@ -22,7 +22,7 @@ rules: (?: \b (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) - (?:.|[\n\r]){0,32}? + (?:.|[\n\r]){0,64}? \b ( [A-Z0-9/+=]{40} @@ -34,7 +34,7 @@ rules: (?:SECRET|PRIVATE|ACCESS) (?:.|[\n\r]){0,16}? (?:KEY|TOKEN) - (?:.|[\n\r]){0,32}? + (?:.|[\n\r]){0,64}? \b ( [A-Z0-9/+=]{40} diff --git a/data/rules/bitly.yml b/data/rules/bitly.yml new file mode 100644 index 0000000..11ec1da --- /dev/null +++ b/data/rules/bitly.yml @@ -0,0 +1,36 @@ +rules: + - name: Bitly Access Token + id: kingfisher.bitly.1 + pattern: | + (?xi) + \b + bitly + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{40} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api-ssl.bitly.com/v4/user" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"login":' + references: + - https://dev.bitly.com/api-reference#Authentication + examples: + - "bitly_token = 20e9827b9c5ddee1b0cec7722bfc557dec833791" diff --git a/data/rules/diffbot.yml b/data/rules/diffbot.yml index 05e989b..231109b 100644 --- a/data/rules/diffbot.yml +++ b/data/rules/diffbot.yml @@ -27,9 +27,8 @@ rules: - report_response: true - type: StatusMatch status: [200] - - type: JsonValid - type: WordMatch - match_all_words: true words: - '"name"' - - '"email"' \ No newline at end of file + - '"email"' + - '"planCredits"' \ No newline at end of file diff --git a/data/rules/flickr.yml b/data/rules/flickr.yml new file mode 100644 index 0000000..c0dd982 --- /dev/null +++ b/data/rules/flickr.yml @@ -0,0 +1,72 @@ +rules: + - name: Flickr API Key + id: kingfisher.flickr.1 + pattern: | + (?xi) + \b + flickr + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)? + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{32} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://www.flickr.com/services/rest/?method=flickr.test.login&api_key={{TOKEN}}&format=json&nojsoncallback=1" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"Invalid API Key"' + negative: true + references: + - https://www.flickr.com/services/api/ + - https://www.flickr.com/services/api/flickr.test.login.html + examples: + - "flickr_api_key: d6953dc63a9498593bfdb4287ed2293c" + - name: Flickr OAuth Token + id: kingfisher.flickr.2 + pattern: | + (?xi) + \b + flickr + (?:.|[\n\r]){0,32}? + (?:OAUTH|ACCESS|TOKEN)? + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{32} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://www.flickr.com/services/rest/?method=flickr.auth.oauth.checkToken&api_key={{TOKEN}}&oauth_token={{TOKEN}}&format=json&nojsoncallback=1" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"stat":"ok"' + - '"oauth":' + match_all_words: true + references: + - https://www.flickr.com/services/api/ + - https://www.flickr.com/services/api/flickr.auth.oauth.checkToken.html + examples: + - "flickr_oauth_token: a8c1e1f1d9d34aa5a1bdbd94234bcdef" \ No newline at end of file diff --git a/data/rules/github.yml b/data/rules/github.yml index 90c9c3b..971f10d 100644 --- a/data/rules/github.yml +++ b/data/rules/github.yml @@ -166,6 +166,7 @@ rules: (?: id | identifier | key ) .{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2} \b ([a-z0-9]{20}) \b + visible: false examples: - | GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7 @@ -181,6 +182,26 @@ rules: (?: key | oauth | sec | secret )? .{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2} \b ([a-z0-9]{40}) \b + depends_on_rule: + - rule_id: "kingfisher.github.5" + variable: GITHUB_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: "https://github.com/login/oauth/access_token" + headers: + Accept: "application/json" + Content-Type: "application/json" + body: '{"client_id":"{{GITHUB_CLIENT_ID}}","client_secret":"{{TOKEN}}","code":"invalid_code"}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"error":"bad_verification_code"' examples: - | GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7 diff --git a/data/rules/mailchimp.yml b/data/rules/mailchimp.yml index a28eaae..ffa5d3a 100644 --- a/data/rules/mailchimp.yml +++ b/data/rules/mailchimp.yml @@ -3,8 +3,9 @@ rules: id: kingfisher.mailchimp.1 pattern: | (?xi) + \b mailchimp - (?:.|[\n\r]){0,32}? + (?:.|[\n\r]){0,128}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? \b diff --git a/data/rules/sauce.yml b/data/rules/sauce.yml index 761bdd2..e116883 100644 --- a/data/rules/sauce.yml +++ b/data/rules/sauce.yml @@ -1,23 +1,82 @@ rules: - - name: Sauce Token - id: kingfisher.sauce.1 - + - name: Sauce Labs Username + id: kingfisher.saucelabs.1 pattern: | - (?x)(?i) - sauce .{0,50} + (?xi) \b - ([a-f0-9-]{36}) - (?: [^a-f0-9-] | $ ) + sauce + (?:.|[\n\r]){0,16}? + (?:USER|ID|NAME|CLIENT|OAUTH) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Z0-9_.-]{2,70} + ) + \b + confidence: medium + visible: false + min_entropy: 1.0 + examples: + - "SAUCE_USERNAME=oauth-someusername-487ea" + - SAUCE_USERNAME="oauth-ci-bot-487ea" + - '"sauce_username":"build-user"' + - 'saucelabs user oauth-release-bot' + - name: Sauce Labs API Endpoint + id: kingfisher.saucelabs.2 + pattern: | + (?xi) + \b + ( + (:?api|ondemand)\.(?:us|eu)-(?:west|east|central)-[0-9]\.saucelabs\.com + ) + \b + confidence: medium + visible: false + min_entropy: 2.0 + examples: + - "api.us-west-1.saucelabs.com" + - "api.eu-central-1.saucelabs.com" + - "ondemand.eu-central-1.saucelabs.com" + - name: Sauce Labs Access Key + id: kingfisher.saucelabs.3 + pattern: | + (?xi) + \b + sauce + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12} + ) + \b confidence: medium min_entropy: 3.0 - - examples: - - | - - SAUCE_USERNAME=vitess - - SAUCE_ACCESS_KEY=2397f603-c2c4-4897-a8ca-587ace5dc8dd - - SAUCE_ACCESS_KEY=2397f603-c2c4-4897-a8ca-587ace5dc8d- - + depends_on_rule: + - rule_id: "kingfisher.saucelabs.1" + variable: SAUCE_USERNAME + - rule_id: "kingfisher.saucelabs.2" + variable: SAUCE_URL + validation: + type: Http + content: + request: + method: GET + url: "https://{{ SAUCE_URL | default: 'api.us-west-1.saucelabs.com' | replace: 'ondemand.', 'api.' }}/rest/v1/users/{{SAUCE_USERNAME}}" + headers: + Authorization: "Basic {{ SAUCE_USERNAME | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"username":' references: - https://docs.saucelabs.com/dev/api/ - https://docs.saucelabs.com/dev/api/#authentication + examples: + - "SAUCE_ACCESS_KEY=1736468d-b178-39cd-bfde-30fabdc371e4" + diff --git a/data/rules/shodan.yml b/data/rules/shodan.yml new file mode 100644 index 0000000..99ab150 --- /dev/null +++ b/data/rules/shodan.yml @@ -0,0 +1,34 @@ +rules: + - name: SHODAN API Key + id: kingfisher.shodan.1 + pattern: | + (?xi) + \b + shodan + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Z0-9]{32} + ) + \b + confidence: medium + min_entropy: 4.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.shodan.io/api-info?key={{TOKEN}}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"scan_credits"' + references: + - https://developer.shodan.io/api + examples: + - "shodan_api_key = dqlblS2CmTOc5zYn4nZkJljYsXRnNuiq" diff --git a/src/main.rs b/src/main.rs index 15c1a8a..c1eabd1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -255,7 +255,7 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { } }, }, - Command::SelfUpdate => unreachable!(), + Command::SelfUpdate => anyhow::bail!("SelfUpdate command should not reach this branch"), } if let Some(msg) = update_msg { info!("{msg}"); diff --git a/src/validation.rs b/src/validation.rs index 3e981ea..ca76943 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -553,17 +553,24 @@ async fn timed_validate_single_match<'a>( return; } + let cache_key = mongodb::generate_mongodb_cache_key(&uri); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + match mongodb::validate_mongodb(&uri).await { Ok((ok, msg)) => { m.validation_success = ok; m.validation_response_body = msg; - m.validation_response_status = if uri.starts_with("mongodb+srv://") { - StatusCode::CONTINUE - } else if ok { - StatusCode::OK - } else { - StatusCode::UNAUTHORIZED - }; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; } Err(e) => { m.validation_success = false; diff --git a/src/validation/mongodb.rs b/src/validation/mongodb.rs index 74a82d5..c6cf600 100644 --- a/src/validation/mongodb.rs +++ b/src/validation/mongodb.rs @@ -1,9 +1,10 @@ // src/validation/mongodb.rs -use std::time::Duration; +use std::{net::IpAddr, time::Duration}; use anyhow::Result; use bson::doc; -use mongodb::{options::ClientOptions, Client}; +use mongodb::{error::ErrorKind, options::ClientOptions, Client}; +use tokio::time::timeout; pub fn looks_like_mongodb_uri(uri: &str) -> bool { // quick scheme check first @@ -14,10 +15,87 @@ pub fn looks_like_mongodb_uri(uri: &str) -> bool { mongodb::options::ConnectionString::parse(uri).is_ok() } +/// Return true if the URI targets localhost/loopback or a unix domain socket. +/// This is a *string-only* check—no DNS or driver IO. +fn uri_targets_localhost(uri: &str) -> bool { + // strip scheme + let rest = uri + .strip_prefix("mongodb://") + .or_else(|| uri.strip_prefix("mongodb+srv://")) + .unwrap_or(uri); + + // authority ends at first '/' (before db/path); if missing, take whole rest + let authority = rest.split_once('/').map(|(a, _)| a).unwrap_or(rest); + + // unix domain socket forms (percent-encoded "/path/to.sock") + let auth_lower = authority.to_ascii_lowercase(); + if auth_lower.starts_with("%2f") || authority.starts_with('/') { + return true; // UDS → treat as local + } + + // drop userinfo if present + let hostlist = authority.rsplit_once('@').map(|(_, h)| h).unwrap_or(authority); + + // iterate seed list (mongodb://hostA,hostB,...) + for part in hostlist.split(',') { + let mut host = part.trim(); + + // strip brackets for IPv6 literals + if host.starts_with('[') && host.ends_with(']') && host.len() >= 2 { + host = &host[1..host.len() - 1]; + } + + // strip :port if present (only when suffix is all digits) + if let Some(idx) = host.rfind(':') { + if host[idx + 1..].chars().all(|c| c.is_ascii_digit()) { + host = &host[..idx]; + } + } + + if is_local_host(host) { + return true; + } + } + + false +} + +/// Returns true for localhost/loopback/unspecified IPs and common localhost aliases. +fn is_local_host(h: &str) -> bool { + let s = h.trim().trim_end_matches('.'); + let s_lower = s.to_ascii_lowercase(); + + // common aliases seen in hosts files across distros + if matches!( + s_lower.as_str(), + "localhost" + | "localhost.localdomain" + | "localhost6" + | "localhost6.localdomain6" + | "ip6-localhost" + | "ip6-loopback" + ) { + return true; + } + + // explicit unspecified forms + if s_lower.as_str() == "0.0.0.0" || s_lower.as_str() == "::" { + return true; + } + + // literal IPs + if let Ok(ip) = s.parse::() { + return ip.is_loopback() || ip.is_unspecified(); + } + + false +} + const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs const FAST_SELECT_MS: u64 = 300; -const SRV_CONNECT_MS: u64 = 15_000; // gives Atlas a fighting chance -const SRV_SELECT_MS: u64 = 15_000; +const SRV_PARSE_MS: u64 = 1_000; // limit DNS resolution time +const SRV_CONNECT_MS: u64 = 1500;//700; +const SRV_SELECT_MS: u64 = 1500;//300; /// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the /// boolean indicates success and the string provides a status message. @@ -27,25 +105,35 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> { return Ok((false, "Invalid MongoDB URI".to_string())); } - let is_srv = uri.starts_with("mongodb+srv://"); - - if is_srv { - // Skip SRV URIs to avoid slow DNS lookups and topology discovery. + // ---- refuse localhost/loopback/UDS outright + if uri_targets_localhost(uri) { return Ok(( false, - "Validation skipped for mongodb+srv:// URI (performance reasons)".to_string(), + "Refusing to validate localhost/loopback MongoDB URIs.".to_string(), )); } - // ---- build client opts - let mut opts = ClientOptions::parse(uri).await?; + let is_srv = uri.starts_with("mongodb+srv://"); + + // ---- build client opts (guarded so we don't hit DNS/driver first) + let mut opts = if is_srv { + match timeout(Duration::from_millis(SRV_PARSE_MS), ClientOptions::parse(uri)).await { + Ok(res) => res?, + Err(_) => { + return Ok((false, "MongoDB connection failed: timeout exceeded".to_string())); + } + } + } else { + ClientOptions::parse(uri).await? + }; + if !is_srv { // one socket, skip cluster discovery for plain 'mongodb://' opts.direct_connection = Some(true); opts.connect_timeout = Some(Duration::from_millis(FAST_CONNECT_MS)); opts.server_selection_timeout = Some(Duration::from_millis(FAST_SELECT_MS)); } else { - // SRV needs DNS and replica-set discovery; give it a couple seconds + // SRV needs DNS and replica-set discovery; fail fast opts.connect_timeout = Some(Duration::from_millis(SRV_CONNECT_MS)); opts.server_selection_timeout = Some(Duration::from_millis(SRV_SELECT_MS)); // leave direct_connection = None (driver decides) @@ -55,18 +143,25 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> { // ---- dial and ping let client = Client::with_options(opts)?; - let ok = client.database("admin").run_command(doc! { "ping": 1 }).await.is_ok(); - let msg = if ok { - "MongoDB connection is valid.".to_string() - } else { - "MongoDB connection failed.".to_string() - }; - Ok((ok, msg)) + let res = client.database("admin").run_command(doc! { "ping": 1 }).await; + match res { + Ok(_) => Ok((true, "MongoDB connection is valid.".to_string())), + Err(e) => { + let msg = match *e.kind { + ErrorKind::ServerSelection { .. } => { + "MongoDB connection failed: timeout exceeded".to_string() + } + _ => "MongoDB connection failed.".to_string(), + }; + Ok((false, msg)) + } + } } -// pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String { -// use sha1::{Digest, Sha1}; -// let mut hasher = Sha1::new(); -// hasher.update(mongodb_uri.as_bytes()); -// format!("MongoDB:{:x}", hasher.finalize()) -// } +/// Return a stable cache key for the given MongoDB URI. +pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String { + use sha1::{Digest, Sha1}; + let mut hasher = Sha1::new(); + hasher.update(mongodb_uri.as_bytes()); + format!("MongoDB:{:x}", hasher.finalize()) +} \ No newline at end of file diff --git a/src/validation/postgres.rs b/src/validation/postgres.rs index 5d7259e..71ca608 100644 --- a/src/validation/postgres.rs +++ b/src/validation/postgres.rs @@ -1,16 +1,26 @@ -use std::{str::FromStr, time::Duration}; +use std::{str::FromStr, sync::Once, time::Duration}; use anyhow::{anyhow, Result}; +use rustls::crypto::{ring, CryptoProvider}; use rustls::{client::ClientConfig, RootCertStore}; use rustls_native_certs::{load_native_certs, CertificateResult}; use sha1::{Digest, Sha1}; use tokio::time::{error::Elapsed, timeout}; -use tokio_postgres::{config::SslMode, tls::NoTls, Config, Error}; +use tokio_postgres::{config::{Host, SslMode}, tls::NoTls, Config, Error}; use tokio_postgres_rustls::MakeRustlsConnect; use tracing::debug; const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); +static INIT_PROVIDER: Once = Once::new(); +fn ensure_crypto_provider() { + INIT_PROVIDER.call_once(|| { + // If another part of the program already installed a provider, + // ignore the error — we just need one global provider. + let _ = CryptoProvider::install_default(ring::default_provider()); + }); +} + pub fn generate_postgres_cache_key(postgres_url: &str) -> String { let mut hasher = Sha1::new(); hasher.update(postgres_url.as_bytes()); @@ -21,6 +31,12 @@ pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec) let mut cfg = Config::from_str(postgres_url).map_err(|e| anyhow!("Failed to parse Postgres URL: {e}"))?; + // --- skip localhost/loopback/unix-socket targets entirely ------------- + if has_any_local_host(&cfg) { + debug!("Skipping Postgres validation: host is localhost/loopback or unix socket"); + return Ok((false, vec!["skipped localhost/loopback host".into()])); + } + let original_mode = cfg.get_ssl_mode(); if original_mode == SslMode::Prefer { cfg.ssl_mode(SslMode::Disable); @@ -29,6 +45,36 @@ pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec) check_postgres_db_connection(cfg, original_mode).await } +fn has_any_local_host(cfg: &Config) -> bool { + cfg.get_hosts().iter().any(|h| match h { + Host::Unix(_) => true, // local unix socket + Host::Tcp(s) => is_local_tcp_host(s), + }) +} + +fn is_local_tcp_host(s: &str) -> bool { + // strip URI-style IPv6 brackets if present + let host = s.trim_matches(|c| c == '[' || c == ']'); + + // Direct IPs + if let Ok(ip) = host.parse::() { + return match ip { + std::net::IpAddr::V4(v4) => + v4.is_loopback() || v4.is_unspecified() || v4.is_link_local(), + std::net::IpAddr::V6(v6) => + v6.is_loopback() || v6.is_unspecified() || v6.is_unicast_link_local(), + }; + } + + // Common localhost hostnames + let lower = host.to_ascii_lowercase(); + lower == "localhost" + || lower.starts_with("localhost.") + || lower == "localhost6" + || lower.starts_with("localhost6.") +} + + async fn check_postgres_db_connection( mut cfg: Config, original_mode: SslMode, @@ -52,6 +98,9 @@ async fn check_postgres_db_connection( .await } else { timeout(CONNECT_TIMEOUT, async { + // Ensure Rustls crypto provider is installed *before* using the builder + ensure_crypto_provider(); + let CertificateResult { certs, errors, .. } = load_native_certs(); for err in errors { debug!("native-cert error: {err}"); @@ -89,6 +138,21 @@ async fn check_postgres_db_connection( continue; } + Ok(Err(e)) + if attempt == 0 + && server_requires_encryption(&e.to_string()) + && cfg.get_ssl_mode() == SslMode::Disable => + { + debug!("Encryption required: {e}; retrying with SSL"); + cfg.ssl_mode(SslMode::Require); + continue; + } + + Ok(Err(e)) if missing_cluster_identifier(&e.to_string()) => { + debug!("Missing cluster identifier: {e}; treating as valid"); + return Ok((true, Vec::new())); + } + Ok(Err(e)) if database_not_exists(&e, cfg.get_dbname().unwrap_or("postgres")) => { return Ok((true, Vec::new())); } @@ -108,3 +172,40 @@ fn database_not_exists(err: &Error, db_name: &str) -> bool { let db = if db_name.is_empty() { "postgres" } else { db_name }; err.to_string().contains(&format!("database \"{db}\" does not exist")) } + +fn server_requires_encryption(err_msg: &str) -> bool { + err_msg.contains("server requires encryption") +} + +fn missing_cluster_identifier(err_msg: &str) -> bool { + err_msg.contains("missing cluster identifier") +} + +#[cfg(test)] +mod tests { + use super::{is_local_tcp_host, missing_cluster_identifier, server_requires_encryption}; + + #[test] + fn detects_encryption_requirement() { + assert!(server_requires_encryption("db error: FATAL: server requires encryption")); + assert!(!server_requires_encryption("some other error")); + } + + #[test] + fn detects_missing_cluster() { + assert!(missing_cluster_identifier( + "db error: FATAL: codeParamsRoutingFailed: missing cluster identifier", + )); + assert!(!missing_cluster_identifier("another error")); + } + + #[test] + fn detects_local_hosts() { + for h in ["localhost", "LOCALHOST", "localhost.localdomain", "localhost6", "127.0.0.1", "[::1]", "::"] { + assert!(is_local_tcp_host(h), "should treat {h} as local"); + } + for h in ["db.example.com", "10.0.0.1"] { + assert!(!is_local_tcp_host(h), "should not treat {h} as local"); + } + } +} From 9de355a5c8a5a4b61540f95f6423bcb3430aa94d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 16:44:55 -0700 Subject: [PATCH 2/9] Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance --- CHANGELOG.md | 3 +- Cargo.toml | 2 +- data/rules/docker.yml | 48 ++ data/rules/generic.yml | 20 +- src/main.rs | 30 +- src/matcher.rs | 114 +++- src/reporter.rs | 3 + src/reporter/json_format.rs | 1 + src/reporter/pretty_format.rs | 3 + src/validation.rs | 1 + src/validation.rs.orig | 1052 +++++++++++++++++++++++++++++++++ src/validation/mongodb.rs | 6 +- tests/fingerprint_dedup.rs | 1 + tests/int_base64.rs | 34 ++ 14 files changed, 1266 insertions(+), 52 deletions(-) create mode 100644 data/rules/docker.yml create mode 100644 src/validation.rs.orig create mode 100644 tests/int_base64.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 69ee670..8bfaed2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,12 @@ All notable changes to this project will be documented in this file. -## [Unreleased] +## [1.47.0] - MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them - Improved rules: github oauth2, diffbot, mailchimp, aws - Added validation to SauceLabs rule - Added rules: shodan, bitly, flickr +- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance ## [1.46.0] - Improved rules: AWS, pem diff --git a/Cargo.toml b/Cargo.toml index b906c05..4e774bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.46.0" +version = "1.47.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/docker.yml b/data/rules/docker.yml new file mode 100644 index 0000000..7701377 --- /dev/null +++ b/data/rules/docker.yml @@ -0,0 +1,48 @@ +rules: + - name: Docker Registry Credentials (auths JSON) + id: kingfisher.docker.auths.1 + pattern: | + (?xis) + "auths"\s*:\s*\{ + [^}]*? + " (?P (?:https?:\/\/)? [a-z0-9.\-:+/]+ ) "\s*:\s*\{ + [^}]*? + "auth"\s*:\s*"(?P [A-Za-z0-9+/=]{16,} )" + [^}]*? + \} + [^}]*? + \} + min_entropy: 2.0 + confidence: medium + examples: + - | + { + "auths": { + "quay.io": { + "auth": "cmhkaCtyaHRhcDowM1BERk1RTTJQTDlaQUE5T1gzSU9IQjFYTUlXOVNGNU1XRzNSRVRHNThKVVpKMzEwV0ZZRVNOQTdGMExNNTYx" + } + } + } + - | + {"auths":{"index.docker.io/v1/":{"auth":"dXNlcjp0b2tlbg=="}}} + references: + - https://distribution.github.io/distribution/spec/api/ + validation: + type: Http + content: + request: + method: GET + url: > + {%- assign r = REG -%} + {%- if r contains "://" -%} + {{ r | replace: "/$", "" }}/v2/auth + {%- else -%} + https://{{ r }}/v2/auth + {%- endif -%} + headers: + Authorization: "Basic {{ B64 }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] \ No newline at end of file diff --git a/data/rules/generic.yml b/data/rules/generic.yml index 73714b9..be42e5c 100644 --- a/data/rules/generic.yml +++ b/data/rules/generic.yml @@ -192,4 +192,22 @@ rules: password = 'abuser123456' # some other comment - | user = 'Aladdin' - password = 'open sesame' \ No newline at end of file + password = 'open sesame' + - name: Docker Robot Credentials (plaintext pair) + id: kingfisher.generic.9 + pattern: | + (?xi) + \b + ( + (?P [a-z0-9._-]+ \+ [a-z0-9._-]+ ) + : + (?P [A-Z0-9]{32,80} ) + ) + \b + min_entropy: 2.0 + confidence: low + examples: + - some+thing:02PDFMQN2PL2ZAB9OX3IOHC1XMIW1SE5NWG3RETG58JUZJ310WFYESRA7F0LM461 + - org+builder:1C2F9D0BB1E67E9F6B3B5B9A2A3D4E5F6A7B8C9D0E1F2A3B4C5D6E7F8A9B0C1 + references: + - https://docs.quay.io/use_quay.html#robot-accounts \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index c1eabd1..639c2de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,27 +5,27 @@ // * Fallback - system allocator (`system-alloc` feature) // ──────────────────────────────────────────────────────────── -// --- jemalloc (opt-in) --- -#[cfg(feature = "use-jemalloc")] -#[global_allocator] -static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +// // --- jemalloc (opt-in) --- +// #[cfg(feature = "use-jemalloc")] +// #[global_allocator] +// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -// --- mimalloc (default) --- -#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -// --- system allocator (explicit opt-out) --- -#[cfg(feature = "system-alloc")] -use std::alloc::System; -#[cfg(feature = "system-alloc")] -#[global_allocator] -static GLOBAL: System = System; +// // --- mimalloc (default) --- +// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +// #[global_allocator] +// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; +// // --- system allocator (explicit opt-out) --- +// #[cfg(feature = "system-alloc")] // use std::alloc::System; +// #[cfg(feature = "system-alloc")] // #[global_allocator] // static GLOBAL: System = System; +use std::alloc::System; +#[global_allocator] +static GLOBAL: System = System; + use std::{ io::Read, sync::{Arc, Mutex}, diff --git a/src/matcher.rs b/src/matcher.rs index b09429f..d817bcc 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -65,6 +65,7 @@ pub struct OwnedBlobMatch { pub validation_response_status: StatusCode, pub validation_success: bool, pub calculated_entropy: f32, + pub is_base64: bool, } impl<'a> Matcher<'a> { pub fn get_profiling_report(&self) -> Option> { @@ -85,6 +86,7 @@ impl OwnedBlobMatch { .unwrap_or(StatusCode::CONTINUE), validation_success: m.validation_success, calculated_entropy: m.calculated_entropy, + is_base64: m.is_base64, } } @@ -108,6 +110,7 @@ impl OwnedBlobMatch { validation_success: blob_match.validation_success, calculated_entropy: blob_match.calculated_entropy, finding_fingerprint: 0, //default + is_base64: blob_match.is_base64, }; // Convert matching_finding to a &str (using lossy conversion if needed) @@ -154,6 +157,7 @@ pub struct BlobMatch<'a> { pub validation_success: bool, pub calculated_entropy: f32, + pub is_base64: bool, } #[derive(Clone)] struct UserData { @@ -305,8 +309,12 @@ impl<'a> Matcher<'a> { // Perform the scan self.scan_bytes_raw(&blob.bytes(), &filename)?; - // Early exit if no matches found - if self.user_data.raw_matches_scratch.is_empty() { + // Opportunistically look for standalone Base64 blobs. If neither + // the raw scan nor this check yields anything, we can return early + // before doing any heavier work. + let mut b64_items = get_base64_strings(blob.bytes()); + + if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() { // Only record in seen_blobs if deduplication is enabled if !no_dedup { return Ok(match self.seen_blobs.insert(blob.id, false) { @@ -322,18 +330,22 @@ impl<'a> Matcher<'a> { let rules_db = self.rules_db; let mut seen_matches = FxHashSet::default(); let mut previous_matches = Vec::new(); - let tree_sitter_result = lang.and_then(|lang_str| { - get_language_and_queries(&lang_str).and_then(|(language, queries)| { - let checker = Checker { language, rules: queries }; - match checker.check(&blob.bytes()) { - Ok(results) => Some(results), - Err(e) => { - println!("Error in checker.check: {}", e); - None + let tree_sitter_result = if self.user_data.raw_matches_scratch.is_empty() { + None + } else { + lang.and_then(|lang_str| { + get_language_and_queries(&lang_str).and_then(|(language, queries)| { + let checker = Checker { language, rules: queries }; + match checker.check(&blob.bytes()) { + Ok(results) => Some(results), + Err(e) => { + println!("Error in checker.check: {}", e); + None + } } - } + }) }) - }); + }; // Process matches let mut matches = Vec::new(); let owned_ts_results = tree_sitter_result.map(|ts_results| { @@ -383,6 +395,7 @@ impl<'a> Matcher<'a> { &mut seen_matches, origin, None, + false, redact, &filename, self.profiler.as_ref(), @@ -406,6 +419,7 @@ impl<'a> Matcher<'a> { &mut seen_matches, origin, Some(ts_match.clone()), + *is_base64_decoded, redact, &filename, self.profiler.as_ref(), @@ -414,6 +428,45 @@ impl<'a> Matcher<'a> { } } } + // If the blob contains standalone Base64 blobs, decode and scan them as well + const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep + let mut b64_stack: Vec<(DecodedData, usize)> = + b64_items.drain(..).map(|d| (d, 0)).collect(); + while let Some((item, depth)) = b64_stack.pop() { + for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() { + let re = &rules_db.anchored_regexes[rule_id_usize]; + filter_match( + blob, + rule.clone(), + re, + item.pos_start, + item.pos_end, + &mut matches, + &mut previous_matches, + rule_id_usize, + &mut seen_matches, + origin, + Some(item.decoded.clone()), + true, + redact, + &filename, + self.profiler.as_ref(), + ); + } + if depth + 1 < MAX_B64_DEPTH { + for nested in get_base64_strings(item.decoded.as_bytes()) { + b64_stack.push(( + DecodedData { + original: nested.original, + decoded: nested.decoded, + pos_start: item.pos_start, + pos_end: item.pos_end, + }, + depth + 1, + )); + } + } + } // Finalize // Only record in seen_blobs if deduplication is enabled if !no_dedup { @@ -457,6 +510,7 @@ fn filter_match<'b>( seen_matches: &mut FxHashSet, _origin: &OriginSet, ts_match: Option, + is_base64: bool, redact: bool, filename: &str, profiler: Option<&Arc>, @@ -521,6 +575,7 @@ fn filter_match<'b>( validation_response_status: StatusCode::from_u16(0).unwrap_or(StatusCode::CONTINUE), validation_success: false, calculated_entropy, + is_base64, }); previous_matches.push((rule_id, matching_input_offset_span)); } @@ -729,6 +784,8 @@ pub struct Match { pub calculated_entropy: f32, pub visible: bool, + #[serde(default)] + pub is_base64: bool, } impl Match { #[inline] @@ -780,6 +837,7 @@ impl Match { validation_response_status: owned_blob_match.validation_response_status.as_u16(), validation_success: owned_blob_match.validation_success, calculated_entropy: owned_blob_match.calculated_entropy, + is_base64: owned_blob_match.is_base64, } } @@ -832,33 +890,26 @@ pub struct DecodedData { } pub fn get_base64_strings(input: &[u8]) -> Vec { lazy_static! { - static ref RE_BASE64: Regex = - Regex::new(r"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?").unwrap(); + // Require a reasonably long run of valid Base64 characters to reduce + // noise. 32 bytes corresponds to 24 decoded bytes. + static ref RE_BASE64: Regex = Regex::new(r"[A-Za-z0-9+/]{32,}={0,2}").unwrap(); } let mut results = Vec::new(); - for capture in RE_BASE64.captures_iter(input) { - let base64_match = capture.get(0).unwrap(); - - if base64_match.is_empty() { - continue; - } - - let start = base64_match.start(); - let end = base64_match.end(); - let base64_string = &input[start..end]; - // Check if the length is a multiple of 4 + for m in RE_BASE64.find_iter(input) { + let base64_string = m.as_bytes(); + // Skip candidates whose length isn't a multiple of four – they cannot + // be valid Base64. if base64_string.len() % 4 != 0 { continue; } if let Ok(decoded) = general_purpose::STANDARD.decode(base64_string) { - // Check if the decoded string is valid UTF-8 if let Ok(decoded_str) = std::str::from_utf8(&decoded) { if decoded_str.is_ascii() { results.push(DecodedData { original: String::from_utf8_lossy(base64_string).into_owned(), decoded: decoded_str.to_string(), - pos_start: start, - pos_end: end, + pos_start: m.start(), + pos_end: m.end(), }); } } @@ -1026,12 +1077,13 @@ mod test { /// and report correct byte-offsets. #[test] fn test_get_base64_strings_basic() { - let raw = b"foo SGVsbG8gV29ybGQ= bar"; // "Hello World" + let raw = b"foo MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY= bar"; + // decodes to "0123456789abcdef0123456789abcdef" let hits = get_base64_strings(raw); assert_eq!(hits.len(), 1); let item = &hits[0]; - assert_eq!(item.decoded, "Hello World"); - assert_eq!(item.original, "SGVsbG8gV29ybGQ="); + assert_eq!(item.decoded, "0123456789abcdef0123456789abcdef"); + assert_eq!(item.original, "MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY="); // "foo␠" is 4 bytes, so the start offset is 4 assert_eq!((item.pos_start, item.pos_end), (4, 4 + item.original.len())); } diff --git a/src/reporter.rs b/src/reporter.rs index 9a08cc2..c9c2a62 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -384,6 +384,7 @@ impl DetailsReporter { column_start: source_span.start.column as u32, column_end: source_span.end.column as u32, path: file_path, + encoding: if rm.m.is_base64 { Some("base64".to_string()) } else { None }, git_metadata: git_metadata_val, }, } @@ -521,6 +522,8 @@ pub struct FindingRecordData { pub column_end: u32, pub path: String, #[serde(skip_serializing_if = "Option::is_none")] + pub encoding: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub git_metadata: Option, } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 7bf68af..da5f782 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -165,6 +165,7 @@ mod tests { validation_success, calculated_entropy: 4.5, visible: true, + is_base64: false, } } diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 6790a44..086648c 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -100,6 +100,9 @@ impl<'a> Display for PrettyFindingRecord<'a> { }; let finding = &record.finding; writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?; + if let Some(enc) = &finding.encoding { + writeln!(f, " |Encoding.....: {}", enc)?; + } writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?; writeln!(f, " |Confidence....: {}", finding.confidence)?; writeln!(f, " |Entropy.......: {}", finding.entropy)?; diff --git a/src/validation.rs b/src/validation.rs index ca76943..3548d33 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1028,6 +1028,7 @@ rules: validation_response_status: StatusCode::OK, validation_success: false, calculated_entropy: 0.0, // or compute your own + is_base64: false, }; let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; let client = reqwest::Client::new(); diff --git a/src/validation.rs.orig b/src/validation.rs.orig new file mode 100644 index 0000000..ca76943 --- /dev/null +++ b/src/validation.rs.orig @@ -0,0 +1,1052 @@ +use std::{ + collections::BTreeMap, + fs, + hash::{Hash, Hasher}, + sync::Arc, + time::{Duration, Instant}, +}; + +use anyhow::Result; +use crossbeam_skiplist::SkipMap; +use dashmap::DashMap; +use http::StatusCode; +use liquid::Object; +use liquid_core::{Value, ValueView}; +use once_cell::sync::OnceCell; +use reqwest::{header, header::HeaderValue, multipart, Client, Url}; +use rustc_hash::FxHashMap; +use tokio::{sync::Notify, time}; +use tracing::debug; + +use crate::{ + location::OffsetSpan, + matcher::{OwnedBlobMatch, SerializableCaptures}, + rules::rule::Validation, +}; + +mod aws; +mod azure; +mod coinbase; +mod gcp; +mod httpvalidation; +mod jwt; +mod mongodb; +mod postgres; +mod utils; + +const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes +const MAX_VALIDATION_BODY_LEN: usize = 2048; + +// Use SkipMap-based cache instead of a mutex-wrapped FxHashMap. +type Cache = Arc>; + +/// Returns an opaque 64-bit fingerprint for “same secret under the same rule”. +fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 { + let mut hasher = xxhash_rust::xxh3::Xxh3::new(); + m.rule.syntax().id.hash(&mut hasher); + + // first capture = the secret text itself + if let Some(c0) = m.captures.captures.get(0) { + c0.value.hash(&mut hasher); + } + hasher.finish() +} + +static VALIDATION_CACHE: OnceCell> = OnceCell::new(); +static IN_FLIGHT: OnceCell>> = OnceCell::new(); + +/// Call this once near program start (e.g. in `main()`) +pub fn init_validation_caches() { + VALIDATION_CACHE.set(DashMap::new()).ok(); + IN_FLIGHT.set(DashMap::new()).ok(); +} + +#[derive(Clone)] +pub struct CachedResponse { + pub body: String, + pub status: StatusCode, + pub is_valid: bool, + pub timestamp: Instant, +} + +impl CachedResponse { + pub fn new(body: String, status: StatusCode, is_valid: bool) -> Self { + Self { body, status, is_valid, timestamp: Instant::now() } + } + + pub fn is_still_valid(&self, cache_duration: Duration) -> bool { + self.timestamp.elapsed() < cache_duration + } +} + +/// Collect dependent variables and missing dependencies from the provided matches. +pub fn collect_variables_and_dependencies( + matches: &[OwnedBlobMatch], +) -> (FxHashMap>, FxHashMap>) { + let mut variable_map: FxHashMap> = FxHashMap::default(); + let mut missing_deps: FxHashMap> = FxHashMap::default(); + + for m in matches { + let rule_id = m.rule.syntax().id.clone(); + for dependency in m.rule.syntax().depends_on_rule.iter().flatten() { + let dependency_rule_id = &dependency.rule_id; + // Use iterator adapter to get all matching dependencies. + let matching_dependencies: Vec<_> = + matches.iter().filter(|x| x.rule.syntax().id == *dependency_rule_id).collect(); + + if !matching_dependencies.is_empty() { + for other_match in matching_dependencies { + let matching_input = other_match + .captures + .captures + .get(1) + .or_else(|| other_match.captures.captures.get(0)) + .expect("Expected at least one capture"); + variable_map + .entry(dependency.variable.to_uppercase()) + .or_insert_with(Vec::new) + .push(( + matching_input.value.to_string(), + other_match.matching_input_offset_span, + )); + } + } else { + missing_deps.entry(rule_id.clone()).or_default().push(dependency.rule_id.clone()); + } + } + } + (variable_map, missing_deps) +} + +/// Render a template and parse the resulting string as a URL. +async fn render_and_parse_url( + parser: &liquid::Parser, + globals: &liquid::Object, + rule_name: &str, + template_url: &str, +) -> Result { + let rendered_url_str = + render_template(parser, globals, rule_name, template_url).await.map_err(|e| { + let error_msg = format!("Error rendering URL template: <{}> {}", rule_name, e); + debug!("{}", error_msg); + error_msg + })?; + + let url = Url::parse(&rendered_url_str).map_err(|e| { + let error_msg = format!("Error parsing rendered URL: {}", e); + debug!("{}", error_msg); + error_msg + })?; + + // Check if the URL is resolvable. + utils::check_url_resolvable(&url).await.map_err(|e| { + let error_msg = format!("URL resolution failed: {}", e); + error_msg + })?; + + Ok(url) +} + +/// Render a template string using Liquid. +async fn render_template( + parser: &liquid::Parser, + globals: &liquid::Object, + rule_name: &str, + template_str: &str, +) -> Result { + parser + .parse(template_str) + .map_err(|e| { + let msg = format!("Error parsing template for rule <{}>: {}", rule_name, e); + debug!("{}", msg); + msg + }) + .and_then(|template| { + template.render(globals).map_err(|e| { + let msg = format!("Error rendering template for rule <{}>: {}", rule_name, e); + debug!("{}", msg); + msg + }) + }) +} + +/// Validate a single match with a timeout of 60 seconds. +pub async fn validate_single_match( + m: &mut OwnedBlobMatch, + parser: &liquid::Parser, + client: &Client, + dependent_variables: &FxHashMap>, + missing_dependencies: &FxHashMap>, + cache: &Cache, +) { + let timeout_result = time::timeout(Duration::from_secs(60), async { + timed_validate_single_match( + m, + parser, + client, + dependent_variables, + missing_dependencies, + cache, + ) + .await + }) + .await; + + if timeout_result.is_err() { + m.validation_success = false; + m.validation_response_body = "Validation timed out after 60 seconds".to_string(); + m.validation_response_status = StatusCode::REQUEST_TIMEOUT; + } +} + +/// Perform the actual validation of a match. +/// Guarantees that each | is validated only once per process, +/// even when `--no-dedup` is used. +async fn timed_validate_single_match<'a>( + m: &mut OwnedBlobMatch, + parser: &liquid::Parser, + client: &Client, + dependent_variables: &FxHashMap>, + missing_dependencies: &FxHashMap>, + cache: &Cache, +) { + // ────────────────────────────────────────────────────────── + // 1. process-wide fingerprint de-dup + // ────────────────────────────────────────────────────────── + let fp = secret_fingerprint(m); + + if let Some(entry) = VALIDATION_CACHE.get_or_init(DashMap::new).get(&fp) { + if entry.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = entry.is_valid; + m.validation_response_body = entry.body.clone(); + m.validation_response_status = entry.status; + return; + } + } + if let Some(wait) = IN_FLIGHT.get_or_init(DashMap::new).get(&fp) { + wait.notified().await; + if let Some(entry) = VALIDATION_CACHE.get().unwrap().get(&fp) { + m.validation_success = entry.is_valid; + m.validation_response_body = entry.body.clone(); + m.validation_response_status = entry.status; + } + return; + } + let notify = Arc::new(Notify::new()); + IN_FLIGHT.get().unwrap().insert(fp, notify.clone()); + + // helper to persist result + notify waiters + let commit_and_return = |m: &OwnedBlobMatch| { + VALIDATION_CACHE.get().unwrap().insert( + fp, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + IN_FLIGHT.get().unwrap().remove(&fp); + notify.notify_waiters(); + }; + // ────────────────────────────────────────────────────────── + + // 2. dependency check + if let Some(missing) = missing_dependencies.get(&m.rule.syntax().id) { + if !missing.is_empty() { + m.validation_success = false; + m.validation_response_body = + format!("Validation skipped - missing dependent rules: {}", missing.join(", ")); + m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; + commit_and_return(m); + return; + } + } + + // 3. capture processing + let match_re_result = m.rule.syntax().as_anchored_regex(); + let mut captured_values: Vec<(String, String, usize, usize)> = match match_re_result { + Ok(_) => utils::process_captures(&m.captures), + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Regex error: {}", e); + m.validation_response_status = StatusCode::INTERNAL_SERVER_ERROR; + commit_and_return(m); + return; + } + }; + + for dep in m.rule.syntax().depends_on_rule.iter().flatten() { + if let Some(vals) = dependent_variables.get(&dep.variable.to_uppercase()) { + for (val, span) in vals { + captured_values.push(( + dep.variable.to_uppercase(), + val.clone(), + span.start, + span.end, + )); + } + } + } + + let mut globals = Object::new(); + for (k, v, ..) in &captured_values { + globals.insert(k.to_uppercase().into(), Value::scalar(v.clone())); + } + + let rule_syntax = m.rule.syntax(); + + // ────────────────────────────────────────────────────────── + // 4. validator switch + // ────────────────────────────────────────────────────────── + match &rule_syntax.validation { + // ---------------------------------------------------- HTTP validator + Some(Validation::Http(http_validation)) => { + // render URL + let url = match render_and_parse_url( + parser, + &globals, + &rule_syntax.name, + &http_validation.request.url, + ) + .await + { + Ok(u) => u, + Err(e) => { + m.validation_success = false; + m.validation_response_body = e; + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + }; + + // build request builder + let request_builder = match httpvalidation::build_request_builder( + client, + &http_validation.request.method, + &url, + &http_validation.request.headers, + &http_validation.request.body, + parser, + &globals, + ) { + Ok(rb) => rb, + Err(e) => { + m.validation_success = false; + m.validation_response_body = e; + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + }; + + let is_multipart = http_validation.request.multipart.is_some(); + let mut cache_key = String::new(); + + // old per-request cache (optional) + if !is_multipart { + let rendered_headers = httpvalidation::process_headers( + &http_validation.request.headers, + parser, + &globals, + &url, + ) + .unwrap_or_default(); + + let mut header_map = BTreeMap::new(); + for (name, value) in rendered_headers.iter() { + if let Ok(v) = value.to_str() { + header_map.insert(name.as_str().to_string(), v.to_string()); + } + } + cache_key = httpvalidation::generate_http_cache_key_parts( + http_validation.request.method.as_str(), + &url, + &header_map, + ); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + } + + // helper to execute single non-multipart request with retry + let exec_single = |builder: reqwest::RequestBuilder| async { + httpvalidation::retry_request( + builder, + 1, + Duration::from_millis(500), + Duration::from_secs(2), + ) + .await + }; + + // run request (multipart vs non-multipart) + let resp_res = if is_multipart { + // build multipart request each retry + let build_request = || async { + let method = httpvalidation::parse_http_method(&http_validation.request.method) + .unwrap_or(reqwest::Method::GET); + + let mut fresh_builder = + client.request(method, url.clone()).timeout(Duration::from_secs(5)); + + if let Ok(mut headers) = httpvalidation::process_headers( + &http_validation.request.headers, + parser, + &globals, + &url, + ) { + // add realistic UA & accept headers + let ua = format!( + "{} {}/{}", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ + AppleWebKit/537.36 (KHTML, like Gecko) \ + Chrome/132.0.0.0 Safari/537.36", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION") + ); + let std_headers = [ + (header::USER_AGENT, ua.as_str()), + (header::ACCEPT , "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"), + (header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"), + (header::ACCEPT_ENCODING, "gzip, deflate, br"), + (header::CONNECTION, "keep-alive"), + ]; + for (hn, hv) in &std_headers { + if let Ok(v) = HeaderValue::from_str(hv) { + headers.insert(hn.clone(), v); + } + } + fresh_builder = fresh_builder.headers(headers); + } + + // build multipart form + let mut form = multipart::Form::new(); + for part in http_validation.request.multipart.as_ref().unwrap().parts.iter() { + match part.part_type.as_str() { + "file" => { + let path = render_template( + parser, + &globals, + &rule_syntax.name, + &part.content, + ) + .await + .unwrap_or_default(); + let bytes = fs::read(path).unwrap_or_default(); + let p = multipart::Part::bytes(bytes) + .mime_str( + part.content_type + .as_deref() + .unwrap_or("application/octet-stream"), + ) + .unwrap_or_else(|_| multipart::Part::text("invalid")); + form = form.part(part.name.clone(), p); + } + "text" => { + let txt = render_template( + parser, + &globals, + &rule_syntax.name, + &part.content, + ) + .await + .unwrap_or_default(); + let p = multipart::Part::text(txt) + .mime_str(part.content_type.as_deref().unwrap_or("text/plain")) + .unwrap_or_else(|_| multipart::Part::text("invalid")); + form = form.part(part.name.clone(), p); + } + _ => { /* ignore */ } + } + } + fresh_builder.multipart(form) + }; + + httpvalidation::retry_multipart_request( + build_request, + 1, + Duration::from_millis(500), + Duration::from_secs(2), + ) + .await + } else { + exec_single(request_builder).await + }; + + // handle result + match resp_res { + Ok(resp) => { + let status = resp.status(); + let headers = resp.headers().clone(); + let mut body = match resp.text().await { + Ok(b) => b, + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Error reading response: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + commit_and_return(m); + return; + } + }; + if body.len() > MAX_VALIDATION_BODY_LEN { + body.truncate(MAX_VALIDATION_BODY_LEN); + } + + m.validation_response_status = status; + m.validation_response_body = body.clone(); + let matchers = http_validation + .request + .response_matcher + .as_ref() + .expect("missing response_matcher"); + + m.validation_success = httpvalidation::validate_response( + matchers, + &body, + &status, + &headers, + http_validation.request.response_is_html, + ); + + if !is_multipart && !cache_key.is_empty() { + cache.insert( + cache_key, + CachedResponse { + body, + status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("HTTP error: {:?}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + } + + // ---------------------------------------------------- MongoDB validator + Some(Validation::MongoDB) => { + let uri = globals + .get("TOKEN") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + + if uri.is_empty() { + m.validation_success = false; + m.validation_response_body = "MongoDB URI not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = mongodb::generate_mongodb_cache_key(&uri); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match mongodb::validate_mongodb(&uri).await { + Ok((ok, msg)) => { + m.validation_success = ok; + m.validation_response_body = msg; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("MongoDB validation error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + } + + // ------------------------------------------------ Azure Storage validator + Some(Validation::AzureStorage) => { + let storage_key = captured_values + .iter() + .find(|(n, ..)| n == "TOKEN") + .map(|(_, v, ..)| v.clone()) + .unwrap_or_default(); + let storage_account = + utils::find_closest_variable(&captured_values, &storage_key, "TOKEN", "AZURENAME") + .unwrap_or_default(); + + if storage_account.is_empty() || storage_key.is_empty() { + m.validation_success = false; + m.validation_response_body = "Missing Azure Storage account or key.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let creds_json = format!( + r#"{{"storage_account":"{}","storage_key":"{}"}}"#, + storage_account, storage_key + ); + let cache_key = azure::generate_azure_cache_key(&creds_json); + + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match azure::validate_azure_storage_credentials(&creds_json, cache).await { + Ok((ok, msg)) => { + m.validation_success = ok; + m.validation_response_body = msg; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Azure Storage error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + + // ------------------------------------------------ Postgres validator + Some(Validation::Postgres) => { + let pg_url = globals + .get("TOKEN") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + + if pg_url.is_empty() { + m.validation_success = false; + m.validation_response_body = "Postgres URL not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = postgres::generate_postgres_cache_key(&pg_url); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match postgres::validate_postgres(&pg_url).await { + Ok((ok, meta)) => { + m.validation_success = ok; + m.validation_response_body = if ok { + format!("Postgres connection is valid. Metadata: {:?}", meta) + } else { + "Postgres connection failed.".to_string() + }; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Postgres error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + // ---------------------------------------------------- JWT validator + Some(Validation::JWT) => { + let token = captured_values + .iter() + .find(|(n, ..)| n == "TOKEN") + .map(|(_, v, ..)| v.clone()) + .unwrap_or_default(); + + if token.is_empty() { + m.validation_success = false; + m.validation_response_body = "JWT token not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + match jwt::validate_jwt(&token).await { + Ok((ok, msg)) => { + m.validation_success = ok; + m.validation_response_body = msg; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("JWT validation error: {}", e); + m.validation_response_status = StatusCode::BAD_REQUEST; + } + } + } + // ---------------------------------------------------- AWS validator + Some(Validation::AWS) => { + let secret = captured_values + .iter() + .find(|(n, ..)| n == "TOKEN") + .map(|(_, v, ..)| v.clone()) + .unwrap_or_default(); + let akid = utils::find_closest_variable(&captured_values, &secret, "TOKEN", "AKID") + .unwrap_or_default(); + + if akid.is_empty() || secret.is_empty() { + m.validation_success = false; + m.validation_response_body = "Missing AWS access-key ID or secret.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = aws::generate_aws_cache_key(&akid, &secret); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) { + m.validation_success = false; + m.validation_response_body = format!("Invalid AWS credentials ({}): {}", akid, e); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + match aws::validate_aws_credentials(&akid, &secret, cache).await { + Ok((ok, arn)) => { + m.validation_success = ok; + m.validation_response_body = format!("{} --- ARN: {}", akid, arn); + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + if let Ok(acct) = aws::aws_key_to_account_number(&akid) { + m.validation_response_body + .push_str(&format!(" --- AWS Account Number: {:012}", acct)); + } + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("AWS validation error ({}): {}", akid, e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + + // ----------------------------------------------------- GCP validator + Some(Validation::GCP) => { + let gcp_json = globals + .get("TOKEN") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + + if gcp_json.is_empty() { + m.validation_success = false; + m.validation_response_body = "GCP JSON not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = gcp::generate_gcp_cache_key(&gcp_json); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match gcp::GcpValidator::global() { + Ok(validator) => { + match validator.validate_gcp_credentials(&gcp_json.as_bytes()).await { + Ok((ok, meta)) => { + m.validation_success = ok; + m.validation_response_body = meta.join("\n"); + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("GCP validation error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Failed to create GCP validator: {}", e); + m.validation_response_status = StatusCode::INTERNAL_SERVER_ERROR; + } + } + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + // ----------------------------------------------------- Coinbase validator + Some(Validation::Coinbase) => { + let cred_name = globals + .get("CRED_NAME") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + let private_key = globals + .get("PRIVATE_KEY") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + + if cred_name.is_empty() || private_key.is_empty() { + m.validation_success = false; + m.validation_response_body = "Missing key name or private key.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + match coinbase::validate_cdp_api_key(&cred_name, &private_key, client, parser, cache) + .await + { + Ok((ok, msg)) => { + m.validation_success = ok; + m.validation_response_body = msg; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Coinbase validation error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + } + // --------------------------------------------------------- Raw / none + Some(Validation::Raw(raw)) => { + debug!("Raw validation not implemented: {}", raw); + m.validation_success = false; + m.validation_response_body = "Validator not implemented".to_string(); + m.validation_response_status = StatusCode::NOT_IMPLEMENTED; + } + None => { /* no validation specified */ } + } + + // 5. persist result for success path + commit_and_return(m); +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use anyhow::Result; + use crossbeam_skiplist::SkipMap; + use http::StatusCode; + use rustc_hash::FxHashMap; + + use crate::{ + blob::BlobId, + liquid_filters::register_all, + location::OffsetSpan, + matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, + rules::{ + rule::{Confidence, Rule}, + Rules, + }, + validation::{validate_single_match, Cache}, + }; + #[tokio::test] + async fn test_actual_pypi_token_validation() -> Result<()> { + // Minimal PyPI YAML snippet for testing + let pypi_yaml = r#" +rules: + - name: PyPI Upload Token + id: kingfisher.pypi.1 + pattern: | + (?x) + \b + ( + pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,} + ) + (?:[^a-zA-Z0-9_-]|$) + min_entropy: 4.0 + confidence: medium + examples: + - '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM' + - 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw' + validation: + type: Http + content: + request: + method: POST + url: https://upload.pypi.org/legacy/ + response_is_html: true + response_matcher: + - report_response: true + - type: WordMatch + words: + - "isn't allowed to upload to project" + headers: + Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}' + multipart: + parts: + - name: name + type: text + content: "my-package" + - name: version + type: text + content: "0.0.1" + - name: filetype + type: text + content: "sdist" + - name: metadata_version + type: text + content: "2.1" + - name: summary + type: text + content: "A simple example package" + - name: home_page + type: text + content: "https://github.com/yourusername/my_package" + - name: sha256_digest + type: text + content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64" + - name: md5_digest + type: text + content: "9b4036ab91a71124ab9f1d32a518e2bb" + - name: :action + type: text + content: "file_upload" + - name: protocol_version + type: text + content: "1" + - name: content + type: file + content: "path/to/my_package-0.0.1.tar.gz" + content_type: "application/octet-stream" + "#; + // Use from_paths_and_contents to parse the YAML snippet into a Rules object + let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())]; + let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; + // Find the PyPI rule we just loaded + let pypi_rule_syntax = rules + .iter_rules() + .find(|r| r.id == "kingfisher.pypi.1") + .expect("Failed to find PyPI rule in test YAML") + .clone(); // Clone so we can create a `Rule` from it + // Wrap that into a `Rule` object + let pypi_rule = Rule::new(pypi_rule_syntax); + ////////////////////////////////////////// + // + // Your actual PyPI token to test + let token = ""; + let id = BlobId::new(&pypi_yaml.as_bytes()); + // Construct an `OwnedBlobMatch` (all fields needed): + let mut owned_blob_match = OwnedBlobMatch { + rule: pypi_rule.into(), + blob_id: id, + finding_fingerprint: 0, // dummy value + // matching_input: token.as_bytes().to_vec(), + matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, + captures: SerializableCaptures { + captures: vec![SerializableCapture { + name: Some("TOKEN".to_string()), + match_number: -1, + start: 0, + end: token.len(), + value: token.into(), + }], + }, + validation_response_body: String::new(), + validation_response_status: StatusCode::OK, + validation_success: false, + calculated_entropy: 0.0, // or compute your own + }; + let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; + let client = reqwest::Client::new(); + let cache: Cache = Arc::new(SkipMap::new()); + let dependent_vars = FxHashMap::default(); + let missing_deps = FxHashMap::default(); + // Run the validation + validate_single_match( + &mut owned_blob_match, + &parser, + &client, + &dependent_vars, + &missing_deps, + &cache, + ) + .await; + println!("Success? {:?}", owned_blob_match.validation_success); + println!("Status: {:?}", owned_blob_match.validation_response_status); + println!("Body: {:?}", owned_blob_match.validation_response_body); + Ok(()) + } +} diff --git a/src/validation/mongodb.rs b/src/validation/mongodb.rs index c6cf600..a899a32 100644 --- a/src/validation/mongodb.rs +++ b/src/validation/mongodb.rs @@ -93,9 +93,9 @@ fn is_local_host(h: &str) -> bool { const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs const FAST_SELECT_MS: u64 = 300; -const SRV_PARSE_MS: u64 = 1_000; // limit DNS resolution time -const SRV_CONNECT_MS: u64 = 1500;//700; -const SRV_SELECT_MS: u64 = 1500;//300; +const SRV_PARSE_MS: u64 = 2_000; // limit DNS resolution time +const SRV_CONNECT_MS: u64 = 2500; //700; +const SRV_SELECT_MS: u64 = 2500; //300; /// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the /// boolean indicates success and the string provides a status message. diff --git a/tests/fingerprint_dedup.rs b/tests/fingerprint_dedup.rs index bfcbee0..e4380ac 100644 --- a/tests/fingerprint_dedup.rs +++ b/tests/fingerprint_dedup.rs @@ -47,6 +47,7 @@ fn make_match(fp: u64) -> Match { validation_success: false, calculated_entropy: 0.0, visible: true, + is_base64: false, } } diff --git a/tests/int_base64.rs b/tests/int_base64.rs new file mode 100644 index 0000000..1769f72 --- /dev/null +++ b/tests/int_base64.rs @@ -0,0 +1,34 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::{fs, process::Command}; +use tempfile::tempdir; + +// Ensure base64 encoded secrets are decoded and detected +#[test] +fn detects_base64_encoded_secret() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("secret.txt"); + // Base64 for ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa + let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ=="; + fs::write(&file_path, encoded)?; + + Command::cargo_bin("kingfisher")? + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa") + .and(predicate::str::contains("\"encoding\": \"base64\"")), + ); + + dir.close()?; + Ok(()) +} From 5638a6cb450b7679b3451258e5c83901d7ea4600 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 19:40:11 -0700 Subject: [PATCH 3/9] Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with --no-base64 --- CHANGELOG.md | 2 +- src/cli/commands/scan.rs | 4 ++ src/main.rs | 1 + src/matcher.rs | 78 ++++++++++++++++++----------------- src/reporter/json_format.rs | 1 + src/scanner/enumerate.rs | 2 +- src/scanner/processing.rs | 3 +- src/scanner/repos.rs | 2 +- tests/int_allowlist.rs | 1 + tests/int_base64.rs | 29 ++++++++++++- tests/int_dedup.rs | 1 + tests/int_github.rs | 1 + tests/int_gitlab.rs | 2 + tests/int_redact.rs | 1 + tests/int_slack.rs | 2 + tests/int_validation_cache.rs | 1 + tests/int_vulnerable_files.rs | 2 + 17 files changed, 91 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bfaed2..6e57853 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ All notable changes to this project will be documented in this file. - Improved rules: github oauth2, diffbot, mailchimp, aws - Added validation to SauceLabs rule - Added rules: shodan, bitly, flickr -- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance +- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64` ## [1.46.0] - Improved rules: AWS, pem diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 5a4d22b..255417f 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -92,6 +92,10 @@ pub struct ScanArgs { #[arg(long, short = 'r', default_value_t = false)] pub redact: bool, + /// Skip decoding Base64 blobs before scanning + #[arg(long, default_value_t = false)] + pub no_base64: bool, + /// Timeout for Git repository scanning in seconds #[arg(long, default_value_t = 1800, value_name = "SECONDS")] pub git_repo_timeout: u64, diff --git a/src/main.rs b/src/main.rs index 639c2de..edb0972 100644 --- a/src/main.rs +++ b/src/main.rs @@ -335,6 +335,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { skip_regex: Vec::new(), skip_word: Vec::new(), output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_base64: false, } } /// Run the rules check command diff --git a/src/matcher.rs b/src/matcher.rs index d817bcc..6f1531e 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -277,6 +277,7 @@ impl<'a> Matcher<'a> { lang: Option, redact: bool, no_dedup: bool, + no_base64: bool, ) -> Result> where 'a: 'b, @@ -312,7 +313,7 @@ impl<'a> Matcher<'a> { // Opportunistically look for standalone Base64 blobs. If neither // the raw scan nor this check yields anything, we can return early // before doing any heavier work. - let mut b64_items = get_base64_strings(blob.bytes()); + let mut b64_items = if no_base64 { Vec::new() } else { get_base64_strings(blob.bytes()) }; if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() { // Only record in seen_blobs if deduplication is enabled @@ -428,42 +429,45 @@ impl<'a> Matcher<'a> { } } } - // If the blob contains standalone Base64 blobs, decode and scan them as well - const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep - let mut b64_stack: Vec<(DecodedData, usize)> = - b64_items.drain(..).map(|d| (d, 0)).collect(); - while let Some((item, depth)) = b64_stack.pop() { - for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() { - let re = &rules_db.anchored_regexes[rule_id_usize]; - filter_match( - blob, - rule.clone(), - re, - item.pos_start, - item.pos_end, - &mut matches, - &mut previous_matches, - rule_id_usize, - &mut seen_matches, - origin, - Some(item.decoded.clone()), - true, - redact, - &filename, - self.profiler.as_ref(), - ); - } - if depth + 1 < MAX_B64_DEPTH { - for nested in get_base64_strings(item.decoded.as_bytes()) { - b64_stack.push(( - DecodedData { - original: nested.original, - decoded: nested.decoded, - pos_start: item.pos_start, - pos_end: item.pos_end, - }, - depth + 1, - )); + + if !no_base64 { + // If the blob contains standalone Base64 blobs, decode and scan them as well + const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep + let mut b64_stack: Vec<(DecodedData, usize)> = + b64_items.drain(..).map(|d| (d, 0)).collect(); + while let Some((item, depth)) = b64_stack.pop() { + for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() { + let re = &rules_db.anchored_regexes[rule_id_usize]; + filter_match( + blob, + rule.clone(), + re, + item.pos_start, + item.pos_end, + &mut matches, + &mut previous_matches, + rule_id_usize, + &mut seen_matches, + origin, + Some(item.decoded.clone()), + true, + redact, + &filename, + self.profiler.as_ref(), + ); + } + if depth + 1 < MAX_B64_DEPTH { + for nested in get_base64_strings(item.decoded.as_bytes()) { + b64_stack.push(( + DecodedData { + original: nested.original, + decoded: nested.decoded, + pos_start: item.pos_start, + pos_end: item.pos_end, + }, + depth + 1, + )); + } } } } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index da5f782..0b562d2 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -128,6 +128,7 @@ mod tests { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, } } diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index a999f31..14861ff 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -181,7 +181,7 @@ pub fn enumerate_filesystem_inputs( return Ok(()); } progress.inc(blob.len().try_into().unwrap()); - match processor.run(origin, blob, args.no_dedup, args.redact) { + match processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64) { Ok(None) => { // nothing to record } diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs index e88393f..9447dbf 100644 --- a/src/scanner/processing.rs +++ b/src/scanner/processing.rs @@ -25,11 +25,12 @@ impl<'a> BlobProcessor<'a> { blob: Blob, no_dedup: bool, redact: bool, + no_base64: bool, ) -> Result> { let blob_id = blob.id.hex(); let _span = debug_span!("matcher", blob_id).entered(); let t1 = Instant::now(); - let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup)?; + let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup, no_base64)?; let scan_us = t1.elapsed().as_micros(); match res { // blob already seen, but with no matches; nothing to do! diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 68f6f67..6770434 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -427,7 +427,7 @@ pub async fn fetch_s3_objects( let blob = crate::blob::Blob::from_bytes(bytes); if let Some((origin, blob_md, scored_matches)) = - processor.run(origin, blob, args.no_dedup, args.redact)? + processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)? { // Wrap origin & metadata once: let origin_arc = Arc::new(origin); diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index d287616..0370755 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -105,6 +105,7 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result anyhow::Result<()> { "--no-update-check", ]) .assert() - .code(200) + .code(0) .stdout( predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa") .and(predicate::str::contains("\"encoding\": \"base64\"")), @@ -32,3 +32,30 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> { dir.close()?; Ok(()) } + +// Ensure disabling Base64 decoding suppresses encoded secrets +#[test] +fn skips_base64_when_disabled() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("secret.txt"); + let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ=="; + fs::write(&file_path, encoded)?; + + Command::cargo_bin("kingfisher")? + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--no-base64", + "--confidence=low", + "--format", + "json", + "--no-update-check", + ]) + .assert() + .code(0) + .stdout(predicate::str::contains("\"findings\":0")); + + dir.close()?; + Ok(()) +} \ No newline at end of file diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index fe354a7..8ed1341 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -121,6 +121,7 @@ rules: manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let global_args = GlobalArgs { diff --git a/tests/int_github.rs b/tests/int_github.rs index f632bf2..82c64ae 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -108,6 +108,7 @@ fn test_github_remote_scan() -> Result<()> { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; // Create global arguments let global_args = GlobalArgs { diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index a7e75d3..9cfde7c 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -106,6 +106,7 @@ fn test_gitlab_remote_scan() -> Result<()> { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let global_args = GlobalArgs { @@ -213,6 +214,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let global_args = GlobalArgs { diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 03e8afb..9be8c4a 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -88,6 +88,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let global_args = GlobalArgs { diff --git a/tests/int_slack.rs b/tests/int_slack.rs index abbf3ba..0bcae59 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -94,6 +94,7 @@ impl TestContext { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; @@ -191,6 +192,7 @@ async fn test_scan_slack_messages() -> Result<()> { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let global_args = GlobalArgs { diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 24148ec..0cf2a08 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -164,6 +164,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; /* --------------------------------------------------------- * diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 34fe709..c53b8fc 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -107,6 +107,7 @@ impl TestContext { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) @@ -189,6 +190,7 @@ impl TestContext { manage_baseline: false, skip_regex: Vec::new(), skip_word: Vec::new(), + no_base64: false, }; let global_args = GlobalArgs { From 5c33aa0b718b5c0bd7a2416d11b5f458ce7f0323 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 19:40:22 -0700 Subject: [PATCH 4/9] Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with --no-base64 --- src/main.rs | 4 +++- src/matcher.rs | 2 +- src/validation/mongodb.rs | 7 ++----- src/validation/postgres.rs | 27 ++++++++++++++++++++------- tests/int_base64.rs | 2 +- 5 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/main.rs b/src/main.rs index edb0972..6be95c4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -255,7 +255,9 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { } }, }, - Command::SelfUpdate => anyhow::bail!("SelfUpdate command should not reach this branch"), + Command::SelfUpdate => { + anyhow::bail!("SelfUpdate command should not reach this branch") + } } if let Some(msg) = update_msg { info!("{msg}"); diff --git a/src/matcher.rs b/src/matcher.rs index 6f1531e..1cbb16f 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -430,7 +430,7 @@ impl<'a> Matcher<'a> { } } - if !no_base64 { + if !no_base64 { // If the blob contains standalone Base64 blobs, decode and scan them as well const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep let mut b64_stack: Vec<(DecodedData, usize)> = diff --git a/src/validation/mongodb.rs b/src/validation/mongodb.rs index a899a32..19185b6 100644 --- a/src/validation/mongodb.rs +++ b/src/validation/mongodb.rs @@ -107,10 +107,7 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> { // ---- refuse localhost/loopback/UDS outright if uri_targets_localhost(uri) { - return Ok(( - false, - "Refusing to validate localhost/loopback MongoDB URIs.".to_string(), - )); + return Ok((false, "Refusing to validate localhost/loopback MongoDB URIs.".to_string())); } let is_srv = uri.starts_with("mongodb+srv://"); @@ -164,4 +161,4 @@ pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String { let mut hasher = Sha1::new(); hasher.update(mongodb_uri.as_bytes()); format!("MongoDB:{:x}", hasher.finalize()) -} \ No newline at end of file +} diff --git a/src/validation/postgres.rs b/src/validation/postgres.rs index 71ca608..c262b30 100644 --- a/src/validation/postgres.rs +++ b/src/validation/postgres.rs @@ -6,7 +6,11 @@ use rustls::{client::ClientConfig, RootCertStore}; use rustls_native_certs::{load_native_certs, CertificateResult}; use sha1::{Digest, Sha1}; use tokio::time::{error::Elapsed, timeout}; -use tokio_postgres::{config::{Host, SslMode}, tls::NoTls, Config, Error}; +use tokio_postgres::{ + config::{Host, SslMode}, + tls::NoTls, + Config, Error, +}; use tokio_postgres_rustls::MakeRustlsConnect; use tracing::debug; @@ -59,10 +63,12 @@ fn is_local_tcp_host(s: &str) -> bool { // Direct IPs if let Ok(ip) = host.parse::() { return match ip { - std::net::IpAddr::V4(v4) => - v4.is_loopback() || v4.is_unspecified() || v4.is_link_local(), - std::net::IpAddr::V6(v6) => - v6.is_loopback() || v6.is_unspecified() || v6.is_unicast_link_local(), + std::net::IpAddr::V4(v4) => { + v4.is_loopback() || v4.is_unspecified() || v4.is_link_local() + } + std::net::IpAddr::V6(v6) => { + v6.is_loopback() || v6.is_unspecified() || v6.is_unicast_link_local() + } }; } @@ -74,7 +80,6 @@ fn is_local_tcp_host(s: &str) -> bool { || lower.starts_with("localhost6.") } - async fn check_postgres_db_connection( mut cfg: Config, original_mode: SslMode, @@ -201,7 +206,15 @@ mod tests { #[test] fn detects_local_hosts() { - for h in ["localhost", "LOCALHOST", "localhost.localdomain", "localhost6", "127.0.0.1", "[::1]", "::"] { + for h in [ + "localhost", + "LOCALHOST", + "localhost.localdomain", + "localhost6", + "127.0.0.1", + "[::1]", + "::", + ] { assert!(is_local_tcp_host(h), "should treat {h} as local"); } for h in ["db.example.com", "10.0.0.1"] { diff --git a/tests/int_base64.rs b/tests/int_base64.rs index 614ff90..f5d8858 100644 --- a/tests/int_base64.rs +++ b/tests/int_base64.rs @@ -58,4 +58,4 @@ fn skips_base64_when_disabled() -> anyhow::Result<()> { dir.close()?; Ok(()) -} \ No newline at end of file +} From f24f00d6bd2f3eca64660d091f21d425c249862c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 20:02:53 -0700 Subject: [PATCH 5/9] Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with --no-base64 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 10fdbf7..cccbf34 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,8 @@ Originally forked from Praetorian’s Nosey Parker, Kingfisher adds live cloud-A - **Slack messages**: query‑based scans with `--slack-query` - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Compressed Files**: Supports extracting and scanning compressed files for secrets -- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) +- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64` +- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) From 43fce5159ae7db94e2e8e23d3ca78bf83b0c9cc6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 20:07:31 -0700 Subject: [PATCH 6/9] Fix changes in response to code review --- data/rules/sauce.yml | 2 +- src/main.rs | 30 +++++++++++++++--------------- src/validation/mongodb.rs | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/data/rules/sauce.yml b/data/rules/sauce.yml index e116883..6c8be24 100644 --- a/data/rules/sauce.yml +++ b/data/rules/sauce.yml @@ -27,7 +27,7 @@ rules: (?xi) \b ( - (:?api|ondemand)\.(?:us|eu)-(?:west|east|central)-[0-9]\.saucelabs\.com + (?:api|ondemand)\.(?:us|eu)-(?:west|east|central)-[0-9]\.saucelabs\.com ) \b confidence: medium diff --git a/src/main.rs b/src/main.rs index 6be95c4..bfd16f8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,27 +5,27 @@ // * Fallback - system allocator (`system-alloc` feature) // ──────────────────────────────────────────────────────────── -// // --- jemalloc (opt-in) --- -// #[cfg(feature = "use-jemalloc")] -// #[global_allocator] -// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +// --- jemalloc (opt-in) --- +#[cfg(feature = "use-jemalloc")] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -// // --- mimalloc (default) --- -// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -// #[global_allocator] -// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -// // --- system allocator (explicit opt-out) --- -// #[cfg(feature = "system-alloc")] -// use std::alloc::System; -// #[cfg(feature = "system-alloc")] -// #[global_allocator] -// static GLOBAL: System = System; +// --- mimalloc (default) --- +#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; +// --- system allocator (explicit opt-out) --- +#[cfg(feature = "system-alloc")] use std::alloc::System; +#[cfg(feature = "system-alloc")] #[global_allocator] static GLOBAL: System = System; +// use std::alloc::System; +// #[global_allocator] +// static GLOBAL: System = System; + use std::{ io::Read, sync::{Arc, Mutex}, diff --git a/src/validation/mongodb.rs b/src/validation/mongodb.rs index 19185b6..bb3da23 100644 --- a/src/validation/mongodb.rs +++ b/src/validation/mongodb.rs @@ -94,8 +94,8 @@ fn is_local_host(h: &str) -> bool { const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs const FAST_SELECT_MS: u64 = 300; const SRV_PARSE_MS: u64 = 2_000; // limit DNS resolution time -const SRV_CONNECT_MS: u64 = 2500; //700; -const SRV_SELECT_MS: u64 = 2500; //300; +const SRV_CONNECT_MS: u64 = 2500; +const SRV_SELECT_MS: u64 = 2500; /// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the /// boolean indicates success and the string provides a status message. From 8b43f982c6c688c50165a97f11da8e139446e4fe Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 21:25:12 -0700 Subject: [PATCH 7/9] Fix tests --- tests/int_base64.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/int_base64.rs b/tests/int_base64.rs index f5d8858..2c535da 100644 --- a/tests/int_base64.rs +++ b/tests/int_base64.rs @@ -23,7 +23,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> { "--no-update-check", ]) .assert() - .code(0) + .code(200) .stdout( predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa") .and(predicate::str::contains("\"encoding\": \"base64\"")), From 2a3a4956d2cd5cc1a4181b649222b948e797b221 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 30 Aug 2025 22:24:13 -0700 Subject: [PATCH 8/9] fix ci build error --- Makefile | 9 ++++++--- tests/int_base64.rs | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index b4d63a6..14a4c6a 100644 --- a/Makefile +++ b/Makefile @@ -183,14 +183,15 @@ ubuntu-arm64: setup-zig # ensures Zig & cargo-zigbuild exist $(MAKE) list-archives - darwin-arm64: @echo "Checking Rust for darwin-arm64..." @$(MAKE) check-rust || ( \ echo "Rust not found or out-of-date. Installing via Homebrew..." && \ brew install rust \ ) - @brew install boost cmake gcc libpcap pkg-config ragel sqlite coreutils gnu-tar || true + @brew list cmake >/dev/null 2>&1 || brew install cmake + @brew list boost >/dev/null 2>&1 || brew install boost + @brew install gcc libpcap pkg-config ragel sqlite coreutils gnu-tar @rustup target add aarch64-apple-darwin cargo build --release --target aarch64-apple-darwin --features system-alloc @cd target/aarch64-apple-darwin/release && \ @@ -212,7 +213,9 @@ darwin-x64: echo "Rust not found or out-of-date. Installing via Homebrew..." && \ brew install rust \ ) - @brew install boost cmake gcc libpcap pkg-config ragel sqlite coreutils gnu-tar || true + @brew list cmake >/dev/null 2>&1 || brew install cmake + @brew list boost >/dev/null 2>&1 || brew install boost + @brew install gcc libpcap pkg-config ragel sqlite coreutils gnu-tar @rustup target add x86_64-apple-darwin source $$HOME/.cargo/env && cargo build --release --target x86_64-apple-darwin --features system-alloc @cd target/x86_64-apple-darwin/release && \ diff --git a/tests/int_base64.rs b/tests/int_base64.rs index 2c535da..8ed9a2c 100644 --- a/tests/int_base64.rs +++ b/tests/int_base64.rs @@ -20,6 +20,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> { "--confidence=low", "--format", "json", + "--no-validate", "--no-update-check", ]) .assert() From dcd0460e8ad7293d34226d20136b1650fb65e149 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 31 Aug 2025 10:27:16 -0700 Subject: [PATCH 9/9] fix ci build error --- data/rules/bitly.yml | 2 +- data/rules/docker.yml | 4 ++-- data/rules/flickr.yml | 6 +++--- tests/int_rules_no_validated_findings.rs | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/rules/bitly.yml b/data/rules/bitly.yml index 11ec1da..3f4ca02 100644 --- a/data/rules/bitly.yml +++ b/data/rules/bitly.yml @@ -33,4 +33,4 @@ rules: references: - https://dev.bitly.com/api-reference#Authentication examples: - - "bitly_token = 20e9827b9c5ddee1b0cec7722bfc557dec833791" + - "bitly_token = 20e9817b9c5ddde1b0cec7622bfc557dbc823791" diff --git a/data/rules/docker.yml b/data/rules/docker.yml index 7701377..1d37ab8 100644 --- a/data/rules/docker.yml +++ b/data/rules/docker.yml @@ -1,6 +1,6 @@ rules: - name: Docker Registry Credentials (auths JSON) - id: kingfisher.docker.auths.1 + id: kingfisher.docker.1 pattern: | (?xis) "auths"\s*:\s*\{ @@ -19,7 +19,7 @@ rules: { "auths": { "quay.io": { - "auth": "cmhkaCtyaHRhcDowM1BERk1RTTJQTDlaQUE5T1gzSU9IQjFYTUlXOVNGNU1XRzNSRVRHNThKVVpKMzEwV0ZZRVNOQTdGMExNNTYx" + "auth": "cmhkaCtyaHRhcDowM1BERl1RQTJQTDlaQUE5T1gzSU9IQjFYTUlXOVNGNU1XRzNSRVRHNThKVXpKMzEwV0ZZRVMOQTdGMExMNOYx" } } } diff --git a/data/rules/flickr.yml b/data/rules/flickr.yml index c0dd982..d789348 100644 --- a/data/rules/flickr.yml +++ b/data/rules/flickr.yml @@ -27,13 +27,13 @@ rules: status: [200] - type: WordMatch words: - - '"Invalid API Key"' + - '"Invalid API Key' negative: true references: - https://www.flickr.com/services/api/ - https://www.flickr.com/services/api/flickr.test.login.html examples: - - "flickr_api_key: d6953dc63a9498593bfdb4287ed2293c" + - "flickr_api_key: d1953fb62a9798593bfdb4287ed2423e" - name: Flickr OAuth Token id: kingfisher.flickr.2 pattern: | @@ -69,4 +69,4 @@ rules: - https://www.flickr.com/services/api/ - https://www.flickr.com/services/api/flickr.auth.oauth.checkToken.html examples: - - "flickr_oauth_token: a8c1e1f1d9d34aa5a1bdbd94234bcdef" \ No newline at end of file + - "flickr_oauth_token: a8c1b9f1d9d34aa5a1edbd43234bcdef" \ No newline at end of file diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index bd0f4f7..4c4cf19 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -15,7 +15,7 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { Some(i) => i, None => return Ok(()), // no array found }; - + let mut depth = 0usize; let mut end = None; for (i, ch) in stdout.char_indices().skip(start) {