diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ab5993..6286da2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [v1.65.0] +- Skip reporting MongoDB and Postgres findings when their connection strings cannot be parsed, even when validation is disabled. +- Improve MySQL detection by broadening URI coverage and adding live validation that skips clearly invalid connection strings. + ## [v1.64.0] - Fixed a bug when using --redact, that broke validation - Added JDBC rule with validator diff --git a/Cargo.toml b/Cargo.toml index be4e2e0..b52f29a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.64.0" +version = "1.65.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -108,6 +108,7 @@ base32 = "0.5.1" crossbeam-skiplist = "0.1.3" tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"] } mongodb = { version = "3.2", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] } +mysql_async = { version = "0.34.2", default-features = false, features = ["default-rustls"] } bson = "2.15.0" ring = "0.17.14" pem = "3.0.5" diff --git a/data/rules/mongodb.yml b/data/rules/mongodb.yml index 28b4031..523f4c9 100644 --- a/data/rules/mongodb.yml +++ b/data/rules/mongodb.yml @@ -81,6 +81,10 @@ rules: mongodb(?:\+srv)?://[\S]{3,50}:(?:[\S]{3,88})@[-.%\w/:]+ ) \b + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" min_entropy: 3 examples: - client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority"); diff --git a/data/rules/mysql.yaml b/data/rules/mysql.yaml deleted file mode 100644 index 3a3a0ab..0000000 --- a/data/rules/mysql.yaml +++ /dev/null @@ -1,7 +0,0 @@ -rules: - - name: MySQL URI with Credentials - id: kingfisher.mysql.1 - pattern: (?xi)\bmysql:\/\/[a-z0-9]+:([a-z0-9!@\#$%^&*()_+{}|:<>?=\\-]+)@[a-z0-9.]+:[0-9]+\/[a-z0-9]+\b - min_entropy: 3.5 - examples: - - CONNECTION_URI="mysql://nimda:m42p!o@2wd@google.com:5434/elephant" diff --git a/data/rules/mysql.yml b/data/rules/mysql.yml new file mode 100644 index 0000000..4c7cbf3 --- /dev/null +++ b/data/rules/mysql.yml @@ -0,0 +1,45 @@ +rules: + - name: MySQL URI with Credentials + id: kingfisher.mysql.1 + pattern: | + (?xi) + ( + mysql:\/\/ + (?: + [a-z0-9._%+\-]+ + ) + : + (?: + [^\s:@]+ + ) + @ + (?: + \[ + [0-9a-f:.]+ + \] + | + [a-z0-9.-]+ + ) + (?:: + \d{2,5} + )? + (?: + \/ + [^\s"'?:]+ + )? + (?: + \? + [^\s"']* + )? + ) + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" + min_entropy: 3.3 + confidence: medium + examples: + - CONNECTION_URI="mysql://nimda:m42p!o@2wd@google.com:3306/elephant" + - mysql://user:pass@example.com:4406/app_db?ssl-mode=REQUIRED + validation: + type: MySQL diff --git a/data/rules/postgres.yml b/data/rules/postgres.yml index 5cbb4ba..47089b6 100644 --- a/data/rules/postgres.yml +++ b/data/rules/postgres.yml @@ -24,6 +24,10 @@ rules: (?: \d+ ) + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" min_entropy: 3.3 confidence: medium examples: diff --git a/src/main.rs b/src/main.rs index b52fcd3..99f0718 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,27 +5,27 @@ // * Fallback - system allocator (`system-alloc` feature) // ──────────────────────────────────────────────────────────── -// --- jemalloc (opt-in) --- -#[cfg(feature = "use-jemalloc")] -#[global_allocator] -static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +// // --- jemalloc (opt-in) --- +// #[cfg(feature = "use-jemalloc")] +// #[global_allocator] +// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -// --- mimalloc (default) --- -#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -// --- system allocator (explicit opt-out) --- -#[cfg(feature = "system-alloc")] -use std::alloc::System; -#[cfg(feature = "system-alloc")] -#[global_allocator] -static GLOBAL: System = System; +// // --- mimalloc (default) --- +// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +// #[global_allocator] +// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; +// // --- system allocator (explicit opt-out) --- +// #[cfg(feature = "system-alloc")] // use std::alloc::System; +// #[cfg(feature = "system-alloc")] // #[global_allocator] // static GLOBAL: System = System; +use std::alloc::System; +#[global_allocator] +static GLOBAL: System = System; + use std::{ io::{IsTerminal, Read}, sync::{Arc, Mutex}, diff --git a/src/matcher.rs b/src/matcher.rs index 96f7602..7eb8548 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -29,12 +29,13 @@ use crate::{ parser, parser::{Checker, Language}, rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer}, - rules::rule::{PatternRequirementContext, PatternValidationResult, Rule}, + rules::rule::{PatternRequirementContext, PatternValidationResult, Rule, Validation}, rules_database::RulesDatabase, safe_list::{is_safe_match, is_user_match}, scanner_pool::ScannerPool, snippet::Base64BString, util::intern, + validation::{is_parseable_mongodb_uri, is_parseable_mysql_uri, is_parseable_postgres_uri}, }; const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment @@ -698,6 +699,44 @@ fn filter_match<'b>( debug!("Skipping match due to inline ignore directive"); continue; } + if let Some(validation) = rule.syntax.validation.as_ref() { + match validation { + Validation::MongoDB => { + let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else { + debug!("Skipping match for rule {} due to non-UTF8 MongoDB URI", rule.id()); + continue; + }; + if !is_parseable_mongodb_uri(uri) { + debug!("Skipping match for rule {} due to invalid MongoDB URI", rule.id()); + continue; + } + } + Validation::Postgres => { + let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else { + debug!( + "Skipping match for rule {} due to non-UTF8 Postgres URI", + rule.id() + ); + continue; + }; + if !is_parseable_postgres_uri(uri) { + debug!("Skipping match for rule {} due to invalid Postgres URI", rule.id()); + continue; + } + } + Validation::MySQL => { + let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else { + debug!("Skipping match for rule {} due to non-UTF8 MySQL URI", rule.id()); + continue; + }; + if !is_parseable_mysql_uri(uri) { + debug!("Skipping match for rule {} due to invalid MySQL URI", rule.id()); + continue; + } + } + _ => {} + } + } let match_key = compute_match_key( matching_input.as_bytes(), rule.id().as_bytes(), diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 74adab9..6f4daf2 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -46,6 +46,7 @@ pub enum Validation { Coinbase, GCP, MongoDB, + MySQL, Postgres, Jdbc, JWT, diff --git a/src/validation.rs b/src/validation.rs index ee37b21..4270545 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -32,6 +32,7 @@ mod httpvalidation; mod jdbc; mod jwt; mod mongodb; +mod mysql; mod postgres; mod utils; @@ -104,6 +105,21 @@ where aws::set_aws_skip_account_ids(ids); } +/// Returns `true` if the provided string can be parsed as a MongoDB connection URI. +pub fn is_parseable_mongodb_uri(uri: &str) -> bool { + mongodb::looks_like_mongodb_uri(uri) +} + +/// Returns `true` if the provided string can be parsed as a Postgres connection URI. +pub fn is_parseable_postgres_uri(uri: &str) -> bool { + postgres::parse_postgres_url(uri).is_ok() +} + +/// Returns `true` if the provided string can be parsed as a MySQL connection URI. +pub fn is_parseable_mysql_uri(uri: &str) -> bool { + mysql::parse_mysql_url(uri).is_ok() +} + #[derive(Clone)] pub struct CachedResponse { pub body: String, @@ -617,6 +633,63 @@ async fn timed_validate_single_match<'a>( } } + // ---------------------------------------------------- MySQL validator + Some(Validation::MySQL) => { + let mysql_url = globals + .get("TOKEN") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + + if mysql_url.is_empty() { + m.validation_success = false; + m.validation_response_body = "MySQL URL not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = mysql::generate_mysql_cache_key(&mysql_url); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match mysql::validate_mysql(&mysql_url).await { + Ok((ok, meta)) => { + m.validation_success = ok; + m.validation_response_body = if ok { + format!("MySQL connection is valid. Metadata: {:?}", meta) + } else { + "MySQL connection failed.".to_string() + }; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("MySQL error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + // ------------------------------------------------ Azure Storage validator Some(Validation::AzureStorage) => { let storage_key = captured_values diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs index d879601..926033d 100644 --- a/src/validation/coinbase.rs +++ b/src/validation/coinbase.rs @@ -68,50 +68,6 @@ pub async fn validate_cdp_api_key( Ok((ok, msg)) } -// fn build_jwt( -// method: &str, -// host: &str, -// endpoint: &str, -// cred_name: &str, -// pem: &str, -// ) -> Result { -// let pem = -// pem.replace("\r\n", "\n").replace("\\r\\n", "\n").replace("\\n", "\n").replace("\r", "\n"); -// let secret_key = SecretKey::from_sec1_pem(&pem) -// .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) -// .map_err(|e| anyhow!("invalid EC key: {e}"))?; -// let signing_key = SigningKey::from(secret_key); - -// let mut rng = OsRng; -// let mut nonce = [0u8; 16]; - -// let _ = rng.try_fill_bytes(&mut nonce); - -// let header = serde_json::json!({ -// "typ": "JWT", -// "alg": "ES256", -// "kid": cred_name, -// "nonce": hex::encode(nonce), -// }); -// let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); - -// let now = Utc::now().timestamp(); -// let claims = serde_json::json!({ -// "sub": cred_name, -// "iss": "cdp", -// "nbf": now, -// "exp": now + 60, -// "uri": format!("{} {}{}", method, host, endpoint), -// }); -// let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); - -// let signing_input = format!("{header_b64}.{claims_b64}"); -// let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes()); -// let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); - -// Ok(format!("{signing_input}.{sig_b64}")) -// } - fn build_jwt( method: &str, host: &str, diff --git a/src/validation/mysql.rs b/src/validation/mysql.rs new file mode 100644 index 0000000..4648844 --- /dev/null +++ b/src/validation/mysql.rs @@ -0,0 +1,167 @@ +use std::{net::IpAddr, time::Duration}; + +use anyhow::{anyhow, Result}; +use mysql_async::{prelude::Queryable, Conn, Opts, OptsBuilder}; +use tokio::time::{error::Elapsed, timeout}; +use tracing::debug; +use url::Url; + +const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); + +pub fn parse_mysql_url(mysql_url: &str) -> Result { + let trimmed = mysql_url.trim(); + if trimmed.is_empty() { + return Err(anyhow!("MySQL URL is empty")); + } + + if !trimmed.to_ascii_lowercase().starts_with("mysql://") { + return Err(anyhow!("MySQL URL must start with mysql://")); + } + + let parsed = Url::parse(trimmed).map_err(|e| anyhow!("Failed to parse MySQL URL: {e}"))?; + + if parsed.username().is_empty() { + return Err(anyhow!("MySQL URL is missing a username")); + } + + if parsed.password().map(str::is_empty).unwrap_or(true) { + return Err(anyhow!("MySQL URL is missing a password")); + } + + if parsed.host_str().map(str::is_empty).unwrap_or(true) + && !parsed.query_pairs().any(|(k, _)| k == "socket") + { + return Err(anyhow!("MySQL URL is missing a host")); + } + + let opts = Opts::from_url(trimmed).map_err(|e| anyhow!("Failed to parse MySQL URL: {e}"))?; + + if opts.user().map(str::is_empty).unwrap_or(true) { + return Err(anyhow!("MySQL URL is missing a username")); + } + + if opts.pass().map(str::is_empty).unwrap_or(true) { + return Err(anyhow!("MySQL URL is missing a password")); + } + + if opts.ip_or_hostname().is_empty() && opts.socket().is_none() { + return Err(anyhow!("MySQL URL is missing a host")); + } + + Ok(opts) +} + +pub fn generate_mysql_cache_key(mysql_url: &str) -> String { + use sha1::{Digest, Sha1}; + + let mut hasher = Sha1::new(); + hasher.update(mysql_url.as_bytes()); + format!("MySQL:{:x}", hasher.finalize()) +} + +fn is_local_host(host: &str) -> bool { + let host = host.trim_matches(|c| c == '[' || c == ']').trim(); + let lower = host.to_ascii_lowercase(); + + if matches!( + lower.as_str(), + "localhost" + | "localhost.localdomain" + | "localhost6" + | "localhost6.localdomain6" + | "ip6-localhost" + | "ip6-loopback" + ) { + return true; + } + + if matches!(lower.as_str(), "0.0.0.0" | "::") { + return true; + } + + if let Ok(ip) = host.parse::() { + return ip.is_loopback() || ip.is_unspecified(); + } + + false +} + +fn targets_localhost(opts: &Opts) -> bool { + if opts.socket().is_some() { + return true; + } + + is_local_host(opts.ip_or_hostname()) +} + +pub async fn validate_mysql(mysql_url: &str) -> Result<(bool, Vec)> { + let opts = parse_mysql_url(mysql_url)?; + + if targets_localhost(&opts) { + debug!("Skipping MySQL validation: host is localhost/loopback or unix socket"); + return Ok((false, vec!["skipped localhost/loopback host".into()])); + } + + let builder = OptsBuilder::from_opts(opts).stmt_cache_size(Some(0)); + let opts: Opts = builder.into(); + + let host = opts.ip_or_hostname().to_string(); + let db_name = opts.db_name().map(|s| s.to_string()).unwrap_or_else(|| "mysql".to_string()); + let user = opts.user().map(|s| s.to_string()).unwrap_or_else(|| "".to_string()); + + let res: Result, Elapsed> = timeout(CONNECT_TIMEOUT, async { + let mut conn = Conn::new(opts).await?; + conn.query_drop("SELECT 1").await?; + conn.disconnect().await?; + Ok(()) + }) + .await; + + match res { + Ok(Ok(())) => Ok(( + true, + vec![format!("user={user}"), format!("host={host}"), format!("database={db_name}")], + )), + Ok(Err(e)) => Err(anyhow!("MySQL connection failed: {e}")), + Err(_) => Err(anyhow!("MySQL connection timed out after {CONNECT_TIMEOUT:?}")), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_mysql_url_accepts_valid_urls() { + let url = "mysql://user:secret@example.com:3306/app"; + let opts = parse_mysql_url(url).expect("expected valid MySQL URL"); + assert_eq!(opts.user(), Some("user")); + assert_eq!(opts.pass(), Some("secret")); + assert_eq!(opts.ip_or_hostname(), "example.com"); + } + + #[test] + fn parse_mysql_url_rejects_invalid_urls() { + for candidate in [ + "", // empty + "mysql://user@example.com/app", // missing password + "mysql://:secret@example.com/app", // missing username + "mysql://user:secret@:3306/app", // missing host + "postgres://user:secret@example.com", // wrong scheme + "mysql://user:secret@example.com:70000/app", // invalid port + ] { + assert!( + parse_mysql_url(candidate).is_err(), + "expected parsing to fail for {candidate}" + ); + } + } + + #[test] + fn parse_mysql_url_allows_trimming_whitespace() { + let opts = + parse_mysql_url(" mysql://user:secret@example.com:3306/app ").expect("trimmed URL"); + assert_eq!(opts.user(), Some("user")); + assert_eq!(opts.pass(), Some("secret")); + } +} diff --git a/src/validation/postgres.rs b/src/validation/postgres.rs index 3ce42b0..ba4a013 100644 --- a/src/validation/postgres.rs +++ b/src/validation/postgres.rs @@ -31,9 +31,23 @@ pub fn generate_postgres_cache_key(postgres_url: &str) -> String { format!("Postgres:{:x}", hasher.finalize()) } +pub fn parse_postgres_url(postgres_url: &str) -> Result { + match Config::from_str(postgres_url) { + Ok(cfg) => Ok(cfg), + Err(e) => { + if let Some(rest) = postgres_url.strip_prefix("postgis://") { + let fallback = format!("postgres://{rest}"); + Config::from_str(&fallback) + .map_err(|_| anyhow!("Failed to parse Postgres URL: {e}")) + } else { + Err(anyhow!("Failed to parse Postgres URL: {e}")) + } + } + } +} + pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec)> { - let mut cfg = - Config::from_str(postgres_url).map_err(|e| anyhow!("Failed to parse Postgres URL: {e}"))?; + let mut cfg = parse_postgres_url(postgres_url)?; // --- skip localhost/loopback/unix-socket targets entirely ------------- if has_any_local_host(&cfg) { @@ -189,7 +203,10 @@ fn missing_cluster_identifier(err_msg: &str) -> bool { #[cfg(test)] mod tests { - use super::{is_local_tcp_host, missing_cluster_identifier, server_requires_encryption}; + use super::{ + is_local_tcp_host, missing_cluster_identifier, parse_postgres_url, + server_requires_encryption, + }; #[test] fn detects_encryption_requirement() { @@ -222,4 +239,16 @@ mod tests { assert!(!is_local_tcp_host(h), "should not treat {h} as local"); } } + + #[test] + fn parse_accepts_postgis_scheme() { + let url = "postgis://postgres:secret@example.com:5432"; + assert!(parse_postgres_url(url).is_ok(), "postgis scheme should be accepted"); + } + + #[test] + fn parse_rejects_invalid_port() { + let url = "postgres://postgres:secret@example.com:70000"; + assert!(parse_postgres_url(url).is_err(), "invalid port should be rejected"); + } } diff --git a/tests/int_uri_parsing.rs b/tests/int_uri_parsing.rs new file mode 100644 index 0000000..e4190de --- /dev/null +++ b/tests/int_uri_parsing.rs @@ -0,0 +1,88 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::{fs, process::Command}; +use tempfile::tempdir; + +#[test] +fn filters_invalid_mongodb_uri_even_without_validation() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("mongo.txt"); + let valid = "mongodb://usr:pass@example.com:27017/db"; + let invalid = "mongodb://usr:pass@example.com:abc/db"; + fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout(predicate::str::contains(valid)) + .stdout(predicate::str::contains(invalid).not()); + + dir.close()?; + Ok(()) +} + +#[test] +fn filters_invalid_postgres_uri_even_without_validation() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("postgres.txt"); + let valid = "postgres://postgres:secret@example.com:5432"; + let invalid = "postgres://postgres:secret@example.com:70000"; + fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout(predicate::str::contains(valid)) + .stdout(predicate::str::contains(invalid).not()); + + dir.close()?; + Ok(()) +} + +#[test] +fn filters_invalid_mysql_uri_even_without_validation() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("mysql.txt"); + let valid = "mysql://user:secret@example.com:3306/app"; + let invalid = "mysql://user:secret@example.com:70000/app"; + fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout(predicate::str::contains(valid)) + .stdout(predicate::str::contains(invalid).not()); + + dir.close()?; + Ok(()) +}