diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ab5993..fd69ef5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [v1.65.0] +- Skip reporting MongoDB and Postgres findings when their connection strings cannot be parsed, even when validation is disabled. +- Improve MySQL detection by broadening URI coverage and adding live validation that skips clearly invalid connection strings. +- Added a helper to truncate validation response bodies only at UTF-8 character boundaries to prevent panics during validation. + ## [v1.64.0] - Fixed a bug when using --redact, that broke validation - Added JDBC rule with validator diff --git a/Cargo.toml b/Cargo.toml index be4e2e0..d6b0a0e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.64.0" +version = "1.65.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -108,6 +108,7 @@ base32 = "0.5.1" crossbeam-skiplist = "0.1.3" tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"] } mongodb = { version = "3.2", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] } +mysql_async = { version = "0.34.2", default-features = false, features = ["default-rustls"] } bson = "2.15.0" ring = "0.17.14" pem = "3.0.5" @@ -220,6 +221,7 @@ temp-env = "0.3.6" wiremock = "0.6.4" git2 = "0.20.2" rand_chacha = "0.9.0" +testcontainers = "0.15.0" [profile.release] debug = false diff --git a/data/rules/adobe.yml b/data/rules/adobe.yml index 3eb79a2..ec5584c 100644 --- a/data/rules/adobe.yml +++ b/data/rules/adobe.yml @@ -46,6 +46,7 @@ rules: \b adobe (?:.|[\n\r]){0,64}? + \b ( [a-z0-9]{12} ) diff --git a/data/rules/azureopenai.yml b/data/rules/azureopenai.yml index 51cba8c..050e7a2 100644 --- a/data/rules/azureopenai.yml +++ b/data/rules/azureopenai.yml @@ -49,7 +49,7 @@ rules: (?xi) \b ( - [a-z0-9-]+ + [a-z0-9-]{3,32} \.openai\.azure\.com ) \b diff --git a/data/rules/grafana.yml b/data/rules/grafana.yml index a7c9726..d339ac3 100644 --- a/data/rules/grafana.yml +++ b/data/rules/grafana.yml @@ -100,12 +100,12 @@ rules: - name: Grafana Domain id: kingfisher.grafana.4 pattern: | - (?xi) - (?:https?://)? - (?:[A-Z0-9-]+\.)* - grafana\.[A-Z0-9.-]+ - (?::\d{2,5})? - (?:[/?\#]\S*)? + (?xi) + (?:https?://)? + (?:[A-Z0-9-]+\.){0,32} + grafana\.[A-Z0-9.-]{3,32} + (?::\d{2,5})? + (?:[/?\#]\S*)? min_entropy: 3.0 visible: false confidence: medium diff --git a/data/rules/jdbc.yml b/data/rules/jdbc.yml index 7456334..3a2ab14 100644 --- a/data/rules/jdbc.yml +++ b/data/rules/jdbc.yml @@ -5,11 +5,15 @@ rules: (?xi) ( jdbc: - [a-z][a-z0-9+.-]{2,30} - (?:[:][a-z0-9+.-]{1,30})* + [a-z][a-z0-9+.-]{2,32} + (?:[:][a-z0-9+.-]{1,32})* : [^\s"'<>,(){}\[\]]{10,448} ) + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" min_entropy: 3.3 confidence: medium validation: diff --git a/data/rules/mongodb.yml b/data/rules/mongodb.yml index 28b4031..523f4c9 100644 --- a/data/rules/mongodb.yml +++ b/data/rules/mongodb.yml @@ -81,6 +81,10 @@ rules: mongodb(?:\+srv)?://[\S]{3,50}:(?:[\S]{3,88})@[-.%\w/:]+ ) \b + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" min_entropy: 3 examples: - client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority"); diff --git a/data/rules/mysql.yaml b/data/rules/mysql.yaml deleted file mode 100644 index 3a3a0ab..0000000 --- a/data/rules/mysql.yaml +++ /dev/null @@ -1,7 +0,0 @@ -rules: - - name: MySQL URI with Credentials - id: kingfisher.mysql.1 - pattern: (?xi)\bmysql:\/\/[a-z0-9]+:([a-z0-9!@\#$%^&*()_+{}|:<>?=\\-]+)@[a-z0-9.]+:[0-9]+\/[a-z0-9]+\b - min_entropy: 3.5 - examples: - - CONNECTION_URI="mysql://nimda:m42p!o@2wd@google.com:5434/elephant" diff --git a/data/rules/mysql.yml b/data/rules/mysql.yml new file mode 100644 index 0000000..4c7cbf3 --- /dev/null +++ b/data/rules/mysql.yml @@ -0,0 +1,45 @@ +rules: + - name: MySQL URI with Credentials + id: kingfisher.mysql.1 + pattern: | + (?xi) + ( + mysql:\/\/ + (?: + [a-z0-9._%+\-]+ + ) + : + (?: + [^\s:@]+ + ) + @ + (?: + \[ + [0-9a-f:.]+ + \] + | + [a-z0-9.-]+ + ) + (?:: + \d{2,5} + )? + (?: + \/ + [^\s"'?:]+ + )? + (?: + \? + [^\s"']* + )? + ) + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" + min_entropy: 3.3 + confidence: medium + examples: + - CONNECTION_URI="mysql://nimda:m42p!o@2wd@google.com:3306/elephant" + - mysql://user:pass@example.com:4406/app_db?ssl-mode=REQUIRED + validation: + type: MySQL diff --git a/data/rules/postgres.yml b/data/rules/postgres.yml index 5cbb4ba..47089b6 100644 --- a/data/rules/postgres.yml +++ b/data/rules/postgres.yml @@ -24,6 +24,10 @@ rules: (?: \d+ ) + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/uri.yml b/data/rules/uri.yml index 04c8057..45fde52 100644 --- a/data/rules/uri.yml +++ b/data/rules/uri.yml @@ -16,6 +16,10 @@ rules: (?:\?[A-Za-z0-9\-._~%!$&'()*,;=:@/?%]*)? (?:\#[A-Za-z0-9\-._~%!$&'()*,;=:@/?%]*)? ) + pattern_requirements: + ignore_if_contains: + - "*****" + - "xxxxx" min_entropy: 4.0 confidence: medium examples: diff --git a/src/bitbucket.rs b/src/bitbucket.rs index 6f5f374..12edf6c 100644 --- a/src/bitbucket.rs +++ b/src/bitbucket.rs @@ -51,15 +51,20 @@ impl AuthConfig { bearer_token: Option, ) -> Self { fn normalized(value: Option) -> Option { - value.and_then(|v| if v.trim().is_empty() { None } else { Some(v) }) + value.and_then(|v| { + let trimmed = v.trim(); + if trimmed.is_empty() { + None + } else if trimmed.len() == v.len() { + Some(v) + } else { + Some(trimmed.to_owned()) + } + }) } fn env_var(name: &str) -> Option { - match env::var(name) { - Ok(value) if value.trim().is_empty() => None, - Ok(value) => Some(value), - Err(_) => None, - } + normalized(env::var(name).ok()) } let username = normalized(username).or_else(|| env_var("KF_BITBUCKET_USERNAME")); @@ -769,6 +774,21 @@ mod tests { ); } + #[test] + fn auth_config_trims_environment_whitespace() { + temp_env::with_vars( + &[ + ("KF_BITBUCKET_USERNAME", Some(" user ")), + ("KF_BITBUCKET_APP_PASSWORD", Some(" pass\n")), + ], + || { + let auth = AuthConfig::from_env(); + assert_eq!(auth.username.as_deref(), Some("user")); + assert_eq!(auth.password.as_deref(), Some("pass")); + }, + ); + } + #[test] fn auth_config_treats_access_token_as_bearer() { let token = "AT1234567890_ACCESS_TOKEN_EXAMPLE_WITH_UNDERSCORE"; diff --git a/src/matcher.rs b/src/matcher.rs index 96f7602..7eb8548 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -29,12 +29,13 @@ use crate::{ parser, parser::{Checker, Language}, rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer}, - rules::rule::{PatternRequirementContext, PatternValidationResult, Rule}, + rules::rule::{PatternRequirementContext, PatternValidationResult, Rule, Validation}, rules_database::RulesDatabase, safe_list::{is_safe_match, is_user_match}, scanner_pool::ScannerPool, snippet::Base64BString, util::intern, + validation::{is_parseable_mongodb_uri, is_parseable_mysql_uri, is_parseable_postgres_uri}, }; const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment @@ -698,6 +699,44 @@ fn filter_match<'b>( debug!("Skipping match due to inline ignore directive"); continue; } + if let Some(validation) = rule.syntax.validation.as_ref() { + match validation { + Validation::MongoDB => { + let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else { + debug!("Skipping match for rule {} due to non-UTF8 MongoDB URI", rule.id()); + continue; + }; + if !is_parseable_mongodb_uri(uri) { + debug!("Skipping match for rule {} due to invalid MongoDB URI", rule.id()); + continue; + } + } + Validation::Postgres => { + let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else { + debug!( + "Skipping match for rule {} due to non-UTF8 Postgres URI", + rule.id() + ); + continue; + }; + if !is_parseable_postgres_uri(uri) { + debug!("Skipping match for rule {} due to invalid Postgres URI", rule.id()); + continue; + } + } + Validation::MySQL => { + let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else { + debug!("Skipping match for rule {} due to non-UTF8 MySQL URI", rule.id()); + continue; + }; + if !is_parseable_mysql_uri(uri) { + debug!("Skipping match for rule {} due to invalid MySQL URI", rule.id()); + continue; + } + } + _ => {} + } + } let match_key = compute_match_key( matching_input.as_bytes(), rule.id().as_bytes(), diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 74adab9..6f4daf2 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -46,6 +46,7 @@ pub enum Validation { Coinbase, GCP, MongoDB, + MySQL, Postgres, Jdbc, JWT, diff --git a/src/validation.rs b/src/validation.rs index ee37b21..8ffcd66 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -32,12 +32,28 @@ mod httpvalidation; mod jdbc; mod jwt; mod mongodb; +mod mysql; mod postgres; +pub use mysql::validate_mysql; +pub use postgres::validate_postgres; mod utils; const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes const MAX_VALIDATION_BODY_LEN: usize = 2048; +fn truncate_to_char_boundary(s: &mut String, max_len: usize) { + if s.len() <= max_len { + return; + } + + let mut new_len = max_len; + while new_len > 0 && !s.is_char_boundary(new_len) { + new_len -= 1; + } + + s.truncate(new_len); +} + static USER_AGENT_SUFFIX: OnceCell = OnceCell::new(); const BROWSER_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ @@ -104,6 +120,21 @@ where aws::set_aws_skip_account_ids(ids); } +/// Returns `true` if the provided string can be parsed as a MongoDB connection URI. +pub fn is_parseable_mongodb_uri(uri: &str) -> bool { + mongodb::looks_like_mongodb_uri(uri) +} + +/// Returns `true` if the provided string can be parsed as a Postgres connection URI. +pub fn is_parseable_postgres_uri(uri: &str) -> bool { + postgres::parse_postgres_url(uri).is_ok() +} + +/// Returns `true` if the provided string can be parsed as a MySQL connection URI. +pub fn is_parseable_mysql_uri(uri: &str) -> bool { + mysql::parse_mysql_url(uri).is_ok() +} + #[derive(Clone)] pub struct CachedResponse { pub body: String, @@ -534,9 +565,7 @@ async fn timed_validate_single_match<'a>( return; } }; - if body.len() > MAX_VALIDATION_BODY_LEN { - body.truncate(MAX_VALIDATION_BODY_LEN); - } + truncate_to_char_boundary(&mut body, MAX_VALIDATION_BODY_LEN); m.validation_response_status = status; m.validation_response_body = body.clone(); @@ -617,6 +646,63 @@ async fn timed_validate_single_match<'a>( } } + // ---------------------------------------------------- MySQL validator + Some(Validation::MySQL) => { + let mysql_url = globals + .get("TOKEN") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + + if mysql_url.is_empty() { + m.validation_success = false; + m.validation_response_body = "MySQL URL not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = mysql::generate_mysql_cache_key(&mysql_url); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match mysql::validate_mysql(&mysql_url).await { + Ok((ok, meta)) => { + m.validation_success = ok; + m.validation_response_body = if ok { + format!("MySQL connection is valid. Metadata: {:?}", meta) + } else { + "MySQL connection failed.".to_string() + }; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("MySQL error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + // ------------------------------------------------ Azure Storage validator Some(Validation::AzureStorage) => { let storage_key = captured_values @@ -1066,6 +1152,18 @@ mod tests { assert!(globals.get("TOKEN").is_none()); assert_eq!(globals.get("CHECKSUM"), Some(Value::scalar("123456")).as_ref()); } + + #[test] + fn truncate_to_char_boundary_handles_multibyte_characters() { + let mut body = "a".repeat(MAX_VALIDATION_BODY_LEN); + body.push('é'); + + truncate_to_char_boundary(&mut body, MAX_VALIDATION_BODY_LEN); + + assert_eq!(body.len(), MAX_VALIDATION_BODY_LEN); + assert!(body.is_char_boundary(body.len())); + assert!(body.ends_with('a')); + } } // #[cfg(test)] diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs index d879601..926033d 100644 --- a/src/validation/coinbase.rs +++ b/src/validation/coinbase.rs @@ -68,50 +68,6 @@ pub async fn validate_cdp_api_key( Ok((ok, msg)) } -// fn build_jwt( -// method: &str, -// host: &str, -// endpoint: &str, -// cred_name: &str, -// pem: &str, -// ) -> Result { -// let pem = -// pem.replace("\r\n", "\n").replace("\\r\\n", "\n").replace("\\n", "\n").replace("\r", "\n"); -// let secret_key = SecretKey::from_sec1_pem(&pem) -// .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) -// .map_err(|e| anyhow!("invalid EC key: {e}"))?; -// let signing_key = SigningKey::from(secret_key); - -// let mut rng = OsRng; -// let mut nonce = [0u8; 16]; - -// let _ = rng.try_fill_bytes(&mut nonce); - -// let header = serde_json::json!({ -// "typ": "JWT", -// "alg": "ES256", -// "kid": cred_name, -// "nonce": hex::encode(nonce), -// }); -// let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); - -// let now = Utc::now().timestamp(); -// let claims = serde_json::json!({ -// "sub": cred_name, -// "iss": "cdp", -// "nbf": now, -// "exp": now + 60, -// "uri": format!("{} {}{}", method, host, endpoint), -// }); -// let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); - -// let signing_input = format!("{header_b64}.{claims_b64}"); -// let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes()); -// let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); - -// Ok(format!("{signing_input}.{sig_b64}")) -// } - fn build_jwt( method: &str, host: &str, diff --git a/src/validation/mysql.rs b/src/validation/mysql.rs new file mode 100644 index 0000000..4648844 --- /dev/null +++ b/src/validation/mysql.rs @@ -0,0 +1,167 @@ +use std::{net::IpAddr, time::Duration}; + +use anyhow::{anyhow, Result}; +use mysql_async::{prelude::Queryable, Conn, Opts, OptsBuilder}; +use tokio::time::{error::Elapsed, timeout}; +use tracing::debug; +use url::Url; + +const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); + +pub fn parse_mysql_url(mysql_url: &str) -> Result { + let trimmed = mysql_url.trim(); + if trimmed.is_empty() { + return Err(anyhow!("MySQL URL is empty")); + } + + if !trimmed.to_ascii_lowercase().starts_with("mysql://") { + return Err(anyhow!("MySQL URL must start with mysql://")); + } + + let parsed = Url::parse(trimmed).map_err(|e| anyhow!("Failed to parse MySQL URL: {e}"))?; + + if parsed.username().is_empty() { + return Err(anyhow!("MySQL URL is missing a username")); + } + + if parsed.password().map(str::is_empty).unwrap_or(true) { + return Err(anyhow!("MySQL URL is missing a password")); + } + + if parsed.host_str().map(str::is_empty).unwrap_or(true) + && !parsed.query_pairs().any(|(k, _)| k == "socket") + { + return Err(anyhow!("MySQL URL is missing a host")); + } + + let opts = Opts::from_url(trimmed).map_err(|e| anyhow!("Failed to parse MySQL URL: {e}"))?; + + if opts.user().map(str::is_empty).unwrap_or(true) { + return Err(anyhow!("MySQL URL is missing a username")); + } + + if opts.pass().map(str::is_empty).unwrap_or(true) { + return Err(anyhow!("MySQL URL is missing a password")); + } + + if opts.ip_or_hostname().is_empty() && opts.socket().is_none() { + return Err(anyhow!("MySQL URL is missing a host")); + } + + Ok(opts) +} + +pub fn generate_mysql_cache_key(mysql_url: &str) -> String { + use sha1::{Digest, Sha1}; + + let mut hasher = Sha1::new(); + hasher.update(mysql_url.as_bytes()); + format!("MySQL:{:x}", hasher.finalize()) +} + +fn is_local_host(host: &str) -> bool { + let host = host.trim_matches(|c| c == '[' || c == ']').trim(); + let lower = host.to_ascii_lowercase(); + + if matches!( + lower.as_str(), + "localhost" + | "localhost.localdomain" + | "localhost6" + | "localhost6.localdomain6" + | "ip6-localhost" + | "ip6-loopback" + ) { + return true; + } + + if matches!(lower.as_str(), "0.0.0.0" | "::") { + return true; + } + + if let Ok(ip) = host.parse::() { + return ip.is_loopback() || ip.is_unspecified(); + } + + false +} + +fn targets_localhost(opts: &Opts) -> bool { + if opts.socket().is_some() { + return true; + } + + is_local_host(opts.ip_or_hostname()) +} + +pub async fn validate_mysql(mysql_url: &str) -> Result<(bool, Vec)> { + let opts = parse_mysql_url(mysql_url)?; + + if targets_localhost(&opts) { + debug!("Skipping MySQL validation: host is localhost/loopback or unix socket"); + return Ok((false, vec!["skipped localhost/loopback host".into()])); + } + + let builder = OptsBuilder::from_opts(opts).stmt_cache_size(Some(0)); + let opts: Opts = builder.into(); + + let host = opts.ip_or_hostname().to_string(); + let db_name = opts.db_name().map(|s| s.to_string()).unwrap_or_else(|| "mysql".to_string()); + let user = opts.user().map(|s| s.to_string()).unwrap_or_else(|| "".to_string()); + + let res: Result, Elapsed> = timeout(CONNECT_TIMEOUT, async { + let mut conn = Conn::new(opts).await?; + conn.query_drop("SELECT 1").await?; + conn.disconnect().await?; + Ok(()) + }) + .await; + + match res { + Ok(Ok(())) => Ok(( + true, + vec![format!("user={user}"), format!("host={host}"), format!("database={db_name}")], + )), + Ok(Err(e)) => Err(anyhow!("MySQL connection failed: {e}")), + Err(_) => Err(anyhow!("MySQL connection timed out after {CONNECT_TIMEOUT:?}")), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_mysql_url_accepts_valid_urls() { + let url = "mysql://user:secret@example.com:3306/app"; + let opts = parse_mysql_url(url).expect("expected valid MySQL URL"); + assert_eq!(opts.user(), Some("user")); + assert_eq!(opts.pass(), Some("secret")); + assert_eq!(opts.ip_or_hostname(), "example.com"); + } + + #[test] + fn parse_mysql_url_rejects_invalid_urls() { + for candidate in [ + "", // empty + "mysql://user@example.com/app", // missing password + "mysql://:secret@example.com/app", // missing username + "mysql://user:secret@:3306/app", // missing host + "postgres://user:secret@example.com", // wrong scheme + "mysql://user:secret@example.com:70000/app", // invalid port + ] { + assert!( + parse_mysql_url(candidate).is_err(), + "expected parsing to fail for {candidate}" + ); + } + } + + #[test] + fn parse_mysql_url_allows_trimming_whitespace() { + let opts = + parse_mysql_url(" mysql://user:secret@example.com:3306/app ").expect("trimmed URL"); + assert_eq!(opts.user(), Some("user")); + assert_eq!(opts.pass(), Some("secret")); + } +} diff --git a/src/validation/postgres.rs b/src/validation/postgres.rs index 3ce42b0..ba4a013 100644 --- a/src/validation/postgres.rs +++ b/src/validation/postgres.rs @@ -31,9 +31,23 @@ pub fn generate_postgres_cache_key(postgres_url: &str) -> String { format!("Postgres:{:x}", hasher.finalize()) } +pub fn parse_postgres_url(postgres_url: &str) -> Result { + match Config::from_str(postgres_url) { + Ok(cfg) => Ok(cfg), + Err(e) => { + if let Some(rest) = postgres_url.strip_prefix("postgis://") { + let fallback = format!("postgres://{rest}"); + Config::from_str(&fallback) + .map_err(|_| anyhow!("Failed to parse Postgres URL: {e}")) + } else { + Err(anyhow!("Failed to parse Postgres URL: {e}")) + } + } + } +} + pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec)> { - let mut cfg = - Config::from_str(postgres_url).map_err(|e| anyhow!("Failed to parse Postgres URL: {e}"))?; + let mut cfg = parse_postgres_url(postgres_url)?; // --- skip localhost/loopback/unix-socket targets entirely ------------- if has_any_local_host(&cfg) { @@ -189,7 +203,10 @@ fn missing_cluster_identifier(err_msg: &str) -> bool { #[cfg(test)] mod tests { - use super::{is_local_tcp_host, missing_cluster_identifier, server_requires_encryption}; + use super::{ + is_local_tcp_host, missing_cluster_identifier, parse_postgres_url, + server_requires_encryption, + }; #[test] fn detects_encryption_requirement() { @@ -222,4 +239,16 @@ mod tests { assert!(!is_local_tcp_host(h), "should not treat {h} as local"); } } + + #[test] + fn parse_accepts_postgis_scheme() { + let url = "postgis://postgres:secret@example.com:5432"; + assert!(parse_postgres_url(url).is_ok(), "postgis scheme should be accepted"); + } + + #[test] + fn parse_rejects_invalid_port() { + let url = "postgres://postgres:secret@example.com:70000"; + assert!(parse_postgres_url(url).is_err(), "invalid port should be rejected"); + } } diff --git a/tests/int_uri_parsing.rs b/tests/int_uri_parsing.rs new file mode 100644 index 0000000..a8e7c8c --- /dev/null +++ b/tests/int_uri_parsing.rs @@ -0,0 +1,88 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::{fs, process::Command}; +use tempfile::tempdir; + +#[test] +fn filters_invalid_mongodb_uri_even_without_validation() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("mongo.txt"); + let valid = "mongodb://usr:pass@example.com:27017/db"; + let invalid = "mongodb://usr:pass@example.com:abc/db"; + fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout(predicate::str::contains(valid)) + .stdout(predicate::str::contains(invalid).not()); + + dir.close()?; + Ok(()) +} + +#[test] +fn filters_invalid_postgres_uri_even_without_validation() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("postgres.txt"); + let valid = "postgres://postgres:secret@example.com:5432"; + let invalid = "postgres://postgres:secret@example.com:70000"; + fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout(predicate::str::contains(valid)) + .stdout(predicate::str::contains(invalid).not()); + + dir.close()?; + Ok(()) +} + +#[test] +fn filters_invalid_mysql_uri_even_without_validation() -> anyhow::Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("mysql.txt"); + let valid = "mysql://user:secret@example.com:3306/app"; + let invalid = "mysql://user:secret@example.com:70000/app"; + fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--format", + "json", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout(predicate::str::contains(valid)) + .stdout(predicate::str::contains(invalid).not()); + + dir.close()?; + Ok(()) +} diff --git a/tests/live_db_validation.rs b/tests/live_db_validation.rs new file mode 100644 index 0000000..b41c43c --- /dev/null +++ b/tests/live_db_validation.rs @@ -0,0 +1,98 @@ +//! Live validation smoke tests that exercise the database validators against +//! real MySQL and Postgres instances provisioned with `testcontainers`. +//! +//! These are ignored by default because they require Docker. Run them with: +//! `cargo test --test live_db_validation -- --ignored`. + +use std::time::{Duration, Instant}; + +use anyhow::{anyhow, Result}; +use kingfisher::validation::{validate_mysql, validate_postgres}; +use testcontainers::{clients::Cli, core::WaitFor, GenericImage}; +use tokio::{net::TcpStream, time::sleep}; + +const HOST_ALIAS: &str = "kingfisherlocal"; +const STARTUP_TIMEOUT: Duration = Duration::from_secs(60); +const STARTUP_POLL_INTERVAL: Duration = Duration::from_millis(250); + +async fn wait_for_port(host: &str, port: u16) -> Result<()> { + let deadline = Instant::now() + STARTUP_TIMEOUT; + let mut last_err = None; + + loop { + match TcpStream::connect((host, port)).await { + Ok(stream) => { + drop(stream); + return Ok(()); + } + Err(err) => { + last_err = Some(err); + if Instant::now() >= deadline { + return Err(anyhow!( + "timed out after {:?} waiting for {host}:{port}: {last_err:?}", + STARTUP_TIMEOUT, + )); + } + sleep(STARTUP_POLL_INTERVAL).await; + } + } + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[ignore] +async fn validates_mysql_secret_against_testcontainer() -> Result<()> { + let docker = Cli::default(); + let image = GenericImage::new("mysql", "8.4") + .with_env_var("MYSQL_ROOT_PASSWORD", "secret") + .with_env_var("MYSQL_DATABASE", "app") + .with_env_var("MYSQL_ROOT_HOST", "%") + .with_wait_for(WaitFor::message_on_stdout("MySQL init process done. Ready for start up.")); + + let container = docker.run(image); + let port = container.get_host_port_ipv4(3306); + + wait_for_port(HOST_ALIAS, port).await?; + + let uri = format!("mysql://root:secret@{HOST_ALIAS}:{port}/app"); + let (is_valid, metadata) = validate_mysql(&uri).await?; + + assert!(is_valid, "expected MySQL validation to succeed, got {metadata:?}"); + assert!( + metadata.iter().any(|entry| entry.contains("user=root")), + "expected user metadata in {metadata:?}" + ); + assert!( + metadata.iter().any(|entry| entry.contains("database=app")), + "expected database metadata in {metadata:?}" + ); + + drop(container); + drop(docker); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[ignore] +async fn validates_postgres_secret_against_testcontainer() -> Result<()> { + let docker = Cli::default(); + let image = GenericImage::new("postgres", "15") + .with_env_var("POSTGRES_PASSWORD", "secret") + .with_wait_for(WaitFor::message_on_stdout( + "database system is ready to accept connections", + )); + let container = docker.run(image); + let port = container.get_host_port_ipv4(5432); + + wait_for_port(HOST_ALIAS, port).await?; + + let uri = format!("postgres://postgres:secret@{HOST_ALIAS}:{port}/postgres"); + let (is_valid, metadata) = validate_postgres(&uri).await?; + + assert!(is_valid, "expected Postgres validation to succeed"); + assert!(metadata.is_empty(), "expected no metadata but found {metadata:?}"); + + drop(container); + drop(docker); + Ok(()) +}