From 2ed94f75d7ef0dd6378b136ab71ce907cb13c5b4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 12 Nov 2025 22:25:33 -0800 Subject: [PATCH] added jdbc rule and validator --- CHANGELOG.md | 1 + data/rules/jdbc.yml | 24 +++++++ src/rules/rule.rs | 1 + src/safe_list.rs | 7 ++ src/validation.rs | 53 ++++++++++++++ src/validation/jdbc.rs | 154 +++++++++++++++++++++++++++++++++++++++++ tests/jdbc_rule.rs | 79 +++++++++++++++++++++ 7 files changed, 319 insertions(+) create mode 100644 data/rules/jdbc.yml create mode 100644 src/validation/jdbc.rs create mode 100644 tests/jdbc_rule.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 90658f7..ba64619 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [v1.64.0] - Fixed a bug when using --redact, that broke validation +- Added JDBC rule with validator ## [v1.63.1] - Updated allocator diff --git a/data/rules/jdbc.yml b/data/rules/jdbc.yml new file mode 100644 index 0000000..81553be --- /dev/null +++ b/data/rules/jdbc.yml @@ -0,0 +1,24 @@ +rules: + - name: JDBC connection string with embedded credentials + id: kingfisher.jdbc.1 + pattern: | + (?xi) + ( + jdbc: + [a-z][a-z0-9+.-]{2,30} + (?:[:][a-z0-9+.-]{1,30})* + : + [^\s"'<>,(){}\[\]]{10,512} + ) + min_entropy: 3.3 + confidence: medium + validation: + type: Jdbc + examples: + - jdbc:postgresql://db.example.com:5432/app?user=admin&password=s3cr3t + - jdbc:mysql://admin:s3cr3t@prod.internal:3306/inventory + - jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1 + - jdbc:sqlserver://sql.example.org:1433;databaseName=inventory;user=sa;password=s3cr3t! + references: + - https://docs.oracle.com/javase/8/docs/api/java/sql/DriverManager.html + - https://www.postgresql.org/docs/current/jdbc-use.html diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 4048e35..63c8208 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -47,6 +47,7 @@ pub enum Validation { GCP, MongoDB, Postgres, + Jdbc, JWT, Raw(String), Http(HttpValidation), diff --git a/src/safe_list.rs b/src/safe_list.rs index ba7f852..c146658 100644 --- a/src/safe_list.rs +++ b/src/safe_list.rs @@ -198,6 +198,13 @@ pub fn is_safe_match_reason(input: &[u8]) -> Option<&'static str> { .map(|rule| rule.description) } +/// Test helper: clear all user-provided allow-list configuration. +#[doc(hidden)] +pub fn clear_user_filters_for_tests() { + USER_SAFE_REGEXES.lock().unwrap().clear(); + USER_SAFE_SKIPWORDS.lock().unwrap().clear(); +} + /// Returns true if the input likely contains *benign* placeholder/test strings, /// and logs which rule triggered at `debug!` level. pub fn is_safe_match(input: &[u8]) -> bool { diff --git a/src/validation.rs b/src/validation.rs index e5fdace..ee37b21 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -29,6 +29,7 @@ mod azure; mod coinbase; mod gcp; mod httpvalidation; +mod jdbc; mod jwt; mod mongodb; mod postgres; @@ -676,6 +677,58 @@ async fn timed_validate_single_match<'a>( ); } + // ---------------------------------------------------- JDBC validator + Some(Validation::Jdbc) => { + let jdbc_conn = captured_values + .iter() + .find(|(n, ..)| n == "TOKEN") + .map(|(_, v, ..)| v.clone()) + .unwrap_or_default(); + + if jdbc_conn.is_empty() { + m.validation_success = false; + m.validation_response_body = "JDBC connection string not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + let cache_key = jdbc::generate_jdbc_cache_key(&jdbc_conn); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + commit_and_return(m); + return; + } + } + + match jdbc::validate_jdbc(&jdbc_conn).await { + Ok(outcome) => { + m.validation_success = outcome.valid; + m.validation_response_body = outcome.message; + m.validation_response_status = outcome.status; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("JDBC validation error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); + } + // ------------------------------------------------ Postgres validator Some(Validation::Postgres) => { let pg_url = globals diff --git a/src/validation/jdbc.rs b/src/validation/jdbc.rs new file mode 100644 index 0000000..5c6cb73 --- /dev/null +++ b/src/validation/jdbc.rs @@ -0,0 +1,154 @@ +use anyhow::{anyhow, Context, Result}; +use http::StatusCode; +use tracing::debug; +use url::Url; +use xxhash_rust::xxh3::xxh3_64; + +use super::postgres; + +/// Result of attempting to validate a JDBC connection string. +pub struct JdbcValidationOutcome { + pub valid: bool, + pub status: StatusCode, + pub message: String, +} + +/// Produce a short-lived cache key for JDBC validations. +pub fn generate_jdbc_cache_key(raw: &str) -> String { + format!("Jdbc:{:016x}", xxh3_64(raw.as_bytes())) +} + +/// Validate a JDBC connection string by dispatching to the supported backend validators. +pub async fn validate_jdbc(jdbc_conn: &str) -> Result { + let trimmed = jdbc_conn.trim(); + if !trimmed.to_ascii_lowercase().starts_with("jdbc:") { + return Err(anyhow!("JDBC connection string must start with `jdbc:`")); + } + + let without_prefix = &trimmed[5..]; + let (raw_subprotocol, subname) = without_prefix + .split_once(':') + .ok_or_else(|| anyhow!("JDBC connection string is missing a subprotocol"))?; + let subprotocol = raw_subprotocol.trim(); + let subprotocol_lower = subprotocol.to_ascii_lowercase(); + + match subprotocol_lower.as_str() { + "postgres" | "postgresql" | "postgis" => { + validate_postgres_jdbc(subname).await.context("Postgres JDBC validation failed") + } + other => { + debug!("Unsupported JDBC subprotocol encountered: {}", other); + Ok(JdbcValidationOutcome { + valid: false, + status: StatusCode::NOT_IMPLEMENTED, + message: format!( + "JDBC validation not implemented for subprotocol `{}`.", + subprotocol + ), + }) + } + } +} + +async fn validate_postgres_jdbc(subname: &str) -> Result { + let normalized = normalize_postgres_url(subname)?; + let (ok, meta) = postgres::validate_postgres(&normalized).await?; + + let mut message = if ok { + "JDBC Postgres connection is valid.".to_string() + } else { + "JDBC Postgres connection failed.".to_string() + }; + + if !meta.is_empty() { + let joined = meta.join("; "); + if ok { + message.push_str(&format!(" Details: {}", joined)); + } else { + message = format!("JDBC Postgres validation result: {}", joined); + } + } + + let status = if ok { + StatusCode::OK + } else if meta.iter().any(|m| m.to_ascii_lowercase().contains("skip")) { + StatusCode::CONTINUE + } else { + StatusCode::UNAUTHORIZED + }; + + Ok(JdbcValidationOutcome { valid: ok, status, message }) +} + +fn normalize_postgres_url(subname: &str) -> Result { + let trimmed = subname.trim(); + if trimmed.is_empty() { + return Err(anyhow!("Postgres JDBC connection string is empty")); + } + + // First try parsing using the standard JDBC layout, otherwise fall back to a canonical URL. + let candidate = format!("postgresql:{}", trimmed); + let mut url = Url::parse(&candidate).or_else(|_| { + let fallback = format!("postgresql://{}", trimmed.trim_start_matches('/')); + Url::parse(&fallback) + })?; + + // Extract credentials from the query string when they are present. + let mut user = None; + let mut password = None; + if url.query().is_some() { + let mut preserved = Vec::new(); + for (key, value) in url.query_pairs() { + match key.to_ascii_lowercase().as_str() { + "user" | "username" => user = Some(value.into_owned()), + "password" | "pass" | "pwd" => password = Some(value.into_owned()), + _ => preserved.push((key.into_owned(), value.into_owned())), + } + } + + { + let mut pairs = url.query_pairs_mut(); + pairs.clear(); + for (key, value) in preserved { + pairs.append_pair(&key, &value); + } + } + } + + if let Some(user) = user { + url.set_username(&user).map_err(|_| anyhow!("Failed to apply Postgres username"))?; + } + if let Some(password) = password { + url.set_password(Some(&password)) + .map_err(|_| anyhow!("Failed to apply Postgres password"))?; + } + + Ok(url.to_string()) +} + +#[cfg(test)] +mod tests { + use super::normalize_postgres_url; + use pretty_assertions::assert_eq; + + #[test] + fn normalizes_postgres_query_credentials() { + let normalized = normalize_postgres_url( + "//db.example.com:5432/app?user=admin&password=s3cr3t&sslmode=require", + ) + .unwrap(); + assert_eq!(normalized, "postgresql://admin:s3cr3t@db.example.com:5432/app?sslmode=require"); + } + + #[test] + fn preserves_existing_credentials() { + let normalized = + normalize_postgres_url("//db.example.com:5432/app?sslmode=prefer").unwrap(); + assert_eq!(normalized, "postgresql://db.example.com:5432/app?sslmode=prefer"); + } + + #[test] + fn rejects_empty_input() { + assert!(normalize_postgres_url("").is_err()); + } +} diff --git a/tests/jdbc_rule.rs b/tests/jdbc_rule.rs new file mode 100644 index 0000000..def97a9 --- /dev/null +++ b/tests/jdbc_rule.rs @@ -0,0 +1,79 @@ +use std::collections::BTreeSet; + +use anyhow::{anyhow, Result}; +use kingfisher::{rules::rule::RuleSyntax, safe_list}; + +fn load_jdbc_rule() -> Result { + let rules = RuleSyntax::from_yaml_file("data/rules/jdbc.yml")?; + rules + .into_iter() + .find(|rule| rule.id == "kingfisher.jdbc.1") + .ok_or_else(|| anyhow!("JDBC rule not found")) +} + +#[test] +fn jdbc_rule_matches_expected_patterns() -> Result<()> { + let rule = load_jdbc_rule()?; + let regex = rule.as_regex()?; + + let sample = r#" + datasource.url=jdbc:postgresql://db.acme.local:5432/app?user=svc_writer&password=P@s5w0rd + connection.read=jdbc:mysql://analyst:letmein@reports.internal:3306/analytics + cache="jdbc:sqlite:/var/lib/app/cache.db" + vendor.dsn=jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1 + backup=jdbc:mysql://host:3306/db,other_token + jdbc:xyz:short // this should be ignored + somejdbc:mysql://host/db // false prefix + jdbc:mysql://host/db>next // malformed with trailing bracket + "#; + + let matches: BTreeSet = regex + .captures_iter(sample.as_bytes()) + .filter_map(|caps| caps.name("TOKEN")) + .map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned()) + .collect(); + + let expected = BTreeSet::from([ + "jdbc:postgresql://db.acme.local:5432/app?user=svc_writer&password=P@s5w0rd".to_string(), + "jdbc:mysql://analyst:letmein@reports.internal:3306/analytics".to_string(), + "jdbc:sqlite:/var/lib/app/cache.db".to_string(), + "jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1".to_string(), + "jdbc:mysql://host:3306/db".to_string(), + ]); + + assert_eq!(matches, expected); + Ok(()) +} + +#[test] +fn jdbc_rule_respects_user_skip_regex() -> Result<()> { + safe_list::clear_user_filters_for_tests(); + safe_list::add_user_regex(r"^jdbc:sqlite::temporary_ignore_secret$")?; + + let rule = load_jdbc_rule()?; + let regex = rule.as_regex()?; + + let sample = r#" + jdbc:sqlite::temporary_ignore_secret + jdbc:mysql://data_ingest:pa55word@analytics.internal:3306/raw + "#; + + let matches: Vec = regex + .captures_iter(sample.as_bytes()) + .filter_map(|caps| caps.name("TOKEN")) + .map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned()) + .collect(); + + let retained: Vec = matches + .into_iter() + .filter(|m| !safe_list::is_user_match(m.as_bytes(), m.as_bytes())) + .collect(); + + safe_list::clear_user_filters_for_tests(); + + assert_eq!( + retained, + vec!["jdbc:mysql://data_ingest:pa55word@analytics.internal:3306/raw".to_string()] + ); + Ok(()) +}