- Skip reporting MongoDB and Postgres findings when their connection strings cannot be parsed, even when validation is disabled.

- Improve MySQL detection by broadening URI coverage and adding live validation that skips clearly invalid connection strings.
This commit is contained in:
Mick Grove 2025-11-15 08:11:25 -08:00
commit f9d75eaadd
14 changed files with 475 additions and 71 deletions

View file

@ -2,6 +2,10 @@
All notable changes to this project will be documented in this file.
## [v1.65.0]
- Skip reporting MongoDB and Postgres findings when their connection strings cannot be parsed, even when validation is disabled.
- Improve MySQL detection by broadening URI coverage and adding live validation that skips clearly invalid connection strings.
## [v1.64.0]
- Fixed a bug when using --redact, that broke validation
- Added JDBC rule with validator

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.64.0"
version = "1.65.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true
@ -108,6 +108,7 @@ base32 = "0.5.1"
crossbeam-skiplist = "0.1.3"
tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"] }
mongodb = { version = "3.2", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] }
mysql_async = { version = "0.34.2", default-features = false, features = ["default-rustls"] }
bson = "2.15.0"
ring = "0.17.14"
pem = "3.0.5"

View file

@ -81,6 +81,10 @@ rules:
mongodb(?:\+srv)?://[\S]{3,50}:(?:[\S]{3,88})@[-.%\w/:]+
)
\b
pattern_requirements:
ignore_if_contains:
- "*****"
- "xxxxx"
min_entropy: 3
examples:
- client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority");

View file

@ -1,7 +0,0 @@
rules:
- name: MySQL URI with Credentials
id: kingfisher.mysql.1
pattern: (?xi)\bmysql:\/\/[a-z0-9]+:([a-z0-9!@\#$%^&*()_+{}|:<>?=\\-]+)@[a-z0-9.]+:[0-9]+\/[a-z0-9]+\b
min_entropy: 3.5
examples:
- CONNECTION_URI="mysql://nimda:m42p!o@2wd@google.com:5434/elephant"

45
data/rules/mysql.yml Normal file
View file

@ -0,0 +1,45 @@
rules:
- name: MySQL URI with Credentials
id: kingfisher.mysql.1
pattern: |
(?xi)
(
mysql:\/\/
(?:
[a-z0-9._%+\-]+
)
:
(?:
[^\s:@]+
)
@
(?:
\[
[0-9a-f:.]+
\]
|
[a-z0-9.-]+
)
(?::
\d{2,5}
)?
(?:
\/
[^\s"'?:]+
)?
(?:
\?
[^\s"']*
)?
)
pattern_requirements:
ignore_if_contains:
- "*****"
- "xxxxx"
min_entropy: 3.3
confidence: medium
examples:
- CONNECTION_URI="mysql://nimda:m42p!o@2wd@google.com:3306/elephant"
- mysql://user:pass@example.com:4406/app_db?ssl-mode=REQUIRED
validation:
type: MySQL

View file

@ -24,6 +24,10 @@ rules:
(?:
\d+
)
pattern_requirements:
ignore_if_contains:
- "*****"
- "xxxxx"
min_entropy: 3.3
confidence: medium
examples:

View file

@ -5,27 +5,27 @@
// * Fallback - system allocator (`system-alloc` feature)
// ────────────────────────────────────────────────────────────
// --- jemalloc (opt-in) ---
#[cfg(feature = "use-jemalloc")]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
// // --- jemalloc (opt-in) ---
// #[cfg(feature = "use-jemalloc")]
// #[global_allocator]
// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
// --- mimalloc (default) ---
#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
// --- system allocator (explicit opt-out) ---
#[cfg(feature = "system-alloc")]
use std::alloc::System;
#[cfg(feature = "system-alloc")]
#[global_allocator]
static GLOBAL: System = System;
// // --- mimalloc (default) ---
// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
// #[global_allocator]
// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
// // --- system allocator (explicit opt-out) ---
// #[cfg(feature = "system-alloc")]
// use std::alloc::System;
// #[cfg(feature = "system-alloc")]
// #[global_allocator]
// static GLOBAL: System = System;
use std::alloc::System;
#[global_allocator]
static GLOBAL: System = System;
use std::{
io::{IsTerminal, Read},
sync::{Arc, Mutex},

View file

@ -29,12 +29,13 @@ use crate::{
parser,
parser::{Checker, Language},
rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
rules::rule::{PatternRequirementContext, PatternValidationResult, Rule},
rules::rule::{PatternRequirementContext, PatternValidationResult, Rule, Validation},
rules_database::RulesDatabase,
safe_list::{is_safe_match, is_user_match},
scanner_pool::ScannerPool,
snippet::Base64BString,
util::intern,
validation::{is_parseable_mongodb_uri, is_parseable_mysql_uri, is_parseable_postgres_uri},
};
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
@ -698,6 +699,44 @@ fn filter_match<'b>(
debug!("Skipping match due to inline ignore directive");
continue;
}
if let Some(validation) = rule.syntax.validation.as_ref() {
match validation {
Validation::MongoDB => {
let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else {
debug!("Skipping match for rule {} due to non-UTF8 MongoDB URI", rule.id());
continue;
};
if !is_parseable_mongodb_uri(uri) {
debug!("Skipping match for rule {} due to invalid MongoDB URI", rule.id());
continue;
}
}
Validation::Postgres => {
let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else {
debug!(
"Skipping match for rule {} due to non-UTF8 Postgres URI",
rule.id()
);
continue;
};
if !is_parseable_postgres_uri(uri) {
debug!("Skipping match for rule {} due to invalid Postgres URI", rule.id());
continue;
}
}
Validation::MySQL => {
let Ok(uri) = std::str::from_utf8(matching_input.as_bytes()) else {
debug!("Skipping match for rule {} due to non-UTF8 MySQL URI", rule.id());
continue;
};
if !is_parseable_mysql_uri(uri) {
debug!("Skipping match for rule {} due to invalid MySQL URI", rule.id());
continue;
}
}
_ => {}
}
}
let match_key = compute_match_key(
matching_input.as_bytes(),
rule.id().as_bytes(),

View file

@ -46,6 +46,7 @@ pub enum Validation {
Coinbase,
GCP,
MongoDB,
MySQL,
Postgres,
Jdbc,
JWT,

View file

@ -32,6 +32,7 @@ mod httpvalidation;
mod jdbc;
mod jwt;
mod mongodb;
mod mysql;
mod postgres;
mod utils;
@ -104,6 +105,21 @@ where
aws::set_aws_skip_account_ids(ids);
}
/// Returns `true` if the provided string can be parsed as a MongoDB connection URI.
pub fn is_parseable_mongodb_uri(uri: &str) -> bool {
mongodb::looks_like_mongodb_uri(uri)
}
/// Returns `true` if the provided string can be parsed as a Postgres connection URI.
pub fn is_parseable_postgres_uri(uri: &str) -> bool {
postgres::parse_postgres_url(uri).is_ok()
}
/// Returns `true` if the provided string can be parsed as a MySQL connection URI.
pub fn is_parseable_mysql_uri(uri: &str) -> bool {
mysql::parse_mysql_url(uri).is_ok()
}
#[derive(Clone)]
pub struct CachedResponse {
pub body: String,
@ -617,6 +633,63 @@ async fn timed_validate_single_match<'a>(
}
}
// ---------------------------------------------------- MySQL validator
Some(Validation::MySQL) => {
let mysql_url = globals
.get("TOKEN")
.and_then(|v| v.as_scalar())
.map(|s| s.into_owned().to_kstr().to_string())
.unwrap_or_default();
if mysql_url.is_empty() {
m.validation_success = false;
m.validation_response_body = "MySQL URL not found.".to_string();
m.validation_response_status = StatusCode::BAD_REQUEST;
commit_and_return(m);
return;
}
let cache_key = mysql::generate_mysql_cache_key(&mysql_url);
if let Some(cached) = cache.get(&cache_key) {
let c = cached.value();
if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
m.validation_success = c.is_valid;
m.validation_response_body = c.body.clone();
m.validation_response_status = c.status;
commit_and_return(m);
return;
}
}
match mysql::validate_mysql(&mysql_url).await {
Ok((ok, meta)) => {
m.validation_success = ok;
m.validation_response_body = if ok {
format!("MySQL connection is valid. Metadata: {:?}", meta)
} else {
"MySQL connection failed.".to_string()
};
m.validation_response_status =
if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
}
Err(e) => {
m.validation_success = false;
m.validation_response_body = format!("MySQL error: {}", e);
m.validation_response_status = StatusCode::BAD_GATEWAY;
}
}
cache.insert(
cache_key,
CachedResponse {
body: m.validation_response_body.clone(),
status: m.validation_response_status,
is_valid: m.validation_success,
timestamp: Instant::now(),
},
);
}
// ------------------------------------------------ Azure Storage validator
Some(Validation::AzureStorage) => {
let storage_key = captured_values

View file

@ -68,50 +68,6 @@ pub async fn validate_cdp_api_key(
Ok((ok, msg))
}
// fn build_jwt(
// method: &str,
// host: &str,
// endpoint: &str,
// cred_name: &str,
// pem: &str,
// ) -> Result<String> {
// let pem =
// pem.replace("\r\n", "\n").replace("\\r\\n", "\n").replace("\\n", "\n").replace("\r", "\n");
// let secret_key = SecretKey::from_sec1_pem(&pem)
// .or_else(|_| SecretKey::from_pkcs8_pem(&pem))
// .map_err(|e| anyhow!("invalid EC key: {e}"))?;
// let signing_key = SigningKey::from(secret_key);
// let mut rng = OsRng;
// let mut nonce = [0u8; 16];
// let _ = rng.try_fill_bytes(&mut nonce);
// let header = serde_json::json!({
// "typ": "JWT",
// "alg": "ES256",
// "kid": cred_name,
// "nonce": hex::encode(nonce),
// });
// let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string());
// let now = Utc::now().timestamp();
// let claims = serde_json::json!({
// "sub": cred_name,
// "iss": "cdp",
// "nbf": now,
// "exp": now + 60,
// "uri": format!("{} {}{}", method, host, endpoint),
// });
// let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string());
// let signing_input = format!("{header_b64}.{claims_b64}");
// let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes());
// let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes());
// Ok(format!("{signing_input}.{sig_b64}"))
// }
fn build_jwt(
method: &str,
host: &str,

167
src/validation/mysql.rs Normal file
View file

@ -0,0 +1,167 @@
use std::{net::IpAddr, time::Duration};
use anyhow::{anyhow, Result};
use mysql_async::{prelude::Queryable, Conn, Opts, OptsBuilder};
use tokio::time::{error::Elapsed, timeout};
use tracing::debug;
use url::Url;
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
pub fn parse_mysql_url(mysql_url: &str) -> Result<Opts> {
let trimmed = mysql_url.trim();
if trimmed.is_empty() {
return Err(anyhow!("MySQL URL is empty"));
}
if !trimmed.to_ascii_lowercase().starts_with("mysql://") {
return Err(anyhow!("MySQL URL must start with mysql://"));
}
let parsed = Url::parse(trimmed).map_err(|e| anyhow!("Failed to parse MySQL URL: {e}"))?;
if parsed.username().is_empty() {
return Err(anyhow!("MySQL URL is missing a username"));
}
if parsed.password().map(str::is_empty).unwrap_or(true) {
return Err(anyhow!("MySQL URL is missing a password"));
}
if parsed.host_str().map(str::is_empty).unwrap_or(true)
&& !parsed.query_pairs().any(|(k, _)| k == "socket")
{
return Err(anyhow!("MySQL URL is missing a host"));
}
let opts = Opts::from_url(trimmed).map_err(|e| anyhow!("Failed to parse MySQL URL: {e}"))?;
if opts.user().map(str::is_empty).unwrap_or(true) {
return Err(anyhow!("MySQL URL is missing a username"));
}
if opts.pass().map(str::is_empty).unwrap_or(true) {
return Err(anyhow!("MySQL URL is missing a password"));
}
if opts.ip_or_hostname().is_empty() && opts.socket().is_none() {
return Err(anyhow!("MySQL URL is missing a host"));
}
Ok(opts)
}
pub fn generate_mysql_cache_key(mysql_url: &str) -> String {
use sha1::{Digest, Sha1};
let mut hasher = Sha1::new();
hasher.update(mysql_url.as_bytes());
format!("MySQL:{:x}", hasher.finalize())
}
fn is_local_host(host: &str) -> bool {
let host = host.trim_matches(|c| c == '[' || c == ']').trim();
let lower = host.to_ascii_lowercase();
if matches!(
lower.as_str(),
"localhost"
| "localhost.localdomain"
| "localhost6"
| "localhost6.localdomain6"
| "ip6-localhost"
| "ip6-loopback"
) {
return true;
}
if matches!(lower.as_str(), "0.0.0.0" | "::") {
return true;
}
if let Ok(ip) = host.parse::<IpAddr>() {
return ip.is_loopback() || ip.is_unspecified();
}
false
}
fn targets_localhost(opts: &Opts) -> bool {
if opts.socket().is_some() {
return true;
}
is_local_host(opts.ip_or_hostname())
}
pub async fn validate_mysql(mysql_url: &str) -> Result<(bool, Vec<String>)> {
let opts = parse_mysql_url(mysql_url)?;
if targets_localhost(&opts) {
debug!("Skipping MySQL validation: host is localhost/loopback or unix socket");
return Ok((false, vec!["skipped localhost/loopback host".into()]));
}
let builder = OptsBuilder::from_opts(opts).stmt_cache_size(Some(0));
let opts: Opts = builder.into();
let host = opts.ip_or_hostname().to_string();
let db_name = opts.db_name().map(|s| s.to_string()).unwrap_or_else(|| "mysql".to_string());
let user = opts.user().map(|s| s.to_string()).unwrap_or_else(|| "<unknown>".to_string());
let res: Result<Result<(), mysql_async::Error>, Elapsed> = timeout(CONNECT_TIMEOUT, async {
let mut conn = Conn::new(opts).await?;
conn.query_drop("SELECT 1").await?;
conn.disconnect().await?;
Ok(())
})
.await;
match res {
Ok(Ok(())) => Ok((
true,
vec![format!("user={user}"), format!("host={host}"), format!("database={db_name}")],
)),
Ok(Err(e)) => Err(anyhow!("MySQL connection failed: {e}")),
Err(_) => Err(anyhow!("MySQL connection timed out after {CONNECT_TIMEOUT:?}")),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_mysql_url_accepts_valid_urls() {
let url = "mysql://user:secret@example.com:3306/app";
let opts = parse_mysql_url(url).expect("expected valid MySQL URL");
assert_eq!(opts.user(), Some("user"));
assert_eq!(opts.pass(), Some("secret"));
assert_eq!(opts.ip_or_hostname(), "example.com");
}
#[test]
fn parse_mysql_url_rejects_invalid_urls() {
for candidate in [
"", // empty
"mysql://user@example.com/app", // missing password
"mysql://:secret@example.com/app", // missing username
"mysql://user:secret@:3306/app", // missing host
"postgres://user:secret@example.com", // wrong scheme
"mysql://user:secret@example.com:70000/app", // invalid port
] {
assert!(
parse_mysql_url(candidate).is_err(),
"expected parsing to fail for {candidate}"
);
}
}
#[test]
fn parse_mysql_url_allows_trimming_whitespace() {
let opts =
parse_mysql_url(" mysql://user:secret@example.com:3306/app ").expect("trimmed URL");
assert_eq!(opts.user(), Some("user"));
assert_eq!(opts.pass(), Some("secret"));
}
}

View file

@ -31,9 +31,23 @@ pub fn generate_postgres_cache_key(postgres_url: &str) -> String {
format!("Postgres:{:x}", hasher.finalize())
}
pub fn parse_postgres_url(postgres_url: &str) -> Result<Config> {
match Config::from_str(postgres_url) {
Ok(cfg) => Ok(cfg),
Err(e) => {
if let Some(rest) = postgres_url.strip_prefix("postgis://") {
let fallback = format!("postgres://{rest}");
Config::from_str(&fallback)
.map_err(|_| anyhow!("Failed to parse Postgres URL: {e}"))
} else {
Err(anyhow!("Failed to parse Postgres URL: {e}"))
}
}
}
}
pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec<String>)> {
let mut cfg =
Config::from_str(postgres_url).map_err(|e| anyhow!("Failed to parse Postgres URL: {e}"))?;
let mut cfg = parse_postgres_url(postgres_url)?;
// --- skip localhost/loopback/unix-socket targets entirely -------------
if has_any_local_host(&cfg) {
@ -189,7 +203,10 @@ fn missing_cluster_identifier(err_msg: &str) -> bool {
#[cfg(test)]
mod tests {
use super::{is_local_tcp_host, missing_cluster_identifier, server_requires_encryption};
use super::{
is_local_tcp_host, missing_cluster_identifier, parse_postgres_url,
server_requires_encryption,
};
#[test]
fn detects_encryption_requirement() {
@ -222,4 +239,16 @@ mod tests {
assert!(!is_local_tcp_host(h), "should not treat {h} as local");
}
}
#[test]
fn parse_accepts_postgis_scheme() {
let url = "postgis://postgres:secret@example.com:5432";
assert!(parse_postgres_url(url).is_ok(), "postgis scheme should be accepted");
}
#[test]
fn parse_rejects_invalid_port() {
let url = "postgres://postgres:secret@example.com:70000";
assert!(parse_postgres_url(url).is_err(), "invalid port should be rejected");
}
}

88
tests/int_uri_parsing.rs Normal file
View file

@ -0,0 +1,88 @@
use assert_cmd::prelude::*;
use predicates::prelude::*;
use std::{fs, process::Command};
use tempfile::tempdir;
#[test]
fn filters_invalid_mongodb_uri_even_without_validation() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("mongo.txt");
let valid = "mongodb://usr:pass@example.com:27017/db";
let invalid = "mongodb://usr:pass@example.com:abc/db";
fs::write(&file_path, format!("{valid}\n{invalid}\n"))?;
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--confidence=low",
"--format",
"json",
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(predicate::str::contains(valid))
.stdout(predicate::str::contains(invalid).not());
dir.close()?;
Ok(())
}
#[test]
fn filters_invalid_postgres_uri_even_without_validation() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("postgres.txt");
let valid = "postgres://postgres:secret@example.com:5432";
let invalid = "postgres://postgres:secret@example.com:70000";
fs::write(&file_path, format!("{valid}\n{invalid}\n"))?;
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--confidence=low",
"--format",
"json",
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(predicate::str::contains(valid))
.stdout(predicate::str::contains(invalid).not());
dir.close()?;
Ok(())
}
#[test]
fn filters_invalid_mysql_uri_even_without_validation() -> anyhow::Result<()> {
let dir = tempdir()?;
let file_path = dir.path().join("mysql.txt");
let valid = "mysql://user:secret@example.com:3306/app";
let invalid = "mysql://user:secret@example.com:70000/app";
fs::write(&file_path, format!("{valid}\n{invalid}\n"))?;
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--confidence=low",
"--format",
"json",
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(predicate::str::contains(valid))
.stdout(predicate::str::contains(invalid).not());
dir.close()?;
Ok(())
}