- Improved rules: github oauth2, diffbot, mailchimp, aws

- Added validation to SauceLabs rule
- Added rules: shodan, bitly, flickr
This commit is contained in:
Mick Grove 2025-08-29 17:24:26 -07:00
commit e54dbe90d0
13 changed files with 487 additions and 55 deletions

View file

@ -2,10 +2,17 @@
All notable changes to this project will be documented in this file.
## [Unreleased]
- MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them
- Improved rules: github oauth2, diffbot, mailchimp, aws
- Added validation to SauceLabs rule
- Added rules: shodan, bitly, flickr
## [1.46.0]
- Improved rules: AWS, pem
- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu
- Added `self-update` command to update the binary independently. Now supports updating over homebrew managed binary
- MongoDB validator now checks `mongodb+srv://` URIs with fast-fail timeouts
## [1.45.0]
- Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url`

View file

@ -22,7 +22,7 @@ rules:
(?:
\b
(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)
(?:.|[\n\r]){0,32}?
(?:.|[\n\r]){0,64}?
\b
(
[A-Z0-9/+=]{40}
@ -34,7 +34,7 @@ rules:
(?:SECRET|PRIVATE|ACCESS)
(?:.|[\n\r]){0,16}?
(?:KEY|TOKEN)
(?:.|[\n\r]){0,32}?
(?:.|[\n\r]){0,64}?
\b
(
[A-Z0-9/+=]{40}

36
data/rules/bitly.yml Normal file
View file

@ -0,0 +1,36 @@
rules:
- name: Bitly Access Token
id: kingfisher.bitly.1
pattern: |
(?xi)
\b
bitly
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
[a-f0-9]{40}
)
\b
confidence: medium
min_entropy: 3.0
validation:
type: Http
content:
request:
method: GET
url: "https://api-ssl.bitly.com/v4/user"
headers:
Authorization: "Bearer {{ TOKEN }}"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"login":'
references:
- https://dev.bitly.com/api-reference#Authentication
examples:
- "bitly_token = 20e9827b9c5ddee1b0cec7722bfc557dec833791"

View file

@ -27,9 +27,8 @@ rules:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"name"'
- '"email"'
- '"email"'
- '"planCredits"'

72
data/rules/flickr.yml Normal file
View file

@ -0,0 +1,72 @@
rules:
- name: Flickr API Key
id: kingfisher.flickr.1
pattern: |
(?xi)
\b
flickr
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)?
(?:.|[\n\r]){0,32}?
\b
(
[a-f0-9]{32}
)
\b
confidence: medium
min_entropy: 3.0
validation:
type: Http
content:
request:
method: GET
url: "https://www.flickr.com/services/rest/?method=flickr.test.login&api_key={{TOKEN}}&format=json&nojsoncallback=1"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"Invalid API Key"'
negative: true
references:
- https://www.flickr.com/services/api/
- https://www.flickr.com/services/api/flickr.test.login.html
examples:
- "flickr_api_key: d6953dc63a9498593bfdb4287ed2293c"
- name: Flickr OAuth Token
id: kingfisher.flickr.2
pattern: |
(?xi)
\b
flickr
(?:.|[\n\r]){0,32}?
(?:OAUTH|ACCESS|TOKEN)?
(?:.|[\n\r]){0,32}?
\b
(
[a-f0-9]{32}
)
\b
confidence: medium
min_entropy: 3.0
validation:
type: Http
content:
request:
method: GET
url: "https://www.flickr.com/services/rest/?method=flickr.auth.oauth.checkToken&api_key={{TOKEN}}&oauth_token={{TOKEN}}&format=json&nojsoncallback=1"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"stat":"ok"'
- '"oauth":'
match_all_words: true
references:
- https://www.flickr.com/services/api/
- https://www.flickr.com/services/api/flickr.auth.oauth.checkToken.html
examples:
- "flickr_oauth_token: a8c1e1f1d9d34aa5a1bdbd94234bcdef"

View file

@ -166,6 +166,7 @@ rules:
(?: id | identifier | key )
.{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2}
\b ([a-z0-9]{20}) \b
visible: false
examples:
- |
GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7
@ -181,6 +182,26 @@ rules:
(?: key | oauth | sec | secret )?
.{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2}
\b ([a-z0-9]{40}) \b
depends_on_rule:
- rule_id: "kingfisher.github.5"
variable: GITHUB_CLIENT_ID
validation:
type: Http
content:
request:
method: POST
url: "https://github.com/login/oauth/access_token"
headers:
Accept: "application/json"
Content-Type: "application/json"
body: '{"client_id":"{{GITHUB_CLIENT_ID}}","client_secret":"{{TOKEN}}","code":"invalid_code"}'
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"error":"bad_verification_code"'
examples:
- |
GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7

View file

@ -3,8 +3,9 @@ rules:
id: kingfisher.mailchimp.1
pattern: |
(?xi)
\b
mailchimp
(?:.|[\n\r]){0,32}?
(?:.|[\n\r]){0,128}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b

View file

@ -1,23 +1,82 @@
rules:
- name: Sauce Token
id: kingfisher.sauce.1
- name: Sauce Labs Username
id: kingfisher.saucelabs.1
pattern: |
(?x)(?i)
sauce .{0,50}
(?xi)
\b
([a-f0-9-]{36})
(?: [^a-f0-9-] | $ )
sauce
(?:.|[\n\r]){0,16}?
(?:USER|ID|NAME|CLIENT|OAUTH)
(?:.|[\n\r]){0,16}?
\b
(
[A-Z0-9_.-]{2,70}
)
\b
confidence: medium
visible: false
min_entropy: 1.0
examples:
- "SAUCE_USERNAME=oauth-someusername-487ea"
- SAUCE_USERNAME="oauth-ci-bot-487ea"
- '"sauce_username":"build-user"'
- 'saucelabs user oauth-release-bot'
- name: Sauce Labs API Endpoint
id: kingfisher.saucelabs.2
pattern: |
(?xi)
\b
(
(:?api|ondemand)\.(?:us|eu)-(?:west|east|central)-[0-9]\.saucelabs\.com
)
\b
confidence: medium
visible: false
min_entropy: 2.0
examples:
- "api.us-west-1.saucelabs.com"
- "api.eu-central-1.saucelabs.com"
- "ondemand.eu-central-1.saucelabs.com"
- name: Sauce Labs Access Key
id: kingfisher.saucelabs.3
pattern: |
(?xi)
\b
sauce
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}
)
\b
confidence: medium
min_entropy: 3.0
examples:
- |
- SAUCE_USERNAME=vitess
- SAUCE_ACCESS_KEY=2397f603-c2c4-4897-a8ca-587ace5dc8dd
- SAUCE_ACCESS_KEY=2397f603-c2c4-4897-a8ca-587ace5dc8d-
depends_on_rule:
- rule_id: "kingfisher.saucelabs.1"
variable: SAUCE_USERNAME
- rule_id: "kingfisher.saucelabs.2"
variable: SAUCE_URL
validation:
type: Http
content:
request:
method: GET
url: "https://{{ SAUCE_URL | default: 'api.us-west-1.saucelabs.com' | replace: 'ondemand.', 'api.' }}/rest/v1/users/{{SAUCE_USERNAME}}"
headers:
Authorization: "Basic {{ SAUCE_USERNAME | append: ':' | append: TOKEN | b64enc }}"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"username":'
references:
- https://docs.saucelabs.com/dev/api/
- https://docs.saucelabs.com/dev/api/#authentication
examples:
- "SAUCE_ACCESS_KEY=1736468d-b178-39cd-bfde-30fabdc371e4"

34
data/rules/shodan.yml Normal file
View file

@ -0,0 +1,34 @@
rules:
- name: SHODAN API Key
id: kingfisher.shodan.1
pattern: |
(?xi)
\b
shodan
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
[A-Z0-9]{32}
)
\b
confidence: medium
min_entropy: 4.0
validation:
type: Http
content:
request:
method: GET
url: "https://api.shodan.io/api-info?key={{TOKEN}}"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"scan_credits"'
references:
- https://developer.shodan.io/api
examples:
- "shodan_api_key = dqlblS2CmTOc5zYn4nZkJljYsXRnNuiq"

View file

@ -255,7 +255,7 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
}
},
},
Command::SelfUpdate => unreachable!(),
Command::SelfUpdate => anyhow::bail!("SelfUpdate command should not reach this branch"),
}
if let Some(msg) = update_msg {
info!("{msg}");

View file

@ -553,17 +553,24 @@ async fn timed_validate_single_match<'a>(
return;
}
let cache_key = mongodb::generate_mongodb_cache_key(&uri);
if let Some(cached) = cache.get(&cache_key) {
let c = cached.value();
if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
m.validation_success = c.is_valid;
m.validation_response_body = c.body.clone();
m.validation_response_status = c.status;
commit_and_return(m);
return;
}
}
match mongodb::validate_mongodb(&uri).await {
Ok((ok, msg)) => {
m.validation_success = ok;
m.validation_response_body = msg;
m.validation_response_status = if uri.starts_with("mongodb+srv://") {
StatusCode::CONTINUE
} else if ok {
StatusCode::OK
} else {
StatusCode::UNAUTHORIZED
};
m.validation_response_status =
if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
}
Err(e) => {
m.validation_success = false;

View file

@ -1,9 +1,10 @@
// src/validation/mongodb.rs
use std::time::Duration;
use std::{net::IpAddr, time::Duration};
use anyhow::Result;
use bson::doc;
use mongodb::{options::ClientOptions, Client};
use mongodb::{error::ErrorKind, options::ClientOptions, Client};
use tokio::time::timeout;
pub fn looks_like_mongodb_uri(uri: &str) -> bool {
// quick scheme check first
@ -14,10 +15,87 @@ pub fn looks_like_mongodb_uri(uri: &str) -> bool {
mongodb::options::ConnectionString::parse(uri).is_ok()
}
/// Return true if the URI targets localhost/loopback or a unix domain socket.
/// This is a *string-only* check—no DNS or driver IO.
fn uri_targets_localhost(uri: &str) -> bool {
// strip scheme
let rest = uri
.strip_prefix("mongodb://")
.or_else(|| uri.strip_prefix("mongodb+srv://"))
.unwrap_or(uri);
// authority ends at first '/' (before db/path); if missing, take whole rest
let authority = rest.split_once('/').map(|(a, _)| a).unwrap_or(rest);
// unix domain socket forms (percent-encoded "/path/to.sock")
let auth_lower = authority.to_ascii_lowercase();
if auth_lower.starts_with("%2f") || authority.starts_with('/') {
return true; // UDS → treat as local
}
// drop userinfo if present
let hostlist = authority.rsplit_once('@').map(|(_, h)| h).unwrap_or(authority);
// iterate seed list (mongodb://hostA,hostB,...)
for part in hostlist.split(',') {
let mut host = part.trim();
// strip brackets for IPv6 literals
if host.starts_with('[') && host.ends_with(']') && host.len() >= 2 {
host = &host[1..host.len() - 1];
}
// strip :port if present (only when suffix is all digits)
if let Some(idx) = host.rfind(':') {
if host[idx + 1..].chars().all(|c| c.is_ascii_digit()) {
host = &host[..idx];
}
}
if is_local_host(host) {
return true;
}
}
false
}
/// Returns true for localhost/loopback/unspecified IPs and common localhost aliases.
fn is_local_host(h: &str) -> bool {
let s = h.trim().trim_end_matches('.');
let s_lower = s.to_ascii_lowercase();
// common aliases seen in hosts files across distros
if matches!(
s_lower.as_str(),
"localhost"
| "localhost.localdomain"
| "localhost6"
| "localhost6.localdomain6"
| "ip6-localhost"
| "ip6-loopback"
) {
return true;
}
// explicit unspecified forms
if s_lower.as_str() == "0.0.0.0" || s_lower.as_str() == "::" {
return true;
}
// literal IPs
if let Ok(ip) = s.parse::<IpAddr>() {
return ip.is_loopback() || ip.is_unspecified();
}
false
}
const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs
const FAST_SELECT_MS: u64 = 300;
const SRV_CONNECT_MS: u64 = 15_000; // gives Atlas a fighting chance
const SRV_SELECT_MS: u64 = 15_000;
const SRV_PARSE_MS: u64 = 1_000; // limit DNS resolution time
const SRV_CONNECT_MS: u64 = 1500;//700;
const SRV_SELECT_MS: u64 = 1500;//300;
/// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the
/// boolean indicates success and the string provides a status message.
@ -27,25 +105,35 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> {
return Ok((false, "Invalid MongoDB URI".to_string()));
}
let is_srv = uri.starts_with("mongodb+srv://");
if is_srv {
// Skip SRV URIs to avoid slow DNS lookups and topology discovery.
// ---- refuse localhost/loopback/UDS outright
if uri_targets_localhost(uri) {
return Ok((
false,
"Validation skipped for mongodb+srv:// URI (performance reasons)".to_string(),
"Refusing to validate localhost/loopback MongoDB URIs.".to_string(),
));
}
// ---- build client opts
let mut opts = ClientOptions::parse(uri).await?;
let is_srv = uri.starts_with("mongodb+srv://");
// ---- build client opts (guarded so we don't hit DNS/driver first)
let mut opts = if is_srv {
match timeout(Duration::from_millis(SRV_PARSE_MS), ClientOptions::parse(uri)).await {
Ok(res) => res?,
Err(_) => {
return Ok((false, "MongoDB connection failed: timeout exceeded".to_string()));
}
}
} else {
ClientOptions::parse(uri).await?
};
if !is_srv {
// one socket, skip cluster discovery for plain 'mongodb://'
opts.direct_connection = Some(true);
opts.connect_timeout = Some(Duration::from_millis(FAST_CONNECT_MS));
opts.server_selection_timeout = Some(Duration::from_millis(FAST_SELECT_MS));
} else {
// SRV needs DNS and replica-set discovery; give it a couple seconds
// SRV needs DNS and replica-set discovery; fail fast
opts.connect_timeout = Some(Duration::from_millis(SRV_CONNECT_MS));
opts.server_selection_timeout = Some(Duration::from_millis(SRV_SELECT_MS));
// leave direct_connection = None (driver decides)
@ -55,18 +143,25 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> {
// ---- dial and ping
let client = Client::with_options(opts)?;
let ok = client.database("admin").run_command(doc! { "ping": 1 }).await.is_ok();
let msg = if ok {
"MongoDB connection is valid.".to_string()
} else {
"MongoDB connection failed.".to_string()
};
Ok((ok, msg))
let res = client.database("admin").run_command(doc! { "ping": 1 }).await;
match res {
Ok(_) => Ok((true, "MongoDB connection is valid.".to_string())),
Err(e) => {
let msg = match *e.kind {
ErrorKind::ServerSelection { .. } => {
"MongoDB connection failed: timeout exceeded".to_string()
}
_ => "MongoDB connection failed.".to_string(),
};
Ok((false, msg))
}
}
}
// pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String {
// use sha1::{Digest, Sha1};
// let mut hasher = Sha1::new();
// hasher.update(mongodb_uri.as_bytes());
// format!("MongoDB:{:x}", hasher.finalize())
// }
/// Return a stable cache key for the given MongoDB URI.
pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String {
use sha1::{Digest, Sha1};
let mut hasher = Sha1::new();
hasher.update(mongodb_uri.as_bytes());
format!("MongoDB:{:x}", hasher.finalize())
}

View file

@ -1,16 +1,26 @@
use std::{str::FromStr, time::Duration};
use std::{str::FromStr, sync::Once, time::Duration};
use anyhow::{anyhow, Result};
use rustls::crypto::{ring, CryptoProvider};
use rustls::{client::ClientConfig, RootCertStore};
use rustls_native_certs::{load_native_certs, CertificateResult};
use sha1::{Digest, Sha1};
use tokio::time::{error::Elapsed, timeout};
use tokio_postgres::{config::SslMode, tls::NoTls, Config, Error};
use tokio_postgres::{config::{Host, SslMode}, tls::NoTls, Config, Error};
use tokio_postgres_rustls::MakeRustlsConnect;
use tracing::debug;
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
static INIT_PROVIDER: Once = Once::new();
fn ensure_crypto_provider() {
INIT_PROVIDER.call_once(|| {
// If another part of the program already installed a provider,
// ignore the error — we just need one global provider.
let _ = CryptoProvider::install_default(ring::default_provider());
});
}
pub fn generate_postgres_cache_key(postgres_url: &str) -> String {
let mut hasher = Sha1::new();
hasher.update(postgres_url.as_bytes());
@ -21,6 +31,12 @@ pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec<String>)
let mut cfg =
Config::from_str(postgres_url).map_err(|e| anyhow!("Failed to parse Postgres URL: {e}"))?;
// --- skip localhost/loopback/unix-socket targets entirely -------------
if has_any_local_host(&cfg) {
debug!("Skipping Postgres validation: host is localhost/loopback or unix socket");
return Ok((false, vec!["skipped localhost/loopback host".into()]));
}
let original_mode = cfg.get_ssl_mode();
if original_mode == SslMode::Prefer {
cfg.ssl_mode(SslMode::Disable);
@ -29,6 +45,36 @@ pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec<String>)
check_postgres_db_connection(cfg, original_mode).await
}
fn has_any_local_host(cfg: &Config) -> bool {
cfg.get_hosts().iter().any(|h| match h {
Host::Unix(_) => true, // local unix socket
Host::Tcp(s) => is_local_tcp_host(s),
})
}
fn is_local_tcp_host(s: &str) -> bool {
// strip URI-style IPv6 brackets if present
let host = s.trim_matches(|c| c == '[' || c == ']');
// Direct IPs
if let Ok(ip) = host.parse::<std::net::IpAddr>() {
return match ip {
std::net::IpAddr::V4(v4) =>
v4.is_loopback() || v4.is_unspecified() || v4.is_link_local(),
std::net::IpAddr::V6(v6) =>
v6.is_loopback() || v6.is_unspecified() || v6.is_unicast_link_local(),
};
}
// Common localhost hostnames
let lower = host.to_ascii_lowercase();
lower == "localhost"
|| lower.starts_with("localhost.")
|| lower == "localhost6"
|| lower.starts_with("localhost6.")
}
async fn check_postgres_db_connection(
mut cfg: Config,
original_mode: SslMode,
@ -52,6 +98,9 @@ async fn check_postgres_db_connection(
.await
} else {
timeout(CONNECT_TIMEOUT, async {
// Ensure Rustls crypto provider is installed *before* using the builder
ensure_crypto_provider();
let CertificateResult { certs, errors, .. } = load_native_certs();
for err in errors {
debug!("native-cert error: {err}");
@ -89,6 +138,21 @@ async fn check_postgres_db_connection(
continue;
}
Ok(Err(e))
if attempt == 0
&& server_requires_encryption(&e.to_string())
&& cfg.get_ssl_mode() == SslMode::Disable =>
{
debug!("Encryption required: {e}; retrying with SSL");
cfg.ssl_mode(SslMode::Require);
continue;
}
Ok(Err(e)) if missing_cluster_identifier(&e.to_string()) => {
debug!("Missing cluster identifier: {e}; treating as valid");
return Ok((true, Vec::new()));
}
Ok(Err(e)) if database_not_exists(&e, cfg.get_dbname().unwrap_or("postgres")) => {
return Ok((true, Vec::new()));
}
@ -108,3 +172,40 @@ fn database_not_exists(err: &Error, db_name: &str) -> bool {
let db = if db_name.is_empty() { "postgres" } else { db_name };
err.to_string().contains(&format!("database \"{db}\" does not exist"))
}
fn server_requires_encryption(err_msg: &str) -> bool {
err_msg.contains("server requires encryption")
}
fn missing_cluster_identifier(err_msg: &str) -> bool {
err_msg.contains("missing cluster identifier")
}
#[cfg(test)]
mod tests {
use super::{is_local_tcp_host, missing_cluster_identifier, server_requires_encryption};
#[test]
fn detects_encryption_requirement() {
assert!(server_requires_encryption("db error: FATAL: server requires encryption"));
assert!(!server_requires_encryption("some other error"));
}
#[test]
fn detects_missing_cluster() {
assert!(missing_cluster_identifier(
"db error: FATAL: codeParamsRoutingFailed: missing cluster identifier",
));
assert!(!missing_cluster_identifier("another error"));
}
#[test]
fn detects_local_hosts() {
for h in ["localhost", "LOCALHOST", "localhost.localdomain", "localhost6", "127.0.0.1", "[::1]", "::"] {
assert!(is_local_tcp_host(h), "should treat {h} as local");
}
for h in ["db.example.com", "10.0.0.1"] {
assert!(!is_local_tcp_host(h), "should not treat {h} as local");
}
}
}