Merge pull request #108 from mongodb/development

v1.49.0
This commit is contained in:
Mick Grove 2025-09-10 17:26:49 -07:00 committed by GitHub
commit cf306c854c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 239 additions and 86 deletions

View file

@ -2,6 +2,12 @@
All notable changes to this project will be documented in this file.
## [1.49.0]
- Enabled MongoDB URI validation
- AWS + GCP validators now respect HTTPS_PROXY and share a consistent user agent across AWS, GCP, and HTTP validation
- Increase max-file-size default to 256 mb (up from 64 mb)
- Improved AWS rule
## [1.48.0]
- Improved error message when self-update cannot find the current binary
- Optimized memory usage via string interning and extensive data sharing

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.48.0"
version = "1.49.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true
@ -122,6 +122,10 @@ reqwest-middleware = "0.4.2"
tracing-subscriber = {version = "0.3.19", features = ["env-filter"] }
tracing-core = "0.1.34"
tree-sitter = "0.25.8"
aws-smithy-runtime = "1.9.1"
aws-smithy-http-client = "1.1.1"
aws-smithy-runtime-api = "1.9.0"
aws-smithy-types = "1.3.2"
tree-sitter-bash = "0.25.0"
tree-sitter-c = "0.24.1"
tree-sitter-c-sharp = "0.23.1"

View file

@ -285,11 +285,11 @@ cat /path/to/file.py | kingfisher scan -
### Limit maximum file size scanned (`--max-file-size`)
By default, Kingfisher skips files larger than **64 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**.
By default, Kingfisher skips files larger than **256 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**.
```bash
# Scan files up to 250 mb in size
kingfisher scan /some/file --max-file-size 250
# Scan files up to 500 mb in size
kingfisher scan /some/file --max-file-size 500
```
### Scan using a rule _family_ with one flag

View file

@ -24,9 +24,7 @@ rules:
(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)
(?:.|[\n\r]){0,64}?
\b
(
[A-Z0-9/+=]{40}
)
([A-Za-z0-9/+]{40})
\b
|
\b(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)
@ -36,9 +34,7 @@ rules:
(?:KEY|TOKEN)
(?:.|[\n\r]){0,64}?
\b
(
[A-Z0-9/+=]{40}
)
([A-Za-z0-9/+]{40})
\b
)
min_entropy: 4.5

View file

@ -80,6 +80,8 @@ rules:
- client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority");
- "mongodb+srv://user:passw0rd@cluster0.something.mongodb.net/"
- "mongodb://mongoadmin:contoso@something.foo.mongodb.net/myFirstDatabase"
validation:
type: MongoDB
- name: MongoDB Atlas Service Account Token
id: kingfisher.mongodb.4
pattern: |

View file

@ -169,27 +169,30 @@ pub struct InputSpecifierArgs {
#[derive(Args, Debug, Clone)]
pub struct ContentFilteringArgs {
/// Ignore files larger than the given size in MB
#[arg(long("max-file-size"), default_value_t = 64.0)]
#[arg(
long = "max-file-size",
visible_alias = "max-filesize", // also show in --help
// alias = "max-filesize", // use this instead if you DONT want it shown in --help
default_value_t = 256.0,
value_name = "MB"
)]
pub max_file_size_mb: f64,
// /// Use custom path-based ignore rules from the given file(s)
// #[arg(long, short, value_hint = ValueHint::FilePath)]
// pub ignore: Vec<PathBuf>,
/// Skip any file or directory whose path matches this glob pattern. Multiple
/// patterns may be provided by repeating the flag.
#[arg(long, value_name = "PATTERN")]
pub exclude: Vec<String>,
/// If true, do NOT extract archive files
#[arg(long("no-extract-archives"), default_value_t = false)]
#[arg(long = "no-extract-archives", default_value_t = false)]
pub no_extract_archives: bool,
/// Maximum allowed depth for extracting nested archives
#[arg(long("extraction-depth"), default_value_t = 2, value_parser = clap::value_parser!(u8).range(1..=25))]
#[arg(long = "extraction-depth", default_value_t = 2, value_parser = clap::value_parser!(u8).range(1..=25))]
pub extraction_depth: u8,
/// If true, do NOT scan binary files
#[arg(long("no-binary"), default_value_t = false)]
#[arg(long = "no-binary", default_value_t = false)]
pub no_binary: bool,
}
@ -197,7 +200,7 @@ impl ContentFilteringArgs {
/// Convert the maximum file size in MB to bytes
pub fn max_file_size_bytes(&self) -> Option<u64> {
if self.max_file_size_mb < 0.0 {
Some(25 * 1024 * 1024) // default 25 MB if negative
Some(256 * 1024 * 1024) // default 256 MB if negative
} else {
Some((self.max_file_size_mb * 1024.0 * 1024.0) as u64)
}

View file

@ -290,6 +290,32 @@ impl DetailsReporter {
}
matches = expanded;
}
matches.sort_by(|a, b| {
let path_a = a
.origin
.first()
.full_path()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
let path_b = b
.origin
.first()
.full_path()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_default();
path_a
.cmp(&path_b)
.then_with(|| {
a.m.location.source_span.start.line.cmp(&b.m.location.source_span.start.line)
})
.then_with(|| {
a.m.location
.source_span
.start
.column
.cmp(&b.m.location.source_span.start.column)
})
});
Ok(matches)
}

View file

@ -14,13 +14,12 @@ pub struct Styles {
is_term: bool,
}
impl Styles {
pub fn new(_use_color: bool) -> Self {
pub fn new(use_color: bool) -> Self {
let stdout_is_tty = std::io::stdout().is_terminal();
let is_term = Term::stdout().is_term();
let use_color = stdout_is_tty && is_term; // Enable color only if both checks agree it's a terminal
let styles_enabled = use_color;
// Enable color only when explicitly requested and stdout is a terminal.
let styles_enabled = use_color && stdout_is_tty && is_term;
let style_finding_heading = Style::new().bright().white().force_styling(styles_enabled);
let style_finding_active_heading =
Style::new().bold().bright().cyan().force_styling(styles_enabled);

View file

@ -12,7 +12,7 @@ use dashmap::DashMap;
use http::StatusCode;
use liquid::Object;
use liquid_core::{Value, ValueView};
use once_cell::sync::OnceCell;
use once_cell::sync::{Lazy, OnceCell};
use reqwest::{header, header::HeaderValue, multipart, Client, Url};
use rustc_hash::FxHashMap;
use tokio::{sync::Notify, time};
@ -37,6 +37,17 @@ mod utils;
const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes
const MAX_VALIDATION_BODY_LEN: usize = 2048;
pub static GLOBAL_USER_AGENT: Lazy<String> = Lazy::new(|| {
format!(
"{}/{} {}",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/140.0.0.0 Safari/537.36"
)
});
// Use SkipMap-based cache instead of a mutex-wrapped FxHashMap.
type Cache = Arc<SkipMap<String, CachedResponse>>;
@ -59,6 +70,7 @@ static IN_FLIGHT: OnceCell<DashMap<u64, Arc<Notify>>> = OnceCell::new();
pub fn init_validation_caches() {
VALIDATION_CACHE.set(DashMap::new()).ok();
IN_FLIGHT.set(DashMap::new()).ok();
aws::set_aws_validation_concurrency(15);
}
#[derive(Clone)]
@ -405,16 +417,8 @@ async fn timed_validate_single_match<'a>(
&url,
) {
// add realistic UA & accept headers
let ua = format!(
"{} {}/{}",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/132.0.0.0 Safari/537.36",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION")
);
let std_headers = [
(header::USER_AGENT, ua.as_str()),
(header::USER_AGENT, GLOBAL_USER_AGENT.as_str()),
(header::ACCEPT , "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"),
(header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"),
(header::ACCEPT_ENCODING, "gzip, deflate, br"),
@ -763,16 +767,30 @@ async fn timed_validate_single_match<'a>(
return;
}
match aws::validate_aws_credentials(&akid, &secret, cache).await {
Ok((ok, arn)) => {
match aws::validate_aws_credentials(&akid, &secret).await {
Ok((ok, msg)) => {
m.validation_success = ok;
m.validation_response_body = format!("{} --- ARN: {}", akid, arn);
m.validation_response_status =
if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
if let Ok(acct) = aws::aws_key_to_account_number(&akid) {
m.validation_response_body
.push_str(&format!(" --- AWS Account Number: {:012}", acct));
if ok {
m.validation_response_body = format!("{} --- ARN: {}", akid, msg);
m.validation_response_status = StatusCode::OK;
if let Ok(acct) = aws::aws_key_to_account_number(&akid) {
m.validation_response_body
.push_str(&format!(" --- AWS Account Number: {:012}", acct));
}
} else {
m.validation_response_body =
format!("AWS validation error ({}): {}", akid, msg);
m.validation_response_status = StatusCode::UNAUTHORIZED;
}
cache.insert(
cache_key,
CachedResponse {
body: m.validation_response_body.clone(),
status: m.validation_response_status,
is_valid: m.validation_success,
timestamp: Instant::now(),
},
);
}
Err(e) => {
m.validation_success = false;
@ -780,15 +798,6 @@ async fn timed_validate_single_match<'a>(
m.validation_response_status = StatusCode::BAD_GATEWAY;
}
}
cache.insert(
cache_key,
CachedResponse {
body: m.validation_response_body.clone(),
status: m.validation_response_status,
is_valid: m.validation_success,
timestamp: Instant::now(),
},
);
}
// ----------------------------------------------------- GCP validator

View file

@ -1,15 +1,74 @@
use std::time::Duration;
use anyhow::{anyhow, Result};
use aws_config::BehaviorVersion;
use aws_config::{retry::RetryConfig, BehaviorVersion};
use aws_credential_types::Credentials;
use aws_sdk_sts::Client as StsClient;
use aws_sdk_sts::{
config::Builder as StsConfigBuilder, error::SdkError,
operation::get_caller_identity::GetCallerIdentityError, Client as StsClient,
};
use aws_smithy_http_client::{
proxy::ProxyConfig, tls, Builder as HttpClientBuilder, ConnectorBuilder,
};
use aws_smithy_runtime_api::{
box_error::BoxError,
client::{
http::SharedHttpClient,
interceptors::{context::BeforeTransmitInterceptorContextMut, Intercept},
runtime_components::RuntimeComponents,
},
};
use aws_smithy_types::config_bag::ConfigBag;
use aws_types::region::Region;
use base32::Alphabet;
use byteorder::{BigEndian, ByteOrder};
use http::StatusCode;
use http::{
header::{HeaderValue, USER_AGENT},
StatusCode,
};
use once_cell::sync::OnceCell;
use rand::{rng, Rng};
use tokio::{
sync::Semaphore,
time::{sleep, timeout},
};
use crate::validation::{Cache, CachedResponse, VALIDATION_CACHE_SECONDS};
use crate::validation::GLOBAL_USER_AGENT;
static AWS_VALIDATION_SEMAPHORE: OnceCell<Semaphore> = OnceCell::new();
/// Set the maximum number of concurrent AWS validations. Call before first use.
pub fn set_aws_validation_concurrency(max: usize) {
AWS_VALIDATION_SEMAPHORE.set(Semaphore::new(max)).ok();
}
fn aws_validation_semaphore() -> &'static Semaphore {
AWS_VALIDATION_SEMAPHORE.get_or_init(|| Semaphore::new(15))
}
#[derive(Debug)]
struct UaInterceptor;
impl Intercept for UaInterceptor {
fn name(&self) -> &'static str {
"ua"
}
fn modify_before_transmit(
&self,
context: &mut BeforeTransmitInterceptorContextMut<'_>,
_rc: &RuntimeComponents,
_cfg: &mut ConfigBag,
) -> std::result::Result<(), BoxError> {
let req = context.request_mut();
req.headers_mut().insert(
USER_AGENT,
HeaderValue::from_str(GLOBAL_USER_AGENT.as_str())
.map_err(|e| format!("invalid USER_AGENT header: {e}"))?,
);
Ok(())
}
}
/// Generate a standardized cache key for AWS validation attempts
pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &str) -> String {
@ -41,19 +100,30 @@ pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) ->
Ok(())
}
fn is_throttling_or_transient(e: &SdkError<GetCallerIdentityError>) -> bool {
match e {
SdkError::ServiceError(ctx) => {
let code = ctx.err().meta().code().unwrap_or_default();
let status: StatusCode = ctx.raw().status().into();
code.contains("Throttl")
|| status == StatusCode::TOO_MANY_REQUESTS
|| status == StatusCode::SERVICE_UNAVAILABLE
}
SdkError::DispatchFailure(df) => df.is_timeout() || df.is_io(),
SdkError::ResponseError(ctx) => {
let status: StatusCode = ctx.raw().status().into();
status == StatusCode::TOO_MANY_REQUESTS || status == StatusCode::SERVICE_UNAVAILABLE
}
_ => false,
}
}
pub async fn validate_aws_credentials(
aws_access_key_id: &str,
aws_secret_access_key: &str,
cache: &Cache,
) -> Result<(bool, String)> {
let cache_key = generate_aws_cache_key(aws_access_key_id, aws_secret_access_key);
// Check cache first
if let Some(cached) = cache.get(&cache_key) {
let cached_response = cached.value();
if cached_response.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
return Ok((cached_response.is_valid, cached_response.body.clone()));
}
}
let _permit = aws_validation_semaphore().acquire().await.expect("semaphore closed");
// Create static credentials
let credentials = Credentials::new(
aws_access_key_id,
@ -62,29 +132,64 @@ pub async fn validate_aws_credentials(
None, // expiry
"static", // provider name
);
// Create AWS config
// Create HTTP client that respects proxy settings from the environment
let http_client: SharedHttpClient =
HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| {
let mut conn_builder = ConnectorBuilder::default()
.tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc));
conn_builder.set_connector_settings(settings.cloned());
if let Some(components) = runtime_components {
conn_builder.set_sleep_impl(components.sleep_impl());
}
conn_builder.set_proxy_config(Some(ProxyConfig::from_env()));
conn_builder.build()
});
// Create AWS config with adaptive retries
let retry_config = RetryConfig::adaptive().with_max_attempts(3);
let config = aws_config::defaults(BehaviorVersion::latest())
.region(Region::new("us-east-1"))
.credentials_provider(credentials)
.http_client(http_client)
.retry_config(retry_config)
.load()
.await;
// Create STS client
let sts_client = StsClient::new(&config);
// Call get-caller-identity
match sts_client.get_caller_identity().send().await {
Ok(identity) => {
let arn = identity.arn.unwrap_or_else(|| "Unknown".to_string());
// let acct = identity.account.unwrap_or_else(|| "Unknown".to_string());
let response = CachedResponse::new(arn.clone(), StatusCode::OK, true);
cache.insert(cache_key, response);
Ok((true, arn))
}
Err(e) => {
let response = CachedResponse::new(e.to_string(), StatusCode::UNAUTHORIZED, false);
cache.insert(cache_key, response);
Err(anyhow!("AWS validation failed: {}", e))
let sts_config = StsConfigBuilder::from(&config).interceptor(UaInterceptor).build();
let sts_client = StsClient::from_conf(sts_config);
const MAX_ATTEMPTS: usize = 3;
const ATTEMPT_TIMEOUT: Duration = Duration::from_secs(5);
for attempt in 1..=MAX_ATTEMPTS {
let result = timeout(ATTEMPT_TIMEOUT, sts_client.get_caller_identity().send()).await;
match result {
Ok(Ok(identity)) => {
let arn = identity.arn.unwrap_or_else(|| "Unknown".to_string());
return Ok((true, arn));
}
Ok(Err(e)) => {
if is_throttling_or_transient(&e) {
if attempt == MAX_ATTEMPTS {
return Err(anyhow!("AWS validation failed: {}", e));
}
} else {
return Ok((false, e.to_string()));
}
}
Err(_) => {
if attempt == MAX_ATTEMPTS {
return Err(anyhow!("AWS validation timed out"));
}
}
}
let max_delay = 100u64 * 2u64.pow((attempt - 1) as u32);
let sleep_ms = rng().random_range(0..=max_delay);
sleep(Duration::from_millis(sleep_ms)).await;
}
Err(anyhow!("AWS validation failed"))
}
/// Converts an AWS Key ID to an AWS Account Number.

View file

@ -1,11 +1,12 @@
use std::sync::Arc;
use crate::validation::GLOBAL_USER_AGENT;
use anyhow::{anyhow, Result};
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
use chrono::{Duration as ChronoDuration, Utc};
use once_cell::sync::OnceCell;
use pem::parse;
use reqwest::Client;
use reqwest::{Client, Proxy};
use ring::{rand, signature};
use serde_json::Value as JsonValue;
use tokio::sync::Semaphore;
@ -36,7 +37,13 @@ impl GcpValidator {
pub fn new() -> Result<Self> {
const MAX_CONCURRENT_VALIDATIONS: usize = 500;
let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_VALIDATIONS));
let client = Client::builder().build()?;
let mut builder = Client::builder();
if let Ok(proxy) = std::env::var("HTTPS_PROXY").or_else(|_| std::env::var("https_proxy")) {
builder = builder.proxy(Proxy::all(&proxy)?);
}
let client = builder.user_agent(GLOBAL_USER_AGENT.as_str()).build()?;
Ok(Self { semaphore, client })
}

View file

@ -1,5 +1,6 @@
use std::{collections::BTreeMap, future::Future, str::FromStr, time::Duration};
use crate::validation::GLOBAL_USER_AGENT;
use anyhow::{anyhow, Error, Result};
use http::StatusCode;
use liquid::Object;
@ -76,14 +77,9 @@ pub fn build_request_builder(
.map_err(|e| format!("Error processing headers: {}", e))?;
// Prepare a standard set of headers.
let user_agent = format!(
"{}/{}",
//"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION")
);
let user_agent = GLOBAL_USER_AGENT.as_str();
let standard_headers = [
(header::USER_AGENT, user_agent.as_str()),
(header::USER_AGENT, user_agent),
(
header::ACCEPT,
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",