diff --git a/CHANGELOG.md b/CHANGELOG.md index fb350a4..5355560 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [1.49.0] +- Enabled MongoDB URI validation +- AWS + GCP validators now respect HTTPS_PROXY and share a consistent user agent across AWS, GCP, and HTTP validation +- Increase max-file-size default to 256 mb (up from 64 mb) +- Improved AWS rule + ## [1.48.0] - Improved error message when self-update cannot find the current binary - Optimized memory usage via string interning and extensive data sharing diff --git a/Cargo.toml b/Cargo.toml index 14cb21a..4d810df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.48.0" +version = "1.49.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -122,6 +122,10 @@ reqwest-middleware = "0.4.2" tracing-subscriber = {version = "0.3.19", features = ["env-filter"] } tracing-core = "0.1.34" tree-sitter = "0.25.8" +aws-smithy-runtime = "1.9.1" +aws-smithy-http-client = "1.1.1" +aws-smithy-runtime-api = "1.9.0" +aws-smithy-types = "1.3.2" tree-sitter-bash = "0.25.0" tree-sitter-c = "0.24.1" tree-sitter-c-sharp = "0.23.1" diff --git a/README.md b/README.md index 1da58dd..bea6c11 100644 --- a/README.md +++ b/README.md @@ -285,11 +285,11 @@ cat /path/to/file.py | kingfisher scan - ### Limit maximum file size scanned (`--max-file-size`) -By default, Kingfisher skips files larger than **64 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**. +By default, Kingfisher skips files larger than **256 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**. ```bash -# Scan files up to 250 mb in size -kingfisher scan /some/file --max-file-size 250 +# Scan files up to 500 mb in size +kingfisher scan /some/file --max-file-size 500 ``` ### Scan using a rule _family_ with one flag diff --git a/data/rules/aws.yml b/data/rules/aws.yml index 1855aa3..f5fcf4a 100644 --- a/data/rules/aws.yml +++ b/data/rules/aws.yml @@ -24,9 +24,7 @@ rules: (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) (?:.|[\n\r]){0,64}? \b - ( - [A-Z0-9/+=]{40} - ) + ([A-Za-z0-9/+]{40}) \b | \b(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) @@ -36,9 +34,7 @@ rules: (?:KEY|TOKEN) (?:.|[\n\r]){0,64}? \b - ( - [A-Z0-9/+=]{40} - ) + ([A-Za-z0-9/+]{40}) \b ) min_entropy: 4.5 diff --git a/data/rules/mongodb.yml b/data/rules/mongodb.yml index 46fbcf8..dc02e94 100644 --- a/data/rules/mongodb.yml +++ b/data/rules/mongodb.yml @@ -80,6 +80,8 @@ rules: - client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority"); - "mongodb+srv://user:passw0rd@cluster0.something.mongodb.net/" - "mongodb://mongoadmin:contoso@something.foo.mongodb.net/myFirstDatabase" + validation: + type: MongoDB - name: MongoDB Atlas Service Account Token id: kingfisher.mongodb.4 pattern: | diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 4f2dc5b..60bfba8 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -169,27 +169,30 @@ pub struct InputSpecifierArgs { #[derive(Args, Debug, Clone)] pub struct ContentFilteringArgs { /// Ignore files larger than the given size in MB - #[arg(long("max-file-size"), default_value_t = 64.0)] + #[arg( + long = "max-file-size", + visible_alias = "max-filesize", // also show in --help + // alias = "max-filesize", // use this instead if you DON’T want it shown in --help + default_value_t = 256.0, + value_name = "MB" + )] pub max_file_size_mb: f64, - // /// Use custom path-based ignore rules from the given file(s) - // #[arg(long, short, value_hint = ValueHint::FilePath)] - // pub ignore: Vec, /// Skip any file or directory whose path matches this glob pattern. Multiple /// patterns may be provided by repeating the flag. #[arg(long, value_name = "PATTERN")] pub exclude: Vec, /// If true, do NOT extract archive files - #[arg(long("no-extract-archives"), default_value_t = false)] + #[arg(long = "no-extract-archives", default_value_t = false)] pub no_extract_archives: bool, /// Maximum allowed depth for extracting nested archives - #[arg(long("extraction-depth"), default_value_t = 2, value_parser = clap::value_parser!(u8).range(1..=25))] + #[arg(long = "extraction-depth", default_value_t = 2, value_parser = clap::value_parser!(u8).range(1..=25))] pub extraction_depth: u8, /// If true, do NOT scan binary files - #[arg(long("no-binary"), default_value_t = false)] + #[arg(long = "no-binary", default_value_t = false)] pub no_binary: bool, } @@ -197,7 +200,7 @@ impl ContentFilteringArgs { /// Convert the maximum file size in MB to bytes pub fn max_file_size_bytes(&self) -> Option { if self.max_file_size_mb < 0.0 { - Some(25 * 1024 * 1024) // default 25 MB if negative + Some(256 * 1024 * 1024) // default 256 MB if negative } else { Some((self.max_file_size_mb * 1024.0 * 1024.0) as u64) } diff --git a/src/reporter.rs b/src/reporter.rs index dacf75b..a4bf2fd 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -290,6 +290,32 @@ impl DetailsReporter { } matches = expanded; } + matches.sort_by(|a, b| { + let path_a = a + .origin + .first() + .full_path() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default(); + let path_b = b + .origin + .first() + .full_path() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default(); + path_a + .cmp(&path_b) + .then_with(|| { + a.m.location.source_span.start.line.cmp(&b.m.location.source_span.start.line) + }) + .then_with(|| { + a.m.location + .source_span + .start + .column + .cmp(&b.m.location.source_span.start.column) + }) + }); Ok(matches) } diff --git a/src/reporter/styles.rs b/src/reporter/styles.rs index 263948d..9f96c9c 100644 --- a/src/reporter/styles.rs +++ b/src/reporter/styles.rs @@ -14,13 +14,12 @@ pub struct Styles { is_term: bool, } impl Styles { - pub fn new(_use_color: bool) -> Self { + pub fn new(use_color: bool) -> Self { let stdout_is_tty = std::io::stdout().is_terminal(); let is_term = Term::stdout().is_term(); - let use_color = stdout_is_tty && is_term; // Enable color only if both checks agree it's a terminal - - let styles_enabled = use_color; + // Enable color only when explicitly requested and stdout is a terminal. + let styles_enabled = use_color && stdout_is_tty && is_term; let style_finding_heading = Style::new().bright().white().force_styling(styles_enabled); let style_finding_active_heading = Style::new().bold().bright().cyan().force_styling(styles_enabled); diff --git a/src/validation.rs b/src/validation.rs index a645ce7..a713f73 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -12,7 +12,7 @@ use dashmap::DashMap; use http::StatusCode; use liquid::Object; use liquid_core::{Value, ValueView}; -use once_cell::sync::OnceCell; +use once_cell::sync::{Lazy, OnceCell}; use reqwest::{header, header::HeaderValue, multipart, Client, Url}; use rustc_hash::FxHashMap; use tokio::{sync::Notify, time}; @@ -37,6 +37,17 @@ mod utils; const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes const MAX_VALIDATION_BODY_LEN: usize = 2048; +pub static GLOBAL_USER_AGENT: Lazy = Lazy::new(|| { + format!( + "{}/{} {}", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ + AppleWebKit/537.36 (KHTML, like Gecko) \ + Chrome/140.0.0.0 Safari/537.36" + ) +}); + // Use SkipMap-based cache instead of a mutex-wrapped FxHashMap. type Cache = Arc>; @@ -59,6 +70,7 @@ static IN_FLIGHT: OnceCell>> = OnceCell::new(); pub fn init_validation_caches() { VALIDATION_CACHE.set(DashMap::new()).ok(); IN_FLIGHT.set(DashMap::new()).ok(); + aws::set_aws_validation_concurrency(15); } #[derive(Clone)] @@ -405,16 +417,8 @@ async fn timed_validate_single_match<'a>( &url, ) { // add realistic UA & accept headers - let ua = format!( - "{} {}/{}", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ - AppleWebKit/537.36 (KHTML, like Gecko) \ - Chrome/132.0.0.0 Safari/537.36", - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION") - ); let std_headers = [ - (header::USER_AGENT, ua.as_str()), + (header::USER_AGENT, GLOBAL_USER_AGENT.as_str()), (header::ACCEPT , "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"), (header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"), (header::ACCEPT_ENCODING, "gzip, deflate, br"), @@ -763,16 +767,30 @@ async fn timed_validate_single_match<'a>( return; } - match aws::validate_aws_credentials(&akid, &secret, cache).await { - Ok((ok, arn)) => { + match aws::validate_aws_credentials(&akid, &secret).await { + Ok((ok, msg)) => { m.validation_success = ok; - m.validation_response_body = format!("{} --- ARN: {}", akid, arn); - m.validation_response_status = - if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; - if let Ok(acct) = aws::aws_key_to_account_number(&akid) { - m.validation_response_body - .push_str(&format!(" --- AWS Account Number: {:012}", acct)); + if ok { + m.validation_response_body = format!("{} --- ARN: {}", akid, msg); + m.validation_response_status = StatusCode::OK; + if let Ok(acct) = aws::aws_key_to_account_number(&akid) { + m.validation_response_body + .push_str(&format!(" --- AWS Account Number: {:012}", acct)); + } + } else { + m.validation_response_body = + format!("AWS validation error ({}): {}", akid, msg); + m.validation_response_status = StatusCode::UNAUTHORIZED; } + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: m.validation_success, + timestamp: Instant::now(), + }, + ); } Err(e) => { m.validation_success = false; @@ -780,15 +798,6 @@ async fn timed_validate_single_match<'a>( m.validation_response_status = StatusCode::BAD_GATEWAY; } } - cache.insert( - cache_key, - CachedResponse { - body: m.validation_response_body.clone(), - status: m.validation_response_status, - is_valid: m.validation_success, - timestamp: Instant::now(), - }, - ); } // ----------------------------------------------------- GCP validator diff --git a/src/validation/aws.rs b/src/validation/aws.rs index 3f077c7..7e1073d 100644 --- a/src/validation/aws.rs +++ b/src/validation/aws.rs @@ -1,15 +1,74 @@ use std::time::Duration; use anyhow::{anyhow, Result}; -use aws_config::BehaviorVersion; +use aws_config::{retry::RetryConfig, BehaviorVersion}; use aws_credential_types::Credentials; -use aws_sdk_sts::Client as StsClient; +use aws_sdk_sts::{ + config::Builder as StsConfigBuilder, error::SdkError, + operation::get_caller_identity::GetCallerIdentityError, Client as StsClient, +}; +use aws_smithy_http_client::{ + proxy::ProxyConfig, tls, Builder as HttpClientBuilder, ConnectorBuilder, +}; +use aws_smithy_runtime_api::{ + box_error::BoxError, + client::{ + http::SharedHttpClient, + interceptors::{context::BeforeTransmitInterceptorContextMut, Intercept}, + runtime_components::RuntimeComponents, + }, +}; +use aws_smithy_types::config_bag::ConfigBag; use aws_types::region::Region; use base32::Alphabet; use byteorder::{BigEndian, ByteOrder}; -use http::StatusCode; +use http::{ + header::{HeaderValue, USER_AGENT}, + StatusCode, +}; +use once_cell::sync::OnceCell; +use rand::{rng, Rng}; +use tokio::{ + sync::Semaphore, + time::{sleep, timeout}, +}; -use crate::validation::{Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; +use crate::validation::GLOBAL_USER_AGENT; + +static AWS_VALIDATION_SEMAPHORE: OnceCell = OnceCell::new(); + +/// Set the maximum number of concurrent AWS validations. Call before first use. +pub fn set_aws_validation_concurrency(max: usize) { + AWS_VALIDATION_SEMAPHORE.set(Semaphore::new(max)).ok(); +} + +fn aws_validation_semaphore() -> &'static Semaphore { + AWS_VALIDATION_SEMAPHORE.get_or_init(|| Semaphore::new(15)) +} + +#[derive(Debug)] +struct UaInterceptor; + +impl Intercept for UaInterceptor { + fn name(&self) -> &'static str { + "ua" + } + + fn modify_before_transmit( + &self, + context: &mut BeforeTransmitInterceptorContextMut<'_>, + _rc: &RuntimeComponents, + _cfg: &mut ConfigBag, + ) -> std::result::Result<(), BoxError> { + let req = context.request_mut(); + req.headers_mut().insert( + USER_AGENT, + HeaderValue::from_str(GLOBAL_USER_AGENT.as_str()) + .map_err(|e| format!("invalid USER_AGENT header: {e}"))?, + ); + Ok(()) + } +} /// Generate a standardized cache key for AWS validation attempts pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &str) -> String { @@ -41,19 +100,30 @@ pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> Ok(()) } +fn is_throttling_or_transient(e: &SdkError) -> bool { + match e { + SdkError::ServiceError(ctx) => { + let code = ctx.err().meta().code().unwrap_or_default(); + let status: StatusCode = ctx.raw().status().into(); + code.contains("Throttl") + || status == StatusCode::TOO_MANY_REQUESTS + || status == StatusCode::SERVICE_UNAVAILABLE + } + SdkError::DispatchFailure(df) => df.is_timeout() || df.is_io(), + SdkError::ResponseError(ctx) => { + let status: StatusCode = ctx.raw().status().into(); + status == StatusCode::TOO_MANY_REQUESTS || status == StatusCode::SERVICE_UNAVAILABLE + } + _ => false, + } +} + pub async fn validate_aws_credentials( aws_access_key_id: &str, aws_secret_access_key: &str, - cache: &Cache, ) -> Result<(bool, String)> { - let cache_key = generate_aws_cache_key(aws_access_key_id, aws_secret_access_key); - // Check cache first - if let Some(cached) = cache.get(&cache_key) { - let cached_response = cached.value(); - if cached_response.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { - return Ok((cached_response.is_valid, cached_response.body.clone())); - } - } + let _permit = aws_validation_semaphore().acquire().await.expect("semaphore closed"); + // Create static credentials let credentials = Credentials::new( aws_access_key_id, @@ -62,29 +132,64 @@ pub async fn validate_aws_credentials( None, // expiry "static", // provider name ); - // Create AWS config + // Create HTTP client that respects proxy settings from the environment + let http_client: SharedHttpClient = + HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| { + let mut conn_builder = ConnectorBuilder::default() + .tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc)); + + conn_builder.set_connector_settings(settings.cloned()); + if let Some(components) = runtime_components { + conn_builder.set_sleep_impl(components.sleep_impl()); + } + conn_builder.set_proxy_config(Some(ProxyConfig::from_env())); + conn_builder.build() + }); + + // Create AWS config with adaptive retries + let retry_config = RetryConfig::adaptive().with_max_attempts(3); let config = aws_config::defaults(BehaviorVersion::latest()) .region(Region::new("us-east-1")) .credentials_provider(credentials) + .http_client(http_client) + .retry_config(retry_config) .load() .await; + // Create STS client - let sts_client = StsClient::new(&config); - // Call get-caller-identity - match sts_client.get_caller_identity().send().await { - Ok(identity) => { - let arn = identity.arn.unwrap_or_else(|| "Unknown".to_string()); - // let acct = identity.account.unwrap_or_else(|| "Unknown".to_string()); - let response = CachedResponse::new(arn.clone(), StatusCode::OK, true); - cache.insert(cache_key, response); - Ok((true, arn)) - } - Err(e) => { - let response = CachedResponse::new(e.to_string(), StatusCode::UNAUTHORIZED, false); - cache.insert(cache_key, response); - Err(anyhow!("AWS validation failed: {}", e)) + let sts_config = StsConfigBuilder::from(&config).interceptor(UaInterceptor).build(); + let sts_client = StsClient::from_conf(sts_config); + + const MAX_ATTEMPTS: usize = 3; + const ATTEMPT_TIMEOUT: Duration = Duration::from_secs(5); + + for attempt in 1..=MAX_ATTEMPTS { + let result = timeout(ATTEMPT_TIMEOUT, sts_client.get_caller_identity().send()).await; + match result { + Ok(Ok(identity)) => { + let arn = identity.arn.unwrap_or_else(|| "Unknown".to_string()); + return Ok((true, arn)); + } + Ok(Err(e)) => { + if is_throttling_or_transient(&e) { + if attempt == MAX_ATTEMPTS { + return Err(anyhow!("AWS validation failed: {}", e)); + } + } else { + return Ok((false, e.to_string())); + } + } + Err(_) => { + if attempt == MAX_ATTEMPTS { + return Err(anyhow!("AWS validation timed out")); + } + } } + let max_delay = 100u64 * 2u64.pow((attempt - 1) as u32); + let sleep_ms = rng().random_range(0..=max_delay); + sleep(Duration::from_millis(sleep_ms)).await; } + Err(anyhow!("AWS validation failed")) } /// Converts an AWS Key ID to an AWS Account Number. diff --git a/src/validation/gcp.rs b/src/validation/gcp.rs index c03d77a..87499dc 100644 --- a/src/validation/gcp.rs +++ b/src/validation/gcp.rs @@ -1,11 +1,12 @@ use std::sync::Arc; +use crate::validation::GLOBAL_USER_AGENT; use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::{Duration as ChronoDuration, Utc}; use once_cell::sync::OnceCell; use pem::parse; -use reqwest::Client; +use reqwest::{Client, Proxy}; use ring::{rand, signature}; use serde_json::Value as JsonValue; use tokio::sync::Semaphore; @@ -36,7 +37,13 @@ impl GcpValidator { pub fn new() -> Result { const MAX_CONCURRENT_VALIDATIONS: usize = 500; let semaphore = Arc::new(Semaphore::new(MAX_CONCURRENT_VALIDATIONS)); - let client = Client::builder().build()?; + let mut builder = Client::builder(); + + if let Ok(proxy) = std::env::var("HTTPS_PROXY").or_else(|_| std::env::var("https_proxy")) { + builder = builder.proxy(Proxy::all(&proxy)?); + } + + let client = builder.user_agent(GLOBAL_USER_AGENT.as_str()).build()?; Ok(Self { semaphore, client }) } diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index cc866b3..ba2941b 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -1,5 +1,6 @@ use std::{collections::BTreeMap, future::Future, str::FromStr, time::Duration}; +use crate::validation::GLOBAL_USER_AGENT; use anyhow::{anyhow, Error, Result}; use http::StatusCode; use liquid::Object; @@ -76,14 +77,9 @@ pub fn build_request_builder( .map_err(|e| format!("Error processing headers: {}", e))?; // Prepare a standard set of headers. - let user_agent = format!( - "{}/{}", - //"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36", - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION") - ); + let user_agent = GLOBAL_USER_AGENT.as_str(); let standard_headers = [ - (header::USER_AGENT, user_agent.as_str()), + (header::USER_AGENT, user_agent), ( header::ACCEPT, "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",