- Added first-class Hugging Face scanning support, including CLI enumeration, token authentication, and integration with remote scans.

- Condensed GitError formatting to report the exit status and the first informative lines from stdout/stderr, producing concise git clone failure logs.
- Added support for scanning Google Cloud Storage buckets via --gcs-bucket, including optional prefixes and service-account authentication.
- Added --skip-aws-account (now accepting comma-separated values) and --skip-aws-account-file to bypass live AWS validation for known canary/honey-token account IDs without triggering alerts. Kingfisher now ships with several canary AWS account IDs pre-seeded in the skip list and now reports matching findings as "Not Attempted" with the "Response" containing "(skip list entry)" so its clear that validation was intentionally skipped and why.
This commit is contained in:
Mick Grove 2025-10-15 22:47:40 -07:00
commit 8aced005b8
33 changed files with 1939 additions and 182 deletions

View file

@ -1,7 +1,7 @@
use std::time::Duration;
use std::{collections::HashSet, sync::RwLock, time::Duration};
use anyhow::{anyhow, Result};
use aws_config::{retry::RetryConfig, BehaviorVersion};
use aws_config::{retry::RetryConfig, BehaviorVersion, SdkConfig};
use aws_credential_types::Credentials;
use aws_sdk_sts::{
config::Builder as StsConfigBuilder, error::SdkError,
@ -26,8 +26,9 @@ use http::{
header::{HeaderValue, USER_AGENT},
StatusCode,
};
use once_cell::sync::OnceCell;
use once_cell::sync::{Lazy, OnceCell};
use rand::{rng, Rng};
use regex::Regex;
use tokio::{
sync::Semaphore,
time::{sleep, timeout},
@ -36,6 +37,57 @@ use tokio::{
use crate::validation::GLOBAL_USER_AGENT;
static AWS_VALIDATION_SEMAPHORE: OnceCell<Semaphore> = OnceCell::new();
const BUILTIN_SKIP_ACCOUNT_IDS: &[&str] = &[
"052310077262",
"171436882533",
"528757803018",
"534261010715",
"595918472158",
"729780141977",
"893192397702",
"992382622183",
];
static AWS_SKIP_ACCOUNT_IDS: Lazy<RwLock<HashSet<String>>> = Lazy::new(|| {
let mut set = HashSet::new();
set.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string()));
RwLock::new(set)
});
fn build_http_client() -> SharedHttpClient {
HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| {
let mut conn_builder = ConnectorBuilder::default()
.tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc));
conn_builder.set_connector_settings(settings.cloned());
if let Some(components) = runtime_components {
conn_builder.set_sleep_impl(components.sleep_impl());
}
conn_builder.set_proxy_config(Some(ProxyConfig::from_env()));
conn_builder.build()
})
}
async fn build_base_config(credentials: Credentials) -> SdkConfig {
let retry_config = RetryConfig::adaptive().with_max_attempts(3);
aws_config::defaults(BehaviorVersion::latest())
.region(Region::new("us-east-1"))
.credentials_provider(credentials)
.http_client(build_http_client())
.retry_config(retry_config)
.load()
.await
}
fn extract_account_id(input: &str) -> Option<String> {
let trimmed = input.trim();
if trimmed.len() == 12 && trimmed.chars().all(|c| c.is_ascii_digit()) {
return Some(trimmed.to_string());
}
static ACCOUNT_ID_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d{12})").expect("valid regex"));
ACCOUNT_ID_RE.captures(trimmed).and_then(|caps| caps.get(1)).map(|m| m.as_str().to_string())
}
/// Set the maximum number of concurrent AWS validations. Call before first use.
pub fn set_aws_validation_concurrency(max: usize) {
@ -46,6 +98,46 @@ fn aws_validation_semaphore() -> &'static Semaphore {
AWS_VALIDATION_SEMAPHORE.get_or_init(|| Semaphore::new(15))
}
pub fn set_aws_skip_account_ids<I, S>(ids: I)
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
let mut guard = match AWS_SKIP_ACCOUNT_IDS.write() {
Ok(g) => g,
Err(poisoned) => poisoned.into_inner(),
};
guard.clear();
guard.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string()));
for raw in ids.into_iter() {
let value = raw.into();
if value.trim().is_empty() {
continue;
}
if let Some(normalized) = extract_account_id(&value) {
guard.insert(normalized);
} else {
tracing::warn!("Ignoring invalid AWS account ID in skip list: {value}");
}
}
}
pub fn should_skip_aws_validation(access_key_id: &str) -> Option<String> {
let guard = AWS_SKIP_ACCOUNT_IDS.read().ok()?;
if guard.is_empty() {
return None;
}
let account = aws_key_to_account_number(access_key_id).ok()?;
if guard.contains(&account) {
Some(account)
} else {
None
}
}
#[derive(Debug)]
struct UaInterceptor;
@ -132,29 +224,7 @@ pub async fn validate_aws_credentials(
None, // expiry
"static", // provider name
);
// Create HTTP client that respects proxy settings from the environment
let http_client: SharedHttpClient =
HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| {
let mut conn_builder = ConnectorBuilder::default()
.tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc));
conn_builder.set_connector_settings(settings.cloned());
if let Some(components) = runtime_components {
conn_builder.set_sleep_impl(components.sleep_impl());
}
conn_builder.set_proxy_config(Some(ProxyConfig::from_env()));
conn_builder.build()
});
// Create AWS config with adaptive retries
let retry_config = RetryConfig::adaptive().with_max_attempts(3);
let config = aws_config::defaults(BehaviorVersion::latest())
.region(Region::new("us-east-1"))
.credentials_provider(credentials)
.http_client(http_client)
.retry_config(retry_config)
.load()
.await;
let config = build_base_config(credentials).await;
// Create STS client
let sts_config = StsConfigBuilder::from(&config).interceptor(UaInterceptor).build();
@ -230,3 +300,74 @@ pub fn aws_key_to_account_number(aws_key_id: &str) -> Result<String, Box<dyn std
// Return the account number formatted as a 12-digit string
Ok(format!("{:012}", account_num))
}
#[cfg(test)]
mod tests {
use super::*;
use once_cell::sync::Lazy;
use std::sync::Mutex;
static TEST_GUARD: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
#[test]
fn skip_account_list_normalizes_inputs() {
let _lock = TEST_GUARD.lock().unwrap();
set_aws_skip_account_ids([
" 052310077262 ",
"arn:aws:iam::171436882533:role/demo",
"invalid",
]);
let guard = AWS_SKIP_ACCOUNT_IDS.read().unwrap();
assert!(guard.contains("052310077262"));
assert!(guard.contains("171436882533"));
assert_eq!(guard.len(), BUILTIN_SKIP_ACCOUNT_IDS.len());
drop(guard);
set_aws_skip_account_ids(Vec::<String>::new());
}
#[test]
fn should_skip_when_account_matches() {
let _lock = TEST_GUARD.lock().unwrap();
set_aws_skip_account_ids(["534261010715"]);
assert_eq!(
should_skip_aws_validation("AKIAXYZDQCEN4B6JSJQI"),
Some("534261010715".to_string())
);
set_aws_skip_account_ids(Vec::<String>::new());
}
#[test]
fn builtin_canary_accounts_are_preseeded() {
let _lock = TEST_GUARD.lock().unwrap();
set_aws_skip_account_ids(Vec::<String>::new());
assert_eq!(
should_skip_aws_validation("AKIAXYZDQCEN4B6JSJQI"),
Some("534261010715".to_string())
);
set_aws_skip_account_ids(Vec::<String>::new());
}
#[test]
fn duplicate_accounts_are_deduplicated() {
let _lock = TEST_GUARD.lock().unwrap();
set_aws_skip_account_ids([
"534261010715",
"arn:aws:iam::534261010715:user/canarytokens",
" 534261010715 ",
]);
let guard = AWS_SKIP_ACCOUNT_IDS.read().unwrap();
assert_eq!(guard.iter().filter(|id| id.as_str() == "534261010715").count(), 1);
drop(guard);
set_aws_skip_account_ids(Vec::<String>::new());
}
}

View file

@ -33,31 +33,77 @@ pub fn find_closest_variable(
target_variable_name: &str,
search_variable_name: &str,
) -> Option<String> {
// Find positions of the target variable with the target value
// Collect the positions of the target variable for the provided value so we can
// compare relative offsets with candidate variables.
let mut target_positions = Vec::new();
for (name, value, start, end) in captures {
if name == target_variable_name && value == target_value {
target_positions.push((*start, *end));
}
}
if target_positions.is_empty() {
return None;
}
// For each target position, find the closest search variable
let mut closest_distance = usize::MAX;
let mut closest_value: Option<String> = None;
for (_target_start, target_end) in target_positions {
for (name, value, start, _) in captures {
if name == search_variable_name {
let distance = (*start as isize - target_end as isize).abs() as usize;
if distance < closest_distance {
closest_distance = distance;
closest_value = Some(value.clone());
// Prefer candidates that appear before the target value (same logical block), but
// fall back to overlapping values and then to those that appear after the target
// value when no better match exists. This avoids pairing with the next block when
// multiple credentials are close together in the same file.
let mut best_before: Option<(usize, String)> = None;
let mut best_overlap: Option<(usize, String)> = None;
let mut best_after: Option<(usize, String)> = None;
for (target_start, target_end) in target_positions.iter().copied() {
for (name, value, start, end) in captures {
if name != search_variable_name {
continue;
}
if *end <= target_start {
// Candidate is before the target; choose the one closest to the target start.
let distance = target_start - *end;
match &mut best_before {
Some((best_distance, best_value)) if distance < *best_distance => {
*best_distance = distance;
*best_value = value.clone();
}
None => {
best_before = Some((distance, value.clone()));
}
_ => {}
}
} else if *start >= target_end {
// Candidate is after the target; choose the one closest to the target end.
let distance = *start - target_end;
match &mut best_after {
Some((best_distance, best_value)) if distance < *best_distance => {
*best_distance = distance;
*best_value = value.clone();
}
None => {
best_after = Some((distance, value.clone()));
}
_ => {}
}
} else {
// Candidate overlaps the target treat as an exact match.
let distance = 0usize;
match &mut best_overlap {
Some((best_distance, best_value)) if distance < *best_distance => {
*best_distance = distance;
*best_value = value.clone();
}
None => {
best_overlap = Some((distance, value.clone()));
}
_ => {}
}
}
}
}
closest_value
best_before.or(best_overlap).or(best_after).map(|(_, value)| value)
}
pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::Error>> {
@ -147,4 +193,31 @@ mod tests {
]
);
}
#[test]
fn prefers_closest_preceding_variable() {
let captures = vec![
("TOKEN".to_string(), "secret".to_string(), 75usize, 115usize),
("AKID".to_string(), "preceding".to_string(), 30usize, 50usize),
("AKID".to_string(), "following".to_string(), 180usize, 200usize),
];
let result =
find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
assert_eq!(result, "preceding".to_string());
}
#[test]
fn falls_back_to_following_when_no_preceding() {
let captures = vec![
("TOKEN".to_string(), "secret".to_string(), 10usize, 50usize),
("AKID".to_string(), "after".to_string(), 60usize, 80usize),
];
let result =
find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
assert_eq!(result, "after".to_string());
}
}