From 97d6b53e025de4b264245af1c09a26a2c13742d4 Mon Sep 17 00:00:00 2001 From: Luke Young Date: Sun, 13 Jul 2025 17:09:15 -0700 Subject: [PATCH 001/357] feat(adafruitio): improve pattern matching --- data/rules/adafruitio.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/data/rules/adafruitio.yml b/data/rules/adafruitio.yml index 4982d86..a3e4408 100644 --- a/data/rules/adafruitio.yml +++ b/data/rules/adafruitio.yml @@ -2,11 +2,13 @@ rules: - name: Adafruit IO Key id: kingfisher.adafruitio.1 pattern: | - (?xi) + (?x) \b ( aio_ - [A-Z0-9]{28} + [a-zA-Z]{4} + [0-9]{2} + [a-zA-Z0-9]{22} ) \b min_entropy: 3.5 @@ -28,4 +30,4 @@ rules: type: StatusMatch - type: WordMatch words: - - '"username"' \ No newline at end of file + - '"username"' From d29add75564a2c74aed4e4a63af6a1e129bef8ca Mon Sep 17 00:00:00 2001 From: Luke Young Date: Sun, 13 Jul 2025 17:27:44 -0700 Subject: [PATCH 002/357] fix(digitalocean): regex is case-sensitive --- data/rules/digitalocean.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/rules/digitalocean.yml b/data/rules/digitalocean.yml index 1ae7471..b02bb52 100644 --- a/data/rules/digitalocean.yml +++ b/data/rules/digitalocean.yml @@ -2,7 +2,7 @@ rules: - name: DigitalOcean API Key id: kingfisher.digitalocean.1 pattern: | - (?xi) + (?x) \b ( (?:dop|doo)_v1_ @@ -32,7 +32,7 @@ rules: - name: DigitalOcean Refresh Token id: kingfisher.digitalocean.2 pattern: | - (?xi) + (?x) \b ( dor_v1_ From 601ca05fc803f9a6fdf107d687fecd97d021bbd0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 15:31:44 -0700 Subject: [PATCH 003/357] JWT validation performs OpenID Connect discovery using the iss claim and verifies signatures via JWKS --- CHANGELOG.md | 2 + Cargo.toml | 2 + data/rules/jwt.yml | 4 +- src/rules/rule.rs | 1 + src/validation.rs | 59 ++++++------ src/validation/jwt.rs | 207 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 245 insertions(+), 30 deletions(-) create mode 100644 src/validation/jwt.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e1179b..032379c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file. - Added baseline feature with `--baseline-file` and `--manage-baseline` flags - Introduced `--exclude` option for skipping paths - Added tests covering baseline and exclude workflow +- Added validation for JWT tokens that checks `exp` and `nbf` claims +- JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS ## [1.20.0] diff --git a/Cargo.toml b/Cargo.toml index dc2db13..e220d39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,6 +162,8 @@ atty = "0.2.14" self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] } semver = "1.0.26" globset = "0.4.16" +jsonwebtoken = "9.3.1" +ipnet = "2.11.0" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/data/rules/jwt.yml b/data/rules/jwt.yml index cd3f78d..e596027 100644 --- a/data/rules/jwt.yml +++ b/data/rules/jwt.yml @@ -22,4 +22,6 @@ rules: - https://datatracker.ietf.org/doc/html/rfc7519 - https://en.wikipedia.org/wiki/Base64#URL_applications - https://datatracker.ietf.org/doc/html/rfc4648 - - https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen \ No newline at end of file + - https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen + validation: + type: JWT \ No newline at end of file diff --git a/src/rules/rule.rs b/src/rules/rule.rs index bf923f3..a301a09 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -38,6 +38,7 @@ pub enum Validation { GCP, MongoDB, Postgres, + JWT, Raw(String), Http(HttpValidation), } diff --git a/src/validation.rs b/src/validation.rs index 44a7a6f..172888d 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -27,6 +27,7 @@ mod aws; mod azure; mod gcp; mod httpvalidation; +mod jwt; mod mongodb; mod postgres; mod utils; @@ -58,35 +59,6 @@ pub fn init_validation_caches() { IN_FLIGHT.set(DashMap::new()).ok(); } -// #[derive(Clone, FilterReflection, ParseFilter)] -// #[filter( -// name = "b64enc", -// description = "Encodes the input string using Base64 encoding", -// parsed(B64EncFilter) -// )] -// pub struct B64EncFilterParser; - -// #[derive(Debug, Default, Clone)] -// pub struct B64EncFilter; - -// impl std::fmt::Display for B64EncFilter { -// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { -// write!(f, "b64enc") -// } -// } - -// impl Filter for B64EncFilter { -// fn evaluate( -// &self, -// input: &dyn ValueView, -// _runtime: &dyn Runtime, -// ) -> Result { -// let input_str = input.to_kstr().into_owned(); -// let encoded = general_purpose::STANDARD.encode(input_str.as_bytes()); -// Ok(Value::scalar(encoded)) -// } -// } - #[derive(Clone)] pub struct CachedResponse { pub body: String, @@ -700,7 +672,36 @@ async fn timed_validate_single_match<'a>( }, ); } + // ---------------------------------------------------- JWT validator + Some(Validation::JWT) => { + let token = captured_values + .iter() + .find(|(n, ..)| n == "TOKEN") + .map(|(_, v, ..)| v.clone()) + .unwrap_or_default(); + if token.is_empty() { + m.validation_success = false; + m.validation_response_body = "JWT token not found.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + match jwt::validate_jwt(&token, client).await { + Ok((ok, msg)) => { + m.validation_success = ok; + m.validation_response_body = msg; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("JWT validation error: {}", e); + m.validation_response_status = StatusCode::BAD_REQUEST; + } + } + } // ---------------------------------------------------- AWS validator Some(Validation::AWS) => { let secret = captured_values diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs new file mode 100644 index 0000000..ed2bd3c --- /dev/null +++ b/src/validation/jwt.rs @@ -0,0 +1,207 @@ +use anyhow::{anyhow, Result}; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; +use chrono::Utc; +use ipnet::IpNet; +use jsonwebtoken::{decode, decode_header, jwk::JwkSet, DecodingKey, Validation as JwtValidation}; +use reqwest::{redirect::Policy, Client, Url}; +use serde::Deserialize; +use tokio::net::lookup_host; + +use super::utils::check_url_resolvable; + +/// RFC 1918 + loopback + link-local nets we refuse to contact +const BLOCKED_NETS: &[&str] = &[ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", // private + "127.0.0.0/8", + "169.254.0.0/16", // loopback / link-local +]; + +// aud is allowed to be either a string or an array, so let Serde flatten it. +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum Aud { + Str(String), + Arr(Vec), +} + +#[derive(Debug, Deserialize)] +struct Claims { + exp: Option, + nbf: Option, + iss: Option, + aud: Option, +} + +pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String)> { + // --- insecure payload decode ------------------------------------------------- + let claims: Claims = { + let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?; + let payload_json = URL_SAFE_NO_PAD + .decode(payload_b64) + .map_err(|e| anyhow!("invalid base64 in payload: {e}"))?; + serde_json::from_slice(&payload_json).map_err(|e| anyhow!("invalid JSON claims: {e}"))? + }; + + // temporal checks + let now = Utc::now().timestamp(); + if let Some(nbf) = claims.nbf { + if now < nbf { + return Ok((false, format!("Token not valid before {nbf}"))); + } + } + if let Some(exp) = claims.exp { + if now > exp { + return Ok((false, format!("Token expired at {exp}"))); + } + } + + // --------------------------------------------------------------------------- + let issuer = claims.iss.clone().unwrap_or_default(); + + if let Some(iss) = claims.iss.clone() { + // parse header now (kid, alg) + let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; + + // build discovery URL and fetch it (redirects disabled) + let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/')); + let no_redirect_client = Client::builder() + .redirect(Policy::none()) + .build() + .map_err(|e| anyhow!("client build: {e}"))?; + + let cfg_resp = no_redirect_client + .get(&config_url) + .send() + .await + .map_err(|e| anyhow!("issuer discovery failed: {e}"))?; + + if !cfg_resp.status().is_success() { + return Ok((false, format!("issuer discovery failed: {}", cfg_resp.status()))); + } + + let cfg_json: serde_json::Value = + cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?; + + // extract jwks_uri + let jwks_uri = cfg_json + .get("jwks_uri") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("jwks_uri missing"))?; + + // must be HTTPS + let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?; + if url.scheme() != "https" { + return Ok((false, "jwks_uri must use https".to_string())); + } + + // host must match issuer host  —  prevents open redirects / SSRF-on-other-host + let iss_host = Url::parse(&iss) + .map_err(|e| anyhow!("invalid iss: {e}"))? + .host_str() + .unwrap_or_default() + .to_ascii_lowercase(); + let jwks_host = url.host_str().unwrap_or_default().to_ascii_lowercase(); + if jwks_host != iss_host { + return Ok(( + false, + format!("jwks_uri host ({jwks_host}) must match issuer host ({iss_host})"), + )); + } + + // ----------------------------------------------------------------------- + // DNS resolution + private-range block + for addr in lookup_host((jwks_host.as_str(), 443)).await? { + if is_blocked_ip(addr.ip()) { + return Ok((false, "jwks_uri resolves to private or link-local IP".to_string())); + } + } + + // reachability check (existing helper) + check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?; + + // fetch JWKS with redirect-free client + let jwks_resp = no_redirect_client + .get(url) + .send() + .await + .map_err(|e| anyhow!("jwks fetch failed: {e}"))?; + if !jwks_resp.status().is_success() { + return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status()))); + } + + let jwk_set: JwkSet = + jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?; + + // select key by kid + let kid = header.kid.ok_or_else(|| anyhow!("no kid in header"))?; + let jwk = jwk_set + .keys + .iter() + .find(|k| k.common.key_id.as_deref() == Some(&kid)) + .ok_or_else(|| anyhow!("kid not found in jwks"))?; + + // verify signature + let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?; + let mut validation = JwtValidation::new(header.alg); + validation.set_audience(&extract_aud_strings(&claims)); + validation.validate_exp = false; + validation.validate_nbf = false; + + decode::(token, &decoding_key, &validation) + .map_err(|e| anyhow!("signature verification failed: {e}"))?; + + return Ok(( + true, + format!("JWT valid (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)), + )); + } + + Ok((true, format!("JWT not expired (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)))) +} + +/// Helper: normalize aud into a flat Vec +fn extract_aud_strings(claims: &Claims) -> Vec { + match &claims.aud { + Some(Aud::Str(s)) => vec![s.clone()], + Some(Aud::Arr(v)) => v.clone(), + None => vec![], + } +} +/// returns true if IP is in a blocked network +fn is_blocked_ip(ip: std::net::IpAddr) -> bool { + BLOCKED_NETS.iter().filter_map(|cidr| cidr.parse::().ok()).any(|net| net.contains(&ip)) +} + +#[cfg(test)] +mod tests { + use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; + use chrono::{Duration as ChronoDuration, Utc}; + use reqwest::Client; + + use super::validate_jwt; + + fn build_token(exp_offset: i64) -> String { + let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#); + let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp(); + let payload = URL_SAFE_NO_PAD.encode(format!("{{\"exp\":{exp}}}")); + format!("{header}.{payload}.") + } + + #[tokio::test] + async fn valid_token() { + let token = build_token(60); + let client = Client::new(); + let res = validate_jwt(&token, &client).await.unwrap(); + assert!(res.0); + } + + #[tokio::test] + async fn expired_token() { + let token = build_token(-60); + let client = Client::new(); + let res = validate_jwt(&token, &client).await.unwrap(); + assert!(!res.0); + } +} From 533fc49c5463d3dede4cc248e151544c967cc45f Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 16:55:19 -0700 Subject: [PATCH 004/357] Removed --ignore-tests argument, because the --exclude flag provides more granular functionality --- CHANGELOG.md | 1 + README.md | 8 ++++++-- src/cli/commands/scan.rs | 4 ---- src/git_repo_enumerator.rs | 26 ++++++++++++++++++------- src/lib.rs | 6 +++++- src/main.rs | 1 - src/reporter/pretty_format.rs | 1 - src/scanner/enumerate.rs | 36 ++++++++++++++++------------------- tests/int_dedup.rs | 1 - tests/int_github.rs | 1 - tests/int_gitlab.rs | 1 - tests/int_validation_cache.rs | 1 - tests/int_vulnerable_files.rs | 2 -- 13 files changed, 47 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 032379c..fb1ede4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. - Added tests covering baseline and exclude workflow - Added validation for JWT tokens that checks `exp` and `nbf` claims - JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS +- Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality ## [1.20.0] diff --git a/README.md b/README.md index d7ae8b6..0797344 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,12 @@ kingfisher scan /path/to/repo --rule-stats ```bash # Scan source but skip likely unit / integration tests -kingfisher scan ./my-project --ignore-tests +kingfisher scan ./my-project \ + --exclude='test' \ + --exclude='spec' \ + --exclude='fixture' \ + --exclude='example' \ + --exclude='sample' ``` ### Exclude specific paths @@ -304,7 +309,6 @@ kingfisher github repos list --organization my-org - `--no-extract-archives`: Do not scan inside archives - `--extraction-depth `: Specifies how deep nested archives should be extracted and scanned (default: 2) - `--redact`: Replaces discovered secrets with a one-way hash for secure output -- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive) - `--exclude `: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax) - `--baseline-file `: Ignore matches listed in a baseline YAML file - `--manage-baseline`: Create or update the baseline file with current findings diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 406c896..fe4d5d6 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -88,10 +88,6 @@ pub struct ScanArgs { #[arg(long, default_value_t = false)] pub no_dedup: bool, - /// Ignore matches that appear to come from test files - #[arg(long, default_value_t = false)] - pub ignore_tests: bool, - /// Redact findings values using a secure hash #[arg(long, short = 'r', default_value_t = false)] pub redact: bool, diff --git a/src/git_repo_enumerator.rs b/src/git_repo_enumerator.rs index 77e506e..9b3151e 100644 --- a/src/git_repo_enumerator.rs +++ b/src/git_repo_enumerator.rs @@ -73,11 +73,16 @@ pub struct GitBlobMetadata { pub struct GitRepoWithMetadataEnumerator<'a> { path: &'a Path, repo: Repository, + exclude_globset: Option>, } impl<'a> GitRepoWithMetadataEnumerator<'a> { - pub fn new(path: &'a Path, repo: Repository) -> Self { - Self { path, repo } + pub fn new( + path: &'a Path, + repo: Repository, + exclude_globset: Option>, + ) -> Self { + Self { path, repo, exclude_globset } } pub fn run(self) -> Result { @@ -173,12 +178,19 @@ impl<'a> GitRepoWithMetadataEnumerator<'a> { } let filtered = appearances .into_iter() - .filter(|entry| { - // Apply your actual ignore-logic here: - match entry.path.to_path() { - Ok(_path) => true, - Err(_) => true, + .filter(|entry| match entry.path.to_path() { + Ok(p) => { + if let Some(gs) = &self.exclude_globset { + let m = gs.is_match(p); + if m { + debug!("Skipping {} due to --exclude", p.display()); + } + !m + } else { + true + } } + Err(_) => true, }) .collect::>(); if filtered.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 9f04d46..26703fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,7 +82,7 @@ struct EnumeratorConfig { enumerate_git_history: bool, collect_git_metadata: bool, repo_scan_timeout: Duration, - // gitignore: Gitignore, + exclude_globset: Option>, } pub enum FoundInput { @@ -312,6 +312,10 @@ impl FilesystemEnumerator { Ok(self) } + pub fn exclude_globset(&self) -> Option> { + self.exclude_globset.clone() + } + pub fn gitignore(&self) -> Result { Ok(self.gitignore_builder.build()?) } diff --git a/src/main.rs b/src/main.rs index 2567187..44a3ec3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -300,7 +300,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { redact: false, git_repo_timeout: 1800, no_dedup: false, - ignore_tests: false, snippet_length: 256, baseline_file: None, manage_baseline: false, diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 7a6212c..b9c868c 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -314,7 +314,6 @@ fn test_pretty_format_with_nan_entropy_panics() { // core execution / performance num_jobs: 1, no_dedup: false, - ignore_tests: false, // rule selection rules: RuleSpecifierArgs { diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 62dbd56..8cfa9ff 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -82,9 +82,10 @@ pub fn enumerate_filesystem_inputs( }() .context("Failed to initialize filesystem enumerator")?; - let (enum_thread, input_recv) = { + let (enum_thread, input_recv, exclude_globset) = { let fs_enumerator = make_fs_enumerator(args, input_roots.into()) .context("Failed to initialize filesystem enumerator")?; + let exclude_globset = fs_enumerator.as_ref().and_then(|ie| ie.exclude_globset()); let channel_size = std::cmp::max(args.num_jobs * 128, 1024); let (input_send, input_recv) = crossbeam_channel::bounded(channel_size); @@ -97,7 +98,7 @@ pub fn enumerate_filesystem_inputs( Ok(()) }) .context("Failed to enumerate filesystem inputs")?; - (input_enumerator_thread, input_recv) + (input_enumerator_thread, input_recv, exclude_globset) }; let enum_cfg = EnumeratorConfig { @@ -107,6 +108,7 @@ pub fn enumerate_filesystem_inputs( }, collect_git_metadata: args.input_specifier_args.commit_metadata, repo_scan_timeout, + exclude_globset, }; let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs); let datastore_writer_thread = @@ -189,23 +191,11 @@ pub fn enumerate_filesystem_inputs( Ok(Some((origin_set, blob_metadata, vec_of_matches))) => { for (_, single_match) in vec_of_matches { // Send each match - let is_test = if args.ignore_tests { - origin_set - .iter() - .filter_map(|o| o.full_path()) - .any(|p| is_test_like_path(&p)) - } else { - false - }; - - if !is_test { - // Send each match - send_ds.send(( - Arc::new(origin_set.clone()), - Arc::new(blob_metadata.clone()), - single_match, - ))?; - } + send_ds.send(( + Arc::new(origin_set.clone()), + Arc::new(blob_metadata.clone()), + single_match, + ))?; } } Err(e) => { @@ -604,9 +594,15 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) { // Spawn an enumerator thread so we can time-out cleanly let path_clone = path.to_path_buf(); let (tx, rx) = std::sync::mpsc::channel(); + let exclude_globset = cfg.exclude_globset.clone(); let handle = std::thread::spawn(move || { let res = if collect_git_metadata { - GitRepoWithMetadataEnumerator::new(&path_clone, repository).run() + GitRepoWithMetadataEnumerator::new( + &path_clone, + repository, + exclude_globset.clone(), + ) + .run() } else { GitRepoEnumerator::new(&path_clone, repository).run() }; diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 3c41afd..64c2c92 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -101,7 +101,6 @@ rules: git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup, - ignore_tests: false, snippet_length: 64, baseline_file: None, manage_baseline: false, diff --git a/tests/int_github.rs b/tests/int_github.rs index c66fdb8..330299b 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -88,7 +88,6 @@ fn test_github_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - ignore_tests: false, snippet_length: 256, baseline_file: None, manage_baseline: false, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 308f6a0..66a7f37 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -85,7 +85,6 @@ fn test_gitlab_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - ignore_tests: false, snippet_length: 256, baseline_file: None, manage_baseline: false, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 06bc55a..7e422e1 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -144,7 +144,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, // keep duplicates so the cache is stressed - ignore_tests: false, snippet_length: 128, baseline_file: None, manage_baseline: false, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 032a51b..bb1d064 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -87,7 +87,6 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - ignore_tests: false, snippet_length: 256, baseline_file: None, manage_baseline: false, @@ -153,7 +152,6 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - ignore_tests: false, snippet_length: 256, baseline_file: None, manage_baseline: false, From 5d5521af5fffcaacb902999e5d96a59fd355db14 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 17:09:04 -0700 Subject: [PATCH 005/357] Removed --ignore-tests argument, because the --exclude flag provides more granular functionality --- CHANGELOG.md | 3 ++- README.md | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb1ede4..d9bce79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ All notable changes to this project will be documented in this file. - Added validation for JWT tokens that checks `exp` and `nbf` claims - JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS - Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality - +- DigitalOcean rule update +- Adafruit rule update ## [1.20.0] - Removed confirmation prompt when user provides --self-update flag diff --git a/README.md b/README.md index 0797344..f94fc36 100644 --- a/README.md +++ b/README.md @@ -140,12 +140,14 @@ kingfisher scan /path/to/repo --rule-stats ### Scan while ignoring likely test files +`--exclude` skips any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive) + ```bash # Scan source but skip likely unit / integration tests kingfisher scan ./my-project \ - --exclude='test' \ + --exclude='[Tt]est' \ --exclude='spec' \ - --exclude='fixture' \ + --exclude='[Ff]ixture' \ --exclude='example' \ --exclude='sample' ``` @@ -155,7 +157,7 @@ kingfisher scan ./my-project \ # Skip all Python files and any directory named tests kingfisher scan ./my-project \ --exclude '*.py' \ - --exclude tests + --exclude '[Tt]ests' ``` If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via --exclude): @@ -309,7 +311,7 @@ kingfisher github repos list --organization my-org - `--no-extract-archives`: Do not scan inside archives - `--extraction-depth `: Specifies how deep nested archives should be extracted and scanned (default: 2) - `--redact`: Replaces discovered secrets with a one-way hash for secure output -- `--exclude `: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax) +- `--exclude `: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive) - `--baseline-file `: Ignore matches listed in a baseline YAML file - `--manage-baseline`: Create or update the baseline file with current findings From 959a6b3043f46354d38413d83737e9ffe490cbde Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 17:10:53 -0700 Subject: [PATCH 006/357] Removed --ignore-tests argument, because the --exclude flag provides more granular functionality --- src/git_metadata_graph.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/git_metadata_graph.rs b/src/git_metadata_graph.rs index 90c28fd..9144068 100644 --- a/src/git_metadata_graph.rs +++ b/src/git_metadata_graph.rs @@ -147,9 +147,9 @@ impl RepositoryIndex { .with_ordering(Ordering::PackLexicographicalThenLooseLexicographical) { let oid = unwrap_ok_or_continue!(oid, |e| debug!("Failed to read object id: {e}")); - if count % 100000 == 0 { - debug!("Indexed {} objects in RepositoryIndex::new", count); - } + // if count % 100000 == 0 { + // debug!("Indexed {} objects in RepositoryIndex::new", count); + // } let hdr = unwrap_ok_or_continue!(odb.header(oid), |e| { debug!("Failed to read object header for {oid}: {e}") }); From 8fb78c8c65e690571614f75497452d7c31ba0048 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 17:22:37 -0700 Subject: [PATCH 007/357] change that hoists the redirect-free reqwest::Client into a single, lazily-initialized static so every call to validate_jwt re-uses the same handle (and therefore the same connection-pool, DNS cache, TLS session cache, etc) --- src/reporter/json_format.rs | 1 - src/scanner/enumerate.rs | 1 - src/validation/jwt.rs | 21 ++++++++++++++------- tests/smoke_update.rs | 5 ----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index c1185b2..a4e8730 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -390,7 +390,6 @@ mod tests { cli::commands::scan::ScanArgs { num_jobs: 1, no_dedup: false, - ignore_tests: false, rules: RuleSpecifierArgs { rules_path: Vec::new(), rule: vec!["all".into()], diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 8cfa9ff..0e0c795 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -37,7 +37,6 @@ use crate::{ util::is_compressed_file, }, scanner_pool::ScannerPool, - util::is_test_like_path, EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator, FoundInput, GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator, PathBuf, }; diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index ed2bd3c..ca0ac43 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -3,12 +3,24 @@ use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::Utc; use ipnet::IpNet; use jsonwebtoken::{decode, decode_header, jwk::JwkSet, DecodingKey, Validation as JwtValidation}; +use once_cell::sync::Lazy; use reqwest::{redirect::Policy, Client, Url}; use serde::Deserialize; use tokio::net::lookup_host; use super::utils::check_url_resolvable; +/// One global, redirect-free client. Building a `Client` is comparatively +/// expensive; re-using it lets reqwest share its internal connection pool +/// and TLS sessions across JWT validations. `Lazy` ensures thread-safe, +/// one-time initialisation. +static NO_REDIRECT_CLIENT: Lazy = Lazy::new(|| { + Client::builder() + .redirect(Policy::none()) // disable all redirects + .build() + .expect("failed to build no-redirect Client") +}); + /// RFC 1918 + loopback + link-local nets we refuse to contact const BLOCKED_NETS: &[&str] = &[ "10.0.0.0/8", @@ -66,12 +78,7 @@ pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String) // build discovery URL and fetch it (redirects disabled) let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/')); - let no_redirect_client = Client::builder() - .redirect(Policy::none()) - .build() - .map_err(|e| anyhow!("client build: {e}"))?; - - let cfg_resp = no_redirect_client + let cfg_resp = NO_REDIRECT_CLIENT .get(&config_url) .send() .await @@ -122,7 +129,7 @@ pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String) check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?; // fetch JWKS with redirect-free client - let jwks_resp = no_redirect_client + let jwks_resp = NO_REDIRECT_CLIENT .get(url) .send() .await diff --git a/tests/smoke_update.rs b/tests/smoke_update.rs index 45573fa..8224d42 100644 --- a/tests/smoke_update.rs +++ b/tests/smoke_update.rs @@ -1,9 +1,4 @@ -use std::fs::{self, File}; - -use flate2::{write::GzEncoder, Compression}; use kingfisher::{cli::global::GlobalArgs, update::check_for_update}; -use tar::Builder; -use tempfile::tempdir; use tokio; use wiremock::{ matchers::{method, path}, From 08306062601220a06d4ae70e290bb3aea021b3f8 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 17:22:51 -0700 Subject: [PATCH 008/357] change that hoists the redirect-free reqwest::Client into a single, lazily-initialized static so every call to validate_jwt re-uses the same handle (and therefore the same connection-pool, DNS cache, TLS session cache, etc) --- src/validation.rs | 2 +- src/validation/jwt.rs | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/validation.rs b/src/validation.rs index 172888d..6cb3711 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -688,7 +688,7 @@ async fn timed_validate_single_match<'a>( return; } - match jwt::validate_jwt(&token, client).await { + match jwt::validate_jwt(&token).await { Ok((ok, msg)) => { m.validation_success = ok; m.validation_response_body = msg; diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index ca0ac43..6449fd2 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -46,7 +46,7 @@ struct Claims { aud: Option, } -pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String)> { +pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { // --- insecure payload decode ------------------------------------------------- let claims: Claims = { let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?; @@ -199,16 +199,14 @@ mod tests { #[tokio::test] async fn valid_token() { let token = build_token(60); - let client = Client::new(); - let res = validate_jwt(&token, &client).await.unwrap(); + let res = validate_jwt(&token).await.unwrap(); assert!(res.0); } #[tokio::test] async fn expired_token() { let token = build_token(-60); - let client = Client::new(); - let res = validate_jwt(&token, &client).await.unwrap(); + let res = validate_jwt(&token).await.unwrap(); assert!(!res.0); } } From 5f8ffcd940a8f3912e9490cb10b1c19240c3ee0b Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 14 Jul 2025 19:44:28 -0700 Subject: [PATCH 009/357] updated github actions to upgrade build from 1.85.0 to 1.88.0 --- .github/workflows/ci.yml | 4 ++-- .github/workflows/release.yml | 10 +++++----- Makefile | 14 +++++++------- src/git_metadata_graph.rs | 7 +------ 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c46ebf6..389b9c3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true - uses: swatinem/rust-cache@v2 @@ -32,7 +32,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true - uses: swatinem/rust-cache@v2 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cc46915..b681f1b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true @@ -43,7 +43,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true @@ -72,7 +72,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true @@ -103,7 +103,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true @@ -134,7 +134,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions-rs/toolchain@v1 with: - toolchain: 1.85.0 + toolchain: 1.88.0 profile: minimal override: true diff --git a/Makefile b/Makefile index da375d4..540959e 100644 --- a/Makefile +++ b/Makefile @@ -110,11 +110,11 @@ setup-zig: ubuntu-x64: setup-zig # ensures Zig & cargo-zigbuild exist @echo "Checking Rust toolchain…" @$(MAKE) check-rust || { \ - echo "🦀 Installing Rust 1.85.0 …"; \ + echo "🦀 Installing Rust 1.88.0 …"; \ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \ . $$HOME/.cargo/env; \ - rustup toolchain install 1.85.0; \ - rustup default 1.85.0; \ + rustup toolchain install 1.88.0; \ + rustup default 1.88.0; \ } @echo "📦 Installing build dependencies (musl, cmake, etc.)…" @@ -150,11 +150,11 @@ ubuntu-x64: setup-zig # ensures Zig & cargo-zigbuild exist ubuntu-arm64: setup-zig # ensures Zig & cargo-zigbuild exist @echo "Checking Rust toolchain…" @$(MAKE) check-rust || { \ - echo "🦀 Installing Rust 1.85.0 …"; \ + echo "🦀 Installing Rust 1.88.0 …"; \ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y; \ . $$HOME/.cargo/env; \ - rustup toolchain install 1.85.0; \ - rustup default 1.85.0; \ + rustup toolchain install 1.88.0; \ + rustup default 1.88.0; \ } @echo "📦 Installing build dependencies (musl, cmake, etc.)…" @@ -391,7 +391,7 @@ check-rust: echo "Rust not found."; \ exit 1; \ fi; \ - required=1.85.0; \ + required=1.88.0; \ if [ $$(printf '%s\n' "$$required" "$$version" | sort -V | head -n1) != "$$required" ]; then \ echo "Rust version $$version is older than required $$required."; \ exit 1; \ diff --git a/src/git_metadata_graph.rs b/src/git_metadata_graph.rs index 9144068..04d6729 100644 --- a/src/git_metadata_graph.rs +++ b/src/git_metadata_graph.rs @@ -139,17 +139,13 @@ impl RepositoryIndex { let mut num_trees = 0; let mut num_blobs = 0; let mut num_commits = 0; - let count = 0; - + for oid in odb .iter() .context("Failed to iterate object database")? .with_ordering(Ordering::PackLexicographicalThenLooseLexicographical) { let oid = unwrap_ok_or_continue!(oid, |e| debug!("Failed to read object id: {e}")); - // if count % 100000 == 0 { - // debug!("Indexed {} objects in RepositoryIndex::new", count); - // } let hdr = unwrap_ok_or_continue!(odb.header(oid), |e| { debug!("Failed to read object header for {oid}: {e}") }); @@ -160,7 +156,6 @@ impl RepositoryIndex { Kind::Tag => num_tags += 1, } } - debug!("Total objects to map in RepositoryIndex::new: {}", count); let mut trees = ObjectIdBimap::with_capacity(num_trees); let mut commits = ObjectIdBimap::with_capacity(num_commits); From 610a27015c547efd15eab5d24a6a521bec65ccaa Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 15 Jul 2025 16:41:35 -0700 Subject: [PATCH 010/357] Added dockerfile and github action to build and publish it --- docker/Dockerfile | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 docker/Dockerfile diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..62da0a8 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,33 @@ +# syntax=docker/dockerfile:1 +FROM alpine:latest + +RUN apk add --no-cache curl tar + +ARG TARGETARCH # set automatically by BuildKit +ENV TARGETARCH=${TARGETARCH} + +WORKDIR /app + +RUN set -eux; \ + # choose the right asset for this build platform + case "${TARGETARCH}" in \ + amd64) SUFFIX="linux-x64.tgz" ;; \ + arm64) SUFFIX="linux-arm64.tgz" ;; \ + *) echo "unsupported arch ${TARGETARCH}" >&2; exit 1 ;; \ + esac; \ + # download & unpack + LATEST_URL=$(curl -s https://api.github.com/repos/mongodb/kingfisher/releases/latest \ + | grep -Eo "https://[^\"]*${SUFFIX}"); \ + curl -L "$LATEST_URL" -o kingfisher.tgz; \ + tar -xzf kingfisher.tgz; \ + rm kingfisher.tgz; \ + # locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64) + KF_PATH=$(find . -type f -name 'kingfisher*' | head -n1); \ + install -m 0755 "$KF_PATH" /usr/local/bin/kingfisher; \ + # optional cleanup to keep the image small + rm -rf /app/* + +# quick smoke-test so the build fails early if something’s wrong +RUN kingfisher --version + +ENTRYPOINT ["kingfisher"] From 507b8e2b5eb3a9890f72dbe70e2861ad2e040543 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 15 Jul 2025 16:42:22 -0700 Subject: [PATCH 011/357] Added dockerfile and github action to build and publish it --- .github/workflows/release-docker.yml | 30 ++++++++++++++++++++++++++++ Makefile | 7 +++++++ README.md | 20 +++++++++++++++++++ data/rules/datadog.yml | 5 ++--- 4 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/release-docker.yml diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml new file mode 100644 index 0000000..dcd1ea9 --- /dev/null +++ b/.github/workflows/release-docker.yml @@ -0,0 +1,30 @@ +# .github/workflows/release-docker.yml +name: Publish Docker image +on: + push: + tags: ["v*.*.*"] # every semantic-version tag +permissions: + contents: read + packages: write +jobs: + build-and-push: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: docker/setup-buildx-action@v3 + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: docker/build-push-action@v5 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64,linux/arm64 + push: true + tags: | + ghcr.io/mongodb/kingfisher:latest + ghcr.io/mongodb/kingfisher:${{ github.ref_name }} diff --git a/Makefile b/Makefile index 540959e..41f74ba 100644 --- a/Makefile +++ b/Makefile @@ -366,6 +366,13 @@ all: linux darwin @echo -e "\nCombined Checksums:" @cat target/release/CHECKSUMS.txt +dockerfile: +# Build for the host architecture (default) + docker build -t kingfisher:latest . + +# Cross‑build for arm64 from an x64 machine + docker buildx build --platform linux/arm64 -t kingfisher:arm64 . + list-archives: @echo -e "\n=== Built archives ===" @found=0; \ diff --git a/README.md b/README.md index f94fc36..7e3e5cf 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,26 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` +### Run Kingfisher in Docker + + +Run the dockerized Kingfisher container: +```bash +# GitHub Container Registry +docker run --rm ghcr.io/mongodb/kingfisher:latest --version + +# Scan the current working directory +# (mounts your code at /src and scans it) +docker run --rm -v "$PWD":/src ghcr.io/mongodb/kingfisher:latest scan /src + +# Scan while providing a GitHub token +docker run --rm -e KF_GITHUB_TOKEN=ghp_… -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan --git-url https://github.com/org/private_repo.git + +# Scan and output as json +docker run --rm -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan /proj --format json --output findings.json +``` + + # Write Custom Rules! Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. diff --git a/data/rules/datadog.yml b/data/rules/datadog.yml index 1fc5420..c851ed8 100644 --- a/data/rules/datadog.yml +++ b/data/rules/datadog.yml @@ -5,7 +5,7 @@ rules: (?xi) \b (?:datadog|dd-|dd_) - (?:.|[\n\r]){0,32}? + (?:.|[\n\r]){0,16}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? \b @@ -46,8 +46,7 @@ rules: \b (?: dd[_-]?\w{0,8}[_-]?(?:key|secret) | - datadog | - dog + datadog ) (?:.|[\n\r]){0,64}? \b From bf24b0c563f9409f2cebcfe62d8be795b9c49c6a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 11:07:44 -0700 Subject: [PATCH 012/357] Added rule for Google Gemini AI --- Cargo.toml | 2 +- data/rules/google.yml | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e220d39..c03c5ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.21.0" +version = "1.22.0" edition.workspace = true rust-version.workspace = true license.workspace = true diff --git a/data/rules/google.yml b/data/rules/google.yml index 812a903..d7d4aed 100644 --- a/data/rules/google.yml +++ b/data/rules/google.yml @@ -93,4 +93,38 @@ rules: client_id: '132261435625-69ubohrvppjr9hcc5t9uighsb7j2cqhv.apps.googleusercontent.com', client_secret: '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', redirect_uri: `http://localhost:${Config.OAUTH_HTTP_PORT}/oauth2callback` - }; \ No newline at end of file + }; + - name: Google Gemini API Key + id: kingfisher.google.7 + pattern: | + (?xi) + ( + AIza + [A-Za-z0-9_-]{35} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - AIzaSyByz6BGQf8QtcQLml8spbyy8x5_327PTow + - AIzaSyDhISgbccTi6mfp2GOSmTtqdU__IdevJes + - AIzaSyA_uW1h2CF4ak3vHr7si_RFD_yWxM4tMAM + references: + - https://ai.google.dev/docs/gemini_api_overview + validation: + type: Http + content: + request: + method: GET + url: https://generativelanguage.googleapis.com/v1/models + headers: + X-goog-api-key: '{{ TOKEN }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + match_all_words: true + words: + - '"models"' + - '"name"' \ No newline at end of file From 96943c72daf7e99abd3c5947b755bb4e1c5e0761 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 11:30:36 -0700 Subject: [PATCH 013/357] Added rules for Cohere and Stability.ai --- CHANGELOG.md | 3 +++ data/rules/cohere.yml | 39 ++++++++++++++++++++++++++++++++++++++ data/rules/stabilityai.yml | 38 +++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) create mode 100644 data/rules/cohere.yml create mode 100644 data/rules/stabilityai.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index d9bce79..1fdb88a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.22.0] +- Added rules for Google Gemini AI, Cohere, Stability.ai + ## [1.21.0] - Improved Azure Storage rule - Added rule to detect TravisCI encrypted values diff --git a/data/rules/cohere.yml b/data/rules/cohere.yml new file mode 100644 index 0000000..400d044 --- /dev/null +++ b/data/rules/cohere.yml @@ -0,0 +1,39 @@ +rules: + - name: Cohere API Key + id: kingfisher.cohere.1 + pattern: | + (?xi) + \b + cohere + (?:.|[\n\r]){0,16}? + ( + [A-Z0-9]{40} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - 'cohere_key = 5fNtU1ytdUcOX3jfvgjlr61EPxBqxOojOklDD6BG' + - "cohere secret key = QfsfCM0HdHH9x5ZlhsGzeignSk4pCeBwBrzYqgGV" + - 'cohere_token: x7PX0fac8a2GW2fgnNqdtqIwMQvFbrL6E7lKrKOv' + references: + - https://docs.cohere.com/reference/list-connectors + validation: + type: Http + content: + request: + method: GET + url: https://api.cohere.com/v1/connectors + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"connectors"' + - '"id"' + - '"organization_id"' \ No newline at end of file diff --git a/data/rules/stabilityai.yml b/data/rules/stabilityai.yml new file mode 100644 index 0000000..1ac8fd1 --- /dev/null +++ b/data/rules/stabilityai.yml @@ -0,0 +1,38 @@ +rules: + - name: Stability AI API Key + id: kingfisher.stabilityai.1 + pattern: | + (?x) + \b + ( + sk- + [A-Za-z0-9]{48} + ) + \b + min_entropy: 4.0 + confidence: medium + examples: + - sk-AnmgropvAII5XEoxVPjbnSMG3XhacEwhJlLh8ossXh7K1iLP + - sk-gQHyuK4k6Vw2viJRaAnLh6zAULaWtUg40ZHWcYjw7JGutlW6 + - sk-nwvJypEMFNASJLiPBgNnzJj1xsDwlHChbFRMNwVkzy3e4UJg + references: + - https://platform.stability.ai/docs/api-reference#v1-user-account + validation: + type: Http + content: + request: + method: GET + url: https://api.stability.ai/v1/user/account + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"id"' + - '"email"' From 9dc46103ed617eef22480d48134de04bde3f4330 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 13:18:54 -0700 Subject: [PATCH 014/357] Added rules for Cohere and Stability.ai --- data/rules/cohere.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data/rules/cohere.yml b/data/rules/cohere.yml index 400d044..a362570 100644 --- a/data/rules/cohere.yml +++ b/data/rules/cohere.yml @@ -35,5 +35,4 @@ rules: match_all_words: true words: - '"connectors"' - - '"id"' - - '"organization_id"' \ No newline at end of file + - '"total_count"' \ No newline at end of file From 8dee433ef6630d58fb9536326708aaa35c624c27 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 13:28:59 -0700 Subject: [PATCH 015/357] Added rule for Replicate --- CHANGELOG.md | 2 +- data/rules/replicate.yml | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 data/rules/replicate.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fdb88a..e0964dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. ## [1.22.0] -- Added rules for Google Gemini AI, Cohere, Stability.ai +- Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate ## [1.21.0] - Improved Azure Storage rule diff --git a/data/rules/replicate.yml b/data/rules/replicate.yml new file mode 100644 index 0000000..5a20417 --- /dev/null +++ b/data/rules/replicate.yml @@ -0,0 +1,39 @@ +rules: + - name: Replicate API Token + id: kingfisher.replicate.1 + pattern: | # + (?x) + \b + ( + r8_ + [A-Za-z0-9]{37} + ) + \b + min_entropy: 3.0 + confidence: medium + examples: + - r8_WesXNvqsCpq7r1gpQABpB3NJvdR21nb2s7HVy + - r8_Lvn3Tsrs8H2wCYSEPDiUfyePqWpBOWi0vQTtN + - r8_XOpqpi4q9UADwsgrbEjCpT9p1cDldUu3t1D8R + - r8_ap8Mo5iTbW01FHJtElPrBUqf7fjz1r40EVrJu + references: + - https://replicate.com/docs/reference/http + validation: + type: Http + content: + request: + method: GET + url: https://api.replicate.com/v1/account + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"type"' + - '"username"' + - '"name"' From 3b95aae12a983b4a305f5174fd69dc5bcda12116 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 13:52:26 -0700 Subject: [PATCH 016/357] Added rule for Runway --- data/rules/runway.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 data/rules/runway.yml diff --git a/data/rules/runway.yml b/data/rules/runway.yml new file mode 100644 index 0000000..9c816b2 --- /dev/null +++ b/data/rules/runway.yml @@ -0,0 +1,40 @@ +rules: + - name: Runway API Key + id: kingfisher.runway.1 + pattern: | + (?x) + \b + ( + key_ + [A-Fa-f0-9]{128} + ) + \b + min_entropy: 4.0 + confidence: medium + examples: + - key_efef51067da4873eeefa5e3daeeff61537885e52e20e053a824bb5e564d3d707367d005e7d48dbe473de287383356a5abd77421703e1a3e52a27d17f703abe20 + - key_7ab2eab6623761354e72f7de76ea041ee3fd73db8e20b5e1173f1e7537ae5ad3e503267e4da374e650715e457e558724bf7bcb170e723bb3fff6445aa2830784 + - key_5a37de4ea80f355afa4aa653d67e8b6db08aaefd0704a773f1584e40236e4d54bf7f78974e8ed07f72e6a6787e66872127a577046743e3e4117ed7b14adeeeb8 + - key_10eb4a74b40672ddee1716ed008637a7aed5176b70eee76017b4e8e5713b8ab12720a8e4768dfe3e47073f86f718286eee814ffea20e271dd5d87ee8d367f8aa + references: + - https://docs.dev.runwayml.com/guides/setup + - https://api.useapi.net/v1/runwayml/accounts + validation: + type: Http + content: + request: + method: GET + url: https://api.dev.runwayml.com/v1/organization + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + X-Runway-Version: "2024-11-06" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"usage"' From 6bcfd6bc48ab2352b90d9448246772c4550cf763 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 14:31:09 -0700 Subject: [PATCH 017/357] upgraded cargo dependencies --- CHANGELOG.md | 2 +- Cargo.toml | 98 ++++++++++++++++++++--------------------- data/rules/clarifai.yml | 38 ++++++++++++++++ src/validation/jwt.rs | 1 - 4 files changed, 88 insertions(+), 51 deletions(-) create mode 100644 data/rules/clarifai.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index e0964dd..f15b21d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. ## [1.22.0] -- Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate +- Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate, Runway, Clarifai ## [1.21.0] - Improved Azure Storage rule diff --git a/Cargo.toml b/Cargo.toml index c03c5ef..e7650ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ repository.workspace = true publish.workspace = true [dependencies] -clap = { version = "4.3", features = [ +clap = { version = "4.5", features = [ "cargo", "derive", "env", @@ -28,7 +28,7 @@ clap = { version = "4.3", features = [ "wrap_help", ] } anyhow = "1.0" -bstr = { version = "1.0", features = ["serde"] } +bstr = { version = "1.12", features = ["serde"] } fixedbitset = "0.5" gix = { version = "0.72", features = ["max-performance", "serde", "blocking-network-client"] } ignore = "0.4" @@ -47,13 +47,13 @@ rayon = "1.10" sha1 = "0.10.6" hex = "0.4.3" vectorscan-rs = "0.0.5" -regex = "1.10.6" -serde_json = "1.0.128" +regex = "1.11.1" +serde_json = "1.0.140" lazy_static = "1.5.0" -url = "2.5.2" +url = "2.5.4" include_dir = { version = "0.7", features = ["glob"] } strum = { version = "0.26", features = ["derive"] } -sysinfo = "0.31.2" +sysinfo = "0.31.4" reqwest = { version = "0.12", default-features = false, features = [ "json", "gzip", @@ -67,91 +67,91 @@ reqwest = { version = "0.12", default-features = false, features = [ ] } -chrono = "0.4.38" -thiserror = "1.0.63" -tokio = { version = "1.39.2", features = ["full"] } +chrono = "0.4.41" +thiserror = "1.0.69" +tokio = { version = "1.46.1", features = ["full"] } base64 = "0.22.1" -crossbeam-channel = "0.5.13" +crossbeam-channel = "0.5.15" indenter = "0.3.3" serde-sarif = "0.4" -console = "0.15.8" -time = "0.3.36" -tempfile = "3.12.0" -num_cpus = "1.16.0" -once_cell = "1.19.0" -http = "1.1.0" -liquid = "0.26.4" -liquid-core = "0.26.4" -flate2 = "1.0.33" +console = "0.15.11" +time = "0.3.41" +tempfile = "3.20.0" +num_cpus = "1.17.0" +once_cell = "1.21.3" +http = "1.3.1" +liquid = "0.26.11" +liquid-core = "0.26.11" +flate2 = "1.1.2" brotli = "6.0.0" thousands = "0.2.0" base32 = "0.5.1" crossbeam-skiplist = "0.1.3" tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"] } mongodb = { version = "3.2", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] } -bson = "2.13.0" -ring = "0.17.8" -pem = "3.0.4" -aws-config = "1.5.10" -aws-credential-types = "1.2.1" -aws-sdk-sts = "1.21.0" -aws-types = "1.3.3" +bson = "2.15.0" +ring = "0.17.14" +pem = "3.0.5" +aws-config = "1.8.2" +aws-credential-types = "1.2.4" +aws-sdk-sts = "1.73.0" +aws-types = "1.3.7" byteorder = "1.5.0" -parking_lot = "0.12.3" +parking_lot = "0.12.4" octorust = "0.9.0" -reqwest-middleware = "0.4.1" +reqwest-middleware = "0.4.2" tracing-subscriber = {version = "0.3.19", features = ["env-filter"] } -tracing-core = "0.1.33" -tree-sitter = "0.24.4" +tracing-core = "0.1.34" +tree-sitter = "0.24.7" tree-sitter-bash = "0.23.3" -tree-sitter-c = "0.23.2" +tree-sitter-c = "0.23.4" tree-sitter-c-sharp = "0.23.1" tree-sitter-cpp = "0.23.4" -tree-sitter-css = "0.23.1" +tree-sitter-css = "0.23.2" tree-sitter-go = "0.23.4" tree-sitter-html = "0.23.2" -tree-sitter-java = "0.23.4" +tree-sitter-java = "0.23.5" tree-sitter-javascript = "0.23.1" tree-sitter-php = "0.23.11" -tree-sitter-python = "0.23.4" +tree-sitter-python = "0.23.6" tree-sitter-ruby = "0.23.1" -tree-sitter-rust = "0.23.2" +tree-sitter-rust = "0.23.3" tree-sitter-toml-ng = "0.7.0" tree-sitter-typescript = "0.23.2" tree-sitter-yaml = "0.6.1" streaming-iterator = "0.1.9" tree-sitter-regex = "0.24.3" content_inspector = "0.2.4" -rustc-hash = "2.1.0" +rustc-hash = "2.1.1" term_size = "0.3.2" -bzip2 = "0.5.0" -zip = "2.2.2" -tar = "0.4.43" +bzip2 = "0.5.2" +zip = "2.4.2" +tar = "0.4.44" xz2 = "0.1.7" asar = "0.3.0" -blake3 = "1.5.5" -memmap2 = "0.9.5" +blake3 = "1.8.2" +memmap2 = "0.9.7" futures = "0.3.31" dashmap = "6.1.0" xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] } serde_yaml = "0.9.34" hmac = "0.12.1" -sha2 = "0.10.8" +sha2 = "0.10.9" strum_macros = "0.27.1" humantime = "2.2.0" path-dedot = "3.1.1" quick-xml = {version = "0.37.5", features = ["serde","serialize"] } -rustls = "0.23.26" +rustls = "0.23.29" tokio-postgres-rustls = "0.13.0" rustls-native-certs = "0.8.1" predicates = "3.1.3" assert_cmd = "2.0.17" -proptest = "1.6.0" +proptest = "1.7.0" color-backtrace = "0.7.0" gitlab = "0.1711.0" -mimalloc = {version = "0.1.46", features = ["override"]} -thread_local = "1.1.8" -crc32fast = "1.4.2" +mimalloc = {version = "0.1.47", features = ["override"]} +thread_local = "1.1.9" +crc32fast = "1.5.0" bloomfilter = "3.0.1" uuid = "1.17.0" urlencoding = "2.1.3" @@ -177,9 +177,9 @@ use-jemalloc = ["tikv-jemallocator"] system-alloc = [] # forces System allocator [dev-dependencies] -pretty_assertions = "1.3" +pretty_assertions = "1.4" temp-env = "0.3.6" -wiremock = "0.6.2" +wiremock = "0.6.4" git2 = "0.20.2" rand_chacha = "0.9.0" diff --git a/data/rules/clarifai.yml b/data/rules/clarifai.yml new file mode 100644 index 0000000..e3f7167 --- /dev/null +++ b/data/rules/clarifai.yml @@ -0,0 +1,38 @@ +rules: + - name: Clarifai API Key + id: kingfisher.clarifai.1 + pattern: | + (?xi) + \b + clarifai + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-f]{32,36} + ) + \b + min_entropy: 3.0 + confidence: medium + examples: + - clarifai_key = 29ee853d47364107b9edf5e5ad4374c2 + - "clarifai_token: eb3cf5440b7b45f8954bb4a1fcea0ea5" + - clarifai-secret = 8e43e018f61b493c8104024ee124a57f + - clarifai_api = cf3cacafabe747988298298bffcbb459 + references: + - https://docs.gitguardian.com/secrets-detection/secrets-detection-engine/detectors/specifics/clarifai_key + - https://docs.clarifai.com/control/authentication/key/ + validation: + type: Http + content: + request: + method: GET + url: https://api.clarifai.com/v2/models + headers: + Authorization: Key {{ TOKEN }} + response_matcher: + - report_response: true + - type: WordMatch + match_all_words: true + words: + - '"models"' + - '"status"' diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 6449fd2..59cac9d 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -185,7 +185,6 @@ fn is_blocked_ip(ip: std::net::IpAddr) -> bool { mod tests { use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::{Duration as ChronoDuration, Utc}; - use reqwest::Client; use super::validate_jwt; From 7d28ab531de8eab6bee034e3c733ec9570b67096 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 15:11:35 -0700 Subject: [PATCH 018/357] updated README --- CHANGELOG.md | 1 + Cargo.toml | 16 ++++++------ Makefile | 4 +-- README.md | 64 ++++++++++++++++------------------------------- docker/Dockerfile | 18 ++++++++++--- src/parser.rs | 2 +- 6 files changed, 48 insertions(+), 57 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f15b21d..d385891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.22.0] - Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate, Runway, Clarifai +- Upgraded dependencies ## [1.21.0] - Improved Azure Storage rule diff --git a/Cargo.toml b/Cargo.toml index e7650ac..4bf7e3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ clap = { version = "4.5", features = [ anyhow = "1.0" bstr = { version = "1.12", features = ["serde"] } fixedbitset = "0.5" -gix = { version = "0.72", features = ["max-performance", "serde", "blocking-network-client"] } +gix = { version = "0.73", features = ["max-performance", "serde", "blocking-network-client"] } ignore = "0.4" petgraph = "0.6" roaring = "0.10" @@ -102,9 +102,9 @@ octorust = "0.9.0" reqwest-middleware = "0.4.2" tracing-subscriber = {version = "0.3.19", features = ["env-filter"] } tracing-core = "0.1.34" -tree-sitter = "0.24.7" -tree-sitter-bash = "0.23.3" -tree-sitter-c = "0.23.4" +tree-sitter = "0.25.8" +tree-sitter-bash = "0.25.0" +tree-sitter-c = "0.24.1" tree-sitter-c-sharp = "0.23.1" tree-sitter-cpp = "0.23.4" tree-sitter-css = "0.23.2" @@ -115,10 +115,10 @@ tree-sitter-javascript = "0.23.1" tree-sitter-php = "0.23.11" tree-sitter-python = "0.23.6" tree-sitter-ruby = "0.23.1" -tree-sitter-rust = "0.23.3" +tree-sitter-rust = "0.24.0" tree-sitter-toml-ng = "0.7.0" tree-sitter-typescript = "0.23.2" -tree-sitter-yaml = "0.6.1" +tree-sitter-yaml = "0.7.1" streaming-iterator = "0.1.9" tree-sitter-regex = "0.24.3" content_inspector = "0.2.4" @@ -140,7 +140,7 @@ sha2 = "0.10.9" strum_macros = "0.27.1" humantime = "2.2.0" path-dedot = "3.1.1" -quick-xml = {version = "0.37.5", features = ["serde","serialize"] } +quick-xml = {version = "0.38.0", features = ["serde","serialize"] } rustls = "0.23.29" tokio-postgres-rustls = "0.13.0" rustls-native-certs = "0.8.1" @@ -148,7 +148,7 @@ predicates = "3.1.3" assert_cmd = "2.0.17" proptest = "1.7.0" color-backtrace = "0.7.0" -gitlab = "0.1711.0" +gitlab = "0.1801.0" mimalloc = {version = "0.1.47", features = ["override"]} thread_local = "1.1.9" crc32fast = "1.5.0" diff --git a/Makefile b/Makefile index 41f74ba..fa97341 100644 --- a/Makefile +++ b/Makefile @@ -368,10 +368,10 @@ all: linux darwin dockerfile: # Build for the host architecture (default) - docker build -t kingfisher:latest . + docker build -f docker/Dockerfile -t kingfisher:latest . # Cross‑build for arm64 from an x64 machine - docker buildx build --platform linux/arm64 -t kingfisher:arm64 . + docker buildx build -f docker/Dockerfile --platform linux/arm64 -t kingfisher:arm64 . list-archives: @echo -e "\n=== Built archives ===" diff --git a/README.md b/README.md index 7e3e5cf..cfd4b24 100644 --- a/README.md +++ b/README.md @@ -58,27 +58,23 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` -### Run Kingfisher in Docker +# 🔐 Detection Rules at a Glance +Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. +Below is an overview; click any category to see the exact rule IDs. -Run the dockerized Kingfisher container: -```bash -# GitHub Container Registry -docker run --rm ghcr.io/mongodb/kingfisher:latest --version +| Category | What we catch | +|----------|---------------| +| **AI / LLM APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), and more +| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more +| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm & PyPI publish token, and more +| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun/SendGrid/Mailchimp, and more +| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more +| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more +| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more +| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and mmore -# Scan the current working directory -# (mounts your code at /src and scans it) -docker run --rm -v "$PWD":/src ghcr.io/mongodb/kingfisher:latest scan /src - -# Scan while providing a GitHub token -docker run --rm -e KF_GITHUB_TOKEN=ghp_… -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan --git-url https://github.com/org/private_repo.git - -# Scan and output as json -docker run --rm -v "$PWD":/proj ghcr.io/mongodb/kingfisher:latest scan /proj --format json --output findings.json -``` - - -# Write Custom Rules! +## Write Custom Rules! Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. @@ -388,33 +384,15 @@ By integrating Kingfisher into your development lifecycle, you can: ## The Risk of Leaked Secrets -Embedding credentials in code repositories is a pervasive, ever‑present risk that leads directly to data breaches: +Real breaches show how one exposed key can snowball into a full-scale incident: -1. **Uber (2016)** +- **Uber (2016):** GitHub-hosted AWS key let attackers access data on 57 M riders and 600 k drivers. [[BBC](https://www.bbc.com/news/technology-42075306)] [[Ars](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/)] +- **AWS engineer (2020):** Pushed log files with root credentials to GitHub. [[Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/)] [[UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more)] +- **Infosys (2023):** Full-admin AWS key left in a public PyPI package for a year. [[Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/)] [[Blog](https://tomforb.es/blog/infosys-leak/)] +- **Microsoft (2023):** Azure SAS token in an AI repo exposed 38 TB of internal data. [[Wiz](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers)] [[TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/)] +- **GitHub (2023):** RSA SSH host key briefly went public; company rotated it. [[GitHub](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/)] - - _Incident_: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers. - - _Sources_: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/) - -2. **AWS** - - - _Incident_: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo. - - _Sources_: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more) - -3. **Infosys** - - - _Incident_: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year. - - _Sources_: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/) - -4. **Microsoft** - - - _Incident_: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages). - - _Sources_: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/) - -5. **GitHub** - - _Incident_: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution. - - _Sources_: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/) - -Left unchecked, leaked secrets can lead to unauthorized access, pivoting within your environment, regulatory fines, and brand‑damaging incident response costs. +Leaked secrets fuel unauthorized access, lateral movement, regulatory fines, and brand-damaging incident-response costs. # Benchmark Results diff --git a/docker/Dockerfile b/docker/Dockerfile index 62da0a8..ea87c51 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,13 +16,25 @@ RUN set -eux; \ *) echo "unsupported arch ${TARGETARCH}" >&2; exit 1 ;; \ esac; \ # download & unpack - LATEST_URL=$(curl -s https://api.github.com/repos/mongodb/kingfisher/releases/latest \ + LATEST_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \ | grep -Eo "https://[^\"]*${SUFFIX}"); \ - curl -L "$LATEST_URL" -o kingfisher.tgz; \ + if [ -z "$LATEST_URL" ]; then \ + echo "Failed to fetch the latest release URL for ${SUFFIX}" >&2; \ + exit 1; \ + fi; \ + curl -fsSL "$LATEST_URL" -o kingfisher.tgz; \ + CHECKSUM_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \ + | grep -Eo "https://[^\"]*checksums.txt"); \ + curl -fsSL "$CHECKSUM_URL" -o checksums.txt; \ + EXPECTED_CHECKSUM=$(grep "${SUFFIX}" checksums.txt | awk '{print $1}'); \ + echo "$EXPECTED_CHECKSUM kingfisher.tgz" | sha256sum -c -; \ + tar -xzf kingfisher.tgz; \ + rm kingfisher.tgz checksums.txt; \ tar -xzf kingfisher.tgz; \ rm kingfisher.tgz; \ # locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64) - KF_PATH=$(find . -type f -name 'kingfisher*' | head -n1); \ + KF_PATH=$(find . -type f -name 'kingfisher*' -executable -print -quit); \ + if [ -z "$KF_PATH" ]; then echo "No executable kingfisher binary found" >&2; exit 1; fi; \ install -m 0755 "$KF_PATH" /usr/local/bin/kingfisher; \ # optional cleanup to keep the image small rm -rf /app/* diff --git a/src/parser.rs b/src/parser.rs index 4625b6c..255335a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -98,7 +98,7 @@ impl Language { Language::Rust => Ok(tree_sitter_rust::LANGUAGE.into()), Language::Toml => Ok(tree_sitter_toml_ng::LANGUAGE.into()), Language::TypeScript => Ok(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()), - Language::Yaml => Ok(tree_sitter_yaml::language()), + Language::Yaml => Ok(tree_sitter_yaml::LANGUAGE.into()), } } } From e51b3f0424e4a3bce11ec9ca65cf7e08c37db363 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 15:11:41 -0700 Subject: [PATCH 019/357] updated README --- README.md | 7 ++++++- docs/COMPARISON.md | 17 ++++------------- docs/runtime-comparison.png | Bin 0 -> 56693 bytes 3 files changed, 10 insertions(+), 14 deletions(-) create mode 100644 docs/runtime-comparison.png diff --git a/README.md b/README.md index cfd4b24..19bef7c 100644 --- a/README.md +++ b/README.md @@ -398,10 +398,15 @@ Leaked secrets fuel unauthorized access, lateral movement, regulatory fines, and See ([docs/COMPARISON.md](docs/COMPARISON.md)) + +

+ Kingfisher Runtime Comparison +

+ + # Roadmap - More rules -- Auto-updater - Packages for Linux (deb, rpm) - Please file a [feature request](https://github.com/mongodb/kingfisher/issues) if you have specific features you'd like added diff --git a/docs/COMPARISON.md b/docs/COMPARISON.md index ecb8ebf..a0eb4c2 100644 --- a/docs/COMPARISON.md +++ b/docs/COMPARISON.md @@ -12,6 +12,10 @@ | linux | 205.19 | 597.51 | 548.96 | 5.49 | | typescript | 64.99 | 183.04 | 232.34 | 4.23 | +

+ Kingfisher Runtime Comparison +

+ ### Validated/Verified Findings Comparison Note: For GitLeaks and detect-secrets, validated/verified counts are not available. @@ -42,19 +46,6 @@ Note: For GitLeaks and detect-secrets, validated/verified counts are not availab | mongodb | 1 | 191 | 0 | 0 | | linux | 0 | 287 | 0 | 0 | | typescript | 0 | 10 | 0 | 0 | -### QuickChart.io Visualizations - -#### Runtime Chart -*Lower runtimes are better* - -![Runtime Comparison](https://quickchart.io/chart?c=%7B%22type%22%3A%22bar%22%2C%22data%22%3A%7B%22labels%22%3A%5B%22croc%22%2C%22rails%22%2C%22ruby%22%2C%22gitlab%22%2C%22django%22%2C%22lucene%22%2C%22mongodb%22%2C%22linux%22%2C%22typescript%22%5D%2C%22datasets%22%3A%5B%7B%22label%22%3A%22Kingfisher%22%2C%22data%22%3A%5B3.087692041%2C9.816560542%2C22.222204459%2C129.921919875%2C6.748027708%2C18.650581459%2C27.47587625%2C204.192040875%2C62.877494792%5D%7D%2C%7B%22label%22%3A%22TruffleHog%22%2C%22data%22%3A%5B17.667027792%2C24.4969155%2C133.286264708%2C335.819256375%2C248.135664708%2C91.367231833%2C180.311266375%2C585.00584475%2C182.478392708%5D%7D%2C%7B%22label%22%3A%22GitLeaks%22%2C%22data%22%3A%5B2.845539417%2C19.704876208%2C46.658975%2C285.6701695%2C22.446593958%2C53.793195375%2C174.406220375%2C517.420016958%2C164.260176625%5D%7D%2C%7B%22label%22%3A%22detect-secrets%22%2C%22data%22%3A%5B0.703465916%2C0.783118209%2C1.231432834%2C8.751082041%2C1.120182458%2C1.019824708%2C4.737797875%2C8.402164%2C7.170617042%5D%7D%5D%7D%2C%22options%22%3A%7B%22scales%22%3A%7B%22yAxes%22%3A%5B%7B%22ticks%22%3A%7B%22beginAtZero%22%3Atrue%7D%7D%5D%7D%2C%22title%22%3A%7B%22display%22%3A%22true%22%2C%22text%22%3A%22Runtime+Comparison+%28seconds%29%22%7D%7D%7D) - - -#### Validated/Verified Findings Chart -*Validated/Verified counts are reported where available* - -![Findings Comparison](https://quickchart.io/chart?c=%7B%22type%22%3A%22bar%22%2C%22data%22%3A%7B%22labels%22%3A%5B%22croc%22%2C%22rails%22%2C%22ruby%22%2C%22gitlab%22%2C%22django%22%2C%22lucene%22%2C%22mongodb%22%2C%22linux%22%2C%22typescript%22%5D%2C%22datasets%22%3A%5B%7B%22label%22%3A%22detect-secrets%22%2C%22data%22%3A%5B0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%5D%7D%2C%7B%22label%22%3A%22Kingfisher%22%2C%22data%22%3A%5B0%2C0%2C0%2C6%2C0%2C0%2C0%2C0%2C0%5D%7D%2C%7B%22label%22%3A%22TruffleHog%22%2C%22data%22%3A%5B0%2C0%2C0%2C6%2C0%2C0%2C0%2C0%2C0%5D%7D%2C%7B%22label%22%3A%22GitLeaks%22%2C%22data%22%3A%5B0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%2C0%5D%7D%5D%7D%2C%22options%22%3A%7B%22scales%22%3A%7B%22yAxes%22%3A%5B%7B%22ticks%22%3A%7B%22beginAtZero%22%3Atrue%7D%7D%5D%7D%2C%22title%22%3A%7B%22display%22%3A%22true%22%2C%22text%22%3A%22Validated%2FVerified+Findings%22%7D%7D%7D) - *Lower runtimes are better. Validated/Verified counts are reported where available. 'Network Requests' indicates the number of HTTP requests made during scanning.* diff --git a/docs/runtime-comparison.png b/docs/runtime-comparison.png new file mode 100644 index 0000000000000000000000000000000000000000..b8e8c2447bde5e5478b41643a1ad2206ad9f7417 GIT binary patch literal 56693 zcmce-bx@qo(=WQXySqbhcY*{9zIgE9?(Po36Wm=Eceen+Ex_XL?(UcGPu^Rn>Q>#V zQ+3Y%G1bqfWx9K(duMlMBY=w1D2RlJ0000*=BtDX000970H6lop*|1}B{9;E2LMoB zP4fNyT{iUKsS^g`#N_Sm4T6;6>FN3P_3iHN?)LUpA^dP_YwPmz^0^afX=!P9cUL~_ zU}tCN@$u>9<@Npj?e+EjrJv)u3-@)=Pxt51{r$u9^ULAI%lrLKY~AJSPRZ)}=FRP$ zm3JUJJNxT|E(94PEH&HO*7oVy`AY{j)SmI-@yYJNp<~XeYj9*tdhYVt`pm}DhhbTL zvysW^zq9jot({pV<-CfTxA*re`_FcMA&aYP>4l){o7;_@-G_(A=B}RS9^$O5EEXx? z#LR44TbqHSCo(-34x1o8r!WF7$MpQ-_Wr?VRo&dOs`s}ipU7WCJYrB3%+I~-j)CFt zkEgG1@8HGdx2x&j1H*4OYZsSSgX4cUj~=+>)F}id&o3^|&M%-3O}vZG&=`3dI=ZPp zOV2N_3MgwwBxf*6D1OoWUQk(AT-~sLbR3qD);~ItoLAiSXONUn+&wJ%`1Gu2XjIMI zJ~}n4vZ>9~&G+`jS605Lx_(ex5(YD0$gh+?BV)=DhY2}_e78(#xhkz|dR+}3o0@rgd7s^SdYLwZ zgok36QPr??+&z1Fow3)mcYk@lt8VGA@bF*j@sQ9miYb``2obdP^eIK`y>4d#8WbKn zFhL+t!{{yau`x`Z#Q5UL<@NQ={f$NXiH?nnd%?L*m)|#QCmlQ2x6>YIG#CM)&k$6o zG4+=arC%?ut_K$%Cf6S!1qmQX5COSjp@~&*NA+x9RX&2mCBlaqQ3EXNF6=!4ehI#G z5VcI)K|~1I+uK99v%L-os;a6|Gtz3=x=>J(o0yp=G+hLjpF?!3Bqk<8aAPGF_d}>s zmh@eFd3yttNFdDUD=I2pS5sf-t==E^!y=>WhE5?21OYNU?+=FnHGU~6sgBCXiPn(K zc5RomxvlNp-gc0FvHSbe(ZR;#-Q~VsvZG$3Hv}o;{rSTFN{>;f?e>&P8UR2JkdY8m z^H@5`T$Q6dB^vzf&`=L|RG}!F%FZVL7u;U~O=Zp{E=d~EYuR%@53Y-Dly9<;$DkB) zG^B@&3x&BK6ZiM;d83-#Jl}9%=QOs`Ifd%JGtAk2%G~K#Wu2dW%{ul=)1id;sJ{Q@ zwUHwxTuTSX_(G!L_WPqkJ9@!pb;_(&!748cK%FWtS|NmF0APa#HMS(9}a5<4Ba%+w$n>XxEjbBbB*y_{2y?(?(iNf1R*^>MA|{>KWtA z3fx7{u{<@O38(>)2?Fqr+Qw6u8>w|^VcQ-ClRVnBE1Q*t74+c4M_ZfWE|Y~F{Vu*U z5>*JLp#br_JG9z3h~Zb4`)A5J4ZI>1* z38(CN2V5~MV8dOy2kFS5591>K|4!9vBlBJc}X5{auXv3^lf> zPJDd&06WRWh(bu1+`*z09^zm}#pzP0lxUe;rp!n5FimryJ}Zqo+BTNsu;m-BAN?)a zWMt@Uk9p0sz8hm!m!7|yygPS0jzR|Wh=@N>=CH2cA$y_$uPetYfpIy^W$AmG@7{D! z7xT-~B2Qs~j@(RKk%1CRwrvtxj$e7Z<;Cz{vb6I%K3hA+Zq}i~x_Dh1lbi9RlEqK>up#PJ{9Rzp-^}nsVRbgMrt%Z|=?#6_HDS@hZB*BUn<54~R7xt0Tt0owv>1 z)r!_LQx%i$k)_QuR@ms>r4D3E8lFmDdWbKAjaIq(1fu*e#@FW^^mp;Yb`^;^m1rys zZ2!!!P!#ZrTsxtNj2%|%Q1JR2uk!lY4(79kJ#f9yJzbJ}*}gLMTGgI?Dk$O0$(#ja z7f$MKAA1&|#CFk|v_bh-<;BZu^E@#rPA_`IFMCJjhpObSGL_6KMVrsD?Nv?DAHGSA zjn~XVv!+}lIDp}a0g?$N+6{*PtB&}s0mZtlPRWW54woQb36N;?-n5Yv- z%U&D8P59EOg&OJ~w(2VArBje{y`5fvj`tBCN~>zAe3K&$a;nGPMEJwg+sAl~n?Pnt zvYaqfk7k#LOzqpr@~fgD?pymerEZ2~&>FnB1dq=- z4>!vF<#;r3Dp31CDhubjh=n%psZ*w!eY=gm@=aQbFKpFj@E=+9tW z87Up5FG}&14{D&@6}BI`_6Q=(;_u^-N!aPDEwUN*nV|5sy2-9ym)OJ}Hh5u5TV3bL z#N^oqdhLd4M^I%C@;^}R& zTU6%N+_NCA-TzA81dG#(pWNc4Mrky--2Qx8NtDRr%*e2$pJ&~#NS;O#AxzojAN{2G zyfj3V^9hwwx%KRm=yY+^iJdS$_Y#7R8M@g>y1~z7zFQ5`_8JfhI)u4dO@+}P$tPtz zSKo_CG=PXCFnpOn#(TcEjNUK0y1v8TI|#;A@8oJSd^Cd4?mQoo7oP&(_*l|7z>|+( zN9f1){-I7+sXOsv1x=*AgH|2B^!|kqIbxwUI@n45FFHrvYfKYSHyBSZr2{{02{CL$ z9p@0#7CM6r6Rte3_RU(F{p{`~QDV(GGZY>z#nbJ&2SF~18gy)9g{clID25-gB1I84+%x@Z7eH>!cahRHvsq-xyETYKSe;5xq+PbO?dwP4 zFy4HG87#B4ojY1B1dlFnjGM8kq%II%sKLWFNaI!JU+zPfG&YuUZmBjNS4YR%lFkr?bWW3h9}lo33IomtU3MN zhD_%Nw^q;iqZ^yaf*h&(D?bz27JgZQvh&9RJLJ=Q{ie0FqGdt4LDbTcp}ulvy?6gs zNB&4HWL~-n%!jn>ZKg@%GyCg+rRH;(wltjXAtir`ZqHQ!pE`{=RWG3`9-*)ChwoDF z5gsnpP(OLyJeE`&8CSkUaBuoJ(Ywr494S0$;@jbL=53Kch5&2wt6Ubk8LR%lBioGl zx{jI;A>*lB1&A)6rbO@HF5m(4*E&Tl@(-RxF0%e-pMw?~#6P!U_G5~VC|G68YOuV* zwz$FLrI6A;gS!F?jcA6VFYJEWQ7X_Hhw}Hxfy@mkKN(+`;`ug;XfeV8e(NBWsWqW@ zpiwBd&?7%j2IWo&Hv*G49J!j%wR%h1^uUVW~wmcY7+5UGOBw<)6})Dj7ZnolK${ zP21pvTM^`u>lbdxZ#fdBHu9`ljgF$hk5%y0d%=jzeQO6IxSo^b3dux%UchmW&Wg$X zau=#R&0WOeeTfquq(7Le<}4<1HmC_*&((NP#!aF7G)a0kCJjSXRe4m`>|-X$<23)v z8nNHVL5yjLy{muveUf)LUM+R z37A}ZRk&E8?-w>OVCtt3`z*jLtoHc;@uR2@Bt?D{$CW}px6^l}KD4dS^JE_bkq{4W zt#i}MbMkxz8%s2Mht5fzFdSIY0!D4rQao`T6MrFo`pn4a&=Wf*Lt;a=Zyc$IQ{swX@M-3=E{)|3fH-D$jaZ{_Af1=1 zbK148GK8k1UE0{flTz72z<3W*A6DJ$M;nNOA>Y2Dld-$s{1MRi?7X<`s{>CI;O5V#%1x67ntA%^&79T6RKv06I5F&U?z(4g_7J zPf@3 zzR45zfqgCv2u9PWJ)@^x?n&I5oi*(V#F5(U=ZMaG*j9ZBgW{Cju}= zH0L7jx-~s>xm!Pi~yB)aI`M)8de;9}~V zbFN$RMiABHNmje2Y7Yr~4a$Y6!o>mi1cwrr1MH=u+4^(v^&479nDgJK1HKsPWej2l z&T}C`&^?9MzkIwCmHY}pGSs1f^}RU`&I%6>+-U$fLPP>TcEf-t_!M=tm1#bQQZdgE zblHBEWHaY-G0O-8fHYrkY8mwNa+tUwtQubsfl8ja3<~dJi*saDCY3YN)iVe< zwd0|4sM};XtIKl$PKN9F8j>RE5d$g%(t6lt@>57?xnb^1OdzugOW9Oa+B$66CT=Wt z!>WON``Jdqe2O3cv3CudsI5IDSREGB_BbQT%>3ou6a8L-@Z&E-}lXSeMRH(N{W42M-d-H>XTF$U#sUMR2Y~}Ub63*OKSjC z7XPmH=kituK5vhkc{LD@;qGeiL)GZEoutCApGHXt^`P%F{CcFeC5|hS+M7knQ#IzI z6pAt}b-+hr$k0uZuu(v#9%j{*b-;X8rRRE-zT$($$UrMA7%&$Sn1&RD#1F8dLsGLx z0Zo(s`T68#fKp(?x>*+xqe!5!NoZ|D%n&eKF^k{t2yzuI>biu1YFr`)$r4*3C%mK6 z?;Z*pTjyS#!_%O0e?wCn-k31VfWh}k;sXah@u?|vDsn>~{giom35EC6+2lD)@N%e} zPUM+S1gqYc;EIMWYFF`OR3b7G0&Y{`7?Qlw|A-dB*C;}$^CH3Xl67>=+>(MdV`zyM z5mEXKUHY*lDS~kl=f+WQ-!b99k`@oNC}0$YcN=7|<3m_KVl%IaaVEfHu_Z zqAitR3O=6x-%{xz&fimps_e9?h_dFAD@?$0NYIy%7sJiwUsZbmB;WL$Zj|IoRU8f- zxCy@8t;O_LUVaC}`q{jv=)?+HR1n1lgJWq4QR4jp5`bF{gB(Is9ZJF50i*aYQ>O{H z+Usr1arn@*LtyKDvNeS24K(*kW{on^jV-61kDAQq&&4+i$Z8B_IIN8_hQ|WAO~HaB0z3)x|&)6*5DFXm;MkxeZF^ebl;@jbjA)F7`f=gszdK*bBO{mUvqnEEZ%ai=6MMyoO{ZBMF6t0TbTgn9 z>PsU%Qlku6Rjrpy59G3GH3++aGSPf4{`R%S$3j@J+^K7Z^x|b!Yo5>YhBZ^u#YQz_ zTcjlwcxEhgBU=0Ev{gXq*MPaO5AC=p%J?L(j2YC<&w!l=NuOiTu9Yj?I%~$vpPLj) z<72H$#I{lm0fITC{(*nAlZRFBeIk6>apFcz)=aj)@#b&eMjhIK5`&D=Y|v zpLJpu{z*;}Z?NMA9RY)KCkC1^Q_qBk-nJ)@4PlmGbufc2E0*Ok!FL{G@=X{{O_$&( zQd>~G)%Xz@8r*;=-~cg*5bo_h!s)dwcIi;)UT9eysX+-(cqwdUd8G!?P(qM!wRoEKSBZ*Tb z>5no*U^XY7uD5v#jFm9n;70tDxyD%CqF{+ok{_ntIERo)@Ym^TtS-5mYV$=9k*a{o z@YDL90m2h?CyXo&xpe&!|8tm@zVqf_Qk#;`fkuQU4Q`)W;Hx1bcx@ecq&;yZ?lMj5 z_x2^EBMILoxOC7nsPqbPj86RNFq25pS5e?v(0cl@pmauav3AZlM5p|sW;v@ z<~T!rOX+-mmSGay0QkAXhHz!cLkYHz|1}B3c90G-6YIxOro|+H)*@Gvr8#P1AQyoZ z6I=NaO=7q+xTM|3Gxev1eY12qHJ#75?lek)XZ+4mf6r9^&a~+nERuV#@8_XxW+rVr zz`m{&y*aq+(1O=>zvAql32?nBSFT-=Yv%Lp9G#;#OAA+A6(G32JNr=+53w?A=~dV| zo7dsdBzV>+69Yvny?BBA8LnGAEE0Mw6_pO?V5eT-q(mF#!2A?U}&uO7p;{hZmuoFZBCu3FlO%WSvm0zqe40mA7?#T7HZJ1kvEvY{VO#)N`p6>3TbyQgkx=xP(&-T_;)BV|6W3%i^1sLux|g{L9Mveur|a-zw^B7LNIaAfN?(T<1h! zWJHOdx+;^oiphS>$+&goA*0EoL|znn)c7+IHn_>;=Ha%~u8!Np5F4M_{h?O7!gv?* zoVleTWY^=9aZ{mlNa}Jtu?U+OPa21D^fN!l15z{xZ@FNx2m5hxgIC^9WywRyg0XrD zfk7)*a*JSh=CKv58SBOD_`0I|Xq6^ko9^oezCbv7DH?Z=KP@TB!-xDq3G8ePrQe~g zYAv!@=0{0M;5qJ~_{r3ctcPIt;FDuz)#_k`hIe)FP3t>nVO=}{fmS3;Cf@a0ycyKX zVEX0MDe_;@wr~WRRi-f~OgGEJI0248%GzBWg~sHw!b(33N5QnH0}4Ml0N&P}iFH<% z>^kLU+Tq${$$fwF$mW}Z;`*1hb|QBd+)_QhvfiKt_I4g6l3nT(e!o}b$AI5|KGl>s zC87@!qnCuckujQOnxvHkL={BjusylF=AFzb$c= zOQ=i9yy-jA^Zco!6i@bjvrAFmU%qSzo5`k{E_|-p9Lh^7CI6XX`}6XH#5(MDS5Xv` z^Dku?JSyOAMMebNmJf&bMosEd>Sv}s2}2RV#MQeg@sTS^>t!7y1Dm)FwtX$@8n5{D z-@`r&G9NV-$0PtdEz{`#TFd>t?0kE}!fnp!ap&jN@2PFG6aOc%D0sk|J#?L0bD3T- zNmb)jh@D>HC)Q@KyAa#=BwlOJ>Z#WOj4~Gt)q{MrnO0^Ob6QS@@rUv<%IOffgkMI> zLx%|b&JzwY%_lbe?)FABz|ut;=M>%3y@bJWB<&?Vft30-X5oGCXiN2Tl~BF3`q5BG z78S~b#zXCZ-gN)cE?*ZmV9hB>!Efy2guoe&fg3uKBoGl0APNI$;R+-yM1>X4g#_rz z=hk6>0D%2DX)?Z}fmZ1c5*4h-0PMpDApka30++;LtdO@O0f0<6IR_muhZ2m{U}jTP zxCR5@x7KN^4^%<$4fnU=?YS8JF$_Qe=T+fp{79obHD)_=06!$aLFy{o4z?W885*z= z&6kf6Ca#+W{lA>A{U3SAjiy1rncZ|`aSyde=mNPPpQcWc`8q*%c>^?2%uV`J+zbD{N86B{BB0HFe-7EdqnO+spULTP9Sa@xVk>GKhQX;z1ht*q{^ZE?l!y(>h^!vraIpwf_Dh@a9aP|srYnzdu%ZK zw*FZEdC|kEE7vKwe;*C2?2FQjoLtH{HYA0j15d9HaOU(#z}Z*i?IkO$>#;&ik8voS z2OMA1q{8f|{Jlwi0PR8kR>}FHV#(Yzt37auO{seIbr#7W-?pry)N{bf416^Ex}2G$ z%h|OEu>b9J^(;8A^mKISeiWv94&$bCkkbm{UZ0VNE^d<6*J$AvNmNWmZ7+x#6ID@w zYcQ`mCo@0?O)A=7qj}7lj)?Z+gKIu&FM7N+&y=yN1^qtlKaRtzKaAO3}?-nqnh!jnleg5dywD2msHc`nRN~I=)qW z;$~tX_OF=7Lr7lXIgTCnh;%7@^XxaP^(oNY6tPdaih{hgD-bnpWQr-9* z56w?_>6V+DGt ze!?C|Ao`ML(_yIGwh9L=C+ZLx%&1$JY)=g&Y67D58@I}I)>K+(iTwDbyd-h^6FKAb zv+jq{BqX&;do{!S{06OcsbcG?zSdGC3GD;$&RDA^ChgjalLcQPAqVs-Aq&M!_>g%+ zS&XPz6)|W#J2u2SmZeSxRDY5Rnjcxs>)evsxyXl1iq|oqeZOx!Y^2cpJFM9_!+i8} zM4l@nIoh(>PZ#@6rm`x(2@|ByStR;})dC;%vi6mYwz^@saUyo($}s0ST=Mj|0oW>5 z5S-l^;-)n(hpv}yq?%A;QoHZYvCU+bTi5nBxc2x`SV!XU!nDWUBy){ftameAEV|VX z4OvVj!bCYOijMOMpY#Tn@``fgoQyK4gX0H`#P7er5pRS--Erz-x%g!Z-bOvg8JF46 zsH$|X)rQquw1cq?Tb$bYV^w2=33CY4G5Lw;!AYOVI08M%>$io`&1sjmt%bit>SHjN52#Q$QeGcf zgVU>RVi5J?Eo9T>@{HE35~JY9OU%=v>?}|J&( zP6Q0KDH`03GduRV0~n)oW<`saLvWpxD%Mb}1VcOS#e1fS5aRb%C!B-B{^d0EQf*0h z$YSQ81~~P#YHTuWx}^9EG`UJqoWaj|rAwII6wIR^AtOr}ar#6u8PqZY9&p00pnD#0SJS3?Uq*Hv-hS>I>vyu4x5W{n%D# zn)49nzE_r}YB5BE8_NF~8aRkvMp57$Xrvh$nJY~zVpZa_wzf5e|AZw4s^!hXM{*#Z zg<`OaT5V+$2TLO)X);B@;BS1r4N2LJPIrPtcJzvn+!dz-Yh5qVaxj6FeL;$kUOh7R z=)@19GVH%_@<3nJyRaDsb&k)j0d$q{s?RHKSU-dw zhmgmY)6%)S17D|d;hgi|>D}s}wva&j3Q23HPz4y;rR(%);Jn(OUh>4YG6~x9&HS2T zaq>wZXta?Uv-KPOT^0Oo>LW`xCX}oX+pcp3ACnr}-cm14i|O#2-BO}zv=V|%yNCI2 zVb%(i&hCzTYzBT$ygWo(SR-J16sP6>1}?%4*1=V4-2 z->P%B&Hzp}e7#y(WaIJ2${1=2iRDU8~1ieLRwS1!d zB?dOFnES%S9zH3Xe0Haub)X7S9sg9R7Vi7dHj-<41)HjhIAcMmKo*@FOH>~LY2sFV zX`nzTwf|noiqRF>oVNExNF+Su$71oXaG~)j2a-sbpVc*3Oi>%|>D_$PNA`$7@x=xU z&%~f~lOGM_zw2XUh2C`TisS%3j}!w}0MBrG#xRf4Ia(rV6k{h6xY#e6*;<`5F7^Av zg(~ghsL6@SI9vvJm^9(<0aL57^PvnYPWFGpCzdSbggDLVNn|Gr-EIw9Dfy$_<--e-RxcR|!9ua{Q=vvOAJkY!J)?jR@)Ig`F2JIhjbVEm)h$X0~ zLPo|PwvHkSx!Ty6z=x$ZZTFP%Ji1s6KY4rz3$Aq=&I!FC^Xk;D6)+~T`rPd?$`qAC zVzQlo$%P>!xoSKATtd&5HfpiN+AUh<3I9v=smYy8C=c?_j25sB`b(Z8t9YswZ-`YI zwz5QKeSh##bj^MmQ`yvYpPw%+e=UWU2d^z+CVG}F5084Lqat0#9Dcfxf<5}=$|%reWJ zrTt4$6Y`(mWY!^TB3BVXo_f81~33<*9}>nlNo+uM6P8BnubApGad_DtK`llGF% zI-&A{k)R7on{Sg8Kj|pa^q*%+`d&7xcQ0Pu!vEfrLYNd3cvSH0<@gJXlo-7CVCC=% zJ|%nIennPw*LfdYe-O?4LjLnzWNiR@ly(2zFUFrK4(^=a-f>#~;~;)ITJYr?%$t#k z#Ove9Omo01Y%Qs1+1jSGE!P1WBs zQfyLN^c|h<_tD8P*Qeuk-@CigVBf{Ie70|K=S%#L<61#XOv|;<#RjEL%dvIv3{7BxjT zWZI$@_Q0q6V_1MyMq$}TT{~5^pkO{#h=hfw_p#}!8fEb!Q?*t_)|H2)$w7n=u6JvY7tt@XWa{%?Le`|W00rSK=)KF30D(nok`Qt-PM zpvevdvVP4N@2Z;JoL$~sjX154Lo*MX)U_Sw5(nta=;&!>{#s&cWf$^o&#Nn==^|ld ztQ_mA=a}aS;=D25M`HTt5RtK#wA5E4Gyf+zS6p$xM*=XjxG7^IRQNy z{Xad19Ss=t%VYiC(~e3v(;#(Mr$$vE!o7}vW?{zdm#vdi#nXs$$t-7A0Ayse@h&&j zIqcnhwMR#0#iqSa63L!0@awo5rwP2oXf6Wqq_|dic2zZQ+)u(P!hY-@r*MhIG*-!gN8Z})dajrxv>)!SPIva*|J;q-#bbO zj@RKlDmy#H<0e_xJ(4@@wE48ipeJcnCzZ0gr;^xj9O%=D8(cabR@BVKMY`cGv>AV# z$pc5vtEuB;rHPKHda~ZGOy;X6(R|ZZaezw=<=>=Hv{*T{?l%}->N~?UxirIuT84<{rz|~%{Nd|9uHHA*l0x?^?Hu(-G`={O>s=PiB}-pR$Ii>Ikgy7Gu92Kj8u&lW zcfZ_7WPJgeayGgdd^aVx~8Q!gmA zb-%m_55VuU{WMwann4lWZd;syg`CD9Awoc!q#n$bSaJ{}PZNXb|GR zhX1f2;miIl^C9?O*8h(x!~fVI2mQy`|F=esC6Lqqb+${y=KDIU$Xj43)K@AHng6BB z`I^DY`utjn$m@}NF)DdX?%UxZoW1Q%R*wHAS~v(`3GKSR=FWqBq$Bb&r38&53WWIg zp#ERt1A_T4{67Q)^#98Th{2$%{cHFi0w9L-Z{!31->~?9XGZsMi{eH|_@gEg1JX!p zeJZV#vO%c{(k0_oKu{aAmTFwvNWo>yTIAzE=7j3ZXe8o?D3g*Kk6vX&`0b8p>QBy9 ztj6yEF@^Q)+E|D0CZB&0Ya{lpw7xB77El%7B9ns-mqpTMgtG9U;mBOVvdT|g7wg}T zMC^M2LFruyNrtbB57yS2sWUExf_!Vqf)9D#uK~Hm)kZ8G>zL5LcHh|Cg=-6}cshW_ z{T~`lTbB-PJn*On=g4sW84Jo>L~D*1IOyOT^>su|*)8*z=Dy6>><2!TCE{3|Z|onI zPQ^?Y+&FOCJp{&>6{5ZOdx=Y40&?xdA5|7wy_$@;k%?HT2XGWNZS?*`LO^rdPSqCI z)VH>Bo^;F;k|m?;ghcKB6U8#y4K?p0LTTVmp9)%Bn1ZU-zfkn!QY+-Xm8Zzy;f_0A zJUqT_IA~XINs?1Rcf+zGUhWpH%>7zFQD8HpwzJT&2+rqJvU=VBtQB8pNmghZCj1Xcmbp}{t}!SiSadZp80gz&kzN0~mPC#jjIgYj+wz2>SCY#i(k!i` z`j4!ywtZES^Hf=HY`r+RsIAW-(at8+5=y_ezdJjn`*jzv{h&Jr)gjaVjLVYFBokb` zK}*=MNz70C4H_FNk%?X9FE)P^v6^oE76nVBkpg@kUOHiA#9>jy@L5*M8{Bi;O_PH8 zUmQ2)YE-+rx>sKoT#9WQ4q?A6f-v*mvvEdbe1IrWkWJ;HN)z0XF(MUrsxps>1_cX1 zS37gLCQUOsJezrjYErHfiG_grXb>`kV_g-9G^k0O&@j?Y;tuqnmF?NZ#`wcNb^mMC=$~0Pt3H5@nz1DCAY=`}Ez) zN>>3@Up`DXa&{a@J=*PcDI2UtZ$f^El{|x>_Tkz|2`&X1u9|&4!pQ*W;F6Oe~lGXWpH5Ae|vKL8O-V=zZ;Be6YU@4x-<=`>_bMjI_uXgQ|; z;Aj0{*>NPY{BXTHFdGFe0eJ2uJnizIgi;}X$rQvf5LyO@|2xC}B>Za_>?gKzPO#P+Ac>T-q5{@SZ&07i>2b3R=>*33hSEDnQG9zBN(fjVt?z$@T4Ak$>nqOgYv1N^me#6Ob(yCJ z1ON3$R%Ur?b87^}5!c03vBj}+k%9&GVz|AIKAD7I+k2J0p7K7qjre*|`8F;%!STyX z2j&W^=bE5-FY4SZW?WQ>h|a?%9UhwgmRGKZ9T*2QCdil&E9~W7Fu!u~u|41m9-+Y0-Rpv%@z*eH#O0O8HjP|NcB|Bxfj`;~YGZ7dj>$v#n{AR{F5?(=~qp>y4{9HNJmKhL@H8q!%Z#JlGus6(c=b$`S zzX;njMS*hyHi@34(PmzL(PU1xNX1pJnmzob=lLBg=xJl}i;v;J59oul&)|-1((Z=m z1>ccLF>an9FG{{E(Bih6>0x=U3TMKox7abm`(0hB7%ixCGmcP$p^xeC3y_~f4y;4; z3>WOoKvH!9cd(S}wb(f<%HA;R&kAZonSA4uB;Xb_fOCEN!prro3O-$|b7_gdEM5No zMirjH{G|(Q$;L#ZV!ktzn-lJc@V8RTl=Wm^DKVICz*}rQlcdU4iLh*?FxA94!e zw5W;(bX%~|MN`w6)F}#V#f)W=IAalWJ=^tSV}oep1F6L=E00`sp_cKvCV$HAk3`Vi zh{}!4vXWy=TE(8lRI6?K?y8<3woVx|GDS2in~GjKR{QbihlI9vlYt@VgMr+1;T9r; zn^Q6!7x7CEG8)Ixv~}~RSLvdknA$63v3uM_JNS|)J_6tI#z@OQbvbRT?lzbePFphy z>@_YXWd>lvZ8eQt=Dyp>K;Qm?#XXU*iX$Px-)hR2mi;llK5XN6v!77i0Q}u9h#`mM z{Re*^FuC;&JoYX_?o-aZl2JEl^Fr2L+-u;GK`3V19z3#EG|5S9YR@RiIw9R5k({;H zf%_)^>SmNdFl#f;;dUGQs(sT|l`R<8)k~xYSYKagRpH=j#nkeC`n-b8wr%&Np4%?|p=MQN~MZ&Sl2$L9#YcGTJagV}k z`d*VO;~%U}0XiMilKJ!ZYK4sA&-a}s)T#L1O$%u5%2&2O#2e;igI)=YCEL?#+%8Hx z;5HInJjS(@E(_Ywi2|3kCla_({3vZ3vQ7?flQS+g|JQd9JJQyLTn z%*OG9CUf{xbbJo%_ouz7c(BPqww5D1ZF5&0-SR_6*eTrdgTduOy9z+L6jq{Ydbv)4 zcl}MXaPv?04@RL}LRc^Tq79e*Nlu*1`m(fEws<&_-oc^l1%0iyUEa(!I^0H7Zv5Gx zg!3nz60y^K=8qqaX0I(ZYfd8zhf)#*=S!)?HMN`{x+i4?*W3 zrF(b#<6}JzrVqjVPRML7{+f7T`8Dw&@V-BnhO3Y`6d60_vz+${N4m4CJlYd)_T7bJx zo~9kcHXF)DD1__W>3=n)qE;*A+b*gn?OUaHwQ)CFG!SDenuV)z>pzU<5zjU0oa8>s z8YB_sSAl*|QZ@=dKQq31N>%J_9kj1hmT$fO*)*bZdn`=Z>I8t>cWPX)k27v*?DS2^ zK*>l-Goy}&!c3T6P`Dx4S#+b~XFNtvGfaahF#Edhp&T6|S!yuvR$cIkP`64euAseF z<;`@oTq8TStb$y{)Rb9Hw41${T_rX-kall{jF+B8_2MvHBi9W4XjrO!#i2Yi(>!JXn& ze_P|;w>9i_DcE3-EGzd(LZ_WbL2zFDGkKq_#&)Z7MtqCfpP?O~=e23i56CRmcw-Y0 z&7s`RJ7(diR$=_4*$m2{tD>k~P{pn4+%S;L;j6bahW+qM`O|;8&2@+xPJfzfm-x!W z%4J)yICPP`IN;93%Ci$7`WiD=Ug#VIZtOXnG6=;@uvE?GsC0Ird~6GyL{B~OW#l#| z)$i?pQAGl$&BhEMoW=`7mHqO6WnX0`>dnS}xPX=EKO_eHsubKLgU&5xWksTwx{FYH zwB!d|KW=ncZ01fhE=O!epgMvo$Z{>$^{=YMH4UtXf!|B5(nn{VqoST$RT}%(aBsB+ z3o$0xMg0#j2hJuC@<+84?{x+em0IS-Srz{qX7ETMF?r~RfCD_^o$g* z+?!JYI2Nd{gHrFc`XB{Ep7>ba{Hajkn=?xohPH}jx?ls}1uP&ZVi!?THfoqRWv`Eb z>pcvxFS8r>ypdZ7Sl~0y!X4Zkzjvk8Ib|Wy>`EC<2gNK1lRq))qsCv-5RI2Pa*?dG zprmNP1p}W@6#f~aaS;=dEDRT7Pqng`W8k}Su2eKZvT5sQPSpZ7c(A6h*0Ep~7mm@J zR|IG-KDz@Ci&?MSmTdj2Nj=x_+%2uHhd`q!iqx!%&Z_Fd%N4X5J3zW+FoeV|CMx2p znl%8CD|YKUZ~7mP+^Y*cN{?Jz&9IdRwdMveyB)&qoD=<`(#xd2k#mPPvGPVB!#H<2 zEDrz_wuJy%EdKj@mg?bcHW7DgK9U_M_ZXC(@_7s_j}EGya>s%ee%)V=n9nhDpx9y) zDzHiqY#WhIsI#&y=zO38E{}fZhn0|dq5;P3;pFY{wB8_t_UfHk-@!4x@ByM(s3aJ& zGsWqh8Os!Mm|6#vt*8MhHPI(FS~vVuevnqO!;i@)UW|e4u^8ywxO0P|w>XaT9b?TR zXn{`x!(U>vq=PH>b+7;8t)l@yi@&8L>fEIy`vJ<9S`_%EBSz+satZ4qCKT_kF8G0` z!GxB@Dl0)~kgnQWh1e127!N@UiZ?A+c}~)ZJoW5QK;2`=T6_;ktJLGhn|}SQpM)+? z0sZ6cK>#%^S!t(Tqo#jmZJ*rye`vVsxT>CDPlI%KN+aFUmy+fJmyibOZlt?QTKW>g z0VU;1mvjqUIt1ws0cjq;_df6ZxqJ5P%P{T0P4UcYRx96)3}!|G_v{u6_`Az(GGM^%Q~Mg{#RdN%me1}^ zCJ0j?EOoy?kpk8~-3yI@x^FSIep1t@*t4kY-DSEC*8|w;1vx!BcHX(FM$GOvbJlljTnwki0q zoDosO?%pkRfpj};+YamB?BybJQ$_yShVH9ud-OVomxsPsMS*jQ=bY_DSVwdT7Umuw zIMDLt$5HD_fOgL)wUxNf^CK_WICbKw5^_F9MWQ ztI+(!HXaULUI*Xhfi&#IPL_A+ifXqjXXy@I(Ks^5vM+>4sbopY=(er&W;pfr{3Uk2 zh4!|y#x3DoO-*T;VP1|HIRiolko3xNgkpsEf84XtuS^l|l_zSMFVlI+mI#nYkTSE! zt7Ubme77fk{0`jmcK7+1BXIu~G7Srp;4O3g5kngy^vzr_T6WNkt5WSKTIr7 z$P1#vG|&8rA7Zy(DV&t)sW>bASu!ju*S8Z^u7fMldhf_`+NMT_(>#R`q9AR)q`jf=LzHI3*uXp+L^d9S<`db)Mxv>oQwiE%)opJvQ%g$5?O(lj=m7-#HMU-vRXp1<$D1Jb-ezl+$7#V`&Y zj#uoIzcQxgSiKMxj>+se;u960<=hoCb#!73=aD{FNt^I^u5od-t?6;a>A-O4U3H8j zb&hcsb)N`0&u?`U|3L>Cz8Df(TG*OB#5h>A&P{;fLvjS~-kNo9uZE1j(^e@xCmU$6 z>CKmeIr`X^o+Oo&VYc+*q&awZ%y%4qB!Fbvdb@G9JH&1Y_0(S{)t?2RgS^Mc2L2r8 ze9nRvn$-8$;ej$0Dtsw?S}R<>WrQBmFaa|vqjeN)&DT*bthq?{798e_Shm(TY86{3u!URs=Rrrb z^Syqvg$#6&ayfro-S;b;tZ6S7e)6)^t}kyco2Gd_teTa2Ms@DzSEwpS({P?>L=ETo zdn4%G`5vS6oF15>IJZs||MKEaoS&+fd>uWVjgA|ho6jHb_GS}#9Uul}_HU%brdCIG z{-Xz4j~UL{25%4m>;r9dP^NomD|=#7VA`%9_)Cu90m=U~rd>2lsf|ZKHCyl@nQN;>ewb&>u zG3y%3k&M#rpm$F}*97n(6xwert<=29;XbShmx!VR@4(jki^Kylr99!3hW4c2(^oE$ z)&cHtj)VaT#^`b9$%xkGw^qc$M4QN=&`vSklg=s8_AMhWRNcczn*p}hQt1D2_Rvv^ zZisRvpWLluVXXSD@8i~k%>GTI-}75J1UEMK2D=GV2iR5g;7oC$RGSXV3!z#z7u+nJGDpbBm~Q z!k)XRMA&4Mwzt?E4}-(aU-s`;&TZ%(H1`-qm-I^0_O|Me-B6DOhQt11{`7Y;Vrn@1 z%cb3@%JW%I$Jv@j`L1oNP_>*=JIF4(9S zk1kkME99jVPR>6JhoIbgAwEavPw*@ki~QS*xlH%XDY=04*@(Mybm_oL7j?SFlZceX zY?P~`)<(wYm3ih$9(wt|(D^Mo;fVoqx^`vo^W}b#V^?YL`AJL&10bzhQ#r;4s^)Q5 z_wvnr65)EaS*OhWb|NK_spXa!g(MS1!wB$SmiK?pODnrrBVshm3^Dh)=nKY&yeK3= zO02B?JE6>pn|b4HI`TRBA4#`}AO!{RcNASTM?(p2n&Mn1S!4E7-q~Xtg54}~d}I5$rz31Rr7{pwVeL(N&y6r#owANfGt135YEvXV6*#F z`UQQ5Ma5{5;#{!5KpP-|)4k#Vz4+vCwT?%dY!W91ACoV#9)p?c1}{UQe+vASCb>~N+2yS_(u?G#RC9W1)CLDwvrLdUb*KHV{lZ3Q@St%-S((g5-*=aA)yCE zqo8-MIAp*eFk*Oo(~txRRf7)@*ZFS~0VVMeLPDY8P$bu7_Gg^%sy*nBCj9?vK_PKX zNqRFn|KCt|%s(h3qO;HuQ;Hbqf5s9Yl1~m#vVT#tQoR{{3E9CL8AWi{b4k{3eE9bv zO|W(DFJtc;H}`YQ;fI^uTf?pvSF$r}<}DcSk|t{yUK`vfzqJ^^hB! zFULLRmjcM(anixpUy0VVLvG%HBqKn_tBkkDFE|et{(c72jv(R zd$CHnq~ntwL4kF6;qlpMFeUJzV@ho0Hrfs<3JFiWm=iN_kqmCT#m7V;LAfI4{1$t{ zJ@C@y9lrDqeE40OO4Epf3?I_2=Chyxw*F;k8?(Z&gL)OQqyi?PfwR0UF|hd7yknw! zeY@sGTq@wh=>=q?i4(UGkH6s5b3{h8&?mk6Td`pR|^rp-DpnjuGdDfDw`x`9#yn}fM<<$D{OVg2uH-kbzX z?R>E&D78^ht$CwQ5s$Wo3AWVP)!xgGYd0UVB~KG#dl8TGQ+RgA-m+t(kk}yI5;rB# z1H8QHNHA>BW1dbV^k=skF$6dK?%$4k2i|{O_>j-9%8tYS>)$3zxj2Or|Y zyF|sBw_?I_*Z^dVgWuQ=&aKcALXAsT&7ZI0b{a^#3|Aza{d`1EQ2xq>tLccKI z`UF?k&67W8q%H9@W5i#Wa^kLa*S@($8p!l`Jn>11fRmOSgJX;wI3JFnyob`QDK5=X zFGiG@C%4|+_I{1azIZHbk!`=&QeWeK&XQwph>9bQf@q)^Z4(+u{5Rd{%vWknYDh(C z>(Bd$`%OtngOR(IqS_+q+Ac4y;?;y15t3o4r^2F@Z`V2;B>YIT{%PVdB|qD-sz_eg zW)Tts3yiBQ51V}~>1y6pq6)A`CwJ;Hk$$d$Mq|;=o+W6afU(?l_3_9ad8NZ!`JBP~ zokaK9tYM3_Xl@)kX%~$${hBLL8+{40j{N3JrPeubY?bJh1ox<5dn@_18i zxF(z4{A{CfLUEA#Hi;E9RZth?%;sVDo>4bsp?60Z+&mpYU+C|Fau7zfIK5+W+dYN^ zQJZ0iG85Ez#1$P2N_)QC=wCBZjPs?An`kQU>6k*;wYGt_Rv>d|I0d`oie%gAYa_Q0 zhjty7Qwo_X`%5dgEE17@Q8cu0E=Od!(xonZ0SrQxH;(HQeav@#IcuZm-#d{iB^;r;z2#C>Z-iMe^@KtNHGSUDfl$rhoe7=nAjUl_OS3%1DYw)#@;xu2B4PAEb&|c@Z8$m zlFV5uH4x#H@6|{4mh01RMO7GKUgpQek1d&x4Y&PRSq)k{{D+1i3yA1(k((2N5>4*f zA#1XU|K>hzAC|)h)yo>%a1POJ(4f)SnOWyRJ5dZK7XlpJ*H7! zSL}`ieR&bo;A(JO);Cv~p!IBeam`d+dv;$-Bm~{>5&-=I*V&;BBHUX4!P8&8IcDkl zLWtNr=tpA=m-YAgsA_=|`tyGA$(Dc21ak%Z^MEuk!?KL~xfDlgwxqNRC-@vUXrO{}|Q`42!G4Qd>- z#iIQ>o5BNsIQ+{VrNlU}`jC$a#9pv1TTe{sgQYhRlxWjMZdn`&9JXArA;Xn#MzZ3b zOG-_GiL{#f8kN5E#8lEn;}Qn@7jBk~GYrg%KphNFTE@EX!g6qcr)_ji9d4?Qro5Ji z4gWp`w$eNyK7+QT@{E2+#|3fv8+I$L`3!ER&-8$hfA#3kUJ_e_KtGmP|C0esIO4lr zW5FyudC`q2g{2zu{CedGe=$@rjSt_N3h`wvl*13KE&hRm`g;s(O zF(6c_bxPRh;%%QK&p{iQBcNMl)P)tErWy!;^ zpt3Ir*>AH=Itc ztu_qkU0R2e?JMN`my`=Mnd6$Ycn4#@r}NS*zB#wo8Zzn?2nRb`pkoa5d+SHjKVxFJ z9A~6N{g+ig+NIDq{7CD>k56QeNFc#sghAI=lbU0yC4LcRG8uFvHo7 zGClWj!>I24xaCF*bt%BRahwa_lD@(+ z9aUZzGrm4^ZOYNkJ&l{i)kIXJ%vE`=!m*k-5eodh`pEmNbL`0e3b_mJe)6xS1s5U> z`(nv+q|W3rxA*8y(%@$vwT&!51>RIg_~qoX`Edgs+@I7}M=11kLk^JJ#z$qwC5S-d z>E2^K^Z)#Yi~uUopl~!CX6U8_lhw!brvgw^nlK*gIF&cY=`RkS)te89QQTF zwG)l}U1no-G-X@Ro0Kfp&}W71!6esJnm7GVfyi61!6-~2M))hJlU zwkU&qi}G=L{81{Dsr7Ek@at=0_nGiUVq>f*pIwi&uJuRXwS^S-9kBw;f9jhnZ@bhJ z)a{2nFAn_*xAIxZs6bJ#&OQ_iKb^`3IS{CayXK-{1{e~|5< z**ClNC=MY<(>oN#A$T>G1fEin`M$R#^~z`CAR!q-U5wP-r4QWv#nfF}iCrU?4m?U+ zKS;o(t)KdXNm4$6fN@2qe%t$HPq8edCOm;hN;Q>cBa&mVOA2G~tX_26<=;&|8|2)3 zB=N2HVb|CLMgLa3YnTtvO}tYOeVG%5X+M0*0Lie?qwAbE_`ZpUiHj_Kxzsa{t4}%k zRnOs0pYN7hG1ge;5U9mtMWGK4y~XF?WqlyJDEE0WD9UDIxQLkdKpl}9-s7e5UEkwhGnj<_d()zpHBH=+!^o5$`v z%iaxi>dC|2;#@~qH6CJ(Ekjs78Tql_7JA0gnV&?)gL>j(u^APLBM(?r{(UK%4o_vj zkmcfIP-ocTs>4$_GrT_DE$K}-H8~=mA&z51ib*Xo-DMT|ktNXhv+T#>61RMD?fVoS z!PjPNKGsLSvTL7nWk3%I()d-fY{d@q6Z^~WG!Mi6c8e9c3^~j$Bc+?JTjcJ3J6^b! zsVw0mR0B@0=!84-4YKi3D#FVj*?8dOyT4VnSKDuA9{E(ShVqbTs;m0gI)aPbS^A&bkwkqTezd9-`@a2onVb5^!MWf(!uoOjHDNS)Ef-`CJ)_!h_LY4wN*Krf0 zS^??9L3HN<1)P)1*bz8Q*RqC}G5>BSU+iMj19=UFa`rc?$Ef0!Ywk+5d6S1f)9p{P zTOVUXQkA$XIdM~T5bGlkh9M7Gy`nYK0cgSTPK;tk2JZ^R&ovgG@UwTKO~N#bW_PS< zrssC)nLdLnU*`mERMQ3=+xpLjxJ1Rbs0mh|Lei`!eCfO>CZe!ok)^1;@^QvZ=?--o zG<_X`%-+|7?wuugT|?LkUEt(st5Ni}e560GW;-lW8LGZp&Gp;3SqYeAv?YThJ)H~} zIs#&F4{tLMH84yXO}DBYg{1#@2>y*a^MCj=i>s^(OI*?^{}Rit$Nn8W%!87b3dqPN z2~FM-dyN|`E^`zXkeE@ZZt8+16w83}u4zHFNdHfjDIdb>87_3+?ZyiZxkedEOC
%{|l2E^C7~>P8IxnrH3@G3LIWNBq)g*uv->2sf?a&p`ZD# zBeOoyw_e@bGS5WlfO(EZcGdPnV05bi<^>Y%Z6%S-Up+u@Uv=+ev?l{xg*Xy(j0Msn zqw{oJpw|TmmoAt-Ukof%(KiSlet0ZM%fV%G`Krci`&g>XYKx2BI=-fxp-t$Vx%?>& zw>Qb)=jxTyp=_hSZ~EfKksWuZzNsC zGK2og0|_+VBRNUv5eDJICDVCc*LuKWRfQC!nRB8Aze=Oyq6%A53*kJHgR6;u7;Cay zGSMVe`FbSYqP2O}YPgoqf5M?lJa+Bu^$>+fFlx8E!hW;kJM;V@y9DI7(@RsnZL9fW zV_Vw1U?jg1h77QK{CN7o*kdq4lOqr-xT9Je`*w!?KANbQgX5S**{a6b;I&+a`0Yr7 zQSr$5utc6k`rp*zlHDJ^Zv{}W>2tyQ5v!avOsXom$IUBijqdHy(f)aS0;BLaQ89w5 zYGKpk3OKQ%CyJKFD%KTz=M6EYb43!dqN}$pi@=qdTMxe_U1le7Qli=sN{`8lQAj$z2^X6G`@T2*>?{{!yx!Bf{i# zXu=lwnv?8dlHN&u4@(nJR(DQs(|J?`_Ko}Zad%THQ_a|YeM{HGiwExE<$-(uUeiN* z42ZRXXKq=DWBb41{Yc3d+o=Ndps^%^U&`1u9XPu1y!~*i4a?^-$Eb4T*N~sD90*z- zk_T$2gpb*p5+UL2-j3FKKCEz*QBup~RZL7-5^UFA4B3J>i}4!@hg-z+$av2F;b3uQ zsxB9m%Q$y;Vp5idy_;1EZ~Xx2l#!JAxu4rB8#Vn?I-dg{ZDP853!DW)^wt`8e|_*~ z2#8W3XTz@o*nmIP^1nL@5mw7X+4p$DUya7&F)j3iCwK>pHl{Ulq5_IMY@8byg% z{M5Ms0t8BUO==gY--9_`2GTMHW*`?&comPdY)3_<{7BDxRTqcCtAG%RDT`Vxr99Hy zaHjp*uceOPg6a?YXTk#`{HnUdy)~U}q0e0vB)eNL94(UTt0+U?Vj{?rAnGmzEdTZd z&JRDqHmQ0yV{B#)(#(_<@mPFdW&)GfqKhG3XtD)Sg%x}XG_Pq8iZSt8m!@NGYL^^p ztlp{7PT$`Y%Wn!A{JX1R5vh)00`AY@ceT9bOBvRfdtqpy;37n)kgqxG zr#(L}tVJRd$9CR{Ox%<$WAO8l(U!x0V|a%J2@igjHeOaBaS$_{mF+|LPs~KJ+d=-u zo8z&zF!BQp?9@*|=QR`z2XM9+%<7Ynqt#{2*Y}9%XJ+Xe7jtMHX*1h`J<24S^fJAY zqQ)BZLkcFm-)NCG(%iE1;!C`=m;idmx89OvNri-oad<(m>!-4#J6rJ7?|w`^(%qqZ zq0LUyg3Aw2| zYG6x;&5|JxeEFM|^VoVH3Uz-uft!{6OaM$aG@{D1Nknr*f_l>Zv2&k)c^aW_+jxm| zuFO-@2=bwsorMIF$}<;MME;opX;y{<5ZgaX+F_JQ%J`ZF_O0C-8@P;Dk!qV7JHDxk zajpwj_!@vtHRTpHNo=TSuRDWs7!*k378Px*PB^;>Rh;w`pB=G8Xig768x9l6l_%dG z{)@V8zZ2&O44f4lThjx0Klgc|guIhzaJYgqIVk8u?O1t0o8`3(4U9PPhdR~>YVm%l zGA1zj|5yM6<#?_{y_$>VKjuF@8)#f}%S>T4>K_;)LOn4}8b_KRG0Aq_~AB@36j8qkE^^U;d^2 zvO(Dbt>LQGqJWcK4^LGQcO@%AZt1ySX1S87#%2T>XL4v%v+PhG5vsh4aAcOLGLuDW zf3%4#uLVFo4t3>d)2m{IA=2C@J*mMf%3lTZ#uN7VqazH-WlnKOWM;uP+CG0u1u1u; zk?$>Nv8uM?jlrxjgph;;SRl7as zktZ7iqdOrd1-w>?IH38GL6(L~cSGozBVQ96Ml9YLN1KTm>0QuM{2!~h zj6h#^u=-qDdc$c1rp=$UPxQL{<7>!5>+IFd`fpl{py62_{sHl|af^ zJi9h{Mx@ph*JbBTZ!>qOc)<%j!{{V#JP8M8ot=0GAA?%b1pK)EIRds+`tP-*+CNf3 zd|UpER(f_y&UMR-r{i+`tuG9I-gLd+89X)nDQ!hw73z-y3i>nVmo>Y3uC7XmJdj1J zG~n(3juJhyrEVS}v{n=*%56(p6&yMgsPUkrCq;?G(a~oi9J}XK*}sM;<6U2mC1X^G zMHU2L^YXs4L`X_vZUCwCkkMite68&|Gz#qs{&)4Y#|(mkw}lja0X6(8-K3LD@5xBR zM3gM`F}!8k7QZFAmdg0O9y>zS!Alfkd-FmH^UR}Brbg36WgbY zcPDx~11lExh~oYIY(wYt4oRZBp*hfxwgoCSlJXb=;s6RodZ0uu)97+R;L{rBGmXC~ zO(Cb*A&?ID_m(wVFKQ<%gCWD>B?5Fay9Vhf;Mz3;b?>>>GVPh>;EwqOQo7nn;lU zxXFY!u(7-OG4R8zs^yMMtUUPri+lOSKB)2oz7?HMm)B%PNLRjQJ)>7%Ibz4-Mbub- zT2wxv9+ZDbJ%F|r3=R%Xy-wQsMEyICI5^(?#hJd^Nai}OVz$6c01@j4ZBu>IJLGiF zwb_8{e4Y$*h=!(7MygMoDMsJyp+PJV!tdPb&Hpsk zFM@dS;XK*wsbXJE^AMEe8q8|v$-EOAlp0BUe*gWmF=$9}KwN#je<+&4k8aXVHk<<8 zDZ__r#%yCo$=+K~I`dGprW0==?=S!=X&^~TJ#c*>L*(q7f!FPGswA4?n-ohkzz@_nZ@3y#zs zX+NDeZ-nRm3O?2QzC@5fQGRn!jYQ>W5(w-COk}GStkTGad`A z0jKRshTNSs%E4CeV|cv;>taxVVMS9O&aFxCn?p$Hy{gzF$&M3dVc-E*gza%yeEcx- ztc#X}PdIWVl+Xy;_xTqV`ACF6@3*Fh;>|!xNlpHng~lbXG}pSEpW(923A3t(^<4od z(>tW;si}$V&P3k|KVl)OHT=wB`}(j1tnRr0BD*?;oE&yJ9~4gzc69lwR(oV@z9EAi zjrI6FT9S}ELW_lg(`Yit^1X?iUm!8Z zUiZvX;)ghq;@cY7ZahLPO@<_`rkb`xlpiwtMt+Bpf=Tmxzd~X#-E!8;-^9y1i3qX! z2vW~!5N4TAR`hT8=X~%*K-9TU_;R_EXcCMCZ eNxH>Ke67Ep;_79H&jEVO0HkNR&|!P8gHXf;1P@Zs6Tly~#y;RN zQ$XZPg-&W%ZCpyC0=p$pgfKxgYFDS^MpG@e;CU7Hl9IopgqCfRno{!FwZmd@M_gdr zUji)4Nyj9jVZ;%9T8%IN{Ez%YXEfGseH>hf#V5tl1Z7mA8#NhgthC+HQnzwqY?RD= z-s$XZ0fiYU?upz><|8Ey^zPb$8yOarZ}DkOGP85w7S?)C2u7o@HIc3UgDH+sW;G3^ zKs<`48+1b4PLKibB{F%IuONCwyh9P%Do_SR(&hJM(mF%@`TfnCL532VM%|*pFE|CN zmh&^}I5@ZnL_V&f`lReHAUBRy5Ot;0B{DsmYhTTu+m9Cr z_4ehMR*+TrG^+2~EurVN_~gc@a*uC)!3%26(J(t<2x?Y)&-=ux#mo7o>olR^a! zPgJ!VQCYAj3P}WXN18l7o9)Mi$xF!s6*uDh_iLaaH3IM#k6f*SyozbQ!zAT|d8ODT z^GobR!*7m#?QwQrX9a2+FQubr5M^IB1~}H*%g$x`(To0Hp~O9{3KAft68chfZ25BA z3+10u-{OqcO!Xm(xwm8q>7qgsK|Mw&sdKx@g3aEKB%kgWqVQY+piYaU%c80)@GQIc zV*M)*)l-%$c-|Y1kSS1uH?h$|4eEd>q9d^X&OoSJsbxBAOc+Emhq(&~4PGC#4&WSW zF#vUuD%JEi-_d=ao^E>rug@m*NAhmbu1FPRP;m~g+(Wq0@pbVT*=jN;Cy;qVv71Y0eEEbkFSIe_$%tX0DLJ^YluN6NS6QrzNt54s=lk8t0H8K;B zaa!?8=nU9*S3i{yc^nj8#+juD5A3^3gcxie7&huqwcL4!w!4lbGbp3Vw! zvY&DY;P;~KCM~;aKcJ@mr~n;rLr1=S!70%8bh=9cdc59awX**ArNBUkV|Q5VQ?C1| zjQvH0M5~D`S5h^Pvu?w+FYZahzp)lyb0E7??6z>;w(ALm2A+KWaiL%XHnZ}*`|j}c zP>ADOWP@~dZ&no5^yXh(k>m*WLFrr~RI20tl+GIIDt-68*~^vP7j9ByHimNGUMOYZ zv%32x%LdPCrx|^p0+w7x32mI8IR+EWSn3Jsr@16l3ElcsBgOuj>;dzJiQTT@ybrtE z?Qoow_bU9;(FP-*{|IrM=-M&wt~^ z;H5>M+G`4(vCa>P_RaL2tRv>rnCGgRiQEF``19AVTidDi&TSpWR2OFGgkM>p6LBUK zKMvNr$E_vh{!4uI<%(dhL>-60VZuqVqQ90dQ!+<+1&At2w($tae~fVOQHg|u?!V?$j&>Qu*R=*pHM zd_E!Q@Dtq3XQnR(K?g-a)6lEr>DUOti^heHk$Gym%xG1+l1;lMdM?OXcMd6|T-h-0 z6Z$|gfiD(vGnfcAc9<%bY`~{}e%!FS6tuQJhQuK2dG4RTzHx>?eNr(U61((Ppp>2a zLx-GlQK%fR_oY3I-c~{_vHQcQPdjHLxAw_dc@dUeM}=tJovV<-rOHg;LwVSBM(bZe z{n(z~sib-rj6ftRKC!K3eBUSq$vFzmhd+BlW%-A?0u%^3(7u3#rs@)VaB7-x;Gtd? z6QmCuZ$muDt3ibXlGLTBHhj-z3g(MbRDT6l4fukdHgg14%2R?pb|u0J-h?@HqQT25 zuH1#sD|I@xCphiTva?mK1uCCxsjuTX*Ijb#%`?$Pg{5_vvb~f3GFv~(C)M%V^Q`d&NsN$zoqgM0B zb{fRQ(03JB(c%i-GzB;mG^RWD$gm+*;E%bCRy&mc<%s?c85WRCxKHQ zkWzYoME3%;%H{#4>Br_!ZBQlvZ|)SFeeA;oY%m;XK{z9R?dCWQSP&B;G*PT%7KB9a zzNYpI`$ExFPG1L>v`Jk|cokGFs+f}=`)xD1%;OG+*r(5BUMw^A)GqGt4r9)Vy0kGk zhU-PMRO87YkOn0ju!6>-&t0E~fVwz4k<8u&I>#hI$_4WcH#qK9*L$L?CCBS^gDTBg z>qjYY(D&9am9$K+s#KNL%DVIPUiG#cS=N6Xp569+*Xz2@nCb+6bEDdiTSd%c3Z_Ww zr%(xvFTT+by_Pa5Xh8olvP%G2Dv6cUlTJPjZ57SDEbR!!sq~iDIgV&JlLDVtD*Y)CU*yw|Nf$!gublT)*N(6LXehbyteJEl8$ z;Ss!5whjo*ie@L)h@owdohO+dcXR*A3ME{HAMi6dY^aklNg85*#lbQ&W0$=_2u3B0 zK>A>Q1dH%gVMy%Nvoq&iGe%F+O9LH8L2HNw0C$z}+)u-|g5ML7+26#PBWFpO70n50 zBaBDlLtYM6P?qb6__e_2U&gI>`r05GYQx|hnGXPmQ~8u&@B#r$M&|hF!l0V=1V7@z zF$|iPznq-Yer?Oi30}SqTY7k!!NVJG`&XNW>A*GLTCgZ)$rKV&fuXHOPw8$uA_@EiSL z7Vq~&qx{zR&m3xI(fg49ieh9wgd*S64tM`z85bUNnHF_MW)fop(__Udnwv9jaFg`X zT3WO)0nqT!`0OEQMqW4rEX%os^_rZh0q@@2gL08|Is2;J)r0m9{qpx7HZZt3mG9}H z=OXYM9o;Qabn~?LRJiLE9g{qMDSEmKa%RDo0Gv#e%z-!8Sf>$pwcMz>G<<92oKt(d z&51N7lt3vLIvQLf!XdYu5BQ2j)Pcy6euD9?x|H&VB?ZM1A-ycD+3zdS{dk)5CtmRn zd*+*Ah_GJv5EP&&K@B?Okt@!eY)3&G&j93 zb(Gt@ArK}R-QoD(Q7ktyiyyXP77 znl+~xT-YUAqgm5!4*S;HviHi!aAW!RV~_b|qlwT9!L7=!;3U_GyMyK5lFvJ<5+M`M zfBx2n+#EmCj-}8sI5dT+Tw(zDfwfD$FvlE=N?~+&{Dtpx`}nG;n4uM97EZJZp5b}@ zowvjF%5CT}Amb#@zFl7GKC2t%lImkf9r-K%ngJw?U$2l~nMkp$UHTcT)#MeyHPbue9~%pp+DY!{Z?7F*WV67`IufY+Yv!omoiQ! zr9+d@5C`{jENV$4h6)J?X>YI2e!OOO`7EX`JNqhhmPH7&S75u$vieSbW=#zDKyVf0 z97qpz?Wux~`(*z`97yK2c1k$71*ZlkEnvOa9v+daC)4 z2om(vq(DvHgPurCt0UJ#88nLmEKc!rBy?#OI&q8se#aOa@t2H4(zjCK?A#?EzbFhj z<2=t`j(;_zlO84C=xN#{B&I%%`!j1rblW@`XKt>BW5!`AQi$1N+lR;8l4qF234>5u z=gEXt_uRCGLSH`I1RZFjhFb_Eco#cOe7aCFiqMf`^%nQU+sB1y`Mn9)_$h|>b~1aC zG+F31LLWCfRjA>ux1XUe7RO6Yr)5dWd8LHua8J*Y|JPwW`ZX16ixRE*Yasy1KSB)T z2x5oA1UQOc@y2F5gR{v7*Brtp0tkSO6Z^rdjYt;HMSmclN^~G z1opvu#up045TeqD7AwhQ#!#*wS}PW77C~GXW+Rr5m=0+tLh!n{7e|R;6w^J>oYf$-aCX?DtqVL->BI4OJF1r1mzE6AU^ z!nWWAlnM#S5gINt~& z!iGYZEWl0s*i|n^DMm(NHvG6D&nvA<3^UbCR0*F5=Fi5!pxL+c?Serx>s)owI5ZXs ze^!pZ7f@(X0$bpb%GI>tSzT*61W;Ic9=CTITB6y=9J?jk;Z%u@g)Rv|OFTbhH6&it zHg8%-+)YFWYu6Q3%#G~6y%u4Px&885KTBMIY}%WxuEY|#AlO5vj&?o`ikU_c)(5&m zTHosAPRhgSoAO%OSb9eR4v_+TH=CR+~9Z?D*@bZCJ2c z#)P!_+r(k|ERwf_ag~zt4^2c$KHU;nYxnSoojVRe+Ph6Iu>OxPpT#PAWGo$g1`o## zc+5W;xlqqtCan&&G+11z(_YVOS;-uG?NET*=-@FkEwknFCI4XQ3>x3_E^JX|w|pmZ zXIjOfh9-sxeq2~a+21uBz8E}wWGh82aZpxxdSU_6j;me|yhBhj;F&;-e7|F{nz~r* z)#ka$<2({=U9>9A><1ZR{7&w#?EFzgpG5%rg9?IG0P>D`gALFTLPlM)zy^mKt~h0k zH=B<22WPpUv7(x__e`J)VGL;!NY>)#Gal9r5vbwoExGN$S35YQ;qXz4QAzM2*60OI z2My}0?~xCkHyn}cq;sGaF%MC)Gkv6SugRN*^$J|B6eqx7ePf`(4zJ!uknal9zoD(;6fE?~m@ zqWIW>v=S$)GU_rE9)bt4GvsZMFzT%~@>2v1OO@w4+j3_QFzhizY?DKC%ul5id z@NbPUED-{JoEbJnjO<8c`EZM5N{I7<|H|mA1&E@s5~i3XaC_wRD((Zm%6{l@kB>V# zpsNaH0m?ab1}{1YTXT@Y+flN}ld!u)tPjbl=e^?4v2H);(ABK0MUOQ@3t-_=e_HTNutzIXAyF7~ik1Ewq zj6mG*>%5fXE@;T^nBDQp_44k=2_DbV8uTgm%0%R{pZ*`u|dCvp0r`6(G|mh0XXHnXqyd?*|;9^s6Ea^wMj@WMDI2qg;p_Q(-P zs55G1sZ1ooLJEYz%k!>K?o~}9fOk4&@!x^bz^@0qzeaV(&G<89xyVL-rGLg)vt-@M zL7D8k&}3|mMAe7vj`GUNFUTCVTpu*iWXBd}^sn_wS{kBi`!-yPKh6*DO*N;~OalOs zQOXB=90&1W&ZetB{st0Ms`2*PZKkKfE_>#s5ts|go^KPdBQQ4<9kqh8UXRN`TZdg^ z{H!-W;zLRf^tr2g^H>)e!#I4NFX30|pX?DxlBqstb|5C!vK~Wq$}qde<3}dKag0>$ zDag&~$Ha>kOjp&$K?M8E8?bVh3 z?A2NfI}%uizcLQ|5GDuDI1rcmT~hv6_L#mP+u^TFiMxImsnWY#bPm(DUoT+IzN#(R z0QQ#OxHiT^C8RZ+hu9uxZ>Bt{a?6F zB46L==qkn37G8W=wMWwZ?Ngca>CB_|XaKQz{5iPSz#_^FJGb>#;@>g33s>-KpfU$- zRuJ9W3aq)xZB0#ykV$9q<{N75Bw~d<433o=A6mOj%$Bu&2h5^pzRp`)J=2Z`&flcT zZdvN!{_+gD1i+Y-IN*{EJknZ4BL|9~rbuWDNJwF8$ZC^~&XTd6WD4txr+iJG&$b$N zh!!Vbk0>O$gqKFK7_c6j2nYShQ6!wKs#vZfd|sf{k`r4p-YHm+8hD@rZ#~OVla5*) z$^5t;*<%=v%`F664Bw%KzXcdWTPqtIGI)BvN&yo@G_vprLmBXP8LpOfsUV=VTjM|0 z;}2~ZB{L=_*f%e+VP9oDm*dA1o!2PB-8eEcUbF|#zFvztsyNyP(XPMIKHVk!_;j-M zS8W$!AZLrg75B0=1I8_6U9CLKeT=KKigmQzY%*= zo7yXi+N(yYwh|<@YgM%|Ywr=Oc4^fnC|M{4UpSh;vDrSS%#eN$9)=Y*x-Ir=PLe;95TYtRXnSd{_P)7ox1oRIoWczJ~BTq}8+Opq?e6;!8-D@X) z-5{td#Zp3raisS!SnDlTyC(8lSq;2-wa;r*sxsncmBj{xi-#jBvUrpX!gmV(;wzi9 zWKHK?xb3QD=toU+%hGp>X9^LRRl4yNgru1h6|i{UlS|zjn|AknV{ql%LV~+!=w8}z zyd3mfb>w1fc{cH^C<1rJIigj9$QuStbMe!K7**LqMP7UL222flJw!hys9h{=U(j0( zh8nWKT78Uv$ndYQvA|}DZu}Lh`yI{qZb`Vt&JA_M%SQXIR@$sNeKe zK`x=`(TvdC?M#ZNapPfld}%)<3yfwndfcnF<#{y%Q76Vs_`almW z^Z3-er|fEDbLv5DES7g;>fiLM5F4lw$jv@V#{!3cd`fpr5gNLzA)#%(RsX?>82yEM z{!qxXL>3ypm*rf6vwXdMAN!OM3dP=&+c_oJE%SHJ306>DzY=MbNOR~6M=aVBvd1$p zn|f-rdaV!aF}=sx`oc?gOco%5-e2?7i}1%0eS53EIh?{?tNB}Gcc0_8>9`l!iM~`c zR6$}MbqQcs6>`99RD9M8Aq`c}j^6p_?4fRDhaQs?}r%H9%A#(q{_l6rN-O`qG)-3nI9n@Z5dIF+}cQ4>U6= zxQZ<|Ds9z+vr9JOK^s!?og(x}@TscoP)Np4krx?@23dnC{ z>N=4bDbnlt4|Y(5llnEyOpd%{3C@&(9}%Hi*ZFNw#y_4!fDIE1Krl}z*2}!x!Buk` z_@H0?^xLx3M_em4RdSrlH|Kxn^)zYT_&L3xBX|=Lu8ajU`c4-15S92U_)In3d7=~< z1AVyN^0)5u_4sS8-nHxeP&(q8Kx-mP8&S{n2~zh=&B$HvuMAUbi_CVxO?PX!%gsgFWeZJd=cSwlJl=uhugz&8*gtcL>Rjh z&^8s2Ti6~18+Ypr6>2_~ug~l`(MGnqD>Q7)Y(`|=X9PI+^p%<)eMc4M?rVU*1~7*V z+>#=KPecXM`o7@+uk2C^<7|tc$caMt9a+(<@&HFl6J>S0I-j=<%Tctiv$fpSpN*zx z?P?JSb2P9PPaR_HXZ1YRG*$U$JGI#MFP|M;ni+EwL2}UOy)tE)_ORfc8I|7IFsC}^ zzcI!{;D18wbOen`gi!ICPm2|1b=N5*4EuS@VP7!8Pu1u2r{mIsV*@)cwEA(%Q+{sc z%=FVT`bm-Y|H--*s}+&AU{QIVZKlL{*4#!N_f%+EmsQj~7j?hA6b`I(7 zOSGX3SGIFPhQt$Os$h=e!in#-d~+XDp{tZ98F$AkJiB+78v&eFS~H7z)8loEAB_a3 zi;5eL|K1SY;42hejLpB;YH3?__3ogS(8C)TlFYvtIm_Jix9MCAAMozG4L$s9speKu zm16@n-Y&%hYB*}gdQg;#{l1RW`b&vh14=TQ(&*TC5d?5GVE8n$t800)9!DlC{)$R* z@$x`C_wC3tl|W%O5nWovhnFoQy3c%Y9zMbfqhYUt*GV+&J=k!e_%XOg4QZZ*zxLNL;5^+?A3dlZQ}mcQFwm#ey^^9XV>`P9Hw-Q6;E} zZN@?P9HW|G0q}MAkI)XX#3jX9R38B}CLVq?3;Wy8E6YK;!~I57EUZ61;KNMDxR-w& zA|!LLYihgtN&knq39zuqT}(hGqL~HA|H#LP8u`g{GwdjY(#vP4V?q!vnZr;~I&KPe z2rfcyWx2z^UZz*(z@zZbHzJfob{mDv^#|TNW;lB|Mq^ii7c`GS#kuU0SpenaOb6BS_w+AtihtJ4mpH4@?9;ePfjEccXmr zVmZYGoWkCxzEL}@F_3(H-_UndV>ol*IUd@zmiFW0m%YzO^Y$J?-|3$~=wHjG z(NgEhgsl>km)@pts?HS?FWH5+78b89F*Hhl zeYd8eEoY_rwm1l1Epy=x9D1Xs*yslhAp-00nKUeD#!dvO{`6mIBjdN`egx8T7*ONU z+QCGpHJt&+-Nh6m9U)jS3>4JXl3sBaaNV*m6yFv#fiyDi4Rq`xC6Y-Kfuv)rZegJRp0R(--^aN7g^WG zl^uLY#-B1fXbkXS2zuXI%BgHihU!(|ld($h@O*Kgtl$c#7&O=0hdwJ1$ zdwO+N)3<+ne7voaL;-M-l-_P3QHmwX=^Z68$-o zG9;fI*Oh05pPu4(u6cxuh*2(Sj!gZqk-fb#fR1->)BAd^p^!^~hv!uLOXoW}3+FpW z`5_m{Dag2c7x>cmu-EOo!>ScIsj_C3ut_a#ZT!KQdgE3F<3IzBjt%NWXV;ia5+d&= z45KPL3mQ$>-nIsvgz(h=*1On?O80Pz;Ktlcf>J=zPgF@CPt^07eBF(n0pWqR0m2V7 z%uJTK3h1~AfG)ry7Brf#%P-T>zQ-webACw;Ia?Z{>pav<4h`Pr^Iko&R=OB|3u({Y zB!jg@*k-KH%{AENdN?<~=WJr*CFRB0VB4Hv7ApEP|PDhF;ihz@&pn${Ty&g6f|e$a)knd?Xcc z;Ozz%Gk_xB>-WBPsyFL{b7+Ck8^3GSswL5X1YpKDg{~txytv=8XFqCq*=Z{(JhfcN zbu3p@6c7iY@4J;)h+Bl1$Xq`;+k3dHQ#qCfH;dJU|Uu zj!JXJ(-C+6F2iWxAh(Y1>@v>>VC<_CUdfBJ;MkDnI}tt#h9-L9tULxKNdkwAsyBZk z(%+t9ZwWzT$jf*Nq+X0aK=ofgSSW1Al=E+9D2^Vtaos#K6d|3&n^I^snDr@J$dTTc zbWC2F=xv+z_wj}6+g-SD&#Y`6+Y9qKBOxcl;~KbVE|!J|cbv{RH)n?-<6g8#++R;$ z*d}x1e5g3*18mhDC`f)VP+IE0CqI2AxuFb*Ue(o*U6|?pU`4m$fDKS1V?HFXnyn|P zjYLm>_2vXk))gvudi&OfaJ;b35{+AojDw-(s&HPIZHjv$0)uB$)lpq!aPQ-N8u33iob@0nM`wz`5hUen-A9_G z0{3M5Rmi}48~5Rq)#-Gn_MK9YFw??+A^8cP6B!dCN5uq1}+lkwWa8JQ+%1zB4B$| z!X~_h^$Viq_f&(^{A_i~gp2Z2X^y-5IBfJ3az8Pn0_vKZ>a%+pG;lFRwlHl+QcnZ|F>0bB4^QV$$lfb(;Y3 zr`^u4A%^T9aLC9jZ9J)&h$w_-df$9|^N05d8^68%2oW8AMVBAQnNr=Z$B>8)m#bhA z_ITh!M4_RO(N>eF)wJl~r$y(<2CL)N<_IYPBc7Cha-H2rR~y|OZ;yR@z6YKDzG!$$_l zYCgK8wNL<~ws-(KDOo@<(7`mIXCPp{1dC22=rFJve_8%X#N_OK>PYX!kNt*)cg$#E zP#OtgV+N|j722sOFGBT+Q2Gxip!X1nr;V5c%L8d2EhHioScOJ7fdnj$j`)@cnMN7J zn;OQ?0P|J;53eT;wfCNuh2ki^!iz-#0wJ;%P?yA* z4t#GROG1Li1e6XLhHo|*N>YA4sHGMhuokqd3OXBWE!l)%?N$c&7zk(C5t`VvaUOC)emFHH5BNfajzn;Jqsks0sRG0)z2|IIVfY4K7{(?O^UVB)@KYGjuXhCm$O5;$4 zs?yYGxyrJZF(W}h(vxN3I5qaC12PWZ`yLshNM7?Lv<;&`T>ekGanB$UnI)SJfr_K1 zyuu+*SsDZ1p*D1>5MmH|3Pj5t$RornXhaLTPS6MqL3`y zfE-i+TiZ(qdS)Cy93p)Wy*}rI1PcR_KD+YEf3{^e8+6IB3AEYTQ1u9wOcUakPfLgdIa#ndS+j0wPf0QjGh^8zMA#;Fl=*!nMyk}6>% zonGa&_en8)u@H7Hz$jvFLKE5>(HchRoroGB#e7+O3-X?_cd5I zvY~kwyr>}90s7;j4Peto_TEoVo(HMseq=+lAi-a%ZCJcQfF_|dtgOAmArz%UPgnUHOs#yNa69~MkwFwBM!O{SaDRj{S0uQ}xI}2s5Xb!#1_FKTQ zA*0l$#IQi<7v2uy<}U5J8vVBA9LbH<9O_$-|1I}@?;(ZI ze*{C7$b)emJ~AM|VnDU?L!ey(Sw0?}eBaMJF9}7en`^O%8;qOy@y!E(zYMb5jWlNm zMa*&W0dbK9$EA6#Z0#VpX<4w+^9R7jBP3V^V0Zu!nn9FG%lt?&0oh0tsc`@NL<}|% zj{u!O>hn;4RHd`KPyy>8cmeTzewe~OFMyXW#e%+G=5=qK5R(KbK#gL?M$ljbO2gq% zA9r4$t>&jiI$1Doyr>GKHyrT|$z?jfm z3bj3&wV-wSzg0D?C91>r!Hl7_W&jdrkl;>$@Z4`5~%Y^1Rz-iQd4#h$V;}=;p5pn}~swcJ( zCEI!!1R^4a&G3Wj?uxW)K|QUkqz{qiEg?2t+R)yqya{Ku5ixMB9v$o+z-|LI`A=A~ z>B#6ONN^KYgJMmgC-2wIk#Tq@?b25gOt6R^6u2+QLKdGNXdA@{x~Yw*I0N^&u?Dof zvN`!bYgtjx2ZGB1+x0&$imiURk^dU~;Pi=b66+^$$r1U3wfr1_PTJ)g<`aXwG8hQ1 zrSapEe(ida=C3Ta5Rf2{=?*3Pgbz>4Vx9~x1)ddSmkTb6VH5(fB7+5zfJxD|4*|2& zuX@N!EPxn)*6qbbx_H2O=esP_UPr>8mXNjUEF7Ch$r8C|F^6~<_n z+apBH+CUV7Eh`No#Cn8|m(cn<@BIoDc153^O|ffvpPe(`+h>oVzPUa)WQdYxqL!{k z<^5vSr@u?u$q>S{XwPR?y!X(QCKWGG2$jyOO)C50yMM`NwKmlQCSi_G%7T9$FTBr+N0-w%YsV}8d52i=bJP6RTd z&9FW|{aMQ|5P@_c3mN8oFf1T6+qz8lcPb8g+hlYhdC`LLy)`pTg$4cnKW6koi(V6u zBH%OFI0p}KOm;qEz3F3Ywx*Fv>r*!PFYXxds((!`zoY}@BZND3k`Qm`~3krww zG0{i`de>fLmWUW&Hc@RR2=JY)pYYf@nmA~8Rf~_+>dd%UiMr#%i$PpX%~|M)a^3* z%~*2&te#!$Ffc}50NonUGNLpN+Y3!jD85OBl8Aek6F^-4TUF!&RCg<5Ek0z4H}&|l zT!pBRsA7$NW{)|D9tRB@oK!`{on9D(2TU+S)S)LIYP)FzvGGukEoT%#0jre$W$N@8 zE&Aq78rJaPpE_J-0-G2bc z4i;?CYAwhSAgI%;QLSM}mX}+E1Yn*fyP$uvdx;Qizrnpu#(4q{07IxJ2~6W@-a$+) zvlM|eD+4xw?j{t3woI-T=RR>vc=bZwprI@M{ea02%t1}YL0 z6X{{*t^QyY`$Rc(!~kVI3+oNk3Cr-L0N@k`A;{MKHsZmC{$j+B)DZ_xuh`ky$%-DR zi3+K(Pm=;fX)7tEETO`QrKkV(QxNMrtcOc*@>O`o5-;t~8)&`sZPIfTY5i)r>QnDY zzgLOg49g$#yHuLg&E|&b{u4AK9L=a3=|IzfDV}&!~ zn=O^~(Cs=hyA$tsksIEKeVITWQ2@wg*jQ@3? zS>FbJ@-S^TLia=e;hBmx4CeJSo~n+|jb7}rh=?FTUGX_?= zolhbpEhyb>M=p{{nHxZjQ|6iDYHuLr22L~}K&JFZ^lfUNo78_zu2F|!IvlajA`tYr z_4M4^F2^A0C6mlnt7qx3{8A+@)pgATiklRsIEaJCIrrE=Ce1jd%v zO5STW`GQ4GEWTj9G0L)@ay$KgObrNXw1$2!Xv7PE_>1$yD{PAVeY_)PIDhi2ccD!4}tu z|C*09-IPym4OA6U5fBlDl72o0rc3&T0lSkTW2)a=BR{B^2HDtT=z6WH5gmrs5jDkGTm`&88I_LdW#xT}vHj>3M1>-b;8$DFDsIhIq7 zYQSZO>RPie$kbbWvAWSYs^9enTEw0Sy|3Ee_81vG-E+Qn zeaG28ej0?O_+GLl!6)l7fUFmF7<7$H5F~_;%}zfdN9dX<{V&NWe`67ZIp0r)PYGg= z|I}<;FQn(Eu#Yb??x*aI?~63ReY?4}rvCS; zwocbmGV7FNd_`snt0#BUw!wPux&%t*1Gv;YgV~immi2nyYmh@kpQkP?LP0W`DD6L` zh>kNmI0t)wgi?Tc)EkMly(EfJ65R?kFnCa1pT=F7{5nd)-+6DS)?aT2N`sy*R2PvW zYa$3tTbE{mAr!EN>;aweBd6alONn8p;|aBy@=YHi*s(hPMe^WKfD<^dn~Gh;9!9fc zUeuRlsf;41vhASqpQpErpOl?7iT|L)E~6f^e0f5|icYNv@x3~IUi%nQ$zbcS3R;+A zIYgS?M~5}Ak>|Y+DQm5oI+XFvj8FO1H37uk;+w^w&fzyodP4LJ8-tS?`FnGy{PUuA z7nAIUi3$FSvrAe?YfLj+glW| z&zBU86d8Q}-wrB+(@-y2Ag&OoXfMhxrd+voU^NLVACob0w4RsNv{ZAyMZR5397^<0 zWj&cK{g;U6-qfFLVn_)PmAU!SIDkteY7dH>i~3ItfN@s;faMXX3zZK5Lk`sE{YD{? z=4vFsOaWH-2^alzk}PPV7#i&M>9Hd@ zk$xMvahwTViruk3F(~v>xs1wt;t*G^**ZruuHX#rrc5J+X0la1%>0iuG3GH{%A*_b zT#Ke`Qm6Wd)un~W359@bj8BUmzf%s4Sy97t+hq3gE{Mb77d*+T2htly8UHmr#y_$5 zWWA|sS)i7*?-_kZfr-2I#9qNC*mZ~yS#m0M06yQ^sQ3VcglYC*!v>$%j^D0K19*J^ zV(_r4LpI+&iSjbND+<0l!psJXA#T|z7qN{W7ce=tk3pD1Gb!Q>{do)(=3?%mvEsox ze)-dGAQJoLq{8=iafkZTZgH;AoRqj^JFk)Z0MP_#)>dNMPCPnr`*f@biNWov^lUWy zqrTNc@dMN%Ycg%iKZ@tXpKeMB;do<%s}<4qqAJ6gMD}u9h?N8<2j+9*VnNzQXOJvr zJn_FC1)#H&*$W8xlI}&-w~N5JcLhwGZ=o~{fqu`R=_%@Bs5v7%@USBK__U6p`-Bt` zaQR@T^*_bf0+n+b208D<6ou<-@?l_5&aOfwR0|c9N{Xpv+zT!@Jo^MksdDa9|T- z5`TCwPx$~%6I-mJjFZ5&e|_?{SNU{lGIB;A^zssDK)n46Hm^aCIWj;mI$MI*n?e2_ z4p-_sP)0>S`Wsg7;!s8$j!Q>$btUUPh#mf*B6&w`y%00_1SRq!1d&A~Lg|ywJzsof zt$gvbXHBUp^{x~xw=1&jkr;wT}auLev0 za(%yF^+q-3l6ym#5P|5&9B_0O86dt!jNTeboRRycHA>IZ7GErRtQ;Aq$9iJo8C5KK z`oB9WpgG>1O4EEaK~9}a)AO#2siZ#&)OvPD#6F;VFSlLk#ffHwV?}6U!)<&^y6^t0 z>$or1x#0N8(f)y&pTP%Ej`f=9^k4(a=OpD~{ue*iiaO!xW4^CzF8AVvNB)+>G2h;G=c>9q3c|6D{I5B(xOSH0dq1kCCiD z_N3FnG3Db60oghg_g_+4&A)glGwt+ zc}ddgB*8Kg^~N_m41Shz zdo%nulmT_;$CY!l?|Z+U@zdQ3|Dls8sC=LC*==t4xf^%q#^EXv*Jb?ZSTb-f7KeAt zAu5)-6k2k=l>F&@>3W7<>rN~^-HlN9j^gi~#90(G_^HNKr{L|&=NF^fDBe)Ee``G| zkq^a5$R2b13*m&Uazw+UNykUK^H)Mi$O!%!dGOql*oJI#?sn3s42_e49&)u2Txw_9 zY&*P<7)yQ|x_9^Qd(qF^wMh3;|1au})4E1#w&Uzkl2MKPPK-)(WXq zYo9D#wjHBY&as;S^nFzgf0DMk^4l(at8UqeI2rEPOYh>59*FuoFTr6tMbMMC)Lf5y zzP)oCXSy4tz-r-5UC)4nA=@hS<7 zD=W&Rch}0;w+Y85tzkEJ%f7dyyFSD5HtQ@?qWDY4>`|e7^t%Lax*0x}<6Wl*WxhdY zEss98;yrBKy|F#B=g<1q?D>JcW~Sq}mS}~^7kw3NmbxXnVT>}$KQ#rn(-uQX?k!%2 zX!BNE9Cd-=zEeoNUwt|qKj&o8nHaR0&#ok7_7dkkxtyD|{dvYr=jghbST>H8oFPS9 z{--XgUyJV-KNC9jZ~QXc(QM4UUGwG|+5PhB*%__}hhpyy9Wyw?>SZo$@0_2r(@HHW z!1qm-u@riVf}V5W;CK3KF((ZP_ICry?1>nPz(O-C6K!oxHm<4mkMugidt*g7Sq*<5 zeXQ7Yfhbj#V%WBe&EIK@7;bxkar)hJHN(F((1zWgKvt8(ODG)wPG$UrC(t`ce1Rt( z<$i&$tWm)GbWo~XoZk;_>u6pStc*Ev)H$*#^&Ea9{RAce8_o+;CHlmi3_xwYrJYl| zoV9Wa3Qf2YBmJV1V8QV@1?F!=FK`UEB$MRJ@Y3r8gj1lC-;a~?dhgeNBI}2nKj)D9 zboMInJ5d)axO{Olfa;ZXmjff>wd5($Ltl&8FDH~Z+z-)9lPVg6%?CV z`KU~BZ8=Zik|iV|p}*LwD-5qFfX`@`3VQZABmjZ!+r$LvZ^u5A$oKlM%UfNPS|6iom553Zv;?>{4zv-LH zR6x0O%zs$6AJ7>oaNo~aN&viR+FF+i>G_LH>Pf#blzK1ik!U|yc>IAN#(^~Vd9Ulc zeMRu-AZ07xbN^%37cuj(3eVl-?~V3zWxjbw3zC-!kTzh-vCm2647$ z-c0>Q#Ell2S z*x!kXCfB^+>^Yk~%s9j+>Z9I7$I%m$3wIV~Je$g1ds`eQK(PeeG#qln zr@f3~FSzkHWGrp1YoDE06ij{o7hOJ3G8GT|AUGl*r{?0$#h!08^iScZ&rwY)QDRkf zx(&|LSC4I4jQ({@c{L?yUeH5xlNsm&`@Bt{q?LIih9Nb6_xWVfXS({EoP;6j(}T7b zg5;Bu`D^!c__bNGaHbk%i(KQ};UCvycF|7Yl{r!^&Iz?T(+9TCP2RJcV`@*bXcW)* zi3Iy{R+xCoE`z{8EFxih;k9PZg!J0%P0NB@Si_@~vF$put{q#K`j>jtPkblwAW5wS zdeX!Z(cZ9G+uMc_u*n&1P630$CNEr)n&#qRH0gZa#RYl&>12igW%0Mrj^X&VkU~7Y z4x9J5lMsaCHwQqn^53y~2MzhxxeeVPbz@ZjjESP-S1fm z%+eJm;%oOg{QZ)+*&|gBwAFe%Uv+1&+PTxTN?Zj#yUOJs6x`~sv=!-FiLqd6BGj&0 z;?wSpr=+bfY*h$F(Z9Lxe80ASKW)adnRNLD0z$G@;xc?FM{y!llKtVhjxZrbN91$q zG`Vy``T)+ay2P!ZQ0GQ!6SMVy-fC8h6Ws44UF4tvhyHv1zuiyYc=iO7{xRe6dKT7a zOjSdEGRK0kFmjM4Dgb6TVu&dAP*lNuD;&6S^hm6AF5BFr)K(YJ}oV zoGiMLo&KFc&9nuM0o(8puFz4y$t>+Vhv`{={DvG)nvaI8;Jg{~pI>6id(nPv)S9?# z{iRGlk57&WeCpU$z-8g)s}g58kKa4tTRMC=%Dc^OZ*$Tvp2x@O5as{T_I<4F>*w$r zCMK4IjYu<+xYxz6w78-e*5=PsOV-Jf?X|PAxZnB}QAaN~a-yx$a}C@(ejeeDR5mVL zeJb^=pWG~OnUU6)8fN!R2N4)0?r*7snUsI=A`lWiV;y}ZV!I? z{WH%8ixa|`q#{>@3x89Azyz$_)^k}f33n;zG1u-tlxwlZtEn%G9y1%{z!W7t>2_>=#S8lO*ckATR2Wn{mpwUpxi%OiP)B zI0}+%;&f*=^VJu!qT>ne8e@nWZ43*!cA&aTq;VZ=;Pp)W@VTw7ZkHJ06#uZeXq`~) zKsgd{*|s3}qN8*M4cgdB9e=3-^YP~OiwxWcmu(~L3=3H`&z1|yUdgO3OtQp9%En)@ zzS=zMm3XHtsRScnLhG{X_y?-ZExJKVlh_?MO%~ugpo=9w5jDRYtA-bmryHw($hgWM z^`CP@gs`s{%EeX4I;hXxjS4TPy%Pi8eC0g)l@K=n>lG`pUFe$6X3%Cff@8Y< z^Bp}PAWiChv)Y6o9;c}Cc48&dEk|aVI++|M4oJArIlPNKQUrmy*S^vyC?;R^Syw%T zTdIyOuF|>%Lp=%8I>)+3ojznDx{7SIXe|N-SMiLjc(g2x{+?J~T==oQSQ7{2{-RDz=)L#UlDp^W^}w&5?9;<4_;TVc5mKBdYpKpZxvPH-&)>^ zeE6PtVuJ^B&njT=nErWzRBczApDR^mcgqHk>(qX{^8=N}Db9X9dg8<&@_guRNeA9Z zR{iR5gvHcOsf%&uOZO?(!GYsQb6$mZ}zVrf0a#f5|q)p zE=1-HAE5eDvP(XIFVVd#%YjKmX_uxy_TO!77Q<{TJ>Bw~pSs+%Nl`(^=sF%A)Q4$9 zA#I%>9_6y5S&I#*mW)L>pVfmxc8T2-b(=$3fp+RVtGH?b$hsQwD*W35Cj&|OH|-*W zfNn0R%NJj#R}`GpiyxjmlNr9r&iC*o{Sg>)DGZ&L$|}NpY^8xh{Oc2Et;#k#)F9G) z_T$*k5Fu>QZT8L94tru+`{?W1>ig7n*n#*?tu_fh5Uf)9)&B3K>FPLHxA8$QQU zje!6iec6y;Ur`j|HJbu4ciNOAw7F4AzoK%=AG*(u_}Z&8k<1~pVBz;`zert(bfU1N z>;hT7zo`-q)~OGCn(p@!8tb$9)MJIU%LSx&$nkgFEH+?zF*k7={w>t#^U##{u><2s z-P&E)!15HG_Y}4WYp%jG&-;&Vb$*Hi%VP-5(Ij#O*rz_0E`ikE2!x))Sm7cL7UG$m9`#9 zTA27`{>okr1Q99d;~V+1np&@Woj`&&C4V;xUG+Epyqo{dp1DMIG4LL$I(5J`G(y3f z*zzOo(1O+nL>CjRv+PflihWWRIUp}(O6DfL)(ufI@f*Lc|IKcxAEu)-DH!Pb99t4h zm62FyIelA{2GAAhsG_!2^C3EAv_2hvc<1nWaVVLQ$y;Zus795*B#&36lNU}-=q*GZ zfA7LCbvS4$y# z#AEK`O3N9za<&S8Bh;&7T&nkx?Kj;jB;|L*Ry}kdFT%>3qL+gsZLLkDaLeVL%jI?d}ODbKD9=s=Zg-bl-RWw$y+T~^f~AW3j? zN5V81UmZUcxn9DM5q4vL(yO_vU_N5!tCXIwn^b) z6nZzvZr`@G{Ss1vrp;V+t9E%&1357R_j1?AnTL3^^OJ@K2o}u!8m;34C-j;R`2fSH z(vofI$9hEJ0~dgup`lQ3l4IwaQAWSF>aB#Qo66`TOYRRxC0DpUr0B&T64@Lx8fkMe z@l>;dOWDJL9D>{Z*oYySc~NhtYc2W;>Sp#mZSd+vOLFS-iniaZpiImh2zXKO>`hbk zYQf=%r7GV^cv3rK|1%?Z4vYYlU?u4aT*S<$=gxs>v)6KM01kymf9mu(CjQ_S>e%1} zDhtYv2wQaGbHo5f9bKK8aptU`$bh*<<+$)i{Tn~AX5R@tcHi7YO2V?VWN5-9^SZu8o!H@J{E4as$qX$B$^`kL6>VNWqPrER1_i zYGlul4@jKaEm8(@#cHD1EeqwQlRBupyfwi@Ma8S$calgQPpj`VE3U3)Z*+D^1KY`{ zo(4Gd4rfj_KrgZ`3l)oNT@*a-(Toevy5$&wUYp%YSj577H9h$j9+^ygb4hgG5AfEi z<_E?{!l_iQ9rxi>e2NDZ~H%yYuLyXK?>b)T)W5`vZP;J^fe#y|(RT4?6Yp^(CCye^ec7i6N%ZJscc({RKaITjG@Usa zki?x(xXAg#y~D(Lo%ev@(QBbAEx*QKmRAmX#2OyO-GyVWeN94)&~YK6yFW2cis}Kg z=1-kP<-ZXt9m$0}TlxcYB@LAk20}OO;R61TzTFqOH_6*}Xf~!w`=QeHOtrSIu%g1T znmqmRAH{Ig5F_MC=Z%N)U8?S7O4z%h8VgemiP3=0R~VC>4%PZ(C)DQ)v7-e<<4Pw4;;5isoX0vobF;--j6I z+S+})rRl*v`);0@`^Uf8sy0IBU6l@p%&Ipv8x=o3IuFliQ!uT+mgmXaIe>!b+jL@n zPkZ-w_+Le!I;i1L+~wlhE8pGNa0f?^Y3c2Qi&=-&W~-c*qtoxnGH!)d6)mIo`OPDn zIibH8T-q9|EVk2EI$HYLS|9W&EooyD1v=9;Mgsyr#wW%H7IkEKyFR`QW@H4Wi#de& zW=->0E2{D>74AOe$9V0e2XC8{rXOClKKOnc)$uMvZ0W0?i0@r0{u5k3c@5@4Y&j<6rjH-VVNn9IRha$mjmk4aWKZ@CxPL8*X7{ zZpFpHS)Tm4OYFZGDBv=UU@dGBUh>t4csMv;IUwy@s>uMGb(66n#r)#5z4wh=V@nHf%0m$LS7V4e zd$7`PnP=F_TavBO14YE@H&W!-uJ?7IWGw_y3#vWQ3MG#_`w$>(mc_CXDJCP7_WheE z1u9y={tGdZ6bI+uV~EuojH8hoOTo|IH>I%eU7jNeXTM*%Xc4->Z9x7Y`Q&KQ zvcF6D-Wt7HDWm2zW9ijl`>#~O=|83?mp%?DZm)!*Psm>4;4uHyegc6>X0OvoLO?p} z9*(#LJ(HF#mZSj1Yktk3$te46m03MqBZ1a|_WP|{dn^mB?nz#0gL_2DW;TTh^N+iT z@o@eFwl`Edl4VGzW%x}$@N_E>7f$-o5P8%*(c?VB;I1w;- zl1Ph)cgmNsytj_|wMY(;Gq``zrIA)_TC8hE-$hB?)T!M6@fK?MUhU|}D5(i$5q z`3WRK%>H|Q3AQ};y$O%gihZqbL{Ub%P`bE7`toM{xI$8G_&7k%XRzkC{>j`hVTkW! z>``MF^cwu(vOzi0Ipx6(K7=BFWt=v~q_iU^)eA)*PjG^2L-9V*)V8D7{Yz3K3;W## z4kQmC#rc*^4;v4Td{_I4Evww{MMW_`1;6f4-0&2^dr=^+F;k3%JV~kYORUJr`rn_A zM&7K5gCfFbI7h^-jP&%JQIxrQb>ysCx$37|n#tFC(W^$%mg9+;s}kgfmOrrD+w1=b zpJp;U$IiL>TR}gM1wenF?7t+jBV_IJr3=tiviR~*`kyxLe^c1_=BNdKmU6QG_6x=2 zBg~*)-(;+F+V77j{z|itu;p?n}eoP{s^#4=ZS$;(Ub!&X+lI{|a zl&*0A>28Kb8tITmItG;v5eEbY7CVj^Z9eI#OoZl;h&1d#LiK94uCfgSMbjjp_eKh9_8+nJ`rcd(wv^+jQ3HO>!y0ojeY%;FM%FknAm;+PH`S0*cadBj7#Z zAOXe^8`+`06*vNuZ2hJN_oC-%TS+;=?S{^t7vN;o+2yDcEWm0Nfmx~JW@*klk#aMm$OsaG=^Y0=V*ZiH3KDyVTF_L2T#YLm! z?v;H~x|mHSaNM_^n}Xj$7}=JMV+eG~4YJxECvanOGs+|_WVy|c-cQZ?*e=U}l`_p= zyR&rFzcV!VHFC$TIP|aVs9T|7Rng7<+$g6v5b|UR4K}rrN`2qSBFQd>z6P}iNyuVm3$Roro|l$`5OxfQGX|C>fRhL zFvdz;ZvaPD2Q)OIIp^Pqr~((ov+m+F5P-2zh!phd=3;M8Bl{geHE@iobRdwcFO7>B zmF^$9A(EF-=9mzjG2G?_qAx5di-<`JjYca^%ZvCJ`26DmfBo}*Hn};uPLHzcYN;|P zBmc;ya_f%8*Rphk?w>LqBKZ6%;M^J>fsFmRcEga|IJYzn%6O@wm_n4vyv%40VYrqB zI|}`~HiiE(hYa$>eos|Hqy9x*Djg6&7+_q*)QEmdHG3;8mBIyKQ4bjf{|`Fb6*`^dNQrbyRcITN}fUt`P$(RiN4STJC$9x_NbouHiKvY8txE#J8bw8DhKV$AJ-b|yY! zJ96G^2fx;_aO%`?&xrdyNkwV_BkYJqA}&%A%6e`G|M}_w^t+? z-e9k5sFL5pkUzrZ&56((SjXG3&21DSUkVL$M@}I6dEN1LLsVAyhx5n^sV$!r$q|rw z33G8)7$r6HlQ zK)~2f$fI@VTs|@*JzC_=WLkIJ>PhM#js0&viza5oI>La_-xc=A>?N3miBoJS5j}d` zHdRV|MMT9k;+p5x#DaqkI-WmUCNxejgJc^lu^_(1b5g_+?f%Oo>3qcv@nIZit}HwPz%#L+C*ddxD+PuqUa@`*3W-iBz7zMdeg`SgG>MUD&ce_>_8W{s zD>S?fA6Hlq9{wt7{sf__10J)z za~7UEXh<7mFi5qSrm{<621DKp&n*&40!$1wP`5tH)e4lDN+6*5a_{1Tvx~a^G13Tt z6luEqb~xLbSZebj(f#V{*gp0RdHgkbG>aQeIm#^qysFn%ydd?poCMSn{9dXdLW>_ zu=VxXi^(zfuMJO01Xi-mzWh_;~Z@l(IV;Vtrxl>FprQI#7JzT=tey-R@@wo?e3 z4tXXHVT)v5KnnpgRMXPP?f^=Y^oxi*IWw-Ei{h5VsZT8%uXr*Mr(s z_PhwL@VCKv>SGfF2UeOFS&Mf;=(}G3S>9Qzc)fj)4aw=rRYH9S&UAjZi^;D>p?^Xh zagj|CGpnWxXl6v!wj|&RYXGY0c$@4dY;yKyxib^fm>(2#9==jF;C9}a{jV*{gs`Zj z)KsS0$^2EXbI^9_xPlb=KFQDkaNOTUfLc*=kY!tV>8gTU0Qc3VO`H*JQ zwc%RG%``Rf-yX+x!nsk9jR;pH`qQat*KO7W!v=duBjjNY(3RvfbnwvPA}}-J>!b?>?!?YxQwlANlE8V=ed)b~eKKXEMWqIKTBDgZ>{qoFrXms=|Cs-e~?m90Zvq)zPOwlRVco!8ewDS{<29tXN} zeIQp26S*m1^o3-E;oV$5146Fp)CC#QMBdJhA7qF$9?Uk1*a=(^m zvJReQD#epxbPEv~Q4+HTzxhD2>~O2dRZ+7?|3T=LW(ZZ1n*^{|#ZvrF*pxP~Zk*7l zDUJGSJJK&+{p0-m^ATfd9B$}yH{<-%cI;CF2$tuJxFvY zMMaCgxqv=Nk|+)%#Bu)WmFMIa?DyXw@Rs%_Y1(i^T{) z5_cIlm!P9lV-}_v3*}PAAgv~>DW5Oob?<~^b|h~)?NY?CfOa_r@3u!kADDQh%=5*YrnUj;mNP;bLPJ8pBKS5KcQDql88ai~E zzZ{Z8C@I)P1>+YQ93N5hn+3ZHcNQ{ylc)lV;{TP-fm=SSkDMq+7@A_rloF`7^OY8VbZr{H}jYTVK>U9OQtx$?uT|8?UOt{oXj zPvHs9jAGBB|Le*WAWeUw3pg=a?L1X;$&- znZ#C>Xxp0HYjCF=%1Md37khkQy9(x5&yYTOanLkZoS9v_O34z}gbp!zAT?)7QMvW} zewhPlBYf}I=0??k>vt#88LHjh{S;oNGNgz}^yCjOOFW3VmQ`1Y;wJqaDRhxxSJ^cs zBlNwo^gp7}*0C351Fy3@HD5y)Dh0|rzw2`T<{%1n%whF4N$W~^M2W@btR8tRu&Vv} z+`5{tkgT=kbtJvh^8OFup+*!dG3nIr;h5M|kBRXu$X)}w;B&0`Ge~Q(vC4~>o~86P z$~R36+^DxR&Y7c_}kq35l*d>in)( zx5+WPR7P_6F|8z2C^i6`%78{8Jj_(Z+f^;+7kkUz971@b?g$q1_gMTRgl@7+GqtF+ zesK6ydyt-qGHIn@d@UGQ1EOa!KP4s%Ea~~NHWj)gXKX50rl*30|Vvq;|g!ko|aX>!F@F>l=5l2CjQ0k+tTT5OL0Q+Dj`mtdiN%I zvIcWXOx^j z3d+@XYBKG9A;UIq`JM4MF|!bKgN!wm}M{G}ribm*Eks3yl?%HfO)&=u_uugPWsW z#I(T}Cwd=ZF>V2ibj>8ZT#5@ViTk!8yTmcQESGD0-D&z3$E&_nvR@=RU&<)(vakAJ zeotOz;q3bG5CsG7`YF)AOqE8s75M**xxQ3-@)O2%^DdFgGC`CP3-w7Q zA4h+M5q&JeMJfY0^Gfk{n@?Ayt7e%0^arTdNO87m(s!&qklk1-$=IpEhu_#zjI5z5 zefVO}@Bu;#A3n2zYcjkEtg@syNUFN;FL4o@_qKG@RFpDEq6gdtp7P(qed}BYozT_j z?`HIocvHc0tahL02z@>9Jr+_UBy<@r4ml?P7d4#wlhLM~#>e7um(&ZBpYqwbWQ=od z2HMl*l(;)0x*FpANvdx*#+nv*3mAsBhE_=GylK1!4Mv>7Bf0lL_;UX3udS+&*6>Du zlAPmNs829TN(6FY+B)(nA>-{lCpUFJ~$+p@v9qU?2+lif>1utQKi)@gZ+ z=2H&+`$-Ym@9>P{<ZZRtDgipUnypBoFEa}NO%Ipmux z@*R0U*Nco^9k3Un!k3;tIwY7= zB^?SsA9Vnf0ZVMo7N=ZTH(+%qQ%K_XqP9%>(k@=*#)HU|)#LnS8NZ%C*b>GvNX9>nK z5{wf}8?_iuM|q<0SPXeAcso2c@)UG>l12tU@0Wq8bJpL&59g~pI%5d8)Th2y4HoE0 zPC9#&T!osqc61b{js=TVAK8 zU?n9d4I#Y5HJjd|v!sW__i;ReA}yK*<)YnY%<4oHj6Is0{aH;<^+!{^b=7Np*rkMU#NQ_?X#;Xf4VJf`C3 zipbRdPGz#l_lfFK(($-V5XI$b&c9JSr@HNe3w|%5toPDd&4;VjrH5cH;6D5#KqcYP z?w7t_@bs+_`8XvQfw9ZU+%U`kJEx3cHxm+N$Jwk>m_slus)=JGFCeJgJo1=S`OM;< zAa3nqP8xI%dN4cPF{RMxk9?Rx&Yg3ls`%Uqdd?%84);s_-^Jr`;_!VBaPokwM=n}Qk=%&?-;%as^ZN5o_6m)!Qm~(} z_^xEn>Zt1J#Q&~-MQ((rx@#uOw$@$ywO{xdddWDp%Sib}8=LH(AN|p-S)HfpVADI` zg*F&euU|qh)Z$&%A##sJiMQGaZ@ppmWZ-6a#OC7nzK@=tbET>oY815;Gp<)WJ8l-Y z7V-Rc;A!q}&Nm_LMNXhp_UuDNmwU9-hTI4Ye}_S+lMT^yBPnzh9N}W!;e?~~iSU)- z*(Q7|9=^M;ldY5Dv>4UevSM}m(p~4+P+@5}lr=EJ@{>#y5jY zi=3p-yF?T#Ln+wQHj))I7epPD9uBWESZ2UQ?X_g*FIZQ-a+%`yYqkv@2l$tg~{3sulr+31kzi00xhT= z7wMqx8yIxOFEEGt*+67N)3YIxUSpQtYtm^iJz{zk^_Hf$^IaeD_9;^3Q<9+9-)PiE zj`#5NRo{L17Qajssu&`5tV_d3@8ve%Q=W)6wq z%NdGXi_+jb@O!xj2|uFws9o{7+H7j=!1@zzv;s1VKSsPU=;?HOjaWjhZCZ{pvR)}# zUE^;<9S2-Y7qF?;Upkz?Q}X$`Q<=c_7hNs$_jb8W%zED!^@v5# z_hBj$bV(7tR_0QCK)7{wOJ0l8snUjIsbw{2;rH=EyrP+cp>T8%HzmH}gepPDy8TbG zcSnnisN{N^lPsSFp-H_zXdDXJW)yxK@N3KEk|<`-)zk-lyOtfXuQNIyAFR)CX!02E z37(U0hgrx-SVRd&LJntre^-(*f=%=}!hx}(lcaHh6-H2ki3V$T%+Ai3_FE3AwaT*^cZ1Uyu z&aSJKPuJJpEV85bOJTT?APz)6j4;=`M~JXDvp0LIm~;WXR>C#_p`xo?t!NwlUvkvXk^lez literal 0 HcmV?d00001 From ccd9e7a79405f6d8aba90a5c6df49ed5441674a2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 15:17:45 -0700 Subject: [PATCH 020/357] updated README --- README.md | 61 +++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 19bef7c..de71918 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ Kingfisher extends Nosey Parker with live secret validation via cloud-provider A - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos -## Getting Started +# Getting Started -### Installation +## Installation On macOS, you can simply @@ -60,8 +60,7 @@ make all # builds for every OS and architecture supported # 🔐 Detection Rules at a Glance -Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. -Below is an overview; click any category to see the exact rule IDs. +Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. Below is an overview: | Category | What we catch | |----------|---------------| @@ -269,7 +268,7 @@ _If no token is provided Kingfisher still works for public repositories._ --- -### Update Checks +## Update Checks Kingfisher automatically queries GitHub for a newer release when it starts and tells you whether an update is available. @@ -280,15 +279,37 @@ Kingfisher automatically queries GitHub for a newer release when it starts and t - **Disable version checks** – Pass `--no-update-check` to skip both the startup and shutdown checks entirely ---- +# Advanced Options -### List Builtin Rules +## Build a Baseline / Detect New Secrets + +There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list. + +The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches): + +```bash +kingfisher scan /path/to/code \ + --confidence low \ + --manage-baseline \ + --baseline-file ./baseline-file.yml +``` + +Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings: + +```bash +kingfisher scan /path/to/code \ + --baseline-file /path/to/baseline-file.yaml +``` + +See ([docs/BASELINE.md](docs/BASELINE.md)) for full detail. + +## List Builtin Rules ```bash kingfisher rules list ``` -### To scan using **only** your own `my_rules.yaml` you could run: +## To scan using **only** your own `my_rules.yaml` you could run: ```bash kingfisher scan \ @@ -297,7 +318,7 @@ kingfisher scan \ ./src/ ``` -### To add your rules alongside the built‑ins: +## To add your rules alongside the built‑ins: ```bash kingfisher scan \ @@ -331,28 +352,6 @@ kingfisher github repos list --organization my-org - `--baseline-file `: Ignore matches listed in a baseline YAML file - `--manage-baseline`: Create or update the baseline file with current findings -## Build a Baseline / Detect New Secrets - -There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list. - -The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches): - -```bash -kingfisher scan /path/to/code \ - --confidence low \ - --manage-baseline \ - --baseline-file ./baseline-file.yml -``` - -Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings: - -```bash -kingfisher scan /path/to/code \ - --baseline-file /path/to/baseline-file.yaml -``` - -See ([docs/BASELINE.md](docs/BASELINE.md)) for full detail. - ## Finding Fingerprint From 8f58e2fe3d6fb44fa73ab7195162e06687e85bf7 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 15:19:03 -0700 Subject: [PATCH 021/357] fixed issues found by pr review --- README.md | 2 +- data/rules/replicate.yml | 2 +- docker/Dockerfile | 2 -- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index de71918..854aa97 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Kingfisher ships with hundreds of rules that cover everything from classic cloud | **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more | **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more | **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more -| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and mmore +| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and more ## Write Custom Rules! diff --git a/data/rules/replicate.yml b/data/rules/replicate.yml index 5a20417..b5f1a47 100644 --- a/data/rules/replicate.yml +++ b/data/rules/replicate.yml @@ -1,7 +1,7 @@ rules: - name: Replicate API Token id: kingfisher.replicate.1 - pattern: | # + pattern: | (?x) \b ( diff --git a/docker/Dockerfile b/docker/Dockerfile index ea87c51..6118b45 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -30,8 +30,6 @@ RUN set -eux; \ echo "$EXPECTED_CHECKSUM kingfisher.tgz" | sha256sum -c -; \ tar -xzf kingfisher.tgz; \ rm kingfisher.tgz checksums.txt; \ - tar -xzf kingfisher.tgz; \ - rm kingfisher.tgz; \ # locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64) KF_PATH=$(find . -type f -name 'kingfisher*' -executable -print -quit); \ if [ -z "$KF_PATH" ]; then echo "No executable kingfisher binary found" >&2; exit 1; fi; \ From 6ac51c52383a63fbe753336cf869c98cecfab8bc Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 15:25:10 -0700 Subject: [PATCH 022/357] fixed 1password rule id --- data/rules/onepassword.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/rules/onepassword.yml b/data/rules/onepassword.yml index 57f0980..e521e01 100644 --- a/data/rules/onepassword.yml +++ b/data/rules/onepassword.yml @@ -1,6 +1,6 @@ rules: - name: 1Password Service-Account Token - id: kingfisher.1password.2 + id: kingfisher.1password.1 pattern: | (?xi) \b From a2a88765671c434311583914c9b406338c9d2248 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 17 Jul 2025 16:42:39 -0700 Subject: [PATCH 023/357] fixed Makefile that included incorrect rust image for Linux builds --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index fa97341..4072974 100644 --- a/Makefile +++ b/Makefile @@ -242,7 +242,7 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ - -v "$$(pwd):/src" -w /src rust:1.85-alpine sh -eu -c '\ + -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -279,7 +279,7 @@ linux-x64: check-docker create-dockerignore linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ - -v "$$(pwd):/src" -w /src rust:1.85-alpine sh -eu -c '\ + -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ From 8b2c79e70fbc04bc2ab5c58a80e6679aed717299 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 18 Jul 2025 15:26:18 -0700 Subject: [PATCH 024/357] Updating GitHub Action to generate Docker image. Added rules for Diffbot, ai21, baseten. Fixed supabase rule. Added 'alg' to JWT validation output --- .github/workflows/release-docker.yml | 14 ++++-- CHANGELOG.md | 5 +++ Cargo.toml | 2 +- data/rules/ai21.yml | 44 +++++++++++++++++++ data/rules/baseten.yml | 42 ++++++++++++++++++ data/rules/diffbot.yml | 35 +++++++++++++++ .../rules/{supabasetoken.yml => supabase.yml} | 3 +- src/validation/jwt.rs | 3 +- 8 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 data/rules/ai21.yml create mode 100644 data/rules/baseten.yml create mode 100644 data/rules/diffbot.yml rename data/rules/{supabasetoken.yml => supabase.yml} (95%) diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index dcd1ea9..baee4c1 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -1,8 +1,8 @@ # .github/workflows/release-docker.yml name: Publish Docker image on: - push: - tags: ["v*.*.*"] # every semantic-version tag + release: + types: [published] permissions: contents: read packages: write @@ -18,7 +18,13 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - + - name: Prepare release tag + id: tag + run: | + TAG=${RELEASE_TAG#v} + echo "tag=$TAG" >>"$GITHUB_OUTPUT" + env: + RELEASE_TAG: ${{ github.event.release.tag_name }} - uses: docker/build-push-action@v5 with: context: . @@ -27,4 +33,4 @@ jobs: push: true tags: | ghcr.io/mongodb/kingfisher:latest - ghcr.io/mongodb/kingfisher:${{ github.ref_name }} + ghcr.io/mongodb/kingfisher:${{ steps.tag.outputs.tag }} diff --git a/CHANGELOG.md b/CHANGELOG.md index d385891..81b32a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.23.0] +- Updating GitHub Action to generate Docker image +- Added rules for Diffbot, ai21, baseten +- Fixed supabase rule + ## [1.22.0] - Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate, Runway, Clarifai - Upgraded dependencies diff --git a/Cargo.toml b/Cargo.toml index 4bf7e3d..3d6bed2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.22.0" +version = "1.23.0" edition.workspace = true rust-version.workspace = true license.workspace = true diff --git a/data/rules/ai21.yml b/data/rules/ai21.yml new file mode 100644 index 0000000..7a400eb --- /dev/null +++ b/data/rules/ai21.yml @@ -0,0 +1,44 @@ +rules: + - name: AI21 Studio API Key + id: kingfisher.ai21studio.1 + pattern: | + (?xi) + \b + ai21 + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-f]{8} + - + [0-9a-f]{4} + - + [0-9a-f]{4} + - + [0-9a-f]{4} + - + [0-9a-f]{12} + ) + \b + min_entropy: 3.2 + confidence: medium + examples: + - ai21 = 90cd6930-a9ae-4f15-8da0-dc1bbcd814b9 + - 'ai21_key: befa7ec1-1129-4713-8e92-bb53d1a4f632' + - ai21_token = ec2e14e9-0309-459b-ba76-1e59e1f42b87 + references: + - https://docs.ai21.com/reference/authentication + - https://docs.ai21.com/reference/manage-library-ref/list-library-files + + validation: + type: Http + content: + request: + method: GET + url: https://api.ai21.com/studio/v1/library/files + headers: + Authorization: Bearer {{ TOKEN }} + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] \ No newline at end of file diff --git a/data/rules/baseten.yml b/data/rules/baseten.yml new file mode 100644 index 0000000..17d2285 --- /dev/null +++ b/data/rules/baseten.yml @@ -0,0 +1,42 @@ +rules: + - name: Baseten API Key + id: kingfisher.baseten.1 + pattern: | + (?x) + \b + ( + [A-Za-z0-9]{8} + \. + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.4 + confidence: medium + examples: + - WSsDXzCD.uOcxAp7k82IvCKyY36TnpVbP4ZszP1qw + - crXCQC3W.CgCGGY1b9IfJan5TppW0Z07C9oMN2DmR + - h2wFkhFC.3WFVwVcxGFr4Qup0gyhvIuONwQxEpL0A + - XqbIpj04.x73j1zLUOEgGIKROqVbxsmggPdL8JvAY + references: + - https://docs.baseten.co/examples/vllm + - https://docs.baseten.co/reference/management-api/api-keys/lists-the-users-api-keys + - https://docs.baseten.co/reference/training-api/overview#authentication + - https://docs.baseten.co/reference/management-api/api-keys/creates-an-api-key + validation: + type: Http + content: + request: + method: GET + url: https://api.baseten.co/v1/api_keys + headers: + Authorization: Api-Key {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"name"' + - '"type"' diff --git a/data/rules/diffbot.yml b/data/rules/diffbot.yml new file mode 100644 index 0000000..05e989b --- /dev/null +++ b/data/rules/diffbot.yml @@ -0,0 +1,35 @@ +rules: + - name: Diffbot API Key + id: kingfisher.diffbot.1 + pattern: | + (?xi) + \b + diffbot + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-z]{32} + ) + \b + min_entropy: 3.0 + examples: + - diffbot_key = a7424adbafc4624e61482d0f60e43016 + references: + - https://docs.diffbot.com/reference/account + validation: + type: Http + content: + request: + method: GET + url: >- + https://api.diffbot.com/v4/account?token={{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"name"' + - '"email"' \ No newline at end of file diff --git a/data/rules/supabasetoken.yml b/data/rules/supabase.yml similarity index 95% rename from data/rules/supabasetoken.yml rename to data/rules/supabase.yml index d244d3b..6cb4abb 100644 --- a/data/rules/supabasetoken.yml +++ b/data/rules/supabase.yml @@ -4,9 +4,8 @@ rules: pattern: | (?xi) \b - sbp_ ( - [a-z0-9_-]{40} + sbp_[a-z0-9_-]{40} ) \b min_entropy: 3.5 diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 59cac9d..327f0d2 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -75,6 +75,7 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { if let Some(iss) = claims.iss.clone() { // parse header now (kid, alg) let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; + let alg = header.alg; // build discovery URL and fetch it (redirects disabled) let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/')); @@ -161,7 +162,7 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { return Ok(( true, - format!("JWT valid (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)), + format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, extract_aud_strings(&claims)), )); } From 127ad17622d9b29dcd8f1eb7af14ba193cd3a8e4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 18 Jul 2025 16:55:38 -0700 Subject: [PATCH 025/357] Updating GitHub Action to create docker image --- .github/workflows/release-docker.yml | 111 ++++++++++++++++++++------- CHANGELOG.md | 1 + 2 files changed, 86 insertions(+), 26 deletions(-) diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index baee4c1..ff131b2 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -1,36 +1,95 @@ # .github/workflows/release-docker.yml name: Publish Docker image + +############################################################################### +# Triggers +############################################################################### on: + # 1️⃣ Traditional: run automatically when a GitHub Release is published release: types: [published] + + # 2️⃣ Option 2: run every time the build-and-release workflow + # completes successfully on the main branch + workflow_run: + workflows: ["build-and-release"] + types: [completed] + branches: [main] + + # 3️⃣ Manual: “Run workflow” button or `gh workflow run` + workflow_dispatch: + inputs: + tag: + description: "Tag to push (leave blank → latest release)" + required: false + type: string + +############################################################################### permissions: - contents: read - packages: write + contents: read # needed for checkout + GH API + packages: write # push to ghcr.io + +############################################################################### jobs: build-and-push: + # Only run on workflow_run if the upstream workflow succeeded + if: github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: docker/setup-buildx-action@v3 - - uses: docker/login-action@v3 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Prepare release tag - id: tag - run: | - TAG=${RELEASE_TAG#v} - echo "tag=$TAG" >>"$GITHUB_OUTPUT" - env: - RELEASE_TAG: ${{ github.event.release.tag_name }} - - uses: docker/build-push-action@v5 - with: - context: . - file: docker/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: | - ghcr.io/mongodb/kingfisher:latest - ghcr.io/mongodb/kingfisher:${{ steps.tag.outputs.tag }} + steps: + # ----------------------------------------------------------------------- + # Check out the exact commit that produced the artifacts (workflow_run), + # otherwise just use the SHA tied to the release / manual dispatch. + # ----------------------------------------------------------------------- + - uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.head_sha || github.sha }} + + # ----------------------------------------------------------------------- + # Decide which tag we’re going to publish + # ----------------------------------------------------------------------- + - name: Determine tag + id: tag + shell: bash + env: + # populated only for workflow_dispatch + MANUAL_TAG: ${{ github.event.inputs.tag }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then + RAW_TAG="${{ github.event.release.tag_name }}" + elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${MANUAL_TAG}" ]]; then + RAW_TAG="${MANUAL_TAG}" + else + # workflow_run (or manual w/o tag) → ask GitHub API for latest release tag + RAW_TAG=$(curl -sSL -H "Authorization: Bearer ${GH_TOKEN}" \ + "https://api.github.com/repos/${{ github.repository }}/releases/latest" \ + | jq -r .tag_name) + fi + + # Strip a leading "v" so v1.2.3 → 1.2.3 + TAG=${RAW_TAG#v} + echo "Selected tag: ${TAG}" + echo "tag=${TAG}" >> "${GITHUB_OUTPUT}" + + # ----------------------------------------------------------------------- + # Build & push + # ----------------------------------------------------------------------- + - uses: docker/setup-buildx-action@v3 + + - uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: docker/build-push-action@v5 + with: + context: . + file: docker/Dockerfile + platforms: linux/amd64,linux/arm64 + push: true + tags: | + ghcr.io/mongodb/kingfisher:latest + ghcr.io/mongodb/kingfisher:${{ steps.tag.outputs.tag }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 81b32a1..dffce78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. - Updating GitHub Action to generate Docker image - Added rules for Diffbot, ai21, baseten - Fixed supabase rule +- Added 'alg' to JWT validation output ## [1.22.0] - Added rules for Google Gemini AI, Cohere, Stability.ai, Replicate, Runway, Clarifai From 15613285737885e6af4ab1dcaad1fa73d6093d68 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 18 Jul 2025 17:23:41 -0700 Subject: [PATCH 026/357] Updating GitHub Action to create docker image --- docker/Dockerfile | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 6118b45..317f5d1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -23,13 +23,8 @@ RUN set -eux; \ exit 1; \ fi; \ curl -fsSL "$LATEST_URL" -o kingfisher.tgz; \ - CHECKSUM_URL=$(curl -fsSL https://api.github.com/repos/mongodb/kingfisher/releases/latest \ - | grep -Eo "https://[^\"]*checksums.txt"); \ - curl -fsSL "$CHECKSUM_URL" -o checksums.txt; \ - EXPECTED_CHECKSUM=$(grep "${SUFFIX}" checksums.txt | awk '{print $1}'); \ - echo "$EXPECTED_CHECKSUM kingfisher.tgz" | sha256sum -c -; \ tar -xzf kingfisher.tgz; \ - rm kingfisher.tgz checksums.txt; \ + rm kingfisher.tgz CHECKSUM-*.txt; \ # locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64) KF_PATH=$(find . -type f -name 'kingfisher*' -executable -print -quit); \ if [ -z "$KF_PATH" ]; then echo "No executable kingfisher binary found" >&2; exit 1; fi; \ From 1f1dbf312f53cfa8b526807849177cc3847862a0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 21 Jul 2025 15:21:10 -0700 Subject: [PATCH 027/357] - Now generating DEB and RPM packages - Now releasing Docker images, and updated README - Added rule for Scale, Deepgram, AssemblyAI --- .github/workflows/release.yml | 40 +++++++++++++++++++++-- CHANGELOG.md | 6 ++++ Cargo.toml | 22 ++++++++++++- README.md | 52 +++++++++++++++++++++++++++++ data/rules/assemblyai.yml | 38 +++++++++++++++++++++ data/rules/deepgram.yml | 41 +++++++++++++++++++++++ data/rules/pagerdutyapikey.yml | 6 ++-- data/rules/scale.yml | 60 ++++++++++++++++++++++++++++++++++ docker/Dockerfile | 2 +- 9 files changed, 259 insertions(+), 8 deletions(-) create mode 100644 data/rules/assemblyai.yml create mode 100644 data/rules/deepgram.yml create mode 100644 data/rules/scale.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b681f1b..14bb7a6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -21,19 +21,37 @@ jobs: - uses: swatinem/rust-cache@v2 + - name: Install packaging tools + run: cargo install cargo-deb cargo-generate-rpm + - name: Build (Makefile linux-x64) run: make linux-x64 + - name: Build Debian package + run: | + cargo deb --no-build --target x86_64-unknown-linux-musl \ + --output target/release/kingfisher-amd64.deb + + - name: Build RPM package + run: | + cargo generate-rpm --target x86_64-unknown-linux-musl \ + --output target/release/kingfisher-amd64.rpm + - name: Move artifact to dist shell: bash run: | mkdir -p dist cp target/release/kingfisher-linux-x64.tgz dist/ + cp target/release/kingfisher-amd64.deb dist/ + cp target/release/kingfisher-amd64.rpm dist/ - uses: actions/upload-artifact@v4 with: name: kingfisher-linux-x64 - path: dist/kingfisher-*linux-x64*.* + path: | + dist/kingfisher-linux-x64.tgz + dist/kingfisher-amd64.deb + dist/kingfisher-amd64.rpm linux-arm64: name: Linux arm64 @@ -49,19 +67,37 @@ jobs: - uses: swatinem/rust-cache@v2 + - name: Install packaging tools + run: cargo install cargo-deb cargo-generate-rpm + - name: Build (Makefile linux-arm64) run: make linux-arm64 + - name: Build Debian package + run: | + cargo deb --no-build --target aarch64-unknown-linux-musl \ + --output target/release/kingfisher-arm64.deb + + - name: Build RPM package + run: | + cargo generate-rpm --target aarch64-unknown-linux-musl \ + --output target/release/kingfisher-arm64.rpm + - name: Move artifact to dist shell: bash run: | mkdir -p dist cp target/release/kingfisher-linux-arm64.tgz dist/ + cp target/release/kingfisher-arm64.deb dist/ + cp target/release/kingfisher-arm64.rpm dist/ - uses: actions/upload-artifact@v4 with: name: kingfisher-linux-arm64 - path: dist/kingfisher-*linux-arm64*.* + path: | + dist/kingfisher-linux-arm64.tgz + dist/kingfisher-arm64.deb + dist/kingfisher-arm64.rpm macos-x64: diff --git a/CHANGELOG.md b/CHANGELOG.md index dffce78..9f609a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [1.24.0] +- Now generating DEB and RPM packages +- Now releasing Docker images, and updated README +- Added rule for Scale, Deepgram, AssemblyAI + + ## [1.23.0] - Updating GitHub Action to generate Docker image - Added rules for Diffbot, ai21, baseten diff --git a/Cargo.toml b/Cargo.toml index 3d6bed2..5ab6f6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace.package] edition = "2021" -rust-version = "1.83" +rust-version = "1.88" license = "Apache-2.0" authors = ["Mick Grove "] homepage = "https://github.com/mongodb/kingfisher" @@ -19,6 +19,25 @@ homepage.workspace = true repository.workspace = true publish.workspace = true +[package.metadata.deb] +name = "kingfisher" +maintainer = "Mick Grove " +depends = "$auto" +section = "utils" +priority = "optional" +assets = [ + ["target/release/kingfisher", "/usr/bin/kingfisher", "755"] +] + +[package.metadata.generate-rpm] +package = "kingfisher" +summary = "MongoDB's blazingly fast secret scanning and validation tool" +license = "Apache-2.0" +url = "https://github.com/mongodb/kingfisher" +assets = [ + { source = "target/release/kingfisher", dest = "/usr/bin/kingfisher", mode = "755" } +] + [dependencies] clap = { version = "4.5", features = [ "cargo", @@ -27,6 +46,7 @@ clap = { version = "4.5", features = [ "unicode", "wrap_help", ] } + anyhow = "1.0" bstr = { version = "1.12", features = ["serde"] } fixedbitset = "0.5" diff --git a/README.md b/README.md index 854aa97..fba254f 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,58 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` + +### Run Kingfisher in Docker + + +Run the dockerized Kingfisher container: +```bash +# GitHub Container Registry +docker run --rm ghcr.io/mongodb/kingfisher:latest --version + +# Scan the current working directory +# (mounts your code at /src and scans it) +docker run --rm \ + -v "$PWD":/src \ + ghcr.io/mongodb/kingfisher:latest scan /src + + +# Scan while providing a GitHub token +# Mounts your working dir at /proj and passes in the token: +docker run --rm \ + -e KF_GITHUB_TOKEN=ghp_… \ + -v "$PWD":/proj \ + ghcr.io/mongodb/kingfisher:latest \ + scan --git-url https://github.com/org/private_repo.git + +# Scan and write a JSON report locally +# Here we: +# 1. Mount $PWD → /proj +# 2. Tell Kingfisher to write findings.json inside /proj/reports +# 3. Ensure ./reports exists on your host so Docker can mount it +mkdir -p reports + +# run and output into host’s ./reports directory +docker run --rm \ + -v "$PWD":/proj \ + ghcr.io/mongodb/kingfisher:latest \ + scan /proj \ + --format json \ + --output /proj/reports/findings.json + + +# Tip: you can combine multiple mounts if you prefer separating source vs. output: +# Here /src is read‑only, and /out holds your generated reports +docker run --rm \ + -v "$PWD":/src:ro \ + -v "$PWD/reports":/out \ + ghcr.io/mongodb/kingfisher:latest \ + scan /src \ + --format json \ + --output /out/findings.json + +``` + # 🔐 Detection Rules at a Glance Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. Below is an overview: diff --git a/data/rules/assemblyai.yml b/data/rules/assemblyai.yml new file mode 100644 index 0000000..3c81111 --- /dev/null +++ b/data/rules/assemblyai.yml @@ -0,0 +1,38 @@ +rules: + - name: AssemblyAI API Key + id: kingfisher.assemblyai.1 + pattern: | + (?xi) + \b + assemblyai + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-z]{32} + ) + \b + min_entropy: 3.0 + confidence: medium + examples: + - assemblyai = fa0ed91518b345468f9df7570f31f18a + - assemblyai_token = a741b921ae1f4446826a784726b6a71a + references: + - https://www.assemblyai.com/docs/api-reference/overview + - https://www.assemblyai.com/docs/api-reference/transcripts/list + + validation: + type: Http + content: + request: + method: GET + url: https://api.assemblyai.com/v2/transcript?limit=1 + headers: + Authorization: '{{ TOKEN }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: ['"page_details"', '"transcripts"'] diff --git a/data/rules/deepgram.yml b/data/rules/deepgram.yml new file mode 100644 index 0000000..850afa9 --- /dev/null +++ b/data/rules/deepgram.yml @@ -0,0 +1,41 @@ +rules: + - name: Deepgram API Key + id: kingfisher.deepgram.1 + pattern: | + (?xi) + \b + deepgram + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-f]{40} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - deepgram_key = 948c19ecde2818a1a357fffb14d2fc2a03d3c56e + - 'deepgram-api: 6c8ba06cb14a32d508948606d8b5d9c8f70e493b' + - deepgram = 12e217f37eb173f0c8f1b7309f4207c7dca20186 + - deepgram token 1 == 1f8946087e64b14dffd069b78554e217b3ed34d4 + references: + - https://developers.deepgram.com/docs/authenticating + - https://developers.deepgram.com/reference/management-api/models/list + - https://developers.deepgram.com/reference/list-keys + + validation: + type: Http + content: + request: + method: GET + url: https://api.deepgram.com/v1/projects + headers: + Authorization: Token {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: ['"name"'] diff --git a/data/rules/pagerdutyapikey.yml b/data/rules/pagerdutyapikey.yml index d65bced..d51814e 100644 --- a/data/rules/pagerdutyapikey.yml +++ b/data/rules/pagerdutyapikey.yml @@ -4,9 +4,7 @@ rules: pattern: | (?xi) \b - (?: - Token | - Authorization | + (?: pd[_-]? | pd[_-]? | pagerduty[_-]? | @@ -19,7 +17,7 @@ rules: ( u\+[A-Z0-9_+-]{18} | # personal user token (20 chars) [A-Z0-9_-]{20} | # legacy PAT (20 chars, mixed case) - [a-f0-9]{32} # integration / routing key (32 hex, lower case) + [a-f0-9]{32} # integration / routing key (32 hex, lower case) ) \b min_entropy: 3.5 diff --git a/data/rules/scale.yml b/data/rules/scale.yml new file mode 100644 index 0000000..b0d9b90 --- /dev/null +++ b/data/rules/scale.yml @@ -0,0 +1,60 @@ +rules: + - name: Scale API Key + id: kingfisher.scale.1 + pattern: | + (?x) + \b + ( # capture => TOKEN + live_ # live-mode prefix per docs + [0-9a-f]{32} # 32 lowercase hex chars + ) + \b + min_entropy: 3.1 + confidence: medium + examples: + - live_8df31399ec4a4755a7cf9e0fb59f967a + - live_54d1bd2d1e62430bb2d521d298ec4231 + - live_1b9fc721a4624a478211ce613c674a03 + references: + - https://scale.com/docs/api-reference/authentication + - https://scale.com/docs/api-reference/studio#list-all-teammates + - https://scale.com/docs/api-reference/authentication#test-and-live-modes + + validation: + type: Http + content: + request: + method: GET + url: https://api.scale.com/v1/teams + headers: + Authorization: 'Basic {{ TOKEN | append: ":" | b64enc }}' + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"email"' + - '"role"' + + - name: Scale Callback Auth Key + id: kingfisher.scale.2 + pattern: | + (?x) + \b + ( + live_auth_ + [0-9a-f]{32} + ) + \b + min_entropy: 3.1 + confidence: medium + examples: + - live_auth_250ae896ada542c08a95734f935c871a + references: + - https://scale.com/docs/api-reference/authentication#callback-authentication + # Callback keys are *only* echoed by Scale in webhook headers and + # can’t be validated via an API call, so no `validation:` block. diff --git a/docker/Dockerfile b/docker/Dockerfile index 317f5d1..f44e792 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,7 @@ # syntax=docker/dockerfile:1 FROM alpine:latest -RUN apk add --no-cache curl tar +RUN apk add --no-cache curl tar git sh ARG TARGETARCH # set automatically by BuildKit ENV TARGETARCH=${TARGETARCH} From 39a4f1d25b73b4eb38942a10420489b19e52d2a6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 21 Jul 2025 15:21:40 -0700 Subject: [PATCH 028/357] - Now generating DEB and RPM packages - Now releasing Docker images, and updated README - Added rule for Scale, Deepgram, AssemblyAI --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5ab6f6b..a3d5770 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.23.0" +version = "1.24.0" edition.workspace = true rust-version.workspace = true license.workspace = true From 7a77ef3dbfad154ae09779198d80bd62290bd9fc Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 21 Jul 2025 22:43:29 -0700 Subject: [PATCH 029/357] Fixed bug in Makefile hen setting PROJECT_NAME --- Makefile | 3 +-- data/rules/pagerdutyapikey.yml | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 4072974..8975568 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,7 @@ SHELL := /usr/bin/env bash .SHELLFLAGS := -eu -o pipefail -c -# Detect project name from Cargo.toml -PROJECT_NAME := $(shell grep '^name' Cargo.toml | cut -d '"' -f 2) +PROJECT_NAME := kingfisher # Determine OS and whether to use gtar on darwin OS := $(shell uname) diff --git a/data/rules/pagerdutyapikey.yml b/data/rules/pagerdutyapikey.yml index d51814e..8bd80b1 100644 --- a/data/rules/pagerdutyapikey.yml +++ b/data/rules/pagerdutyapikey.yml @@ -5,7 +5,6 @@ rules: (?xi) \b (?: - pd[_-]? | pd[_-]? | pagerduty[_-]? | pagerduty From 60a4d1a8dd192f44e8adb779744a69f7689a840e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 21 Jul 2025 22:52:05 -0700 Subject: [PATCH 030/357] Fixed bug in Makefile hen setting PROJECT_NAME --- data/rules/pagerdutyapikey.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/data/rules/pagerdutyapikey.yml b/data/rules/pagerdutyapikey.yml index 8bd80b1..b0a3c9b 100644 --- a/data/rules/pagerdutyapikey.yml +++ b/data/rules/pagerdutyapikey.yml @@ -18,27 +18,27 @@ rules: [A-Z0-9_-]{20} | # legacy PAT (20 chars, mixed case) [a-f0-9]{32} # integration / routing key (32 hex, lower case) ) - \b + \b min_entropy: 3.5 confidence: medium examples: - - "Authorization: Token token=u+Lyhd2_N2MCy+ZoH-S5" + - "pagerduty: Token token=u+Lyhd2_N2MCy+ZoH-S5" - pd_key = u+3xVszZ-b4m+T6d23KA - - Token token=ABCDEF1234567890ABCDEF1234567890 + - pagerduty token=ABCDEF1234567890ABCDEF1234567890 references: - - https://developer.pagerduty.com/api-reference/4555ca1c983d0-get-the-current-user + - https://developer.pagerduty.com/api-reference/4555ca1c983d0-get-the-current-user validation: type: Http content: request: method: GET - url: https://api.pagerduty.com/users + url: https://api.pagerduty.com/users headers: Authorization: Token token={{ TOKEN }} Accept: application/json response_matcher: - report_response: true - - type: JsonValid - - type: WordMatch - words: + - type: JsonValid + - type: WordMatch + words: - '"users":' From f02f12b424ae35427af9387271674300801c39b0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 08:24:45 -0700 Subject: [PATCH 031/357] Fixed permission issue with cargo-deb running after docker based linux build --- Makefile | 21 ++- README.md | 3 +- evergreen.yml | 447 -------------------------------------------------- 3 files changed, 13 insertions(+), 458 deletions(-) delete mode 100644 evergreen.yml diff --git a/Makefile b/Makefile index 8975568..7a9da74 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,8 @@ endif ARCHIVE_CMD = $(TAR_CMD) $(TAR_OPTS) SUDO_CMD := $(shell command -v sudo 2>/dev/null) +HOST_UID := $(shell id -u) +HOST_GID := $(shell id -g) .PHONY: default help create-dockerignore ubuntu-x64 ubuntu-arm64 linux-x64 linux-arm64 darwin-arm64 darwin-x64 windows-x64 windows \ linux darwin all list-archives check-docker check-rust clean tests @@ -241,7 +243,8 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ - -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ + -e HOST_UID=$(HOST_UID) -e HOST_GID=$(HOST_GID) \ + -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -262,8 +265,9 @@ linux-x64: check-docker create-dockerignore cargo build --release --target x86_64-unknown-linux-musl && \ cd target/x86_64-unknown-linux-musl/release && \ find "./$(PROJECT_NAME)" -type f -executable \ - -not -name "*.d" -not -name "*.rlib" \ - -exec sha256sum {} \; > CHECKSUM.txt \ + -not -name "*.d" -not -name "*.rlib" \ + -exec sha256sum {} \; > CHECKSUM.txt && \ + chown -R $$HOST_UID:$$HOST_GID /src/target \ ' @cd target/release && \ rm -rf $(PROJECT_NAME)-linux-x64.tgz && \ @@ -278,7 +282,8 @@ linux-x64: check-docker create-dockerignore linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ - -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ + -e HOST_UID=$(HOST_UID) -e HOST_GID=$(HOST_GID) \ + -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -300,8 +305,9 @@ linux-arm64: check-docker create-dockerignore \ cd target/aarch64-unknown-linux-musl/release && \ find "./$(PROJECT_NAME)" -type f -executable \ - -not -name "*.d" -not -name "*.rlib" \ - -exec sha256sum {} \; > CHECKSUM.txt \ + -not -name "*.d" -not -name "*.rlib" \ + -exec sha256sum {} \; > CHECKSUM.txt && \ + chown -R $$HOST_UID:$$HOST_GID /src/target \ ' @cd target/release && \ rm -rf $(PROJECT_NAME)-linux-arm64.tgz && \ @@ -430,6 +436,3 @@ notices: @echo "Generating third-party notices..." @cargo install cargo-bundle-licenses @cargo bundle-licenses --format yaml --output THIRD_PARTY_NOTICES - -evergreen-patch: - @evergreen patch --project kingfisher --variants all --tasks build \ No newline at end of file diff --git a/README.md b/README.md index fba254f..a6142ae 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,7 @@ Kingfisher extends Nosey Parker with live secret validation via cloud-provider A - **Language‑Aware Accuracy**: AST parsing in 20+ languages via Tree‑Sitter reduces contextless regex matches. see [docs/PARSING.md](/docs/PARSING.md) - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos - -# Getting Started +- **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. ## Installation diff --git a/evergreen.yml b/evergreen.yml deleted file mode 100644 index a705ac6..0000000 --- a/evergreen.yml +++ /dev/null @@ -1,447 +0,0 @@ -project: kingfisher -exec_timeout_secs: 10800 - -aliases: - - name: patch - variants: [all] - tasks: [all] - -github_pr_aliases: - - variant: ".*" - task: ".*" - -expansions: - &cdn - aws_cdn_bucket: cdn-origin-kingfisher - aws_cdn_role: arn:aws:iam::119629040606:role/s3-access.cdn-origin-kingfisher - cdn_prefix: kingfisher - -######################## -# SHARED FUNCTIONS # -######################## -functions: - - # -- fetch repo -------------------------------------------------- - fetch-source: - - command: git.get_project - params: - directory: kingfisher - shallow_clone: true - - setup-base-linux: - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - script: | - set -euo pipefail - sudo apt-get update -qq - # Only a tiny set of tools the Makefile assumes exist; - # the Makefile will install everything else it needs. - sudo apt-get install -y --no-install-recommends \ - build-essential curl ca-certificates xz-utils pkg-config - - extract-version: - # Parse the value - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - script: | - set -euo pipefail - # Grab the first `version = "…"` line, strip everything except the value - V=$(grep -m1 '^version *= *"' Cargo.toml | cut -d'"' -f2) - # Write it as a one-line YAML file understood by Evergreen - echo "version: \"$V\"" > version.yml - - # Load it into the task’s expansions - - command: expansions.update # ← writes the new variable - params: - file: kingfisher/version.yml # one-key YAML file we just created - ignore_missing_file: false - - # -- install rustup + Rust 1.85 on macOS ------------------------ - setup-rust-macos: - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - script: | - set -euo pipefail - - REQUIRED=1.85.0 - - # install rustup if missing - if ! command -v rustup >/dev/null 2>&1; then - echo "⬇️ installing rustup…" - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ - | sh -s -- -y --profile minimal --default-toolchain none - export PATH="$HOME/.cargo/bin:$PATH" - hash -r - fi - - # ensure the requested tool-chain is present & default - if ! rustup toolchain list | grep -q "$REQUIRED"; then - rustup toolchain install "$REQUIRED" - fi - rustup default "$REQUIRED" - - # expose rustc version - rustc --version - - - # -- run the Makefile target passed via $BUILD ------------------ - make-build: - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - add_expansions_to_env: true - script: | - set -euo pipefail - [ -f "$HOME/.cargo/env" ] && source "$HOME/.cargo/env" - export PATH="$HOME/.cargo/bin:$PATH" - echo "▶ make $BUILD" - make "$BUILD" - - package-unix: - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - script: | - set -euo pipefail - mkdir -p dist - cp target/release/kingfisher-* dist/ || true - - # build Evergreen artifact manifest - cd dist - { - echo '[' - first=1 - for f in kingfisher-*; do - [ -f "$f" ] || continue - [ $first -eq 0 ] && echo ',' - first=0 - printf ' { "name": "%s", "link": "%s" }' "$f" "$f" - done - echo - echo ']' - } > artifacts.json - ls -lh - - - command: attach.artifacts - params: - working_dir: kingfisher/dist - files: ["artifacts.json"] - - macos-sign-notify: - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - add_expansions_to_env: true - script: | - set -euo pipefail - shopt -s nullglob - - # One-liner: “give me the first macOS tarball if it exists” - archive=$(printf '%s\n' dist/kingfisher-darwin-*.tgz | head -n 1 || true) - - if [[ -z "$archive" || ! -f "$archive" ]]; then - echo "‼️ No macOS archive to notarize – skipping" - exit 0 - fi - - echo "✍️ Notarizing $archive …" - python evergreen/macos_notary.py "$archive" - - - - command: attach.artifacts - params: - working_dir: kingfisher/dist # where artifacts.json sits - files: ["artifacts.json"] - - package-windows: - - command: shell.exec - params: - working_dir: kingfisher - shell: powershell.exe - script: | - $ErrorActionPreference = 'Stop' - New-Item -ItemType Directory -Force dist | Out-Null - Copy-Item target\release\kingfisher-windows-x64.zip dist\ -Force - Write-Host "✓ Copied ZIP into dist\" - - # rebuild artifacts.json with the single ZIP we just copied - Get-ChildItem dist\*.zip -File | - ForEach-Object { [pscustomobject]@{name=$_.Name; link=$_.Name} } | - ConvertTo-Json -Depth 2 | - Set-Content dist\artifacts.json -Encoding ascii - Write-Host "✓ Re-generated artifacts.json" - - - command: attach.artifacts - params: - working_dir: kingfisher/dist - files: ["artifacts.json"] - - - # -- run `make tests` on Bash platforms ------------------------ - run-tests-unix: - - command: shell.exec - params: - working_dir: kingfisher - shell: bash - add_expansions_to_env: true - script: | - set -euo pipefail - [ -f "$HOME/.cargo/env" ] && source "$HOME/.cargo/env" - export PATH="$HOME/.cargo/bin:$PATH" - echo "▶ make tests" - make tests - - # -- run release tests on Windows ------------------------------ - run-tests-windows: - - command: shell.exec - params: - working_dir: kingfisher - shell: powershell.exe - script: | - $ErrorActionPreference = 'Stop' - - # Always pull the latest PATH from the registry - if (Get-Command Update-SessionEnvironment -ErrorAction SilentlyContinue) { - Update-SessionEnvironment - } else { - Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" - Update-SessionEnvironment - } - - # tell Git to use Windows’ native certificate store - git config --global http.sslBackend schannel - - # Ensure tools are present - if (-not (Get-Command cmake.exe -ErrorAction SilentlyContinue)) { - choco install cmake -y --installargs 'ADD_CMAKE_TO_PATH=System' - Update-SessionEnvironment - } - - if (-not (Get-Command git.exe -ErrorAction SilentlyContinue)) { - choco install git -y --params "/GitOnlyOnPath" - Update-SessionEnvironment - } - - if (-not (Get-Command cargo-nextest -ErrorAction SilentlyContinue)) { - cargo install --locked cargo-nextest - } - - # Run the release test-suite - Write-Host "▶ cargo nextest run --release" - cargo nextest run --release --workspace --all-targets - - # Install GitHub CLI ------------------------------------------------- - install-gh-cli: - # Linux (RPM/DEB not shown – easiest is the official static tarball) - - command: shell.exec - params: - shell: bash - script: | - set -euo pipefail - if [[ "$OS_FAMILY" == "windows" ]]; then exit 0; fi - if command -v gh >/dev/null 2>&1; then exit 0; fi - curl -sSL https://github.com/cli/cli/releases/latest/download/gh_$(uname -m | sed 's/aarch64/arm64/;s/x86_64/amd64/')_linux.tar.gz \ - | tar -xz --strip-components=1 -C /usr/local/bin gh_*/bin/gh - gh --version - - # macOS via brew - - command: shell.exec - params: - run_on_distro: macos-14 # only runs on macOS tasks - shell: bash - script: | - set -euo pipefail - if command -v gh >/dev/null 2>&1; then exit 0; fi - brew install gh - gh --version - - # Windows via choco - - command: shell.exec - params: - run_on_distro: windows-2022-large - shell: powershell.exe - script: | - if (Get-Command gh.exe -ErrorAction SilentlyContinue) { exit 0 } - choco install gh -y - gh --version - - - # Upload (or create) GitHub draft release --------------------------- - upload-github-release: - - command: shell.exec - params: - working_dir: kingfisher - add_expansions_to_env: true # so $version is available - shell: bash - script: | - set -euo pipefail - export GH_TOKEN="${GITHUB_TOKEN:?GITHUB_TOKEN not set}" - - TAG="v${version}" - TITLE="Kingfisher ${version}" - ASSET_DIR="dist" - - # Does the draft already exist? - if gh release view "$TAG" --json isDraft >/dev/null 2>&1; then - echo "📝 Draft release $TAG exists." - else - echo "➕ Creating draft release $TAG" - gh release create "$TAG" --draft --title "$TITLE" --notes "Automated draft for $TAG" - fi - - # Get list of already-uploaded asset names - EXISTING=$(gh release view "$TAG" --json assets -q '.assets[].name' | sort) - - for FILE in $ASSET_DIR/*.{tgz,tar.gz,zip} ; do - [[ -f "$FILE" ]] || continue - NAME=$(basename "$FILE") - if grep -qxF "$NAME" <<< "$EXISTING"; then - echo "✔ $NAME already attached – skipping" - else - echo "⬆ Uploading $NAME" - gh release upload "$TAG" "$FILE" --clobber - fi - done - - publish-cdn: - # ZIP archives - - command: s3.put - params: - working_dir: kingfisher - role_arn: ${aws_cdn_role} - bucket: ${aws_cdn_bucket} - region: us-east-1 - local_files_include_filter_prefix: kingfisher - local_files_include_filter: - - dist/*.zip - - release/*.zip - remote_file: ${cdn_prefix}/${version}/ - content_type: application/zip # ← non-blank value - permissions: public-read - visibility: public - skip_existing: true - - # .tgz archives - - command: s3.put - params: - working_dir: kingfisher - role_arn: ${aws_cdn_role} - bucket: ${aws_cdn_bucket} - region: us-east-1 - local_files_include_filter_prefix: kingfisher - local_files_include_filter: - - dist/*.tgz - - dist/*.tar.gz - - release/*.tgz - - release/*.tar.gz - remote_file: ${cdn_prefix}/${version}/ - content_type: application/x-gzip - permissions: public-read - visibility: public - skip_existing: true - - - -######################## -# TASKS # -######################## -tasks: - - - name: build-linux-docker - commands: - - func: fetch-source - - func: extract-version - - func: make-build - - func: package-unix - # - func: install-gh-cli - # - func: upload-github-release - - # ---------- macOS (x64 / arm64) ------------------------------- - - name: build-macos - commands: - - func: fetch-source - - func: setup-rust-macos - - func: extract-version - - func: make-build - - func: run-tests-unix - - func: package-unix - - func: macos-sign-notify # notarize the macOS app - # - func: install-gh-cli - # - func: upload-github-release - - # ---------- Windows ------------------------------------------- - - name: build-windows - commands: - - func: fetch-source - - command: shell.exec - params: - working_dir: kingfisher - shell: cmd.exe - add_expansions_to_env: true - script: | - .\buildwin.bat -force - - func: extract-version - - func: run-tests-windows - - func: package-windows - # - func: install-gh-cli - # - func: upload-github-release - -######################## -# BUILD VARIANTS # -######################## -buildvariants: - - # ---- Linux ---------------------------------------------------- - - name: linux-x64-docker - display_name: "🐧 Linux x64" - run_on: rhel80-docker-medium - expansions: - <<: *cdn - BUILD: linux-x64 - tasks: [build-linux-docker] - - - name: linux-arm64-docker - display_name: "🐧 Linux arm64" - run_on: ubuntu2404-arm64-latest-small - expansions: - <<: *cdn - BUILD: linux-arm64 - tasks: [build-linux-docker] - - # ---- macOS ---------------------------------------------------- - - name: darwin-x64 - display_name: "🍎 macOS x64" - run_on: macos-14 - expansions: - <<: *cdn - BUILD: darwin-x64 - tasks: [build-macos] - - - name: darwin-arm64 - display_name: "🍎 macOS arm64" - run_on: macos-14-arm64-gui - expansions: - <<: *cdn - BUILD: darwin-arm64 - tasks: [build-macos] - - # ---- Windows -------------------------------------------------- - - name: windows-x64 - display_name: "🪟 Windows x64" - run_on: windows-2022-large - expansions: - <<: *cdn - BUILD: "windows-x64" - garasign_jsign_image: 901841024863.dkr.ecr.us-east-1.amazonaws.com/release-infrastructure/garasign-jsign:latest - garasign_jsign_username: ${GARASIGN_USER1_USERNAME|} - garasign_jsign_password: ${GARASIGN_USER1_PASSWORD|} - tasks: [build-windows] From 83bde3247eb88dd345218be32dae5573e1fd95f0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 08:25:42 -0700 Subject: [PATCH 032/357] Fixed permission issue with cargo-deb running after docker based linux build --- Cargo.toml | 1 + README.md | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index a3d5770..1361d7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ publish = false [package] name = "kingfisher" version = "1.24.0" +description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true license.workspace = true diff --git a/README.md b/README.md index a6142ae..7734648 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Kingfisher extends Nosey Parker with live secret validation via cloud-provider A - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos - **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. +# Getting Started ## Installation On macOS, you can simply From 9ed3cb46b0706245fc90083e9aad5fbdc31e03e4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 08:27:20 -0700 Subject: [PATCH 033/357] Fixed permission issue with cargo-deb running after docker based linux build --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7734648..9a7895e 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Kingfisher extends Nosey Parker with live secret validation via cloud-provider A - **Language‑Aware Accuracy**: AST parsing in 20+ languages via Tree‑Sitter reduces contextless regex matches. see [docs/PARSING.md](/docs/PARSING.md) - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos -- **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. +- **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. # Getting Started ## Installation From 77b26e12de9836a342b2e9b17668467a57790ced Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 09:48:01 -0700 Subject: [PATCH 034/357] Fixed permission issue with cargo-deb running after docker based linux build --- Makefile | 38 ++++++++------------------------------ 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/Makefile b/Makefile index 7a9da74..e662d7e 100644 --- a/Makefile +++ b/Makefile @@ -42,8 +42,6 @@ endif ARCHIVE_CMD = $(TAR_CMD) $(TAR_OPTS) SUDO_CMD := $(shell command -v sudo 2>/dev/null) -HOST_UID := $(shell id -u) -HOST_GID := $(shell id -g) .PHONY: default help create-dockerignore ubuntu-x64 ubuntu-arm64 linux-x64 linux-arm64 darwin-arm64 darwin-x64 windows-x64 windows \ linux darwin all list-archives check-docker check-rust clean tests @@ -243,8 +241,7 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ - -e HOST_UID=$(HOST_UID) -e HOST_GID=$(HOST_GID) \ - -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ + -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -264,26 +261,16 @@ linux-x64: check-docker create-dockerignore \ cargo build --release --target x86_64-unknown-linux-musl && \ cd target/x86_64-unknown-linux-musl/release && \ - find "./$(PROJECT_NAME)" -type f -executable \ - -not -name "*.d" -not -name "*.rlib" \ - -exec sha256sum {} \; > CHECKSUM.txt && \ - chown -R $$HOST_UID:$$HOST_GID /src/target \ + sha256sum kingfisher > CHECKSUM.txt && \ + tar -czf /src/target/release/kingfisher-linux-x64.tgz \ + kingfisher CHECKSUM.txt \ ' - @cd target/release && \ - rm -rf $(PROJECT_NAME)-linux-x64.tgz && \ - cp ../x86_64-unknown-linux-musl/release/$(PROJECT_NAME) . && \ - cp ../x86_64-unknown-linux-musl/release/CHECKSUM.txt CHECKSUM-linux-x64.txt && \ - tar --no-xattrs -czf $(PROJECT_NAME)-linux-x64.tgz \ - $(PROJECT_NAME) CHECKSUM-linux-x64.txt && \ - rm $(PROJECT_NAME) && \ - sha256sum $(PROJECT_NAME)-linux-x64.tgz >> CHECKSUM-linux-x64.txt $(MAKE) list-archives linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ - -e HOST_UID=$(HOST_UID) -e HOST_GID=$(HOST_GID) \ - -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ + -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -304,19 +291,10 @@ linux-arm64: check-docker create-dockerignore cargo build --release --target aarch64-unknown-linux-musl && \ \ cd target/aarch64-unknown-linux-musl/release && \ - find "./$(PROJECT_NAME)" -type f -executable \ - -not -name "*.d" -not -name "*.rlib" \ - -exec sha256sum {} \; > CHECKSUM.txt && \ - chown -R $$HOST_UID:$$HOST_GID /src/target \ + sha256sum kingfisher > CHECKSUM.txt && \ + tar -czf /src/target/release/kingfisher-linux-arm64.tgz \ + kingfisher CHECKSUM.txt \ ' - @cd target/release && \ - rm -rf $(PROJECT_NAME)-linux-arm64.tgz && \ - cp ../aarch64-unknown-linux-musl/release/$(PROJECT_NAME) . && \ - cp ../aarch64-unknown-linux-musl/release/CHECKSUM.txt CHECKSUM-linux-arm64.txt && \ - tar --no-xattrs -czf $(PROJECT_NAME)-linux-arm64.tgz \ - $(PROJECT_NAME) CHECKSUM-linux-arm64.txt && \ - rm $(PROJECT_NAME) && \ - sha256sum $(PROJECT_NAME)-linux-arm64.tgz >> CHECKSUM-linux-arm64.txt $(MAKE) list-archives From ef35c4471de877370490f6223fb23c014bb187eb Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 10:33:48 -0700 Subject: [PATCH 035/357] Fixing issue uploading tgz linux builds in github action --- .github/workflows/release.yml | 47 +++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 14bb7a6..2d80d05 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -30,28 +30,33 @@ jobs: - name: Build Debian package run: | cargo deb --no-build --target x86_64-unknown-linux-musl \ - --output target/release/kingfisher-amd64.deb + --output target/release/kingfisher-linux-x64.deb - name: Build RPM package run: | cargo generate-rpm --target x86_64-unknown-linux-musl \ - --output target/release/kingfisher-amd64.rpm + --output target/release/kingfisher-linux-x64.rpm - name: Move artifact to dist shell: bash run: | mkdir -p dist cp target/release/kingfisher-linux-x64.tgz dist/ - cp target/release/kingfisher-amd64.deb dist/ - cp target/release/kingfisher-amd64.rpm dist/ + cp target/release/kingfisher-linux-x64.deb dist/ + cp target/release/kingfisher--linuxx64.rpm dist/ - uses: actions/upload-artifact@v4 with: - name: kingfisher-linux-x64 - path: | - dist/kingfisher-linux-x64.tgz - dist/kingfisher-amd64.deb - dist/kingfisher-amd64.rpm + name: kingfisher-linux-x64.tgz + path: dist/kingfisher-linux-x64.tgz + - uses: actions/upload-artifact@v4 + with: + name: kingfisher-linux-x64.deb + path: dist/kingfisher-linux-x64.deb + - uses: actions/upload-artifact@v4 + with: + name: kingfisher-linux-x64.rpm + path: dist/kingfisher-linux-x64.rpm linux-arm64: name: Linux arm64 @@ -76,29 +81,33 @@ jobs: - name: Build Debian package run: | cargo deb --no-build --target aarch64-unknown-linux-musl \ - --output target/release/kingfisher-arm64.deb + --output target/release/kingfisher-linux-arm64.deb - name: Build RPM package run: | cargo generate-rpm --target aarch64-unknown-linux-musl \ - --output target/release/kingfisher-arm64.rpm + --output target/release/kingfisher-linux-arm64.rpm - name: Move artifact to dist shell: bash run: | mkdir -p dist cp target/release/kingfisher-linux-arm64.tgz dist/ - cp target/release/kingfisher-arm64.deb dist/ - cp target/release/kingfisher-arm64.rpm dist/ + cp target/release/kingfisher-linux-arm64.deb dist/ + cp target/release/kingfisher-linux-arm64.rpm dist/ - uses: actions/upload-artifact@v4 with: - name: kingfisher-linux-arm64 - path: | - dist/kingfisher-linux-arm64.tgz - dist/kingfisher-arm64.deb - dist/kingfisher-arm64.rpm - + name: kingfisher-linux-arm64.tgz + path: dist/kingfisher-linux-arm64.tgz + - uses: actions/upload-artifact@v4 + with: + name: kingfisher-linux-arm64.deb + path: dist/kingfisher-linux-arm64.deb + - uses: actions/upload-artifact@v4 + with: + name: kingfisher-linux-arm64.rpm + path: dist/kingfisher-linux-arm64.rpm macos-x64: name: macOS x64 From f3fb0a064aac0fee934b6d89feb426839ce7e4c0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 10:34:51 -0700 Subject: [PATCH 036/357] Fixing issue uploading tgz linux builds in github action --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2d80d05..5eb042c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -43,7 +43,7 @@ jobs: mkdir -p dist cp target/release/kingfisher-linux-x64.tgz dist/ cp target/release/kingfisher-linux-x64.deb dist/ - cp target/release/kingfisher--linuxx64.rpm dist/ + cp target/release/kingfisher-linux-x64.rpm dist/ - uses: actions/upload-artifact@v4 with: From 916a67bc565af477959a02249a9f6455850f3f80 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 11:38:02 -0700 Subject: [PATCH 037/357] Fixing issue with dockerfile. Removed non-existent sh package from apk command --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index f44e792..7c6ce44 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,9 +1,9 @@ # syntax=docker/dockerfile:1 FROM alpine:latest -RUN apk add --no-cache curl tar git sh +RUN apk add --no-cache curl tar git -ARG TARGETARCH # set automatically by BuildKit +ARG TARGETARCH ENV TARGETARCH=${TARGETARCH} WORKDIR /app From 630ec70990d66046b21be23243f7a258fe8592a8 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 22 Jul 2025 11:49:07 -0700 Subject: [PATCH 038/357] Fixing issue with dockerfile. Removed non-existent sh package from apk command --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7c6ce44..54f0959 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -24,7 +24,7 @@ RUN set -eux; \ fi; \ curl -fsSL "$LATEST_URL" -o kingfisher.tgz; \ tar -xzf kingfisher.tgz; \ - rm kingfisher.tgz CHECKSUM-*.txt; \ + rm -f kingfisher.tgz CHECKSUM-*.txt; \ # locate the binary (pattern covers kingfisher-linux-x64 / kingfisher-linux-arm64) KF_PATH=$(find . -type f -name 'kingfisher*' -executable -print -quit); \ if [ -z "$KF_PATH" ]; then echo "No executable kingfisher binary found" >&2; exit 1; fi; \ From 955118e48390d8c4a443c8447c960b3b9da0d14d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 23 Jul 2025 18:14:43 -0700 Subject: [PATCH 039/357] Added precommit and prereceive hook installations. Fixing Gitlab support --- README.md | 22 +++++++++++++++++++++- install-precommit-hook.sh | 32 ++++++++++++++++++++++++++++++++ install-prereceive-hook.sh | 34 ++++++++++++++++++++++++++++++++++ src/git_binary.rs | 36 ++++++++++++++++++++++++++---------- 4 files changed, 113 insertions(+), 11 deletions(-) create mode 100644 install-precommit-hook.sh create mode 100644 install-prereceive-hook.sh diff --git a/README.md b/README.md index 9a7895e..f7e7c0b 100644 --- a/README.md +++ b/README.md @@ -318,7 +318,27 @@ _If no token is provided Kingfisher still works for public repositories._ | 200 | Findings discovered | | 205 | Validated findings discovered | ---- + +## Install a Pre-Commit Hook + +Run the provided helper script to add a hook that scans staged files before each commit: + +```bash +./install-precommit-hook.sh +``` + +This creates `.git/hooks/pre-commit` that scans the files staged for commit with `kingfisher scan --no-update-check` and blocks the commit if any secrets are found. + +### Install a Pre-Receive Hook + +To check incoming pushes on a server-side repository, install the pre-receive hook: + +```bash +./install-prereceive-hook.sh +``` + +The resulting `.git/hooks/pre-receive` script scans the files in each pushed commit and rejects the push if any secrets are detected. + ## Update Checks diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh new file mode 100644 index 0000000..6a6283f --- /dev/null +++ b/install-precommit-hook.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +HOOK_DIR="$(git rev-parse --git-dir)/hooks" +HOOK_PATH="$HOOK_DIR/pre-commit" + +if [ -e "$HOOK_PATH" ]; then + echo "Error: $HOOK_PATH already exists. Move or remove the existing hook to continue." >&2 + exit 1 +fi + +cat > "$HOOK_PATH" <<'HOOK' +#!/usr/bin/env bash +# Pre-commit hook to run Kingfisher scan on staged changes +set -euo pipefail + +if ! command -v kingfisher >/dev/null 2>&1; then + echo "kingfisher not found in PATH" >&2 + exit 1 +fi + +git diff --cached --name-only -z | \ + xargs -0 --no-run-if-empty kingfisher scan --no-update-check +status=$? +if [ "$status" -ne 0 ]; then + echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 + exit "$status" +fi +HOOK + +chmod +x "$HOOK_PATH" +echo "Pre-commit hook installed to $HOOK_PATH" diff --git a/install-prereceive-hook.sh b/install-prereceive-hook.sh new file mode 100644 index 0000000..3ca4081 --- /dev/null +++ b/install-prereceive-hook.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +set -euo pipefail + +HOOK_DIR="$(git rev-parse --git-dir)/hooks" +HOOK_PATH="$HOOK_DIR/pre-receive" + +if [ -e "$HOOK_PATH" ]; then + echo "Error: $HOOK_PATH already exists. Move or remove the existing hook to continue." >&2 + exit 1 +fi + +cat > "$HOOK_PATH" <<'HOOK' +#!/usr/bin/env bash +# Pre-receive hook to scan pushed commits with Kingfisher +set -euo pipefail + +if ! command -v kingfisher >/dev/null 2>&1; then + echo "kingfisher not found in PATH" >&2 + exit 1 +fi + +while read -r oldrev newrev refname; do + git diff-tree --no-commit-id --name-only -r "$oldrev" "$newrev" -z | + xargs -0 --no-run-if-empty kingfisher scan --no-update-check + status=$? + if [ "$status" -ne 0 ]; then + echo "Kingfisher detected secrets in push. Push rejected." >&2 + exit "$status" + fi +done +HOOK + +chmod +x "$HOOK_PATH" +echo "Pre-receive hook installed to $HOOK_PATH" diff --git a/src/git_binary.rs b/src/git_binary.rs index a0e7c2e..b2b6918 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -36,17 +36,33 @@ impl Git { /// Create a new `Git` instance. /// /// * `ignore_certs`: If `true`, disables SSL certificate verification for `git` operations. - pub fn new(ignore_certs: bool) -> Self { - let credentials = if std::env::var("KF_GITHUB_TOKEN").is_ok() { - vec![ - "-c".into(), - r#"credential.helper="#.into(), - "-c".into(), +pub fn new(ignore_certs: bool) -> Self { + let mut credentials = Vec::new(); + + // If either GitHub or GitLab token is set, first clear existing credential.helpers + if std::env::var("KF_GITHUB_TOKEN").is_ok() + || std::env::var("KF_GITLAB_TOKEN").is_ok() + { + credentials.push("-c".into()); + credentials.push(r#"credential.helper="#.into()); + } + + // Inject GitHub token helper + if std::env::var("KF_GITHUB_TOKEN").is_ok() { + credentials.push("-c".into()); + credentials.push( r#"credential.helper=!_ghcreds() { echo username="kingfisher"; echo password="$KF_GITHUB_TOKEN"; }; _ghcreds"#.into(), - ] - } else { - Vec::new() - }; + ); + } + + // Inject GitLab token helper + if std::env::var("KF_GITLAB_TOKEN").is_ok() { + credentials.push("-c".into()); + credentials.push( + r#"credential.helper=!_glcreds() { echo username="oauth2"; echo password="$KF_GITLAB_TOKEN"; }; _glcreds"#.into(), + ); + } + Self { credentials, ignore_certs } } From 9b4856d7d532b5122a696939c900a82c1b6dd2ce Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 23 Jul 2025 19:57:33 -0700 Subject: [PATCH 040/357] Fixed Gitlab support. Added pre-commit and pre-receive installation scripts. --- CHANGELOG.md | 9 +++++++++ Cargo.toml | 2 +- data/rules/baseten.yml | 11 +++++++---- data/rules/mongodb.yml | 4 +++- src/validation.rs | 16 ++++++++-------- src/validation/mongodb.rs | 23 +++++++++++++++++++---- 6 files changed, 47 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f609a2..3f3092e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ All notable changes to this project will be documented in this file. +## [1.26.0] +- Fixed GitLab authentication bug +- Improved Baseten rule to be less noisy +- Added pre-commit and pre-receive installation hooks + +## [1.25.0] +- MongoDB validator now skips `mongodb+srv://` URIs and returns a message that validation was skipped +- Fixed noisy Baseten rule + ## [1.24.0] - Now generating DEB and RPM packages - Now releasing Docker images, and updated README diff --git a/Cargo.toml b/Cargo.toml index 1361d7a..0146316 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.24.0" +version = "1.26.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/baseten.yml b/data/rules/baseten.yml index 17d2285..8773d6c 100644 --- a/data/rules/baseten.yml +++ b/data/rules/baseten.yml @@ -4,6 +4,9 @@ rules: pattern: | (?x) \b + baseten + (?:.|[\n\r]){0,32}? + \b ( [A-Za-z0-9]{8} \. @@ -13,10 +16,10 @@ rules: min_entropy: 3.4 confidence: medium examples: - - WSsDXzCD.uOcxAp7k82IvCKyY36TnpVbP4ZszP1qw - - crXCQC3W.CgCGGY1b9IfJan5TppW0Z07C9oMN2DmR - - h2wFkhFC.3WFVwVcxGFr4Qup0gyhvIuONwQxEpL0A - - XqbIpj04.x73j1zLUOEgGIKROqVbxsmggPdL8JvAY + - baseten_key = WSsDXzCD.uOcxAp7k82IvCKyY36TnpVbP4ZszP1qw + - baseten_key = crXCQC3W.CgCGGY1b9IfJan5TppW0Z07C9oMN2DmR + - baseten_key = h2wFkhFC.3WFVwVcxGFr4Qup0gyhvIuONwQxEpL0A + - baseten_key = XqbIpj04.x73j1zLUOEgGIKROqVbxsmggPdL8JvAY references: - https://docs.baseten.co/examples/vllm - https://docs.baseten.co/reference/management-api/api-keys/lists-the-users-api-keys diff --git a/data/rules/mongodb.yml b/data/rules/mongodb.yml index 63e4775..46fbcf8 100644 --- a/data/rules/mongodb.yml +++ b/data/rules/mongodb.yml @@ -91,4 +91,6 @@ rules: \b min_entropy: 3.5 examples: - - mdb_sa_sk_BdIX_jLzut2WTgglKzKvSgWMDDj5hEoTqdwOyLOL \ No newline at end of file + - mdb_sa_sk_BdIX_jLzut2WTgglKzKvSgWMDDj5hEoTqdwOyLOL + validation: + type: MongoDB \ No newline at end of file diff --git a/src/validation.rs b/src/validation.rs index 6cb3711..59f5362 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -538,16 +538,16 @@ async fn timed_validate_single_match<'a>( } match mongodb::validate_mongodb(&uri).await { - Ok(ok) => { + Ok((ok, msg)) => { m.validation_success = ok; - m.validation_response_body = if ok { - "MongoDB connection is valid." + m.validation_response_body = msg; + m.validation_response_status = if uri.starts_with("mongodb+srv://") { + StatusCode::CONTINUE + } else if ok { + StatusCode::OK } else { - "MongoDB connection failed." - } - .to_string(); - m.validation_response_status = - if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + StatusCode::UNAUTHORIZED + }; } Err(e) => { m.validation_success = false; diff --git a/src/validation/mongodb.rs b/src/validation/mongodb.rs index efac6fa..74a82d5 100644 --- a/src/validation/mongodb.rs +++ b/src/validation/mongodb.rs @@ -19,15 +19,24 @@ const FAST_SELECT_MS: u64 = 300; const SRV_CONNECT_MS: u64 = 15_000; // gives Atlas a fighting chance const SRV_SELECT_MS: u64 = 15_000; -/// Validates a MongoDB URI in ≤ 2 s. Returns `Ok(true)` on successful ping. -pub async fn validate_mongodb(uri: &str) -> Result { +/// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the +/// boolean indicates success and the string provides a status message. +pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> { // ---- quick reject without touching the network if !looks_like_mongodb_uri(uri) { - return Ok(false); + return Ok((false, "Invalid MongoDB URI".to_string())); } let is_srv = uri.starts_with("mongodb+srv://"); + if is_srv { + // Skip SRV URIs to avoid slow DNS lookups and topology discovery. + return Ok(( + false, + "Validation skipped for mongodb+srv:// URI (performance reasons)".to_string(), + )); + } + // ---- build client opts let mut opts = ClientOptions::parse(uri).await?; if !is_srv { @@ -46,7 +55,13 @@ pub async fn validate_mongodb(uri: &str) -> Result { // ---- dial and ping let client = Client::with_options(opts)?; - Ok(client.database("admin").run_command(doc! { "ping": 1 }).await.is_ok()) + let ok = client.database("admin").run_command(doc! { "ping": 1 }).await.is_ok(); + let msg = if ok { + "MongoDB connection is valid.".to_string() + } else { + "MongoDB connection failed.".to_string() + }; + Ok((ok, msg)) } // pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String { From 9a87e30171637e6ddae9875d26f56532284a25d4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 23 Jul 2025 19:58:24 -0700 Subject: [PATCH 041/357] Fixed version number --- CHANGELOG.md | 7 ++----- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f3092e..d89083c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,9 @@ All notable changes to this project will be documented in this file. -## [1.26.0] -- Fixed GitLab authentication bug -- Improved Baseten rule to be less noisy -- Added pre-commit and pre-receive installation hooks - ## [1.25.0] +- Fixed GitLab authentication bug +- Added pre-commit and pre-receive installation hooks - MongoDB validator now skips `mongodb+srv://` URIs and returns a message that validation was skipped - Fixed noisy Baseten rule diff --git a/Cargo.toml b/Cargo.toml index 0146316..930056a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.26.0" +version = "1.25.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From 425dcbf0e9b58802de9b498d829191937a6a7aac Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 23 Jul 2025 20:47:16 -0700 Subject: [PATCH 042/357] Update install-prereceive-hook.sh Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- install-prereceive-hook.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/install-prereceive-hook.sh b/install-prereceive-hook.sh index 3ca4081..f7a4d5e 100644 --- a/install-prereceive-hook.sh +++ b/install-prereceive-hook.sh @@ -20,8 +20,13 @@ if ! command -v kingfisher >/dev/null 2>&1; then fi while read -r oldrev newrev refname; do - git diff-tree --no-commit-id --name-only -r "$oldrev" "$newrev" -z | - xargs -0 --no-run-if-empty kingfisher scan --no-update-check + if [ "$oldrev" = "0000000000000000000000000000000000000000" ]; then + git diff-tree --name-only -r "$newrev" -z | + xargs -0 --no-run-if-empty kingfisher scan --no-update-check + else + git diff-tree --no-commit-id --name-only -r "$oldrev" "$newrev" -z | + xargs -0 --no-run-if-empty kingfisher scan --no-update-check + fi status=$? if [ "$status" -ne 0 ]; then echo "Kingfisher detected secrets in push. Push rejected." >&2 From 1ac413bbeedbe4be04d31e4811b3ccbc5018e806 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 25 Jul 2025 10:31:17 -0700 Subject: [PATCH 043/357] Added ElevenLabs rule --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- data/rules/elevenlabs.yml | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 data/rules/elevenlabs.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index d89083c..7198eca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.26.0] +- Added rule for ElevenLabs + ## [1.25.0] - Fixed GitLab authentication bug - Added pre-commit and pre-receive installation hooks diff --git a/Cargo.toml b/Cargo.toml index 930056a..0146316 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.25.0" +version = "1.26.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/elevenlabs.yml b/data/rules/elevenlabs.yml new file mode 100644 index 0000000..ade342c --- /dev/null +++ b/data/rules/elevenlabs.yml @@ -0,0 +1,36 @@ +rules: + - name: ElevenLabs API Key + id: kingfisher.elevenlabs.1 + pattern: | + (?xi) + \b + ( + sk_ + [0-9a-f]{48} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - sk_2a30e5a0d39d5f2c5f6a9d2f95cd016049a6323985479bfd + - sk_da9c0613fdeecfab10b302d6f39a3e371f774feb9eafed56 + - sk_82a331629e2128ef70396600809b6a2ff4e433154fa27e1b + references: + - https://elevenlabs.io/docs/api-reference/authentication + - https://elevenlabs.io/docs/api-reference/user/subscription/get + + validation: + type: Http + content: + request: + method: GET + url: https://api.elevenlabs.io/v1/user/subscription + headers: + xi-api-key: '{{ TOKEN }}' + response_matcher: + - report_response: true + - type: WordMatch + match_all_words: false + words: + - '"tier"' + - '"missing_permissions"' From 63a757fba8d6fbbd580c09e12304a56164147023 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 25 Jul 2025 17:21:28 -0700 Subject: [PATCH 044/357] Added support for scanning issues returned from a JQL search using --jira-url and --jql --- CHANGELOG.md | 1 + Cargo.toml | 2 + NOTICE | 17 +- README.md | 26 ++ data/rules/jira.yml | 2 +- src/cli/commands/inputs.rs | 15 +- src/findings_store.rs | 7 + src/jira.rs | 52 +++ src/lib.rs | 1 + src/main.rs | 4 + src/reporter.rs | 30 +- src/reporter/bson_format.rs | 4 +- src/reporter/json_format.rs | 31 +- src/reporter/pretty_format.rs | 17 +- src/reporter/sarif_format.rs | 39 ++- src/scanner/repos.rs | 31 +- src/scanner/runner.rs | 10 +- tests/int_dedup.rs | 3 + tests/int_github.rs | 3 + tests/int_gitlab.rs | 4 + tests/int_validation_cache.rs | 3 + tests/int_vulnerable_files.rs | 6 + vendor/jira_query/.gitignore | 2 + vendor/jira_query/CONTRIBUTING.md | 4 + vendor/jira_query/Cargo.toml | 29 ++ vendor/jira_query/DCO.md | 14 + vendor/jira_query/LICENSE | 201 +++++++++++ vendor/jira_query/README.md | 72 ++++ vendor/jira_query/src/access.rs | 297 +++++++++++++++++ vendor/jira_query/src/errors.rs | 28 ++ vendor/jira_query/src/issue_model.rs | 440 +++++++++++++++++++++++++ vendor/jira_query/src/lib.rs | 41 +++ vendor/jira_query/tests/integration.rs | 165 ++++++++++ 33 files changed, 1564 insertions(+), 37 deletions(-) create mode 100644 src/jira.rs create mode 100644 vendor/jira_query/.gitignore create mode 100644 vendor/jira_query/CONTRIBUTING.md create mode 100644 vendor/jira_query/Cargo.toml create mode 100644 vendor/jira_query/DCO.md create mode 100644 vendor/jira_query/LICENSE create mode 100644 vendor/jira_query/README.md create mode 100644 vendor/jira_query/src/access.rs create mode 100644 vendor/jira_query/src/errors.rs create mode 100644 vendor/jira_query/src/issue_model.rs create mode 100644 vendor/jira_query/src/lib.rs create mode 100644 vendor/jira_query/tests/integration.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 7198eca..f21fc28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.26.0] - Added rule for ElevenLabs +- Added support for scanning Jira issues via a given JQL (Jira Query Language) ## [1.25.0] - Fixed GitLab authentication bug diff --git a/Cargo.toml b/Cargo.toml index 0146316..eff7a26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -185,6 +185,7 @@ semver = "1.0.26" globset = "0.4.16" jsonwebtoken = "9.3.1" ipnet = "2.11.0" +jira_query = "1.6.0" [dependencies.tikv-jemallocator] version = "0.6" @@ -223,6 +224,7 @@ codegen-units = 256 [patch.crates-io] vectorscan-rs = { path = "vendor/vectorscan-rs/vectorscan-rs" } vectorscan-rs-sys = { path = "vendor/vectorscan-rs/vectorscan-rs-sys" } +jira_query = { path = "vendor/jira_query" } [profile.profiling] inherits = "release" diff --git a/NOTICE b/NOTICE index 62c3bf2..6dd6a63 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,14 @@ NOTICE file corresponding to Section 4 (d) of the Apache License, Version 2.0 +-------------------------------------------------------------------- +Notices for Kingfisher +-------------------------------------------------------------------- +Copyright 2025 MongoDB, Inc. +https://www.mongodb.com + +Source repository: https://github.com/mongodb/kingfisher + + -------------------------------------------------------------------- Upstream notices -------------------------------------------------------------------- @@ -21,11 +30,3 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. - --------------------------------------------------------------------- -Additional notices for Kingfisher --------------------------------------------------------------------- -Copyright 2025 MongoDB, Inc. -https://www.mongodb.com - -Source repository: https://github.com/mongodb/kingfisher diff --git a/README.md b/README.md index f7e7c0b..7ce5dad 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Kingfisher extends Nosey Parker with live secret validation via cloud-provider A - **Language‑Aware Accuracy**: AST parsing in 20+ languages via Tree‑Sitter reduces contextless regex matches. see [docs/PARSING.md](/docs/PARSING.md) - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos +- **Jira Scanning**: Scan issues returned from a JQL search using `--jira-url` and `--jql` - **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. # Getting Started @@ -285,14 +286,34 @@ kingfisher scan --git-url https://gitlab.com/group/project.git kingfisher gitlab repos list --group my-group ``` +## Scanning Jira + +### Scan Jira issues matching a JQL query + +```bash +KF_JIRA_TOKEN="token" kingfisher scan \ + --jira-url https://jira.company.com \ + --jql "project = TEST AND status = Open" \ + --max-results 500 +``` + +### Scan the last 1,000 Jira issues: +```bash +KF_JIRA_TOKEN="token" kingfisher scan \ + --jira-url https://jira.mongodb.org \ + --jql 'ORDER BY created DESC' \ + --max-results 1000 +``` --- + ## Environment Variables for Tokens | Variable | Purpose | | ----------------- | ---------------------------- | | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | +| `KF_JIRA_TOKEN` | Jira API token | Set them temporarily per command: @@ -306,6 +327,11 @@ Or export for the session: export KF_GITLAB_TOKEN="glpat-…" ``` +To authenticate Jira requests: +```bash +export KF_JIRA_TOKEN="token" +``` + _If no token is provided Kingfisher still works for public repositories._ --- diff --git a/data/rules/jira.yml b/data/rules/jira.yml index c0b14a2..82ac195 100644 --- a/data/rules/jira.yml +++ b/data/rules/jira.yml @@ -12,7 +12,7 @@ rules: confidence: medium examples: - example-jira.atlassian.net - - jira.sprintUri= https://leakyday.atlassian.net/rest + - jira.sprintUri= https://example.atlassian.net/rest - name: Jira Token id: kingfisher.jira.2 diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 20d3dde..c7a59bc 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -25,7 +25,8 @@ pub struct InputSpecifierArgs { "gitlab_group", "git_url", "all_github_organizations", - "all_gitlab_groups" + "all_gitlab_groups", + "jira_url" ]), value_hint = ValueHint::AnyPath )] @@ -84,6 +85,18 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = GitLabRepoType::Owner)] pub gitlab_repo_type: GitLabRepoType, + /// Jira base URL (e.g. https://jira.example.com) + #[arg(long, value_hint = ValueHint::Url, requires = "jql")] + pub jira_url: Option, + + /// JQL query to select Jira issues + #[arg(long, requires = "jira_url")] + pub jql: Option, + + /// Maximum number of Jira results to fetch + #[arg(long, default_value_t = 100)] + pub max_results: usize, + /// Select how to clone Git repositories #[arg(long, default_value_t=GitCloneMode::Bare, alias="git-clone-mode")] pub git_clone: GitCloneMode, diff --git a/src/findings_store.rs b/src/findings_store.rs index 07d20bf..7d3cd76 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -280,6 +280,13 @@ impl FindingsStore { self.clone_dir.join(repo_identifier) } + /// Return the directory used to store cloned repositories and other + /// temporary artifacts. + pub fn clone_root(&self) -> PathBuf { + self.clone_dir.clone() + } + + pub fn get_finding_data_iter( &self, ) -> impl Iterator + '_ { diff --git a/src/jira.rs b/src/jira.rs new file mode 100644 index 0000000..1b27462 --- /dev/null +++ b/src/jira.rs @@ -0,0 +1,52 @@ +use anyhow::{Context, Result}; +use jira_query::{Auth, JiraInstance, Pagination}; +use reqwest::Client; +use url::Url; + +// Re-export the Issue type from jira_query so callers don't depend on the crate. +pub use jira_query::Issue as JiraIssue; +pub async fn fetch_issues( + jira_url: Url, + jql: &str, + max_results: usize, + ignore_certs: bool, +) -> Result> { + // build a &str without any trailing `/` + let base = jira_url.as_str().trim_end_matches('/'); + + let client = Client::builder() + .danger_accept_invalid_certs(ignore_certs) + .build() + .context("Failed to build HTTP client")?; + + let mut jira = JiraInstance::at(base.to_string())? // no trailing slash here + .with_client(client) + .paginate(Pagination::MaxResults(max_results as u32)); + + if let Ok(token) = std::env::var("KF_JIRA_TOKEN") { + jira = jira.authenticate(Auth::ApiKey(token)); + } + + let issues = jira.search(jql).await?; + Ok(issues) +} + +use std::path::PathBuf; + +pub async fn download_issues_to_dir( + jira_url: Url, + jql: &str, + max_results: usize, + ignore_certs: bool, + output_dir: &PathBuf, +) -> Result> { + std::fs::create_dir_all(output_dir)?; + let issues = fetch_issues(jira_url, jql, max_results, ignore_certs).await?; + let mut paths = Vec::new(); + for issue in issues { + let file = output_dir.join(format!("{}.json", issue.key)); + std::fs::write(&file, serde_json::to_vec(&issue)?)?; + paths.push(file); + } + Ok(paths) +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 26703fc..af74e7b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ pub mod git_url; pub mod github; pub mod gitlab; pub mod guesser; +pub mod jira; pub mod liquid_filters; pub mod location; pub mod matcher; diff --git a/src/main.rs b/src/main.rs index 44a3ec3..3b0d444 100644 --- a/src/main.rs +++ b/src/main.rs @@ -279,6 +279,10 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 50, + // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/reporter.rs b/src/reporter.rs index 93583fa..f6a3331 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -112,6 +112,34 @@ impl DetailsReporter { None } } + + + /// If the given file path corresponds to a Jira issue downloaded to disk, + /// return the online Jira URL for that issue. + fn jira_issue_url( + &self, + path: &std::path::Path, + args: &cli::commands::scan::ScanArgs, + ) -> Option { + // drop any trailing slash so we don’t end up with “//browse/…” + let jira_url = args + .input_specifier_args + .jira_url + .as_ref()? + .as_str() + .trim_end_matches('/'); + + let ds = self.datastore.lock().ok()?; + let root = ds.clone_root(); + let jira_dir = root.join("jira_issues"); + if path.starts_with(&jira_dir) { + let key = path.file_stem()?.to_string_lossy(); + Some(format!("{}/browse/{}", jira_url, key)) + } else { + None + } + } + fn gather_findings(&self) -> Result> { let metadata_list = self.get_finding_data()?; let all_matches = self.get_filtered_matches()?; @@ -288,7 +316,7 @@ impl Reportable for DetailsReporter { ReportOutputFormat::Json => self.json_format(writer, args), ReportOutputFormat::Jsonl => self.jsonl_format(writer, args), ReportOutputFormat::Bson => self.bson_format(writer, args), - ReportOutputFormat::Sarif => self.sarif_format(writer, args.no_dedup), + ReportOutputFormat::Sarif => self.sarif_format(writer, args.no_dedup, args), } } } diff --git a/src/reporter/bson_format.rs b/src/reporter/bson_format.rs index fc1b48c..c1470d0 100644 --- a/src/reporter/bson_format.rs +++ b/src/reporter/bson_format.rs @@ -39,14 +39,14 @@ impl DetailsReporter { }; // Process to JSON first, then convert to BSON - let json_finding = self.process_match_to_json(&single_origin_rm)?; + let json_finding = self.process_match_to_json(&single_origin_rm, args)?; if let Ok(bson_doc) = json_to_bson_document(&json_finding) { bson_findings.push(bson_doc); } } } else { // Process normally for deduped matches or matches with only one origin - let json_finding = self.process_match_to_json(&rm)?; + let json_finding = self.process_match_to_json(&rm, args)?; if let Ok(bson_doc) = json_to_bson_document(&json_finding) { bson_findings.push(bson_doc); } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index a4e8730..31123b6 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -101,7 +101,11 @@ impl DetailsReporter { .iter() .find_map(|origin| { if let Origin::File(e) = origin { - Some(e.path.display().to_string()) + if let Some(url) = self.jira_issue_url(&e.path, args) { + Some(url) + } else { + Some(e.path.display().to_string()) + } } else { None } @@ -173,12 +177,12 @@ impl DetailsReporter { }; // Process this single-origin match into a JSON finding - let json_finding = self.process_match_to_json(&single_origin_rm)?; + let json_finding = self.process_match_to_json(&single_origin_rm, args)?; findings.push(json_finding); } } else { // Process normally for deduped matches or matches with only one origin - let json_finding = self.process_match_to_json(&rm)?; + let json_finding = self.process_match_to_json(&rm, args)?; findings.push(json_finding); } } @@ -192,7 +196,11 @@ impl DetailsReporter { } // Add a helper method to convert a ReportMatch to a JSON finding - pub fn process_match_to_json(&self, rm: &ReportMatch) -> Result { + pub fn process_match_to_json( + &self, + rm: &ReportMatch, + args: &cli::commands::scan::ScanArgs, + ) -> Result { // Extract the relevant data from the match as you already do in your current implementation let source_span = &rm.m.location.source_span; let line_num = source_span.start.line; @@ -242,7 +250,11 @@ impl DetailsReporter { .iter() .find_map(|origin| { if let Origin::File(e) = origin { - Some(e.path.display().to_string()) + if let Some(url) = self.jira_issue_url(&e.path, args) { + Some(url) + } else { + Some(e.path.display().to_string()) + } } else { None } @@ -325,13 +337,13 @@ impl DetailsReporter { }; // Process this single-origin match into a JSON finding and write it - let json_finding = self.process_match_to_json(&single_origin_rm)?; + let json_finding = self.process_match_to_json(&single_origin_rm, args)?; serde_json::to_writer(&mut writer, &json_finding)?; writeln!(writer)?; } } else { // Process normally for deduped matches or matches with only one origin - let json_finding = self.process_match_to_json(&rm)?; + let json_finding = self.process_match_to_json(&rm, args)?; serde_json::to_writer(&mut writer, &json_finding)?; writeln!(writer)?; } @@ -413,7 +425,10 @@ mod tests { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, - + // Jira options + jira_url: None, + jql: None, + max_results: 50, // clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index b9c868c..02999e5 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -214,14 +214,20 @@ impl<'a> Display for PrettyFinding<'a> { for p in rm.origin.iter() { match p { Origin::File(e) => { + let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) { + url + } else { + e.path.display().to_string() + }; writeln!( f, " |Path..........: {}", if rm.validation_success { - reporter.style_active_creds(e.path.display()).to_string().to_string() - // Convert StyledObject to String + // reporter.style_active_creds(e.path.display()).to_string().to_string() + reporter.style_active_creds(&display_path).to_string() } else { - e.path.display().to_string() + // e.path.display().to_string() + display_path } )?; } @@ -337,7 +343,10 @@ fn test_pretty_format_with_nan_entropy_panics() { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, - + // Jira options + jira_url: None, + jql: None, + max_results: 50, // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index dc0106f..de39fd0 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -14,7 +14,12 @@ struct LocationKey { text: String, } impl DetailsReporter { - fn make_sarif_result(&self, finding: &Finding, no_dedup: bool) -> Result { + fn make_sarif_result( + &self, + finding: &Finding, + no_dedup: bool, + args: &cli::commands::scan::ScanArgs, + ) -> Result { // Deduplicate exactly as in the JSON reporter // let matches = self.deduplicate_matches(finding.matches.clone(), no_dedup); // Deduplicate exactly as in the JSON reporter - but only if no_dedup is false @@ -66,11 +71,13 @@ impl DetailsReporter { for p in prov.iter() { match p { Origin::File(e) => { + let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { + url + } else { + e.path.display().to_string() + }; artifact_locations.push( - sarif::ArtifactLocationBuilder::default() - .uri(e.path.display().to_string()) - .build() - .ok()?, + sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?, ); } Origin::GitRepo(e) => { @@ -199,7 +206,13 @@ impl DetailsReporter { let p = first_match.origin.first(); match p { Origin::File(e) => { - msg.push_str(&format!("Location: {}\n", e.path.display())); + // msg.push_str(&format!("Location: {}\n", e.path.display())); + let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { + url + } else { + e.path.display().to_string() + }; + msg.push_str(&format!("Location: {}\n", uri)); } Origin::GitRepo(e) => { if let Some(cs) = &e.first_commit { @@ -242,7 +255,12 @@ impl DetailsReporter { Ok(result) } - pub fn sarif_format(&self, mut writer: W, no_dedup: bool) -> Result<()> { + pub fn sarif_format( + &self, + mut writer: W, + no_dedup: bool, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { // Gather findings first let mut findings = self.gather_findings()?; @@ -329,8 +347,11 @@ impl DetailsReporter { .build()?, ) .build()?; - let sarif_results: Vec = - findings.par_iter().filter_map(|f| self.make_sarif_result(f, no_dedup).ok()).collect(); + + let sarif_results: Vec = findings + .par_iter() + .filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok()) + .collect(); let run = sarif::RunBuilder::default().tool(tool).results(sarif_results).build()?; let sarif = sarif::SarifBuilder::default() .version(sarif::Version::V2_1_0.to_string()) diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 4fb5c58..f84b758 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -20,7 +20,7 @@ use crate::{ findings_store, git_binary::{CloneMode, Git}, git_url::GitUrl, - github, gitlab, + github, gitlab, jira, matcher::Match, origin::OriginSet, PathBuf, @@ -224,3 +224,32 @@ pub async fn enumerate_gitlab_repos( repo_urls.dedup(); Ok(repo_urls) } + + +pub async fn fetch_jira_issues( + args: &scan::ScanArgs, + global_args: &global::GlobalArgs, + datastore: &Arc>, +) -> Result> { + let Some(jira_url) = args.input_specifier_args.jira_url.clone() else { + return Ok(Vec::new()); + }; + let Some(jql) = args.input_specifier_args.jql.as_deref() else { + return Ok(Vec::new()); + }; + let max_results = args.input_specifier_args.max_results; + let output_dir = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let output_dir = output_dir.join("jira_issues"); + let _paths = jira::download_issues_to_dir( + jira_url, + jql, + max_results, + global_args.ignore_certs, + &output_dir, + ) + .await?; + Ok(vec![output_dir]) +} \ No newline at end of file diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 7c2f964..99ad466 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -18,7 +18,9 @@ use crate::{ rules_database::RulesDatabase, scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, - repos::enumerate_gitlab_repos, run_secret_validation, summary::print_scan_summary, + repos::{enumerate_gitlab_repos, fetch_jira_issues}, + run_secret_validation, + summary::print_scan_summary, }, }; @@ -61,7 +63,11 @@ pub async fn run_async_scan( repo_urls.sort(); repo_urls.dedup(); - let input_roots = clone_or_update_git_repos(args, global_args, &repo_urls, &datastore)?; + let mut input_roots = clone_or_update_git_repos(args, global_args, &repo_urls, &datastore)?; + // Fetch Jira issues if requested + let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; + input_roots.extend(jira_dirs); + if input_roots.is_empty() { bail!("No inputs to scan"); } diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 64c2c92..4c4975c 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -79,6 +79,9 @@ rules: gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 50, // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_github.rs b/tests/int_github.rs index 330299b..c8256c8 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -66,6 +66,9 @@ fn test_github_remote_scan() -> Result<()> { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 50, // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 66a7f37..6ec6e19 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -64,6 +64,10 @@ fn test_gitlab_remote_scan() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/")?, gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 50, git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, scan_nested_repos: true, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 7e422e1..933c068 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -122,6 +122,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 50, // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index bb1d064..0da7868 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -65,6 +65,9 @@ impl TestContext { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 50, // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, @@ -130,6 +133,9 @@ impl TestContext { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 50, // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/vendor/jira_query/.gitignore b/vendor/jira_query/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/vendor/jira_query/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/vendor/jira_query/CONTRIBUTING.md b/vendor/jira_query/CONTRIBUTING.md new file mode 100644 index 0000000..f3d0489 --- /dev/null +++ b/vendor/jira_query/CONTRIBUTING.md @@ -0,0 +1,4 @@ +## Certificate of Origin + +By contributing to this project you agree to the Developer Certificate of Origin (DCO). This document was created by the Linux Kernel community and is a simple statement that you, as a contributor, have the legal right to make the contribution. See the [DCO.md](DCO.md) file for details. + diff --git a/vendor/jira_query/Cargo.toml b/vendor/jira_query/Cargo.toml new file mode 100644 index 0000000..6073d49 --- /dev/null +++ b/vendor/jira_query/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "jira_query" +authors = ["Marek Suchánek "] +version = "1.6.0" +edition = "2021" +# Check the Rust version using `cargo msrv verify`. +rust-version = "1.81" +license = "Apache-2.0" +description = "Access tickets on a remote Jira instance." +readme = "README.md" +documentation = "https://docs.rs/jira_query/" +homepage = "https://github.com/msuchane/jira_query" +repository = "https://github.com/msuchane/jira_query" +keywords = ["jira", "atlassian", "rest"] +categories = ["api-bindings"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +log = "0.4" +thiserror = "2.0" +reqwest = { version = "0.12", features = ["json"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +# Version with a security patch: +chrono = { version = ">=0.4.20", features = ["serde"] } + +[dev-dependencies] +tokio = { version = ">=1.45", features = ["full"] } diff --git a/vendor/jira_query/DCO.md b/vendor/jira_query/DCO.md new file mode 100644 index 0000000..f94b001 --- /dev/null +++ b/vendor/jira_query/DCO.md @@ -0,0 +1,14 @@ +## What is the DCO? + +The DCO is a certification normally associated with every contribution to a project made by every contributor. It signifies that the contributor has the right to submit the contribution under the applicable open source license of the project. The entire certification text (maintained by the Linux Foundation as version 1.1 at https://developercertificate.org/) is the following: + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or + +(b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or + +(c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. + +(d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. + diff --git a/vendor/jira_query/LICENSE b/vendor/jira_query/LICENSE new file mode 100644 index 0000000..59e493a --- /dev/null +++ b/vendor/jira_query/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2022 Marek Suchánek + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/jira_query/README.md b/vendor/jira_query/README.md new file mode 100644 index 0000000..0789130 --- /dev/null +++ b/vendor/jira_query/README.md @@ -0,0 +1,72 @@ +# jira_query + +[![Crates.io](https://img.shields.io/crates/v/jira_query.svg)](https://crates.io/crates/jira_query) +[![Apache-2.0 license](https://img.shields.io/crates/l/jira_query)](https://crates.io/crates/jira_query) +[![Documentation](https://docs.rs/jira_query/badge.svg)](https://docs.rs/jira_query) + +[![CI tests](https://github.com/msuchane/jira_query/actions/workflows/rust-tests.yml/badge.svg)](https://github.com/msuchane/jira_query/actions/workflows/rust-tests.yml) +[![Dependency status](https://deps.rs/repo/github/msuchane/jira_query/status.svg)](https://deps.rs/repo/github/msuchane/jira_query) + +Access issues on a remote Jira instance. + +## Description + +The `jira_query` crate is a Rust library that can query a Jira instance using its REST API. It returns a strongly typed representation of the requested issues. + +This library provides no functionality to create or modify issues. The access is read-only. + +## Usage + +### Basic anonymous query + +Without logging in, search for a single ticket and check for its priority: + +```rust +use tokio; +use jira_query::JiraInstance; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let jira = JiraInstance::at("https://issues.redhat.com".to_string())?; + + let issue = jira.issue("CS-1113").await?; + + assert_eq!(issue.fields.priority.name, "Normal"); + + Ok(()) +} +``` + +### Advanced query + +Use an API key to log into Jira. Search for all CentOS Stream tickets that are of the Blocker priority. Check that there is more than one ticket: + +```rust +use tokio; +use jira_query::{Auth, JiraInstance, Pagination}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let jira = JiraInstance::at("https://bugzilla.redhat.com".to_string())? + .authenticate(Auth::ApiKey("My API Key".to_string())) + .paginate(Pagination::ChunkSize(32)); + + let query = r#"project="CentOS Stream" AND priority=Blocker"#; + + let issues = jira.search(query).await?; + + assert!(issues.len() > 1); + + Ok(()) +} +``` + +## A note on semantic versioning + +This crate reserves the right to make limited breaking changes to the Jira structs in minor versions (`X.Y`). + +The reason is that the official Jira documentation does not specify which fields in the JSON body are optional (`Option`) and which are mandatory (`T`). Rather than exposing all fields as optional, this crate tries to process fields as mandatory until proven otherwise in testing. As a consequence, minor releases must occasionally turn a mandatory field to an optional field. + +## See also + +* [`bugzilla_query`](https://crates.io/crates/bugzilla_query), a similar interface to Bugzilla diff --git a/vendor/jira_query/src/access.rs b/vendor/jira_query/src/access.rs new file mode 100644 index 0000000..359e6ca --- /dev/null +++ b/vendor/jira_query/src/access.rs @@ -0,0 +1,297 @@ +/* +Copyright 2022 Marek Suchánek + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Jira API documentation: +// * https://docs.atlassian.com/software/jira/docs/api/REST/latest/ +// * https://docs.atlassian.com/jira-software/REST/latest/ + +use crate::errors::JiraQueryError; +use crate::issue_model::{Issue, JqlResults}; + +// The prefix of every subsequent REST request. +// This string comes directly after the host in the URL. +const REST_PREFIX: &str = "rest/api/2"; + +/// Configuration and credentials to access a Jira instance. +pub struct JiraInstance { + pub host: String, + pub auth: Auth, + pub pagination: Pagination, + client: reqwest::Client, +} + +/// The authentication method used to contact Jira. +pub enum Auth { + Anonymous, + ApiKey(String), + Basic { user: String, password: String }, +} + +// We could set a default enum variant and derive, but that raises the MSRV to 1.62. +impl Default for Auth { + fn default() -> Self { + Self::Anonymous + } +} + +/// Controls the upper limit of how many tickets the response from Jira can contain: +/// +/// * `Default`: Use the default settings of this instance, which sets an arbitrary limit on the number of tickets. +/// * `MaxResults`: Set the upper limit to this value. Note that each instance has a maximum allowed value, +/// and if you set `MaxResults` higher than that, the instance uses its own maximum allowed value. +/// * `ChunkSize`: Access the tickets in a series of requests, each accessing the number of tickets equal to the chunk size. +/// This enables you to access an unlimited number of tickets, as long as the chunk size is smaller +/// than the maximum allowed results size for the instance. +pub enum Pagination { + Default, + MaxResults(u32), + ChunkSize(u32), +} + +// We could set a default enum variant and derive, but that raises the MSRV to 1.62. +impl Default for Pagination { + fn default() -> Self { + Self::Default + } +} + +/// The method of the request to Jira. Either request specific IDs, +/// or use a free-form JQL search query. +enum Method<'a> { + Key(&'a str), + Keys(&'a [&'a str]), + Search(&'a str), +} + +impl<'a> Method<'a> { + fn url_fragment(&self) -> String { + match self { + Self::Key(id) => format!("issue/{id}"), + Self::Keys(ids) => format!("search?jql=id%20in%20({})", ids.join(",")), + Self::Search(query) => format!("search?jql={query}"), + } + } +} + +impl JiraInstance { + /// Create a new `BzInstance` struct using a host URL, with default values + /// for all options. + pub fn at(host: String) -> Result { + // TODO: This function takes host as a String, even though client is happy with &str. + // The String is only used in the host struct attribute. + let client = reqwest::Client::new(); + + Ok(Self { + host, + client, + auth: Auth::default(), + pagination: Pagination::default(), + }) + } + + /// Set the authentication method of this `JiraInstance`. + #[must_use] + pub fn authenticate(mut self, auth: Auth) -> Self { + self.auth = auth; + self + } + + /// Set the http client of this `JiraInstance`. + #[must_use] + pub fn with_client(mut self, client: reqwest::Client) -> Self { + self.client = client; + self + } + + /// Set the pagination method of this `JiraInstance`. + #[must_use] + pub const fn paginate(mut self, pagination: Pagination) -> Self { + self.pagination = pagination; + self + } + + /// Based on the request method, form a complete, absolute URL + /// to download the tickets from the REST API. + #[must_use] + fn path(&self, method: &Method, start_at: u32) -> String { + let max_results = match self.pagination { + Pagination::Default => String::new(), + // For both MaxResults and ChunkSIze, set the maxResults size to the value set in the variant. + // The maxResults size is relevant for ChunkSize in that each chunk requires its own results + // to be at least this large. + Pagination::MaxResults(n) | Pagination::ChunkSize(n) => format!("&maxResults={n}"), + }; + + // The `startAt` option is only valid with JQL. With a URL by key, it breaks the REST query. + let start_at = match method { + Method::Key(_) => String::new(), + Method::Keys(_) | Method::Search(_) => format!("&startAt={start_at}"), + }; + + format!( + "{}/{}/{}{}{}", + self.host, + REST_PREFIX, + method.url_fragment(), + max_results, + start_at, + ) + } + + /// Download the specified URL using the configured authentication. + async fn authenticated_get(&self, url: &str) -> Result { + let request_builder = self.client.get(url); + let authenticated = match &self.auth { + Auth::Anonymous => request_builder, + Auth::ApiKey(key) => request_builder.header("Authorization", &format!("Bearer {key}")), + Auth::Basic { user, password } => request_builder.basic_auth(user, Some(password)), + }; + authenticated.send().await + } + + // This method uses a separate implementation from `issues` because Jira provides a way + // to request a single ticket specifically. That conveniently handles error cases + // where no tickets might match, or more than one might. + /// Access a single issue by its key. + pub async fn issue(&self, key: &str) -> Result { + let url = self.path(&Method::Key(key), 0); + + // Gets an issue by ID and deserializes the JSON to data variable + let issue = self.authenticated_get(&url).await?.json::().await?; + + log::debug!("{:#?}", issue); + + Ok(issue) + } + + /// Access several issues by their keys. + /// + /// If the list of keys is empty, returns an empty list back with no errors. + pub async fn issues(&self, keys: &[&str]) -> Result, JiraQueryError> { + // If the user specifies no keys, skip network requests and return no bugs. + // Returning an error could also be valid, but I believe that this behavior + // is less surprising and more practical. + if keys.is_empty() { + return Ok(Vec::new()); + } + + let method = Method::Keys(keys); + + // If Pagination is set to ChunkSize, split the issue keys into chunk by chunk size + // and request each chunk separately. + if let Pagination::ChunkSize(chunk_size) = self.pagination { + self.paginated_issues(&method, chunk_size).await + // If Pagination is not set to ChunkSize, use a single chunk request for all issues. + } else { + let issues = self.chunk_of_issues(&method, 0).await?; + + // If the resulting list is empty, return an error. + // TODO: The REST parsing above already results in an error if the results are empty. + // Try to catch the error there. + if issues.is_empty() { + Err(JiraQueryError::NoIssues) + } else { + Ok(issues) + } + } + } + + /// Download all issues specified in the request as a series of chunks or pages. + /// The request controls whether the download works with IDs or JQL. + /// This function only processes the resulting pages coming back from Jira + /// and stops the iteration at the last page. + /// + /// See the Jira documentation: + /// . + async fn paginated_issues( + &self, + method: &Method<'_>, + chunk_size: u32, + ) -> Result, JiraQueryError> { + let mut all_issues = Vec::new(); + let mut start_at = 0; + + loop { + let mut chunk_issues = self.chunk_of_issues(method, start_at).await?; + // Calculate the length now before the content moves to `all_issues`. + let page_size = chunk_issues.len(); + all_issues.append(&mut chunk_issues); + + // If this page contains fewer issues than the chunk size, + // it's the last page. Stop the loop. + if page_size < chunk_size as usize { + break; + } + + start_at += chunk_size; + } + + Ok(all_issues) + } + + /// Download a specific list (chunk) of issues. + /// Reused elsewhere as a building block of different pagination methods. + async fn chunk_of_issues( + &self, + method: &Method<'_>, + start_at: u32, + ) -> Result, JiraQueryError> { + let url = self.path(method, start_at); + + let results = self + .authenticated_get(&url) + .await? + .json::() + .await?; + + log::debug!("{:#?}", results); + + Ok(results.issues) + } + + /// Access issues using a free-form JQL search. + /// + /// An example of a query: `project="CentOS Stream" AND priority = High`. + pub async fn search(&self, query: &str) -> Result, JiraQueryError> { + let method = Method::Search(query); + + // If Pagination is set to ChunkSize, split the issue keys into chunk by chunk size + // and request each chunk separately. + if let Pagination::ChunkSize(chunk_size) = self.pagination { + self.paginated_issues(&method, chunk_size).await + // If Pagination is not set to ChunkSize, use a single chunk request for all issues. + } else { + let issues = self.chunk_of_issues(&method, 0).await?; + + Ok(issues) + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + let result = 2 + 2; + assert_eq!(result, 4); + } + // #[test] + // fn issues() { + // let results = crate::issues("todo", &["todo"], "todo"); + // eprintln!("{:#?}", results); + // assert_eq!(results.issues.len(), todo); + // } +} diff --git a/vendor/jira_query/src/errors.rs b/vendor/jira_query/src/errors.rs new file mode 100644 index 0000000..67136fa --- /dev/null +++ b/vendor/jira_query/src/errors.rs @@ -0,0 +1,28 @@ +/* +Copyright 2022 Marek Suchánek + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use thiserror::Error; + +/// All errors that might occur in this crate. +#[derive(Error, Debug)] +pub enum JiraQueryError { + #[error("Required issues are missing in the Jira response: {}.", .0.join(", "))] + MissingIssues(Vec), + #[error("The Jira query returned no issues.")] + NoIssues, + #[error("Error in accessing the Jira REST API.")] + Request(#[from] reqwest::Error), +} diff --git a/vendor/jira_query/src/issue_model.rs b/vendor/jira_query/src/issue_model.rs new file mode 100644 index 0000000..9d89317 --- /dev/null +++ b/vendor/jira_query/src/issue_model.rs @@ -0,0 +1,440 @@ +/* +Copyright 2022 Marek Suchánek + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +use chrono::{DateTime, NaiveDate, Utc}; +/// This module replicates the fields in a Jira issue as strongly typed structs. +/// Any extra fields that come from a custom Jira configuration are captured +/// in the `extra` hash map in the parent struct. +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +/// The response from Jira to a JQL query, +/// which includes the list of requested issues and additional metadata. +#[derive(Clone, Debug, Deserialize)] +pub struct JqlResults { + pub issues: Vec, + #[serde(flatten)] + #[allow(dead_code)] + pub extra: Value, +} + +/// A single Jira issue with all its fields. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Issue { + pub id: String, + pub key: String, + pub expand: String, + pub fields: Fields, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A container for most fields of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Fields { + #[serde(rename = "lastViewed")] + pub last_viewed: Option>, + pub labels: Vec, + #[serde(default)] + pub assignee: Option, + pub description: Option, + pub duedate: Option, + // Both `versions` and `fixVersions` are optional fields and they might + // either be missing or set to an empty list. + // I'm consolidating both cases as an empty list, because I don't believe + // that there's a meaningful semantic difference between them here. + #[serde(default)] + pub versions: Vec, + #[serde(default)] + #[serde(rename = "fixVersions")] + pub fix_versions: Vec, + #[serde(default)] + pub reporter: Option, + pub status: Status, + pub created: DateTime, + pub updated: DateTime, + pub issuetype: IssueType, + pub timeestimate: Option, + pub aggregatetimeestimate: Option, + pub timeoriginalestimate: Option, + pub timespent: Option, + pub aggregatetimespent: Option, + pub aggregatetimeoriginalestimate: Option, + pub progress: Option, + pub aggregateprogress: Option, + pub workratio: i64, + pub summary: String, + #[serde(default)] + pub creator: Option, + pub project: Project, + pub priority: Option, + #[serde(default)] + pub components: Vec, + pub watches: Watches, + pub archiveddate: Option>, + pub archivedby: Option>, + pub resolution: Option, + pub resolutiondate: Option>, + pub comment: Option, + pub issuelinks: Vec, + pub votes: Votes, + pub parent: Option, + pub subtasks: Vec, + pub environment: Option, + pub security: Option, + #[serde(flatten)] + pub extra: Value, +} + +/// The representation of a Jira user account. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct User { + pub active: bool, + #[serde(rename = "displayName")] + pub display_name: String, + #[serde(rename = "emailAddress")] + pub email_address: Option, + pub key: Option, + pub name: Option, + #[serde(rename = "timeZone")] + pub time_zone: String, + #[serde(rename = "avatarUrls")] + pub avatar_urls: AvatarUrls, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, + #[serde(rename = "accountId")] + pub account_id: Option, +} + +/// The representation of a Jira product version. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Version { + pub id: String, + pub description: Option, + pub name: String, + pub archived: bool, + pub released: bool, + /// Jira stores `releaseDate` only as `YYYY-MM-DD`, so it can't Serialize, Deserialize to full `DateTime`. + #[serde(rename = "releaseDate")] + pub release_date: Option, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The Jira issue status. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Status { + pub description: String, + #[serde(rename = "iconUrl")] + pub icon_url: String, + pub id: String, + pub name: String, + #[serde(rename = "statusCategory")] + pub status_category: StatusCategory, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The category of a Jira issue status. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct StatusCategory { + #[serde(rename = "colorName")] + pub color_name: String, + pub id: i32, + pub key: String, + pub name: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The resolution of a Jira issue when it's closed. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Resolution { + pub description: String, + pub id: String, + pub name: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The type of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct IssueType { + #[serde(rename = "avatarId")] + pub avatar_id: Option, + pub description: String, + #[serde(rename = "iconUrl")] + pub icon_url: String, + pub id: String, + pub name: String, + pub subtask: bool, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A project namespace that groups Jira issues. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Project { + pub id: String, + pub key: String, + pub name: String, + #[serde(rename = "projectTypeKey")] + pub project_type_key: String, + #[serde(rename = "projectCategory")] + pub project_category: Option, + #[serde(rename = "avatarUrls")] + pub avatar_urls: AvatarUrls, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The category of a Jira project. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct ProjectCategory { + pub description: String, + pub id: String, + pub name: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The priority of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Priority { + #[serde(rename = "iconUrl")] + pub icon_url: String, + pub id: String, + pub name: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The component of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Component { + pub description: Option, + pub id: String, + pub name: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// Users watching a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Watches { + #[serde(rename = "isWatching")] + pub is_watching: bool, + #[serde(rename = "watchCount")] + pub watch_count: i32, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The progress of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Progress { + pub progress: i32, + pub total: i32, + #[serde(flatten)] + pub extra: Value, +} + +/// A comment below a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Comment { + #[serde(default)] + pub author: Option, + pub body: String, + pub created: DateTime, + pub id: String, + #[serde(rename = "updateAuthor")] + pub update_author: Option, + pub updated: DateTime, + pub visibility: Option, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A container for all comments below a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Comments { + pub comments: Vec, + #[serde(rename = "maxResults")] + pub max_results: i32, + #[serde(rename = "startAt")] + pub start_at: i32, + pub total: i32, + #[serde(flatten)] + pub extra: Value, +} + +/// A link from one Jira issue to another. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct IssueLink { + pub id: String, + #[serde(rename = "outwardIssue")] + pub outward_issue: Option, + #[serde(rename = "inwardIssue")] + pub inward_issue: Option, + #[serde(rename = "type")] + pub link_type: IssueLinkType, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A Jira issue linked from another one. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct LinkedIssue { + pub id: String, + pub key: String, + pub fields: LinkedIssueFields, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The reduced fields of a linked Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct LinkedIssueFields { + pub issuetype: IssueType, + pub priority: Option, + pub status: Status, + pub summary: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The direction of a link to a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct IssueLinkType { + pub id: String, + pub inward: String, + pub name: String, + pub outward: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The votes for a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Votes { + #[serde(rename = "hasVoted")] + pub has_voted: bool, + pub votes: i32, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A Jira avatar in several different sizes: +/// +/// * `xsmall` = 16x16 px +/// * `small` = 24x24 px +/// * `medium` = 48x48 px +/// * `full` = maximum +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct AvatarUrls { + #[serde(rename = "16x16")] + pub xsmall: String, + #[serde(rename = "24x24")] + pub small: String, + #[serde(rename = "32x32")] + pub medium: String, + #[serde(rename = "48x48")] + pub full: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A minimal, reduced representation of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct CondensedIssue { + pub fields: CondensedFields, + pub id: String, + pub key: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} + +/// A minimal, reduced listing of the fields of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct CondensedFields { + pub issuetype: IssueType, + pub priority: Option, + pub status: Status, + pub summary: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The visibility of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Visibility { + pub r#type: String, + pub value: String, + #[serde(flatten)] + pub extra: Value, +} + +/// The security level of a Jira issue. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +// TODO: This seems to be a generic container, similar to several other structs. +// In a future major release, try to consolidate them into one generic struct with: +// description, id, name. +// Also see if Serde can convert id to a number after all, somehow. +pub struct Security { + pub description: String, + pub id: String, + pub name: String, + #[serde(rename = "self")] + pub self_link: String, + #[serde(flatten)] + pub extra: Value, +} diff --git a/vendor/jira_query/src/lib.rs b/vendor/jira_query/src/lib.rs new file mode 100644 index 0000000..50edfd4 --- /dev/null +++ b/vendor/jira_query/src/lib.rs @@ -0,0 +1,41 @@ +/* +Copyright 2022 Marek Suchánek + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Enable additional clippy lints by default. +#![warn( + clippy::pedantic, + clippy::unwrap_used, + clippy::expect_used, + clippy::clone_on_ref_ptr, + clippy::todo +)] +// Forbid unsafe code in this program. +#![forbid(unsafe_code)] + +mod access; +mod errors; +mod issue_model; + +pub use access::{Auth, JiraInstance, Pagination}; +pub use errors::JiraQueryError; +pub use issue_model::{ + AvatarUrls, Comment, Comments, Component, CondensedFields, CondensedIssue, Fields, Issue, + IssueLink, IssueLinkType, IssueType, LinkedIssue, LinkedIssueFields, Priority, Progress, + Project, ProjectCategory, Resolution, Status, StatusCategory, User, Version, Visibility, Votes, + Watches, +}; +// Re-export JSON Value because it's an integral part of the issue model. +pub use serde_json::Value; diff --git a/vendor/jira_query/tests/integration.rs b/vendor/jira_query/tests/integration.rs new file mode 100644 index 0000000..5217d79 --- /dev/null +++ b/vendor/jira_query/tests/integration.rs @@ -0,0 +1,165 @@ +use tokio; + +use jira_query::*; + +/// A common convenience function to get anonymous access +/// to the Red Hat Jira instance. +fn rh_jira() -> JiraInstance { + JiraInstance::at("https://issues.redhat.com".to_string()).unwrap() +} + +/// A common convenience function to get anonymous access +/// to the Atlassian Jira instance. +fn atlassian_jira() -> JiraInstance { + JiraInstance::at("https://jira.atlassian.com/".to_string()).unwrap() +} + +/// A common convenience function to get anonymous access +/// to the Apache Jira instance. +fn apache_jira() -> JiraInstance { + JiraInstance::at("https://issues.apache.org/jira/".to_string()).unwrap() +} + +/// A common convenience function to get anonymous access +/// to the Whamcloud Jira instance. +fn whamcloud_jira() -> JiraInstance { + JiraInstance::at("https://jira.whamcloud.com".to_string()).unwrap() +} + +/// Try accessing several public issues separately +/// to test the client and the deserialization. +#[tokio::test] +async fn access_issue() { + let instance = rh_jira(); + let _issue1 = instance.issue("CS-1113").await.unwrap(); + let _issue2 = instance.issue("CS-1111").await.unwrap(); +} + +/// Try accessing several public issues at once +/// to test the client and the deserialization. +#[tokio::test] +async fn access_issues() { + let instance = rh_jira(); + let issues = instance.issues(&["CS-1086", "CS-1084"]).await.unwrap(); + + assert_eq!(issues.len(), 2); +} + +/// Try accessing several public issues at once +/// to test the client and the deserialization. +#[tokio::test] +async fn access_missing_issue() { + let instance = rh_jira(); + let issues = instance.issues(&["CS-11111111111111111111"]).await; + + assert!(issues.is_err()); + // TODO: This case should actually match JiraQueryError::NoIssues, not JiraQueryError::Rest. Fix it. + assert!(matches!(issues.unwrap_err(), JiraQueryError::Request(_))); +} + +/// Check that the issue fields contain the expected values. +/// Work with fields that are standard in Jira, rather than custom extensions. +#[tokio::test] +async fn check_standard_fields() { + let instance = rh_jira(); + let issue = instance.issue("CS-1113").await.unwrap(); + + assert_eq!(issue.id, "14658916"); + assert_eq!(issue.key, "CS-1113"); + assert_eq!( + issue.fields.summary, + "Set gitlab.com/redhat/centos-stream/tests to public" + ); + assert_eq!(issue.fields.assignee.unwrap().display_name, "aoife moloney"); + assert_eq!(issue.fields.reporter.display_name, "Donald Zickus"); + assert_eq!(issue.fields.issuetype.name, "Task"); + assert_eq!(issue.fields.project.key, "CS"); + assert_eq!(issue.fields.project.name, "CentOS Stream Pipeline"); + assert_eq!(issue.fields.priority.unwrap().name, "Normal"); +} + +/// Check that the issue was created at the expected date, and that time deserialization +/// works as expected. +#[tokio::test] +async fn check_time() { + let instance = rh_jira(); + let issue = instance.issue("CS-1113").await.unwrap(); + + let date_created = chrono::NaiveDate::from_ymd_opt(2022, 5, 24).unwrap(); + assert_eq!(issue.fields.created.date_naive(), date_created); +} + +/// Try accessing issues that match a JQL search. +#[tokio::test] +async fn search_for_issues() { + let instance = rh_jira(); + let query = r#"project="CentOS Stream Pipeline" AND priority=Blocker"#; + let issues = instance.search(query).await.unwrap(); + + // There should be at least a couple of blocker tickets for CentOS Stream. + assert!(issues.len() > 1); +} + +/// Make sure that no IDs on the input result in no bugs, without errors. +#[tokio::test] +async fn check_no_issues() { + let instance = rh_jira(); + let issues = instance.issues(&[]).await; + + assert_eq!(issues.ok(), Some(vec![])); +} + +/// Try accessing issues that match a JQL search. +/// Check that their number isn't limited by a page size. +#[tokio::test] +async fn search_for_issues_start_at() { + let instance = rh_jira().paginate(Pagination::ChunkSize(30)); + let query = r#"project="CentOS Stream Pipeline""#; + let issues = instance.search(query).await.unwrap(); + // The query should result in at least 1,000 issues. + assert!(issues.len() > 1000); +} + +/// Try accessing several public Atlassian issues +/// to test the client and the deserialization. +#[tokio::test] +async fn access_atlassian_issues() { + let instance = atlassian_jira(); + let _issues = instance + .issues(&["ACCESS-1427", "ACCESS-1364", "CLOUD-11546", "CLOUD-11236"]) + .await + .unwrap(); +} + +/// Try accessing Atlassian issues created between two dates, +/// using a JQL search. +#[tokio::test] +async fn search_for_atlassian_issues() { + let instance = atlassian_jira(); + // Search for all closed CONFCLOUD issues created between 2022-11-12 and 2023-02-12. + let query = r#"project = CONFCLOUD AND status = Closed AND created >= 2022-11-12 AND created <= 2023-02-12"#; + let issues = instance.search(query).await.unwrap(); + + // There should be at least 39 such issues. + assert!(issues.len() >= 39); +} + +/// Try accessing several public Apache issues +/// to test the client and the deserialization. +#[tokio::test] +async fn access_apache_issues() { + let instance = apache_jira(); + let _issues = instance + .issues(&["SVN-748", "SVN-750", "SPARK-41075", "SLING-10585"]) + .await + .unwrap(); +} + +#[tokio::test] +async fn access_whamcloud_issues() { + let instance = whamcloud_jira(); + let _issues = instance + .issues(&["LU-10647", "LU-13009", "LU-8002", "LU-8874"]) + .await + .unwrap(); +} From 4c8677b7d4841562c6f52ae45db75c9e1f8eea6e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 25 Jul 2025 19:40:40 -0700 Subject: [PATCH 045/357] fixed failing test --- .gitignore | 1 + tests/cli_failure.rs | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 5e9dde0..fb40dee 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ Cargo.lock !.vscode/launch.json !.vscode/extensions.json !.vscode/*.code-snippets +.vscode/launch.json # Local History for Visual Studio Code .history/ diff --git a/tests/cli_failure.rs b/tests/cli_failure.rs index 7e18195..6d67dbe 100644 --- a/tests/cli_failure.rs +++ b/tests/cli_failure.rs @@ -56,6 +56,11 @@ rules: request: method: BREW url: "https://example.com/" + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch "#, ) .unwrap(); From 981ccf80a23b9a721a9a79b54439098cce5926d6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 25 Jul 2025 21:04:47 -0700 Subject: [PATCH 046/357] Update src/reporter/sarif_format.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/reporter/sarif_format.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index de39fd0..f771c17 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -206,7 +206,7 @@ impl DetailsReporter { let p = first_match.origin.first(); match p { Origin::File(e) => { - // msg.push_str(&format!("Location: {}\n", e.path.display())); + let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { url } else { From 86b3ba49477662f75b041de5522b6c4b8c8986c4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 25 Jul 2025 21:50:28 -0700 Subject: [PATCH 047/357] removed openssl dependency from jira-query --- src/jira.rs | 2 +- src/reporter/pretty_format.rs | 2 -- vendor/jira_query/Cargo.toml | 3 ++- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/jira.rs b/src/jira.rs index 1b27462..9b9e4fb 100644 --- a/src/jira.rs +++ b/src/jira.rs @@ -19,7 +19,7 @@ pub async fn fetch_issues( .build() .context("Failed to build HTTP client")?; - let mut jira = JiraInstance::at(base.to_string())? // no trailing slash here + let mut jira = JiraInstance::at(base.to_string())? // no trailing slash here .with_client(client) .paginate(Pagination::MaxResults(max_results as u32)); diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 02999e5..0b4a46c 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -223,10 +223,8 @@ impl<'a> Display for PrettyFinding<'a> { f, " |Path..........: {}", if rm.validation_success { - // reporter.style_active_creds(e.path.display()).to_string().to_string() reporter.style_active_creds(&display_path).to_string() } else { - // e.path.display().to_string() display_path } )?; diff --git a/vendor/jira_query/Cargo.toml b/vendor/jira_query/Cargo.toml index 6073d49..9140aff 100644 --- a/vendor/jira_query/Cargo.toml +++ b/vendor/jira_query/Cargo.toml @@ -19,7 +19,8 @@ categories = ["api-bindings"] [dependencies] log = "0.4" thiserror = "2.0" -reqwest = { version = "0.12", features = ["json"] } +reqwest = { version = "0.12", default-features = false, features = ["json","rustls-tls"] } + serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" # Version with a security patch: From 840b1c0c33358f0a1e2701527044627d82f4436b Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 08:41:21 -0700 Subject: [PATCH 048/357] fixing docker build permission error in github actions --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index e662d7e..19777fb 100644 --- a/Makefile +++ b/Makefile @@ -241,6 +241,7 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ + --user "$(shell id -u):$(shell id -g)" \ -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ @@ -270,6 +271,7 @@ linux-x64: check-docker create-dockerignore linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ + --user "$(shell id -u):$(shell id -g)" \ -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ From 3c112d919ab4d19316d6f088863b01e35268f0b0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 08:46:19 -0700 Subject: [PATCH 049/357] fixing docker build permission error in github actions --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 19777fb..33c3998 100644 --- a/Makefile +++ b/Makefile @@ -241,7 +241,7 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ - --user "$(shell id -u):$(shell id -g)" \ + --user "$(shell id -u):$(shell id -g)" \ -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ @@ -271,7 +271,7 @@ linux-x64: check-docker create-dockerignore linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ - --user "$(shell id -u):$(shell id -g)" \ + --user "$(shell id -u):$(shell id -g)" \ -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ From 652abe94403f7b30b9c48ca847a129bf5ac140c4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 17:28:49 -0700 Subject: [PATCH 050/357] fixing docker build permission error in github actions --- .github/workflows/release.yml | 3 +++ Makefile | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5eb042c..56fb028 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,6 +27,9 @@ jobs: - name: Build (Makefile linux-x64) run: make linux-x64 + - name: Fix permissions + run: sudo chown -R $(id -u):$(id -g) target + - name: Build Debian package run: | cargo deb --no-build --target x86_64-unknown-linux-musl \ diff --git a/Makefile b/Makefile index 33c3998..e662d7e 100644 --- a/Makefile +++ b/Makefile @@ -241,7 +241,6 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ - --user "$(shell id -u):$(shell id -g)" \ -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ @@ -271,7 +270,6 @@ linux-x64: check-docker create-dockerignore linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ - --user "$(shell id -u):$(shell id -g)" \ -v "$$(pwd):/src" -w /src rust:1.88-alpine sh -eu -c '\ apk add --no-cache \ musl-dev \ From 4143fc463f104ef3fdeacec3bc3f1c7b13b2a6fa Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 21:01:35 -0700 Subject: [PATCH 051/357] updated README --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ce5dad..8d398fa 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,13 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru Kingfisher originated as a fork of [Nosey Parker](https://github.com/praetorian-inc/noseyparker) by Praetorian Security, Inc, and is built atop their incredible work and the work contributed by the Nosey Parker community. -Kingfisher extends Nosey Parker with live secret validation via cloud-provider APIs, augments regex detection with tree-sitter for code parsing, adds GitLab support, and builds a Windows x64 binary. +Kingfisher extends Nosey Parker by: +1. Validating secrets in real time via cloud-provider APIs +2. Enhancing regex-based detection with source-code parsing for improved accuracy +3. Adding GitLab repository scanning support +4. Providing Jira scanning capabilities +5. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones +5. Offering native Windows environment support **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) From e6693b480e6dfe8b1083b0da967f4f7738a584e7 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 22:00:05 -0700 Subject: [PATCH 052/357] added buildkite rule --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- data/rules/buildkit.yml | 28 ++++++++++++++++++++++++++++ data/rules/datadog.yml | 13 ++++++------- 4 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 data/rules/buildkit.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index f21fc28..a23a7ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.27.0] +- Added Buildkite rule + ## [1.26.0] - Added rule for ElevenLabs - Added support for scanning Jira issues via a given JQL (Jira Query Language) diff --git a/Cargo.toml b/Cargo.toml index eff7a26..28422bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.26.0" +version = "1.27.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/buildkit.yml b/data/rules/buildkit.yml new file mode 100644 index 0000000..8add2f1 --- /dev/null +++ b/data/rules/buildkit.yml @@ -0,0 +1,28 @@ +rules: + - name: Buildkite API Key + id: kingfisher.buildkite.1 + pattern: | + (?xi) + \b + ( + bkua_[a-z0-9]{40} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - bkua_3c7019c2e4b6e76fe2e8bdde7c154e3c1a211743 + validation: + type: Http + content: + request: + method: GET + url: https://api.buildkite.com/v2/access-token + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['"uuid"', '"user"'] \ No newline at end of file diff --git a/data/rules/datadog.yml b/data/rules/datadog.yml index c851ed8..718b282 100644 --- a/data/rules/datadog.yml +++ b/data/rules/datadog.yml @@ -43,12 +43,11 @@ rules: id: kingfisher.datadog.2 pattern: | (?xi) - \b - (?: - dd[_-]?\w{0,8}[_-]?(?:key|secret) | - datadog - ) - (?:.|[\n\r]){0,64}? + \b + datadog + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? \b ( [a-z0-9]{40} @@ -57,7 +56,7 @@ rules: min_entropy: 3.3 confidence: medium examples: - - dd_secret_key-3c0c3965368a6b10f7640dbda46abfdca981c2d3 + - datadog_secret_key-3c0c3965368a6b10f7640dbda46abfdca981c2d3 - datadog_token = BzHpkcs7LujMb3Q1vLRRjbpBNxxYV0ousumYoKJS references: - https://docs.datadoghq.com/account_management/api-app-keys/ \ No newline at end of file From 1e9acb39c1b69023f20692fb73fa9e210daf25a5 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 22:00:35 -0700 Subject: [PATCH 053/357] added buildkite rule --- data/rules/{buildkit.yml => buildkite.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename data/rules/{buildkit.yml => buildkite.yml} (100%) diff --git a/data/rules/buildkit.yml b/data/rules/buildkite.yml similarity index 100% rename from data/rules/buildkit.yml rename to data/rules/buildkite.yml From 0dc236372b9caed12be9b18b85da6240576397da Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 26 Jul 2025 22:01:49 -0700 Subject: [PATCH 054/357] added buildkite rule --- data/rules/buildkite.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data/rules/buildkite.yml b/data/rules/buildkite.yml index 8add2f1..5405c59 100644 --- a/data/rules/buildkite.yml +++ b/data/rules/buildkite.yml @@ -12,6 +12,8 @@ rules: confidence: medium examples: - bkua_3c7019c2e4b6e76fe2e8bdde7c154e3c1a211743 + references: + - https://buildkite.com/docs/apis/rest-api/access-token validation: type: Http content: From 627ef98881486ae7c9d81ae417e5d88c897c66f4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 27 Jul 2025 12:20:20 -0700 Subject: [PATCH 055/357] WIP: Adding support for scanning Docker images --- CHANGELOG.md | 1 + Cargo.toml | 1 + README.md | 8 ++++ data/rules/buildkite.yml | 3 +- src/cli/commands/inputs.rs | 8 +++- src/main.rs | 4 +- src/reporter/json_format.rs | 4 +- src/reporter/pretty_format.rs | 4 +- src/scanner/docker.rs | 77 +++++++++++++++++++++++++++++++++++ src/scanner/mod.rs | 2 + src/scanner/runner.rs | 13 +++++- tests/int_dedup.rs | 4 +- tests/int_github.rs | 4 +- tests/int_gitlab.rs | 4 +- tests/int_validation_cache.rs | 4 +- tests/int_vulnerable_files.rs | 8 +++- 16 files changed, 137 insertions(+), 12 deletions(-) create mode 100644 src/scanner/docker.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index a23a7ae..5556a43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.27.0] - Added Buildkite rule +- Added support for scanning Docker images via `--docker-image` ## [1.26.0] - Added rule for ElevenLabs diff --git a/Cargo.toml b/Cargo.toml index 28422bd..14e4cfe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -186,6 +186,7 @@ globset = "0.4.16" jsonwebtoken = "9.3.1" ipnet = "2.11.0" jira_query = "1.6.0" +oci-distribution = "0.11.0" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/README.md b/README.md index 8d398fa..b14ee12 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,14 @@ kingfisher scan /path/to/repo --format sarif --output findings.sarif cat /path/to/file.py | kingfisher scan - ``` +### Scan a Docker image (without Docker installed) + +```bash +kingfisher scan --docker-image ubuntu:latest +``` + +### Sc + ### Scan using a rule _family_ with one flag _(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws._`)\* diff --git a/data/rules/buildkite.yml b/data/rules/buildkite.yml index 5405c59..3728e98 100644 --- a/data/rules/buildkite.yml +++ b/data/rules/buildkite.yml @@ -27,4 +27,5 @@ rules: - type: StatusMatch status: [200] - type: WordMatch - words: ['"uuid"', '"user"'] \ No newline at end of file + words: ['"uuid"', '"user"'] + \ No newline at end of file diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index c7a59bc..f698d87 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -26,7 +26,8 @@ pub struct InputSpecifierArgs { "git_url", "all_github_organizations", "all_gitlab_groups", - "jira_url" + "jira_url", + "docker_image" ]), value_hint = ValueHint::AnyPath )] @@ -97,6 +98,11 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = 100)] pub max_results: usize, + /// Docker/OCI images to scan (no local Docker required) + #[arg(long = "docker-image")] + pub docker_image: Vec, + + /// Select how to clone Git repositories #[arg(long, default_value_t=GitCloneMode::Bare, alias="git-clone-mode")] pub git_clone: GitCloneMode, diff --git a/src/main.rs b/src/main.rs index 3b0d444..a85fb48 100644 --- a/src/main.rs +++ b/src/main.rs @@ -281,7 +281,9 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 31123b6..4132def 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -428,7 +428,9 @@ mod tests { // Jira options jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 0b4a46c..5960538 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -344,7 +344,9 @@ fn test_pretty_format_with_nan_entropy_panics() { // Jira options jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs new file mode 100644 index 0000000..f636e91 --- /dev/null +++ b/src/scanner/docker.rs @@ -0,0 +1,77 @@ +use std::io::Write; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use oci_distribution::client::{linux_amd64_resolver, Client, ClientConfig}; +use oci_distribution::{secrets::RegistryAuth, Reference}; +use tracing::debug; + +use crate::decompress::decompress_file; + +pub struct Docker; + +impl Docker { + pub fn new() -> Self { + Docker + } + + pub async fn save_image_to_dir(&self, image: &str, out_dir: &Path) -> Result<()> { + let reference: Reference = + image.parse().with_context(|| format!("invalid image reference {image}"))?; + debug!("Pulling {image}"); + let mut client = Client::new(ClientConfig { + platform_resolver: Some(Box::new(linux_amd64_resolver)), + ..Default::default() + }); + let auth = RegistryAuth::Anonymous; + let accepted = vec![ + oci_distribution::manifest::IMAGE_LAYER_MEDIA_TYPE, + oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE, + oci_distribution::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE, + oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE, + ]; + let image = client.pull(&reference, &auth, accepted).await?; + + std::fs::create_dir_all(out_dir)?; + for (idx, layer) in image.layers.into_iter().enumerate() { + let ext = match layer.media_type.as_str() { + oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE + | oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE => "tar.gz", + oci_distribution::manifest::IMAGE_LAYER_MEDIA_TYPE + | oci_distribution::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE => "tar", + _ => "bin", + }; + let file_name = format!("layer_{idx}.{ext}"); + let tmp_path = out_dir.join(file_name); + let mut tmp = std::fs::File::create(&tmp_path)?; + tmp.write_all(&layer.data)?; + decompress_file(&tmp_path, Some(out_dir))?; + } + Ok(()) + } +} + +pub async fn save_docker_images(images: &[String], clone_root: &Path) -> Result> { + let docker = Docker::new(); + let mut dirs = Vec::new(); + for image in images { + let dir_name = image.replace(['/', ':'], "_"); + let out_dir = clone_root.join(format!("docker_{dir_name}")); + docker + .save_image_to_dir(image, &out_dir) + .await + .with_context(|| format!("saving image {image}"))?; + dirs.push(out_dir); + } + Ok(dirs) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn docker_struct_new() { + let _ = Docker::new(); + } +} \ No newline at end of file diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 0b4423f..fff9440 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -3,7 +3,9 @@ pub(crate) use enumerate::enumerate_filesystem_inputs; pub(crate) use repos::{clone_or_update_git_repos, enumerate_github_repos}; pub use runner::{load_and_record_rules, run_async_scan, run_scan}; pub(crate) use validation::run_secret_validation; +pub(crate) use docker::save_docker_images; +mod docker; mod enumerate; mod processing; mod repos; diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 99ad466..568b498 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -19,7 +19,7 @@ use crate::{ scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{enumerate_gitlab_repos, fetch_jira_issues}, - run_secret_validation, + run_secret_validation, save_docker_images, summary::print_scan_summary, }, }; @@ -68,6 +68,17 @@ pub async fn run_async_scan( let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); + // Save Docker images if specified + if !args.input_specifier_args.docker_image.is_empty() { + let clone_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let docker_dirs = + save_docker_images(&args.input_specifier_args.docker_image, &clone_root).await?; + input_roots.extend(docker_dirs); + } + if input_roots.is_empty() { bail!("No inputs to scan"); } diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 4c4975c..2763ebd 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -81,7 +81,9 @@ rules: jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_github.rs b/tests/int_github.rs index c8256c8..d4f7f25 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -68,7 +68,9 @@ fn test_github_remote_scan() -> Result<()> { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 6ec6e19..67b1bc3 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -67,7 +67,9 @@ fn test_gitlab_remote_scan() -> Result<()> { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, scan_nested_repos: true, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 933c068..3e21947 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -124,7 +124,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 0da7868..ad78192 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -67,7 +67,9 @@ impl TestContext { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, @@ -135,7 +137,9 @@ impl TestContext { jira_url: None, jql: None, - max_results: 50, + max_results: 100, + // Docker image scanning + docker_image: Vec::new(), // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, From 17586c5b5792d8062c2d9e56d9e8037b9a28a343 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 27 Jul 2025 13:17:49 -0700 Subject: [PATCH 056/357] WIP: Adding support for scanning Docker images --- src/scanner/docker.rs | 38 ++++++++++++++++++++++++++++++++------ src/scanner/runner.rs | 8 ++++++-- tests/smoke_docker.rs | 20 ++++++++++++++++++++ 3 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 tests/smoke_docker.rs diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index f636e91..dd70bbe 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -1,9 +1,11 @@ use std::io::Write; use std::path::{Path, PathBuf}; +use std::time::Duration; use anyhow::{Context, Result}; use oci_distribution::client::{linux_amd64_resolver, Client, ClientConfig}; use oci_distribution::{secrets::RegistryAuth, Reference}; +use indicatif::{ProgressBar, ProgressStyle}; use tracing::debug; use crate::decompress::decompress_file; @@ -15,14 +17,30 @@ impl Docker { Docker } - pub async fn save_image_to_dir(&self, image: &str, out_dir: &Path) -> Result<()> { + pub async fn save_image_to_dir( + &self, + image: &str, + out_dir: &Path, + use_progress: bool, + ) -> Result<()> { let reference: Reference = image.parse().with_context(|| format!("invalid image reference {image}"))?; debug!("Pulling {image}"); - let mut client = Client::new(ClientConfig { + let pb = if use_progress { + let style = ProgressStyle::with_template("{spinner} {msg}") + .expect("progress template"); + let pb = ProgressBar::new_spinner().with_style(style); + pb.enable_steady_tick(Duration::from_millis(100)); + pb.set_message(format!("pulling {image}")); + pb + } else { + ProgressBar::hidden() + }; + let client = Client::new(ClientConfig { platform_resolver: Some(Box::new(linux_amd64_resolver)), ..Default::default() }); + let mut client = client; let auth = RegistryAuth::Anonymous; let accepted = vec![ oci_distribution::manifest::IMAGE_LAYER_MEDIA_TYPE, @@ -30,10 +48,12 @@ impl Docker { oci_distribution::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE, oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE, ]; - let image = client.pull(&reference, &auth, accepted).await?; + let pulled = client.pull(&reference, &auth, accepted).await?; + pb.set_length(pulled.layers.len() as u64); + pb.set_message("extracting layers"); std::fs::create_dir_all(out_dir)?; - for (idx, layer) in image.layers.into_iter().enumerate() { + for (idx, layer) in pulled.layers.into_iter().enumerate() { let ext = match layer.media_type.as_str() { oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE | oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE => "tar.gz", @@ -46,19 +66,25 @@ impl Docker { let mut tmp = std::fs::File::create(&tmp_path)?; tmp.write_all(&layer.data)?; decompress_file(&tmp_path, Some(out_dir))?; + pb.inc(1); } + pb.finish_with_message(format!("saved {image}")); Ok(()) } } -pub async fn save_docker_images(images: &[String], clone_root: &Path) -> Result> { +pub async fn save_docker_images( + images: &[String], + clone_root: &Path, + use_progress: bool, +) -> Result> { let docker = Docker::new(); let mut dirs = Vec::new(); for image in images { let dir_name = image.replace(['/', ':'], "_"); let out_dir = clone_root.join(format!("docker_{dir_name}")); docker - .save_image_to_dir(image, &out_dir) + .save_image_to_dir(image, &out_dir, use_progress) .await .with_context(|| format!("saving image {image}"))?; dirs.push(out_dir); diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 568b498..4dec5e7 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -74,8 +74,12 @@ pub async fn run_async_scan( let ds = datastore.lock().unwrap(); ds.clone_root() }; - let docker_dirs = - save_docker_images(&args.input_specifier_args.docker_image, &clone_root).await?; + let docker_dirs = save_docker_images( + &args.input_specifier_args.docker_image, + &clone_root, + progress_enabled, + ) + .await?; input_roots.extend(docker_dirs); } diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs new file mode 100644 index 0000000..2fd61bc --- /dev/null +++ b/tests/smoke_docker.rs @@ -0,0 +1,20 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::process::Command; + +#[test] +fn smoke_scan_docker_image() -> anyhow::Result<()> { + Command::cargo_bin("kingfisher")? + .args([ + "scan", + "--docker-image", + "ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master", + "--format", + "json", + "--no-update-check", + ]) + .assert() + .code(205) + .stdout(predicate::str::contains("Active Credential")); + Ok(()) +} \ No newline at end of file From 8739c92572c5e3356f25e381c1723d08ece01b6a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 27 Jul 2025 14:59:19 -0700 Subject: [PATCH 057/357] WIP: Adding support for scanning Docker images --- Cargo.toml | 1 + src/findings_store.rs | 9 ++++ src/reporter.rs | 15 +++++++ src/reporter/json_format.rs | 4 ++ src/reporter/pretty_format.rs | 2 + src/scanner/docker.rs | 85 ++++++++++++++++++++++++++++++----- src/scanner/runner.rs | 9 +++- 7 files changed, 114 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 14e4cfe..e80faa7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,6 +187,7 @@ jsonwebtoken = "9.3.1" ipnet = "2.11.0" jira_query = "1.6.0" oci-distribution = "0.11.0" +walkdir = "2.5.0" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/src/findings_store.rs b/src/findings_store.rs index 7d3cd76..5972490 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -52,6 +52,7 @@ pub struct FindingsStore { bloom_items: usize, blob_meta: FxHashMap>, origin_meta: FxHashMap>, + docker_images: FxHashMap, } impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { @@ -69,6 +70,7 @@ impl FindingsStore { clone_dir, seen_bloom, bloom_items: 0, + docker_images: FxHashMap::default(), } } @@ -286,6 +288,13 @@ impl FindingsStore { self.clone_dir.clone() } + pub fn register_docker_image(&mut self, dir: PathBuf, image: String) { + self.docker_images.insert(dir, image); + } + + pub fn docker_images(&self) -> &FxHashMap { + &self.docker_images + } pub fn get_finding_data_iter( &self, diff --git a/src/reporter.rs b/src/reporter.rs index f6a3331..91fcbd1 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -140,6 +140,21 @@ impl DetailsReporter { } } + fn docker_display_path(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + for (dir, image) in ds.docker_images().iter() { + if path.starts_with(dir) { + let rel = path.strip_prefix(dir).ok()?; + let mut rel_str = rel.display().to_string(); + rel_str = rel_str.replace(".decomp.tar!", ".tar.gz => "); + rel_str = rel_str.replace(".tar!", ".tar => "); + rel_str = rel_str.replace('!', " => "); + return Some(format!("{} => {}", image, rel_str)); + } + } + None + } + fn gather_findings(&self) -> Result> { let metadata_list = self.get_finding_data()?; let all_matches = self.get_filtered_matches()?; diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 4132def..6916337 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -103,6 +103,8 @@ impl DetailsReporter { if let Origin::File(e) = origin { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(mapped) = self.docker_display_path(&e.path) { + Some(mapped) } else { Some(e.path.display().to_string()) } @@ -252,6 +254,8 @@ impl DetailsReporter { if let Origin::File(e) = origin { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(mapped) = self.docker_display_path(&e.path) { + Some(mapped) } else { Some(e.path.display().to_string()) } diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 5960538..ffa7cf0 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -216,6 +216,8 @@ impl<'a> Display for PrettyFinding<'a> { Origin::File(e) => { let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) { url + } else if let Some(mapped) = reporter.docker_display_path(&e.path) { + mapped } else { e.path.display().to_string() }; diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index dd70bbe..4a61ac3 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -1,12 +1,16 @@ -use std::io::Write; +use std::fs::File; +use std::io::{Read, Write}; use std::path::{Path, PathBuf}; +use std::process::Command; use std::time::Duration; -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; +use indicatif::{ProgressBar, ProgressStyle}; use oci_distribution::client::{linux_amd64_resolver, Client, ClientConfig}; use oci_distribution::{secrets::RegistryAuth, Reference}; -use indicatif::{ProgressBar, ProgressStyle}; +use sha2::{Digest, Sha256}; use tracing::debug; +use walkdir::WalkDir; use crate::decompress::decompress_file; @@ -17,19 +21,80 @@ impl Docker { Docker } + fn try_save_local_image(&self, image: &str, out_dir: &Path, use_progress: bool) -> Result<()> { + let docker = Command::new("docker") + .args(["image", "inspect", image]) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status(); + + if !matches!(docker, Ok(s) if s.success()) { + return Err(anyhow!("image not local")); + } + + let pb = if use_progress { + let style = ProgressStyle::with_template("{spinner} {msg} {pos}/{len}") + .expect("progress template"); + let pb = ProgressBar::new(0).with_style(style); + pb.enable_steady_tick(Duration::from_millis(100)); + pb + } else { + ProgressBar::hidden() + }; + pb.set_message(format!("saving local {image}")); + + std::fs::create_dir_all(out_dir)?; + let tar_path = out_dir.join("local_image.tar"); + let status = Command::new("docker") + .args(["image", "save", image, "-o", tar_path.to_str().unwrap()]) + .status() + .with_context(|| "running docker save")?; + if !status.success() { + pb.finish_with_message("docker save failed"); + return Err(anyhow!("failed to save local image")); + } + + pb.set_message("extracting layers"); + decompress_file(&tar_path, Some(out_dir))?; + + let mut layer_paths = Vec::new(); + for entry in WalkDir::new(out_dir) { + let entry = entry?; + if entry.file_name() == "layer.tar" { + layer_paths.push(entry.path().to_path_buf()); + } + } + + pb.set_length(layer_paths.len() as u64); + for p in layer_paths { + let mut data = Vec::new(); + File::open(&p)?.read_to_end(&mut data)?; + let digest = format!("{:x}", Sha256::digest(&data)); + let new_path = out_dir.join(format!("layer_{digest}.tar")); + std::fs::rename(&p, &new_path)?; + pb.inc(1); + } + + pb.finish_with_message(format!("saved {image}")); + Ok(()) + } + pub async fn save_image_to_dir( &self, image: &str, out_dir: &Path, use_progress: bool, ) -> Result<()> { + if self.try_save_local_image(image, out_dir, use_progress).is_ok() { + return Ok(()); + } let reference: Reference = image.parse().with_context(|| format!("invalid image reference {image}"))?; debug!("Pulling {image}"); let pb = if use_progress { - let style = ProgressStyle::with_template("{spinner} {msg}") + let style = ProgressStyle::with_template("{spinner} {msg} {pos}/{len}") .expect("progress template"); - let pb = ProgressBar::new_spinner().with_style(style); + let pb = ProgressBar::new(0).with_style(style); pb.enable_steady_tick(Duration::from_millis(100)); pb.set_message(format!("pulling {image}")); pb @@ -53,7 +118,7 @@ impl Docker { pb.set_message("extracting layers"); std::fs::create_dir_all(out_dir)?; - for (idx, layer) in pulled.layers.into_iter().enumerate() { + for layer in pulled.layers.into_iter() { let ext = match layer.media_type.as_str() { oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE | oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE => "tar.gz", @@ -61,11 +126,11 @@ impl Docker { | oci_distribution::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE => "tar", _ => "bin", }; - let file_name = format!("layer_{idx}.{ext}"); + let digest = layer.sha256_digest(); + let file_name = format!("layer_{digest}.{ext}"); let tmp_path = out_dir.join(file_name); let mut tmp = std::fs::File::create(&tmp_path)?; tmp.write_all(&layer.data)?; - decompress_file(&tmp_path, Some(out_dir))?; pb.inc(1); } pb.finish_with_message(format!("saved {image}")); @@ -77,7 +142,7 @@ pub async fn save_docker_images( images: &[String], clone_root: &Path, use_progress: bool, -) -> Result> { +) -> Result> { let docker = Docker::new(); let mut dirs = Vec::new(); for image in images { @@ -87,7 +152,7 @@ pub async fn save_docker_images( .save_image_to_dir(image, &out_dir, use_progress) .await .with_context(|| format!("saving image {image}"))?; - dirs.push(out_dir); + dirs.push((out_dir, image.clone())); } Ok(dirs) } diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 4dec5e7..52c8004 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -80,9 +80,16 @@ pub async fn run_async_scan( progress_enabled, ) .await?; - input_roots.extend(docker_dirs); + for (dir, img) in docker_dirs { + { + let mut ds = datastore.lock().unwrap(); + ds.register_docker_image(dir.clone(), img); + } + input_roots.push(dir); + } } + if input_roots.is_empty() { bail!("No inputs to scan"); } From 7cf37969dd7de41e6a1638bcb5b87898816c1b22 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 27 Jul 2025 17:54:26 -0700 Subject: [PATCH 058/357] Adding support for scanning Docker images --- src/decompress.rs | 33 ++++++++++++++++++++++++++++++--- src/scanner/docker.rs | 5 +++++ src/scanner/runner.rs | 1 - 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/decompress.rs b/src/decompress.rs index 88a11ae..8bc43aa 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -256,12 +256,28 @@ fn make_output_path(path: &Path, base: Option<&Path>, extension: &str) -> PathBu } } -/* ───────────────────────────────────────────────────────────── */ pub fn decompress_file_to_temp(path: &Path) -> Result<(CompressedContent, TempDir)> { let temp_dir = tempdir()?; - let content = decompress_file(path, Some(temp_dir.path()))?; + let mut content = decompress_file(path, Some(temp_dir.path()))?; - if let CompressedContent::Archive(ref files) = content { + // if let CompressedContent::Archive(ref files) = content { + let mut prefix_for_replace = None; + if let Some(stem) = path.file_stem() { + let candidate = temp_dir.path().join(stem).with_extension("decomp.tar"); + prefix_for_replace = Some(candidate); + } + + if let CompressedContent::Archive(ref mut files) = content { + if let Some(prefix) = &prefix_for_replace { + let prefix_str = prefix.display().to_string(); + for (name, _) in files.iter_mut() { + if let Some(rest) = name.strip_prefix(&prefix_str) { + if let Some((_, suffix)) = rest.split_once('!') { + *name = format!("{}!{}", path.display(), suffix); + } + } + } + } for (name, data) in files { let rel = name.split_once('!').map(|(_, sub)| sub).unwrap_or(name); let p = temp_dir.path().join(rel.replace('\\', "/")); @@ -270,6 +286,17 @@ pub fn decompress_file_to_temp(path: &Path) -> Result<(CompressedContent, TempDi } fs::write(p, data)?; } + } else if let CompressedContent::ArchiveFiles(ref mut entries) = content { + if let Some(prefix) = &prefix_for_replace { + let prefix_str = prefix.display().to_string(); + for (name, _) in entries.iter_mut() { + if let Some(rest) = name.strip_prefix(&prefix_str) { + if let Some((_, suffix)) = rest.split_once('!') { + *name = format!("{}!{}", path.display(), suffix); + } + } + } + } } Ok((content, temp_dir)) } diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 4a61ac3..f6e16ed 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -72,6 +72,9 @@ impl Docker { let digest = format!("{:x}", Sha256::digest(&data)); let new_path = out_dir.join(format!("layer_{digest}.tar")); std::fs::rename(&p, &new_path)?; + // extract layer contents so inner filenames appear in scan results + decompress_file(&new_path, Some(out_dir))?; + std::fs::remove_file(&new_path)?; pb.inc(1); } @@ -131,6 +134,8 @@ impl Docker { let tmp_path = out_dir.join(file_name); let mut tmp = std::fs::File::create(&tmp_path)?; tmp.write_all(&layer.data)?; + decompress_file(&tmp_path, Some(out_dir))?; + std::fs::remove_file(&tmp_path)?; pb.inc(1); } pb.finish_with_message(format!("saved {image}")); diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 52c8004..f1271cf 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -89,7 +89,6 @@ pub async fn run_async_scan( } } - if input_roots.is_empty() { bail!("No inputs to scan"); } From 112c8c305787bd6032dceb9b78bfb0bd775234df Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 27 Jul 2025 19:03:43 -0700 Subject: [PATCH 059/357] Adding support for scanning Docker images --- src/reporter.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/reporter.rs b/src/reporter.rs index 91fcbd1..ca69c97 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -146,10 +146,10 @@ impl DetailsReporter { if path.starts_with(dir) { let rel = path.strip_prefix(dir).ok()?; let mut rel_str = rel.display().to_string(); - rel_str = rel_str.replace(".decomp.tar!", ".tar.gz => "); - rel_str = rel_str.replace(".tar!", ".tar => "); - rel_str = rel_str.replace('!', " => "); - return Some(format!("{} => {}", image, rel_str)); + rel_str = rel_str.replace(".decomp.tar!", ".tar.gz | "); + rel_str = rel_str.replace(".tar!", ".tar | "); + rel_str = rel_str.replace('!', " | "); + return Some(format!("{} | {}", image, rel_str)); } } None From 34b533a59d82e7ad188d46cfc731b7954b143168 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 27 Jul 2025 20:25:45 -0700 Subject: [PATCH 060/357] Adding support for scanning Docker images --- README.md | 18 +++++++++++++----- src/scanner/docker.rs | 16 +++++++++++++++- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b14ee12..6138ea9 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,10 @@ Kingfisher extends Nosey Parker by: 1. Validating secrets in real time via cloud-provider APIs 2. Enhancing regex-based detection with source-code parsing for improved accuracy 3. Adding GitLab repository scanning support -4. Providing Jira scanning capabilities -5. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones -5. Offering native Windows environment support +4. Adding support for scanning Docker images via `--docker-image` +5. Providing Jira scanning capabilities +6. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones +7. Offering native Windows support **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -195,14 +196,20 @@ kingfisher scan /path/to/repo --format sarif --output findings.sarif ```bash cat /path/to/file.py | kingfisher scan - -``` -### Scan a Docker image (without Docker installed) +``` +### Scan a Docker image ```bash +# pulls from a registry if not available locally kingfisher scan --docker-image ubuntu:latest +# layers are automatically extracted so reported paths include files inside +# the container image + +# set KF_DOCKER_TOKEN for private registries ("user:pass" or just the token) ``` + ### Sc ### Scan using a rule _family_ with one flag @@ -328,6 +335,7 @@ KF_JIRA_TOKEN="token" kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | +| `KF_DOCKER_TOKEN` | Docker token | Set them temporarily per command: diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index f6e16ed..0588f56 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -1,5 +1,6 @@ use std::fs::File; use std::io::{Read, Write}; +use std::env; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Duration; @@ -14,6 +15,19 @@ use walkdir::WalkDir; use crate::decompress::decompress_file; +fn registry_auth_from_env() -> RegistryAuth { + match env::var("KF_DOCKER_TOKEN") { + Ok(token) => { + if let Some((user, pass)) = token.split_once(':') { + RegistryAuth::Basic(user.to_string(), pass.to_string()) + } else { + RegistryAuth::Basic(String::new(), token) + } + } + Err(_) => RegistryAuth::Anonymous, + } +} + pub struct Docker; impl Docker { @@ -109,7 +123,7 @@ impl Docker { ..Default::default() }); let mut client = client; - let auth = RegistryAuth::Anonymous; + let auth = registry_auth_from_env(); let accepted = vec![ oci_distribution::manifest::IMAGE_LAYER_MEDIA_TYPE, oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE, From cdbf3ffac1b792b1b8f91e467ffad38ec302afa0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 09:55:48 -0700 Subject: [PATCH 061/357] changed from oci-distribution to newer oci-client --- Cargo.toml | 2 +- README.md | 2 +- src/scanner/docker.rs | 122 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 103 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e80faa7..79595e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -186,7 +186,7 @@ globset = "0.4.16" jsonwebtoken = "9.3.1" ipnet = "2.11.0" jira_query = "1.6.0" -oci-distribution = "0.11.0" +oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] } walkdir = "2.5.0" [dependencies.tikv-jemallocator] diff --git a/README.md b/README.md index 6138ea9..059453c 100644 --- a/README.md +++ b/README.md @@ -335,7 +335,7 @@ KF_JIRA_TOKEN="token" kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | -| `KF_DOCKER_TOKEN` | Docker token | +| `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | Set them temporarily per command: diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 0588f56..c4915c2 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -1,30 +1,110 @@ +use std::env; use std::fs::File; use std::io::{Read, Write}; -use std::env; use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Command, Stdio}; use std::time::Duration; use anyhow::{anyhow, Context, Result}; +use base64::Engine; use indicatif::{ProgressBar, ProgressStyle}; -use oci_distribution::client::{linux_amd64_resolver, Client, ClientConfig}; -use oci_distribution::{secrets::RegistryAuth, Reference}; +use oci_client::client::{linux_amd64_resolver, Client, ClientConfig}; +use oci_client::secrets::RegistryAuth; +use oci_client::Reference; +use serde_json::Value; use sha2::{Digest, Sha256}; use tracing::debug; use walkdir::WalkDir; use crate::decompress::decompress_file; -fn registry_auth_from_env() -> RegistryAuth { - match env::var("KF_DOCKER_TOKEN") { - Ok(token) => { - if let Some((user, pass)) = token.split_once(':') { - RegistryAuth::Basic(user.to_string(), pass.to_string()) - } else { - RegistryAuth::Basic(String::new(), token) +fn helper_get_creds(helper: &str, registry: &str) -> Option<(String, String)> { + fn run(bin: &str, registry: &str) -> Option<(String, String)> { + let mut child = Command::new(bin) + .arg("get") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn() + .ok()?; + { + let stdin = child.stdin.as_mut()?; + let _ = stdin.write_all(format!("{registry}\n").as_bytes()); + } + let output = child.wait_with_output().ok()?; + if !output.status.success() { + return None; + } + let v: Value = serde_json::from_slice(&output.stdout).ok()?; + let user = v.get("Username")?.as_str()?.to_string(); + let secret = v.get("Secret")?.as_str()?.to_string(); + Some((user, secret)) + } + + let bin = format!("docker-credential-{helper}"); + if let Some(creds) = run(&bin, registry) { + return Some(creds); + } + if helper == "keychain" && bin != "docker-credential-osxkeychain" { + if let Some(creds) = run("docker-credential-osxkeychain", registry) { + return Some(creds); + } + } + None +} + +fn creds_from_docker_config(registry: &str) -> Option<(String, String)> { + let config_dir = env::var("DOCKER_CONFIG") + .map(PathBuf::from) + .or_else(|_| env::var("HOME").map(|h| PathBuf::from(h).join(".docker"))) + .ok()?; + let path = config_dir.join("config.json"); + let mut content = String::new(); + File::open(path).ok()?.read_to_string(&mut content).ok()?; + let json: Value = serde_json::from_str(&content).ok()?; + + if let Some(ch) = json.get("credHelpers").and_then(|v| v.get(registry)).and_then(|v| v.as_str()) + { + if let Some(creds) = helper_get_creds(ch, registry) { + return Some(creds); + } + } + if let Some(store) = json.get("credsStore").and_then(|v| v.as_str()) { + if let Some(creds) = helper_get_creds(store, registry) { + return Some(creds); + } + } + + if let Some(auths) = json.get("auths").and_then(|v| v.as_object()) { + if let Some(entry) = auths + .get(registry) + .or_else(|| auths.get(&format!("https://{registry}"))) + .or_else(|| auths.get(&format!("http://{registry}"))) + { + if let Some(auth) = entry.get("auth").and_then(|v| v.as_str()) { + let decoded = base64::engine::general_purpose::STANDARD.decode(auth).ok()?; + let cred = String::from_utf8(decoded).ok()?; + if let Some((u, p)) = cred.split_once(':') { + return Some((u.to_string(), p.to_string())); + } } } - Err(_) => RegistryAuth::Anonymous, + } + None +} + +fn registry_auth(reference: &Reference) -> RegistryAuth { + if let Ok(token) = env::var("KF_DOCKER_TOKEN") { + if let Some((user, pass)) = token.split_once(':') { + return RegistryAuth::Basic(user.to_string(), pass.to_string()); + } else { + return RegistryAuth::Bearer(token); + } + } + if let Some((user, pass)) = creds_from_docker_config(reference.registry()) { + RegistryAuth::Basic(user, pass) + } else { + RegistryAuth::Anonymous } } @@ -123,12 +203,12 @@ impl Docker { ..Default::default() }); let mut client = client; - let auth = registry_auth_from_env(); + let auth = registry_auth(&reference); let accepted = vec![ - oci_distribution::manifest::IMAGE_LAYER_MEDIA_TYPE, - oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE, - oci_distribution::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE, - oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE, + oci_client::manifest::IMAGE_LAYER_MEDIA_TYPE, + oci_client::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE, + oci_client::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE, + oci_client::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE, ]; let pulled = client.pull(&reference, &auth, accepted).await?; pb.set_length(pulled.layers.len() as u64); @@ -137,10 +217,10 @@ impl Docker { std::fs::create_dir_all(out_dir)?; for layer in pulled.layers.into_iter() { let ext = match layer.media_type.as_str() { - oci_distribution::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE - | oci_distribution::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE => "tar.gz", - oci_distribution::manifest::IMAGE_LAYER_MEDIA_TYPE - | oci_distribution::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE => "tar", + oci_client::manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE + | oci_client::manifest::IMAGE_DOCKER_LAYER_GZIP_MEDIA_TYPE => "tar.gz", + oci_client::manifest::IMAGE_LAYER_MEDIA_TYPE + | oci_client::manifest::IMAGE_DOCKER_LAYER_TAR_MEDIA_TYPE => "tar", _ => "bin", }; let digest = layer.sha256_digest(); From 6a2648d606117b86c08176ecab8c80510df430dd Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 10:07:55 -0700 Subject: [PATCH 062/357] improved authentication options for Docker support --- README.md | 43 +++++++++++++++++++++++++++++-------------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 059453c..e2b982f 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ Kingfisher extends Nosey Parker by: - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos - **Jira Scanning**: Scan issues returned from a JQL search using `--jira-url` and `--jql` +- **Docker Image Scanning**: Scan public or private docker images via `--docker-image` - **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. # Getting Started @@ -198,19 +199,6 @@ kingfisher scan /path/to/repo --format sarif --output findings.sarif cat /path/to/file.py | kingfisher scan - ``` -### Scan a Docker image - -```bash -# pulls from a registry if not available locally -kingfisher scan --docker-image ubuntu:latest -# layers are automatically extracted so reported paths include files inside -# the container image - -# set KF_DOCKER_TOKEN for private registries ("user:pass" or just the token) -``` - - -### Sc ### Scan using a rule _family_ with one flag @@ -258,8 +246,35 @@ kingfisher scan ./my-project \ --exclude tests \ -v ``` +## Scanning Docker Images ---- +Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. + +Authentication happens *in this order*: + +1. **`KF_DOCKER_TOKEN`** env var + - If it contains `user:pass`, it’s used as Basic auth + - Otherwise it’s sent as a Bearer token +2. **Docker CLI credentials** + - Checks `credHelpers` (per-registry) and `credsStore` in `~/.docker/config.json`. + - Falls back to the legacy `auths` → `auth` (base64) entries. +3. **Anonymous** (no credentials) + + +```bash +# 1) Scan public or already-pulled image +kingfisher scan --docker-image ghcr.io/owasp/wrongsecrets/wrongsecrets-master:latest-master + +# 2) For private registries, explicitly set KF_DOCKER_TOKEN: +# - Basic auth: "user:pass" +# - Bearer only: "TOKEN" +export KF_DOCKER_TOKEN="AWS:$(aws ecr get-login-password --region us-east-1)" +kingfisher scan --docker-image some-private-registry.dkr.ecr.us-east-1.amazonaws.com/base/amazonlinux2023:latest + +# 3) Or rely on your Docker CLI login/keychain: +# (e.g. aws ecr get-login-password … | docker login …) +kingfisher scan --docker-image private.registry.example.com/my-image:tag +``` ## Scanning GitHub From 787a5aaf22a2ed0f378325f9e444c2e2e0129253 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 10:25:11 -0700 Subject: [PATCH 063/357] improved precommit hook, to allow global installation --- README.md | 21 ++++++++++----- install-precommit-hook.sh | 53 ++++++++++++++++++++++++++++++++------ install-prereceive-hook.sh | 0 src/scanner/docker.rs | 4 +-- 4 files changed, 61 insertions(+), 17 deletions(-) mode change 100644 => 100755 install-precommit-hook.sh mode change 100644 => 100755 install-prereceive-hook.sh diff --git a/README.md b/README.md index e2b982f..8dab5ad 100644 --- a/README.md +++ b/README.md @@ -12,13 +12,13 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru Kingfisher originated as a fork of [Nosey Parker](https://github.com/praetorian-inc/noseyparker) by Praetorian Security, Inc, and is built atop their incredible work and the work contributed by the Nosey Parker community. Kingfisher extends Nosey Parker by: -1. Validating secrets in real time via cloud-provider APIs -2. Enhancing regex-based detection with source-code parsing for improved accuracy -3. Adding GitLab repository scanning support -4. Adding support for scanning Docker images via `--docker-image` -5. Providing Jira scanning capabilities +1. **Validating secrets** in real time via cloud-provider APIs +2. Enhancing regex-based detection with **source-code parsing** for improved accuracy +3. Adding **GitLab** repository scanning support +4. Adding support for scanning **Docker** images via `--docker-image` +5. Providing **Jira** scanning capabilities 6. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones -7. Offering native Windows support +7. Offering native **Windows** support **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -387,12 +387,19 @@ _If no token is provided Kingfisher still works for public repositories._ Run the provided helper script to add a hook that scans staged files before each commit: ```bash -./install-precommit-hook.sh +# local (current repo only ─ default) +./install-kingfisher-hook.sh ``` This creates `.git/hooks/pre-commit` that scans the files staged for commit with `kingfisher scan --no-update-check` and blocks the commit if any secrets are found. +```bash +# global (every repo on this machine) +./install-kingfisher-hook.sh --global ### Install a Pre-Receive Hook +``` + +Installs a global pre-commit hook at `$HOME/.git/hooks/pre-commit`; for every Git repository you use, it runs `kingfisher scan --no-update-check` on the staged files and cancels the commit if any secrets are detected. To check incoming pushes on a server-side repository, install the pre-receive hook: diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh old mode 100644 new mode 100755 index 6a6283f..7a81ba6 --- a/install-precommit-hook.sh +++ b/install-precommit-hook.sh @@ -1,17 +1,54 @@ #!/usr/bin/env bash +# +# Install a Git pre-commit hook that runs `kingfisher scan`. +# --global → install once for all repos using core.hooksPath +# --force → overwrite an existing pre-commit hook +# set -euo pipefail -HOOK_DIR="$(git rev-parse --git-dir)/hooks" +MODE="local" +FORCE=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + -g|--global) MODE="global" ;; + -f|--force) FORCE=1 ;; + -h|--help) + echo "Usage: $0 [--global] [--force]" && exit 0 + ;; + *) echo "Unknown flag: $1" >&2; exit 1 ;; + esac + shift +done + +if [[ "$MODE" == "local" ]]; then + # ensure we're inside a Git repo + REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) \ + || { echo "Not inside a Git repository" >&2; exit 1; } + + HOOK_DIR="$(git rev-parse --git-dir)/hooks" +else + # global: honour existing core.hooksPath or default to ~/.git-hooks + HOOK_DIR=$(git config --global --get core.hooksPath || echo "$HOME/.git-hooks") + mkdir -p "$HOOK_DIR" + + # if the user hasn’t set core.hooksPath, do it now + if ! git config --global --get core.hooksPath >/dev/null; then + git config --global core.hooksPath "$HOOK_DIR" + echo "Set git config --global core.hooksPath to $HOOK_DIR" + fi +fi + HOOK_PATH="$HOOK_DIR/pre-commit" -if [ -e "$HOOK_PATH" ]; then - echo "Error: $HOOK_PATH already exists. Move or remove the existing hook to continue." >&2 +if [[ -e "$HOOK_PATH" && $FORCE -eq 0 ]]; then + echo "Error: $HOOK_PATH already exists. Use --force to overwrite." >&2 exit 1 fi -cat > "$HOOK_PATH" <<'HOOK' +cat >"$HOOK_PATH" <<'HOOK' #!/usr/bin/env bash -# Pre-commit hook to run Kingfisher scan on staged changes +# Git pre-commit hook to run Kingfisher on staged changes set -euo pipefail if ! command -v kingfisher >/dev/null 2>&1; then @@ -22,11 +59,11 @@ fi git diff --cached --name-only -z | \ xargs -0 --no-run-if-empty kingfisher scan --no-update-check status=$? -if [ "$status" -ne 0 ]; then +if [[ $status -ne 0 ]]; then echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 - exit "$status" + exit $status fi HOOK chmod +x "$HOOK_PATH" -echo "Pre-commit hook installed to $HOOK_PATH" +echo "Pre-commit hook installed to $HOOK_PATH ($MODE mode)" diff --git a/install-prereceive-hook.sh b/install-prereceive-hook.sh old mode 100644 new mode 100755 diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index c4915c2..a18ca24 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -140,7 +140,7 @@ impl Docker { std::fs::create_dir_all(out_dir)?; let tar_path = out_dir.join("local_image.tar"); let status = Command::new("docker") - .args(["image", "save", image, "-o", tar_path.to_str().unwrap()]) + .args(["image", "save", image, "-o", &tar_path.to_string_lossy()]) .status() .with_context(|| "running docker save")?; if !status.success() { @@ -202,7 +202,7 @@ impl Docker { platform_resolver: Some(Box::new(linux_amd64_resolver)), ..Default::default() }); - let mut client = client; + let client = client; let auth = registry_auth(&reference); let accepted = vec![ oci_client::manifest::IMAGE_LAYER_MEDIA_TYPE, From dcbdb51746d6cd11512d87ecd86593c5177165be Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 10:26:32 -0700 Subject: [PATCH 064/357] improved precommit hook, to allow global installation --- src/scanner/docker.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index a18ca24..775e687 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -53,6 +53,20 @@ fn helper_get_creds(helper: &str, registry: &str) -> Option<(String, String)> { None } +/// Turn `registry.example.com/foo/bar:latest` into something like +/// `registry.example.com_foo_bar_latest_4d3c9e83` +fn image_dir_name(reference: &str) -> String { + // keep it readable + let mut name = reference.replace(['/', ':'], "_"); + + // add a truncated SHA-256 to guarantee uniqueness + let hash = Sha256::digest(reference.as_bytes()); + let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty + name.push('_'); + name.push_str(short); + name +} + fn creds_from_docker_config(registry: &str) -> Option<(String, String)> { let config_dir = env::var("DOCKER_CONFIG") .map(PathBuf::from) @@ -244,8 +258,9 @@ pub async fn save_docker_images( ) -> Result> { let docker = Docker::new(); let mut dirs = Vec::new(); + for image in images { - let dir_name = image.replace(['/', ':'], "_"); + let dir_name = image_dir_name(image); let out_dir = clone_root.join(format!("docker_{dir_name}")); docker .save_image_to_dir(image, &out_dir, use_progress) @@ -253,6 +268,7 @@ pub async fn save_docker_images( .with_context(|| format!("saving image {image}"))?; dirs.push((out_dir, image.clone())); } + Ok(dirs) } From d42098f187096f0bfa30e529172dd78f6139d69d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 11:04:24 -0700 Subject: [PATCH 065/357] fixed documentation typo --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8dab5ad..6b3728d 100644 --- a/README.md +++ b/README.md @@ -388,14 +388,14 @@ Run the provided helper script to add a hook that scans staged files before each ```bash # local (current repo only ─ default) -./install-kingfisher-hook.sh +./install-precommit-hook.sh ``` This creates `.git/hooks/pre-commit` that scans the files staged for commit with `kingfisher scan --no-update-check` and blocks the commit if any secrets are found. ```bash # global (every repo on this machine) -./install-kingfisher-hook.sh --global +./install-precommit-hook.sh --global ### Install a Pre-Receive Hook ``` From 7983cacd2f3a99cf5e053e37e8231f8e5501d882 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 11:05:15 -0700 Subject: [PATCH 066/357] updated precommit hook to only block on valid secrets --- install-precommit-hook.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh index 7a81ba6..40cb95b 100755 --- a/install-precommit-hook.sh +++ b/install-precommit-hook.sh @@ -57,7 +57,7 @@ if ! command -v kingfisher >/dev/null 2>&1; then fi git diff --cached --name-only -z | \ - xargs -0 --no-run-if-empty kingfisher scan --no-update-check + xargs -0 --no-run-if-empty kingfisher scan --only-valid --no-update-check status=$? if [[ $status -ne 0 ]]; then echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 From 95a97d06a33248732151ea1d7ee5a1c1ec00f110 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 28 Jul 2025 14:14:47 -0700 Subject: [PATCH 067/357] updated buildwin --- buildwin.bat | 123 +++++++++++++++++++++++++-------------------------- 1 file changed, 61 insertions(+), 62 deletions(-) diff --git a/buildwin.bat b/buildwin.bat index 1a1f77e..70762ec 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -1,42 +1,42 @@ @echo off -REM This script builds a Windows x64 release binary and creates a tarball with checksum. -REM It requires vcpkg to be installed at root of C: drive (https://github.com/microsoft/vcpkg). -REM This script will install Rust (using chocolatey) if it is not already installed. -REM -REM Call with -force to clone and bootstrap vcpkg if it is not found +REM --------------------------------------------------------------------------- +REM Build a Windows-x64 release of Kingfisher and package it with checksums. REM +REM • Installs Hyperscan statically via vcpkg so vectorscan-rs-sys can link +REM against hs.lib. +REM • Installs Rust (via Chocolatey) if missing. +REM • Call with -force to clone & bootstrap vcpkg if it isn’t found. +REM --------------------------------------------------------------------------- -setlocal +setlocal enabledelayedexpansion -REM Set your Cargo project name manually here if desired: +REM ── Project name ──────────────────────────────────────────────────────────── set "PROJECT_NAME=kingfisher" -REM Optional check for OS: +REM ── Require Windows ───────────────────────────────────────────────────────── if NOT "%OS%"=="Windows_NT" ( echo This script must be run on Windows. exit /b 1 ) + +REM ── Locate MSVC toolchain ─────────────────────────────────────────────────── if "%VCINSTALLDIR%"=="" ( - echo VCINSTALLDIR not set - attempting auto-detection… + echo VCINSTALLDIR not set — attempting auto-detection... for %%P in ( "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC" "C:\Program Files\Microsoft Visual Studio\2022\Professional\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" - ) do ( - if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( - set "VCINSTALLDIR=%%~P" - echo Found Visual C++ Build Tools at: %%~P - goto :vc_found - ) + ) do if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( + set "VCINSTALLDIR=%%~P" + echo Found Visual C++ Build Tools at: %%~P + goto :vc_found ) echo ERROR: Could not find a suitable Visual Studio installation. echo Install “Desktop development with C++” or set VCINSTALLDIR. exit /b 1 ) :vc_found - -REM Strip trailing backslash if present if "%VCINSTALLDIR:~-1%"=="\" set "VCINSTALLDIR=%VCINSTALLDIR:~0,-1%" echo Initialising MSVC environment… @@ -45,96 +45,95 @@ call "%VCINSTALLDIR%\Auxiliary\Build\vcvars64.bat" || ( exit /b 1 ) -REM Locate vcpkg.exe -where vcpkg.exe >nul 2>nul +REM ── Locate or bootstrap vcpkg ─────────────────────────────────────────────── +where vcpkg.exe >nul 2>&1 if %ERRORLEVEL% NEQ 0 ( if exist "%HOMEDRIVE%\vcpkg\vcpkg.exe" ( set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" echo Found vcpkg at: %VCPKG_EXE% + ) else if "%~1"=="-force" ( + echo Cloning and bootstrapping vcpkg… + if exist "%HOMEDRIVE%\vcpkg" rmdir /s /q "%HOMEDRIVE%\vcpkg" + git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" + pushd "%HOMEDRIVE%\vcpkg" + call .\bootstrap-vcpkg.bat || (echo ERROR: vcpkg bootstrap failed.&exit /b 1) + set "VCPKG_EXE=%CD%\vcpkg.exe" + popd ) else ( - if "%~1"=="-force" ( - echo Cloning and bootstrapping vcpkg... - if exist "%HOMEDRIVE%\vcpkg" ( - rmdir /s /q "%HOMEDRIVE%\vcpkg" - ) - git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" - pushd "%HOMEDRIVE%\vcpkg" - dir - call .\bootstrap-vcpkg.bat - set "VCPKG_EXE=%CD%\vcpkg.exe" - popd - echo Installed vcpkg at: %VCPKG_EXE% - ) else ( - echo ERROR: vcpkg not found. Please install it or re-run script with -force. - exit /b 1 - ) + echo ERROR: vcpkg not found. Install it or rerun with -force. + exit /b 1 ) ) else ( - for /f "tokens=*" %%i in ('where vcpkg.exe') do ( - set "VCPKG_EXE=%%i" - goto :found_vcpkg - ) - :found_vcpkg + for /f "tokens=*" %%i in ('where vcpkg.exe') do set "VCPKG_EXE=%%i" echo Found vcpkg at: %VCPKG_EXE% ) -REM Check if LOCALAPPDATA starts with a drive letter, if not set it to APPDATA +REM ── Ensure LOCALAPPDATA has a drive letter (GitHub Actions quirk) ─────────── if /I not "%LOCALAPPDATA:~1,1%"==":" ( - echo LOCALAPPDATA does not start with a drive letter. Setting it to APPDATA. + echo LOCALAPPDATA lacks drive letter; pointing it at APPDATA. set "LOCALAPPDATA=%APPDATA%" ) -echo Installing hyperscan via vcpkg... -set -"%HOMEDRIVE%\vcpkg\vcpkg.exe" install hyperscan:x64-windows +REM ── Install Hyperscan statically ──────────────────────────────────────────── +set "VCPKG_TRIPLET=x64-windows-static" +echo Installing Hyperscan (%VCPKG_TRIPLET%) via vcpkg… +call "%VCPKG_EXE%" install hyperscan:%VCPKG_TRIPLET% --clean-after-build || ( + echo ERROR: vcpkg install failed. + exit /b 1 +) set "LIBHS_NO_PKG_CONFIG=1" -REM Point vectorscan-rs-sys at the Hyperscan install from vcpkg -set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\x64-windows" +REM Path hints for vectorscan-rs-sys +set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\%VCPKG_TRIPLET%" set "LIB=%HYPERSCAN_ROOT%\lib;%LIB%" set "INCLUDE=%HYPERSCAN_ROOT%\include;%INCLUDE%" -REM Check for Rust, install if missing -where rustc.exe >nul 2>nul -if %ERRORLEVEL% NEQ 0 ( - echo Installing Rust... - choco install rust-ms -y - choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" - call refreshenv +REM Fallback: rename vectorscan.lib -> hs.lib if vcpkg changed the name +if not exist "%HYPERSCAN_ROOT%\lib\hs.lib" if exist "%HYPERSCAN_ROOT%\lib\vectorscan.lib" ( + copy "%HYPERSCAN_ROOT%\lib\vectorscan.lib" "%HYPERSCAN_ROOT%\lib\hs.lib" >nul +) +REM ── Install Rust toolchain if absent ──────────────────────────────────────── +where rustc.exe >nul 2>&1 +if %ERRORLEVEL% NEQ 0 ( + echo Installing Rust via Chocolatey… + choco install rust-ms -y || exit /b 1 + choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" || exit /b 1 + call refreshenv ) else ( echo Rust is already installed. ) -echo Building for Windows x64... +REM ── Build ─────────────────────────────────────────────────────────────────── +echo Building for Windows x64… cargo build --release --target x86_64-pc-windows-msvc || ( - echo Cargo build failed. + echo ERROR: Cargo build failed. exit /b 1 ) -echo Generating CHECKSUM.txt... +REM ── Package & checksum ────────────────────────────────────────────────────── +echo Generating CHECKSUM.txt… powershell -Command ^ "Get-FileHash .\target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe -Algorithm SHA256 | Out-File .\target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" if not exist "target\release" mkdir "target\release" -copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul -copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul -cd target\release +pushd target\release echo Creating archive: %PROJECT_NAME%-windows-x64.zip if exist "%PROJECT_NAME%-windows-x64.zip" del /f /q "%PROJECT_NAME%-windows-x64.zip" powershell -Command "Compress-Archive -Path '%PROJECT_NAME%.exe','CHECKSUM-windows-x64.txt' -DestinationPath '%PROJECT_NAME%-windows-x64.zip' -Force" if exist "%PROJECT_NAME%-windows-x64.zip" ( - REM -- append the ZIP’s SHA-256 to the existing checksum file ---- certutil -hashfile "%PROJECT_NAME%-windows-x64.zip" SHA256 >> "CHECKSUM-windows-x64.txt" echo Created: %PROJECT_NAME%-windows-x64.zip ) else ( echo ERROR: Archive not created. ) - echo Archives in target\release: dir /b *.zip 2>nul || echo None found. +popd endlocal exit /b 0 From d3fbffcb84a8fd622d1f38b7348f9f0e098e4f25 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 10:12:40 -0700 Subject: [PATCH 068/357] Fixed broken windows build script --- README.md | 26 +++++--- buildwin.bat | 123 +++++++++++++++++++------------------- install-precommit-hook.sh | 19 ++++-- src/decompress.rs | 46 ++++++++++---- src/scanner/docker.rs | 2 +- 5 files changed, 131 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index 6b3728d..ad65c78 100644 --- a/README.md +++ b/README.md @@ -43,19 +43,33 @@ brew install kingfisher Pre-built binaries are also available on the [Releases](https://github.com/mongodb/kingfisher/releases) section of this page. +You can also install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform: + +```bash +# Linux, macOS +curl --silent --location \ + https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.sh | \ + sh && \ + ubi --project mongodb/kingfisher --in "$HOME/bin" +``` + +```powershell +# Windows +powershell -exec bypass -c "Invoke-WebRequest -URI 'https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.ps1' -UseBasicParsing | Invoke-Expression" && ubi --project mongodb/kingfisher --in . +``` + +This installs `ubi` and then places the `kingfisher` executable in `~/bin` on Unix-like +systems (or the current directory on Windows). + Or you may compile for your platform via `make`: ```bash # NOTE: Requires Docker make linux -``` -```bash -# macOS +# macOS --- must build from a macOS host make darwin -``` -```bash # Windows x64 --- requires building from a Windows host with Visual Studio installed ./buildwin.bat -force ``` @@ -67,10 +81,8 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` - ### Run Kingfisher in Docker - Run the dockerized Kingfisher container: ```bash # GitHub Container Registry diff --git a/buildwin.bat b/buildwin.bat index 70762ec..baa2167 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -1,42 +1,42 @@ @echo off -REM --------------------------------------------------------------------------- -REM Build a Windows-x64 release of Kingfisher and package it with checksums. +REM This script builds a Windows x64 release binary and creates a tarball with checksum. +REM It requires vcpkg to be installed at root of C: drive (https://github.com/microsoft/vcpkg). +REM This script will install Rust (using chocolatey) if it is not already installed. +REM +REM Call with -force to clone and bootstrap vcpkg if it is not found REM -REM • Installs Hyperscan statically via vcpkg so vectorscan-rs-sys can link -REM against hs.lib. -REM • Installs Rust (via Chocolatey) if missing. -REM • Call with -force to clone & bootstrap vcpkg if it isn’t found. -REM --------------------------------------------------------------------------- -setlocal enabledelayedexpansion +setlocal -REM ── Project name ──────────────────────────────────────────────────────────── +REM Set your Cargo project name manually here if desired: set "PROJECT_NAME=kingfisher" -REM ── Require Windows ───────────────────────────────────────────────────────── +REM Optional check for OS: if NOT "%OS%"=="Windows_NT" ( echo This script must be run on Windows. exit /b 1 ) - -REM ── Locate MSVC toolchain ─────────────────────────────────────────────────── if "%VCINSTALLDIR%"=="" ( - echo VCINSTALLDIR not set — attempting auto-detection... + echo VCINSTALLDIR not set - attempting auto-detection… for %%P in ( "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC" "C:\Program Files\Microsoft Visual Studio\2022\Professional\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" - ) do if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( - set "VCINSTALLDIR=%%~P" - echo Found Visual C++ Build Tools at: %%~P - goto :vc_found + ) do ( + if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( + set "VCINSTALLDIR=%%~P" + echo Found Visual C++ Build Tools at: %%~P + goto :vc_found + ) ) echo ERROR: Could not find a suitable Visual Studio installation. echo Install “Desktop development with C++” or set VCINSTALLDIR. exit /b 1 ) :vc_found + +REM Strip trailing backslash if present if "%VCINSTALLDIR:~-1%"=="\" set "VCINSTALLDIR=%VCINSTALLDIR:~0,-1%" echo Initialising MSVC environment… @@ -45,95 +45,96 @@ call "%VCINSTALLDIR%\Auxiliary\Build\vcvars64.bat" || ( exit /b 1 ) -REM ── Locate or bootstrap vcpkg ─────────────────────────────────────────────── -where vcpkg.exe >nul 2>&1 +REM Locate vcpkg.exe +where vcpkg.exe >nul 2>nul if %ERRORLEVEL% NEQ 0 ( if exist "%HOMEDRIVE%\vcpkg\vcpkg.exe" ( set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" echo Found vcpkg at: %VCPKG_EXE% - ) else if "%~1"=="-force" ( - echo Cloning and bootstrapping vcpkg… - if exist "%HOMEDRIVE%\vcpkg" rmdir /s /q "%HOMEDRIVE%\vcpkg" - git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" - pushd "%HOMEDRIVE%\vcpkg" - call .\bootstrap-vcpkg.bat || (echo ERROR: vcpkg bootstrap failed.&exit /b 1) - set "VCPKG_EXE=%CD%\vcpkg.exe" - popd ) else ( - echo ERROR: vcpkg not found. Install it or rerun with -force. - exit /b 1 + if "%~1"=="-force" ( + echo Cloning and bootstrapping vcpkg... + if exist "%HOMEDRIVE%\vcpkg" ( + rmdir /s /q "%HOMEDRIVE%\vcpkg" + ) + git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" + pushd "%HOMEDRIVE%\vcpkg" + dir + call .\bootstrap-vcpkg.bat + set "VCPKG_EXE=%CD%\vcpkg.exe" + popd + echo Installed vcpkg at: %VCPKG_EXE% + ) else ( + echo ERROR: vcpkg not found. Please install it or re-run script with -force. + exit /b 1 + ) ) ) else ( - for /f "tokens=*" %%i in ('where vcpkg.exe') do set "VCPKG_EXE=%%i" + for /f "tokens=*" %%i in ('where vcpkg.exe') do ( + set "VCPKG_EXE=%%i" + goto :found_vcpkg + ) + :found_vcpkg echo Found vcpkg at: %VCPKG_EXE% ) -REM ── Ensure LOCALAPPDATA has a drive letter (GitHub Actions quirk) ─────────── +REM Check if LOCALAPPDATA starts with a drive letter, if not set it to APPDATA if /I not "%LOCALAPPDATA:~1,1%"==":" ( - echo LOCALAPPDATA lacks drive letter; pointing it at APPDATA. + echo LOCALAPPDATA does not start with a drive letter. Setting it to APPDATA. set "LOCALAPPDATA=%APPDATA%" ) -REM ── Install Hyperscan statically ──────────────────────────────────────────── -set "VCPKG_TRIPLET=x64-windows-static" -echo Installing Hyperscan (%VCPKG_TRIPLET%) via vcpkg… -call "%VCPKG_EXE%" install hyperscan:%VCPKG_TRIPLET% --clean-after-build || ( - echo ERROR: vcpkg install failed. - exit /b 1 -) +echo Installing hyperscan via vcpkg... +set +"%HOMEDRIVE%\vcpkg\vcpkg.exe" install hyperscan:x64-windows set "LIBHS_NO_PKG_CONFIG=1" -REM Path hints for vectorscan-rs-sys -set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\%VCPKG_TRIPLET%" +REM Point vectorscan-rs-sys at the Hyperscan install from vcpkg +set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\x64-windows" set "LIB=%HYPERSCAN_ROOT%\lib;%LIB%" set "INCLUDE=%HYPERSCAN_ROOT%\include;%INCLUDE%" -REM Fallback: rename vectorscan.lib -> hs.lib if vcpkg changed the name -if not exist "%HYPERSCAN_ROOT%\lib\hs.lib" if exist "%HYPERSCAN_ROOT%\lib\vectorscan.lib" ( - copy "%HYPERSCAN_ROOT%\lib\vectorscan.lib" "%HYPERSCAN_ROOT%\lib\hs.lib" >nul -) - -REM ── Install Rust toolchain if absent ──────────────────────────────────────── -where rustc.exe >nul 2>&1 +REM Check for Rust, install if missing +where rustc.exe >nul 2>nul if %ERRORLEVEL% NEQ 0 ( - echo Installing Rust via Chocolatey… - choco install rust-ms -y || exit /b 1 - choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" || exit /b 1 + echo Installing Rust... + choco install rust-ms -y + choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" call refreshenv + ) else ( echo Rust is already installed. ) -REM ── Build ─────────────────────────────────────────────────────────────────── -echo Building for Windows x64… +echo Building for Windows x64... cargo build --release --target x86_64-pc-windows-msvc || ( - echo ERROR: Cargo build failed. + echo Cargo build failed. exit /b 1 ) -REM ── Package & checksum ────────────────────────────────────────────────────── -echo Generating CHECKSUM.txt… +echo Generating CHECKSUM.txt... powershell -Command ^ "Get-FileHash .\target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe -Algorithm SHA256 | Out-File .\target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" if not exist "target\release" mkdir "target\release" -copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul -copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul -pushd target\release +cd target\release echo Creating archive: %PROJECT_NAME%-windows-x64.zip if exist "%PROJECT_NAME%-windows-x64.zip" del /f /q "%PROJECT_NAME%-windows-x64.zip" powershell -Command "Compress-Archive -Path '%PROJECT_NAME%.exe','CHECKSUM-windows-x64.txt' -DestinationPath '%PROJECT_NAME%-windows-x64.zip' -Force" if exist "%PROJECT_NAME%-windows-x64.zip" ( + REM -- append the ZIP’s SHA-256 to the existing checksum file ---- certutil -hashfile "%PROJECT_NAME%-windows-x64.zip" SHA256 >> "CHECKSUM-windows-x64.txt" echo Created: %PROJECT_NAME%-windows-x64.zip ) else ( echo ERROR: Archive not created. ) + echo Archives in target\release: dir /b *.zip 2>nul || echo None found. -popd endlocal -exit /b 0 +exit /b 0 \ No newline at end of file diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh index 40cb95b..923fb94 100755 --- a/install-precommit-hook.sh +++ b/install-precommit-hook.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # -# Install a Git pre-commit hook that runs `kingfisher scan`. -# --global → install once for all repos using core.hooksPath -# --force → overwrite an existing pre-commit hook +# Install a Git pre‑commit hook that runs `kingfisher scan`. +# +# --global → install once for all repos via core.hooksPath +# --force → overwrite an existing pre‑commit hook # set -euo pipefail @@ -48,7 +49,7 @@ fi cat >"$HOOK_PATH" <<'HOOK' #!/usr/bin/env bash -# Git pre-commit hook to run Kingfisher on staged changes +# Git pre‑commit hook to run Kingfisher on staged changes set -euo pipefail if ! command -v kingfisher >/dev/null 2>&1; then @@ -59,6 +60,14 @@ fi git diff --cached --name-only -z | \ xargs -0 --no-run-if-empty kingfisher scan --only-valid --no-update-check status=$? + +# ──────────────────────────────────────────────────────────────── +# Treat Kingfisher exit‑code 200 as success (map → 0) +# ──────────────────────────────────────────────────────────────── +if [[ $status -eq 200 ]]; then + status=0 +fi + if [[ $status -ne 0 ]]; then echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 exit $status @@ -66,4 +75,4 @@ fi HOOK chmod +x "$HOOK_PATH" -echo "Pre-commit hook installed to $HOOK_PATH ($MODE mode)" +echo "Pre‑commit hook installed to $HOOK_PATH ($MODE mode)" diff --git a/src/decompress.rs b/src/decompress.rs index 8bc43aa..f91c8fa 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -91,16 +91,28 @@ fn handle_tar_archive_streaming( let out_path = base_dir.join(&path_in_tar); if let Some(parent) = out_path.parent() { - fs::create_dir_all(parent)?; + if let Err(e) = fs::create_dir_all(parent) { + tracing::debug!("failed to create directory {}: {}", parent.display(), e); + continue; + } } if !is_safe_extract_path(&out_path) { tracing::warn!("unsafe tar path: {}", out_path.display()); continue; } - let mut out_file = fs::File::create(&out_path)?; - std::io::copy(&mut entry, &mut out_file)?; - - entries_on_disk.push((logical_path, out_path)); + match fs::File::create(&out_path) { + Ok(mut out_file) => { + if let Err(e) = std::io::copy(&mut entry, &mut out_file) { + tracing::debug!("failed to extract {}: {}", out_path.display(), e); + continue; + } + entries_on_disk.push((logical_path, out_path)); + } + Err(e) => { + tracing::debug!("failed to create file {}: {}", out_path.display(), e); + continue; + } + } } } Ok(CompressedContent::ArchiveFiles(entries_on_disk)) @@ -122,16 +134,28 @@ fn handle_zip_archive_streaming( let out_path = base_dir.join(&name_in_zip); if let Some(parent) = out_path.parent() { - fs::create_dir_all(parent)?; + if let Err(e) = fs::create_dir_all(parent) { + println!("****************failed to create directory {}: {}", parent.display(), e); + continue; + } } if !is_safe_extract_path(&out_path) { - tracing::warn!("unsafe zip path: {}", out_path.display()); + println!("****************unsafe zip path: {}", out_path.display()); continue; } - let mut out_file = fs::File::create(&out_path)?; - std::io::copy(&mut zipped_file, &mut out_file)?; - - entries_on_disk.push((logical_path, out_path)); + match fs::File::create(&out_path) { + Ok(mut out_file) => { + if let Err(e) = std::io::copy(&mut zipped_file, &mut out_file) { + println!("****************failed to extract {}: {}", out_path.display(), e); + continue; + } + entries_on_disk.push((logical_path, out_path)); + } + Err(e) => { + println!("****************failed to create file {}: {}", out_path.display(), e); + continue; + } + } } } Ok(CompressedContent::ArchiveFiles(entries_on_disk)) diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 775e687..5a6daa4 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -238,7 +238,7 @@ impl Docker { _ => "bin", }; let digest = layer.sha256_digest(); - let file_name = format!("layer_{digest}.{ext}"); + let file_name = format!("layer_{}.{}", digest.replace(':', "_"), ext); let tmp_path = out_dir.join(file_name); let mut tmp = std::fs::File::create(&tmp_path)?; tmp.write_all(&layer.data)?; From fc1ac8500a6a751795d4133fe43457d540735540 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 10:25:58 -0700 Subject: [PATCH 069/357] Removed println statements used for debugging --- src/decompress.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/decompress.rs b/src/decompress.rs index f91c8fa..49dc7ef 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -135,24 +135,24 @@ fn handle_zip_archive_streaming( let out_path = base_dir.join(&name_in_zip); if let Some(parent) = out_path.parent() { if let Err(e) = fs::create_dir_all(parent) { - println!("****************failed to create directory {}: {}", parent.display(), e); + tracing::debug!("failed to create directory {}: {}", parent.display(), e); continue; } } if !is_safe_extract_path(&out_path) { - println!("****************unsafe zip path: {}", out_path.display()); + tracing::warn!("unsafe zip path: {}", out_path.display()); continue; } match fs::File::create(&out_path) { Ok(mut out_file) => { if let Err(e) = std::io::copy(&mut zipped_file, &mut out_file) { - println!("****************failed to extract {}: {}", out_path.display(), e); + tracing::debug!("failed to extract {}: {}", out_path.display(), e); continue; } entries_on_disk.push((logical_path, out_path)); } Err(e) => { - println!("****************failed to create file {}: {}", out_path.display(), e); + tracing::debug!("failed to create file {}: {}", out_path.display(), e); continue; } } From b5babebae9c8d3c28a871af52528c4ac22f9fce9 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 10:49:04 -0700 Subject: [PATCH 070/357] Fixed broken windows build script --- buildwin.bat | 140 +++++++++++++++++++++++++++------------------------ 1 file changed, 74 insertions(+), 66 deletions(-) diff --git a/buildwin.bat b/buildwin.bat index baa2167..012d1f0 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -1,42 +1,42 @@ @echo off -REM This script builds a Windows x64 release binary and creates a tarball with checksum. -REM It requires vcpkg to be installed at root of C: drive (https://github.com/microsoft/vcpkg). -REM This script will install Rust (using chocolatey) if it is not already installed. -REM -REM Call with -force to clone and bootstrap vcpkg if it is not found +REM --------------------------------------------------------------------------- +REM Build a Windows-x64 release of Kingfisher and package it with checksums. REM +REM • Installs Hyperscan statically via vcpkg so vectorscan-rs-sys can link +REM against hs.lib. +REM • Installs Rust (via Chocolatey) if missing. +REM • Call with -force to clone & bootstrap vcpkg if it isn’t found. +REM --------------------------------------------------------------------------- -setlocal +setlocal EnableDelayedExpansion -REM Set your Cargo project name manually here if desired: +REM ── Project name ──────────────────────────────────────────────────────────── set "PROJECT_NAME=kingfisher" -REM Optional check for OS: +REM ── Require Windows ───────────────────────────────────────────────────────── if NOT "%OS%"=="Windows_NT" ( echo This script must be run on Windows. exit /b 1 ) + +REM ── Locate MSVC toolchain ─────────────────────────────────────────────────── if "%VCINSTALLDIR%"=="" ( - echo VCINSTALLDIR not set - attempting auto-detection… + echo VCINSTALLDIR not set — attempting auto-detection... for %%P in ( "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC" "C:\Program Files\Microsoft Visual Studio\2022\Professional\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" - ) do ( - if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( - set "VCINSTALLDIR=%%~P" - echo Found Visual C++ Build Tools at: %%~P - goto :vc_found - ) + ) do if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( + set "VCINSTALLDIR=%%~P" + echo Found Visual C++ Build Tools at: %%~P + goto :vc_found ) echo ERROR: Could not find a suitable Visual Studio installation. echo Install “Desktop development with C++” or set VCINSTALLDIR. exit /b 1 ) :vc_found - -REM Strip trailing backslash if present if "%VCINSTALLDIR:~-1%"=="\" set "VCINSTALLDIR=%VCINSTALLDIR:~0,-1%" echo Initialising MSVC environment… @@ -45,96 +45,104 @@ call "%VCINSTALLDIR%\Auxiliary\Build\vcvars64.bat" || ( exit /b 1 ) -REM Locate vcpkg.exe -where vcpkg.exe >nul 2>nul +REM ── Locate or bootstrap vcpkg ─────────────────────────────────────────────── +where vcpkg.exe >nul 2>&1 if %ERRORLEVEL% NEQ 0 ( if exist "%HOMEDRIVE%\vcpkg\vcpkg.exe" ( set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" - echo Found vcpkg at: %VCPKG_EXE% + ) else if "%~1"=="-force" ( + echo Cloning and bootstrapping vcpkg… + if exist "%HOMEDRIVE%\vcpkg" rmdir /s /q "%HOMEDRIVE%\vcpkg" + git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" + pushd "%HOMEDRIVE%\vcpkg" + call .\bootstrap-vcpkg.bat || (echo ERROR: vcpkg bootstrap failed.&exit /b 1) + set "VCPKG_EXE=%CD%\vcpkg.exe" + popd ) else ( - if "%~1"=="-force" ( - echo Cloning and bootstrapping vcpkg... - if exist "%HOMEDRIVE%\vcpkg" ( - rmdir /s /q "%HOMEDRIVE%\vcpkg" - ) - git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" - pushd "%HOMEDRIVE%\vcpkg" - dir - call .\bootstrap-vcpkg.bat - set "VCPKG_EXE=%CD%\vcpkg.exe" - popd - echo Installed vcpkg at: %VCPKG_EXE% - ) else ( - echo ERROR: vcpkg not found. Please install it or re-run script with -force. - exit /b 1 - ) + echo ERROR: vcpkg not found. Install it or rerun with -force. + exit /b 1 ) ) else ( - for /f "tokens=*" %%i in ('where vcpkg.exe') do ( - set "VCPKG_EXE=%%i" - goto :found_vcpkg - ) - :found_vcpkg - echo Found vcpkg at: %VCPKG_EXE% + for /f "tokens=*" %%i in ('where vcpkg.exe') do set "VCPKG_EXE=%%i" ) -REM Check if LOCALAPPDATA starts with a drive letter, if not set it to APPDATA +echo Found vcpkg at: !VCPKG_EXE! + +REM Derive vcpkg root (%~dp will end with a backslash) +for %%i in ("!VCPKG_EXE!") do set "VCPKG_ROOT=%%~dpi" +if "!VCPKG_ROOT:~-1!"=="\" set "VCPKG_ROOT=!VCPKG_ROOT:~0,-1!" + +REM ── Ensure LOCALAPPDATA has a drive letter (GitHub Actions quirk) ─────────── if /I not "%LOCALAPPDATA:~1,1%"==":" ( - echo LOCALAPPDATA does not start with a drive letter. Setting it to APPDATA. + echo LOCALAPPDATA lacks drive letter; pointing it at APPDATA. set "LOCALAPPDATA=%APPDATA%" ) -echo Installing hyperscan via vcpkg... -set -"%HOMEDRIVE%\vcpkg\vcpkg.exe" install hyperscan:x64-windows +REM ── Install Hyperscan statically ──────────────────────────────────────────── +set "VCPKG_TRIPLET=x64-windows-static" +echo Installing Hyperscan (!VCPKG_TRIPLET!) via vcpkg… + +pushd "!VCPKG_ROOT!" +"!VCPKG_EXE!" install hyperscan:!VCPKG_TRIPLET! --clean-after-build || ( + echo ERROR: vcpkg install failed. + popd + exit /b 1 +) +popd + set "LIBHS_NO_PKG_CONFIG=1" -REM Point vectorscan-rs-sys at the Hyperscan install from vcpkg -set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\x64-windows" -set "LIB=%HYPERSCAN_ROOT%\lib;%LIB%" -set "INCLUDE=%HYPERSCAN_ROOT%\include;%INCLUDE%" +REM Path hints for vectorscan-rs-sys +set "HYPERSCAN_ROOT=!VCPKG_ROOT!\installed\!VCPKG_TRIPLET!" +set "LIB=!HYPERSCAN_ROOT!\lib;%LIB%" +set "INCLUDE=!HYPERSCAN_ROOT!\include;%INCLUDE%" -REM Check for Rust, install if missing -where rustc.exe >nul 2>nul +REM Fallback: rename vectorscan.lib -> hs.lib if vcpkg changed the name +if not exist "!HYPERSCAN_ROOT!\lib\hs.lib" if exist "!HYPERSCAN_ROOT!\lib\vectorscan.lib" ( + copy "!HYPERSCAN_ROOT!\lib\vectorscan.lib" "!HYPERSCAN_ROOT!\lib\hs.lib" >nul +) + +REM ── Install Rust toolchain if absent ──────────────────────────────────────── +where rustc.exe >nul 2>&1 if %ERRORLEVEL% NEQ 0 ( - echo Installing Rust... - choco install rust-ms -y - choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" + echo Installing Rust via Chocolatey… + choco install rust-ms -y || exit /b 1 + choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" || exit /b 1 call refreshenv - ) else ( echo Rust is already installed. ) -echo Building for Windows x64... +REM ── Build ─────────────────────────────────────────────────────────────────── +echo Building for Windows x64… cargo build --release --target x86_64-pc-windows-msvc || ( - echo Cargo build failed. + echo ERROR: Cargo build failed. exit /b 1 ) -echo Generating CHECKSUM.txt... +REM ── Package & checksum ────────────────────────────────────────────────────── +echo Generating CHECKSUM.txt… powershell -Command ^ "Get-FileHash .\target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe -Algorithm SHA256 | Out-File .\target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" if not exist "target\release" mkdir "target\release" -copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul -copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul -cd target\release +pushd target\release echo Creating archive: %PROJECT_NAME%-windows-x64.zip if exist "%PROJECT_NAME%-windows-x64.zip" del /f /q "%PROJECT_NAME%-windows-x64.zip" powershell -Command "Compress-Archive -Path '%PROJECT_NAME%.exe','CHECKSUM-windows-x64.txt' -DestinationPath '%PROJECT_NAME%-windows-x64.zip' -Force" if exist "%PROJECT_NAME%-windows-x64.zip" ( - REM -- append the ZIP’s SHA-256 to the existing checksum file ---- certutil -hashfile "%PROJECT_NAME%-windows-x64.zip" SHA256 >> "CHECKSUM-windows-x64.txt" echo Created: %PROJECT_NAME%-windows-x64.zip ) else ( echo ERROR: Archive not created. ) - echo Archives in target\release: dir /b *.zip 2>nul || echo None found. +popd endlocal -exit /b 0 \ No newline at end of file +exit /b 0 From 91edd36cd120d761aa25379f4ef8c56922f0d330 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 11:03:49 -0700 Subject: [PATCH 071/357] Fixed broken windows build script --- buildwin.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildwin.bat b/buildwin.bat index 012d1f0..0b02e04 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -68,7 +68,7 @@ if %ERRORLEVEL% NEQ 0 ( echo Found vcpkg at: !VCPKG_EXE! -REM Derive vcpkg root (%~dp will end with a backslash) +REM Derive vcpkg root – note: %%~dpi ends with a backslash for %%i in ("!VCPKG_EXE!") do set "VCPKG_ROOT=%%~dpi" if "!VCPKG_ROOT:~-1!"=="\" set "VCPKG_ROOT=!VCPKG_ROOT:~0,-1!" From f9a84a3105fa5646533cc57c8306f6a33cfe22b3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 11:05:27 -0700 Subject: [PATCH 072/357] Fixed broken windows build script --- buildwin.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildwin.bat b/buildwin.bat index 0b02e04..57e564d 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -68,7 +68,7 @@ if %ERRORLEVEL% NEQ 0 ( echo Found vcpkg at: !VCPKG_EXE! -REM Derive vcpkg root – note: %%~dpi ends with a backslash +REM Derive vcpkg root for %%i in ("!VCPKG_EXE!") do set "VCPKG_ROOT=%%~dpi" if "!VCPKG_ROOT:~-1!"=="\" set "VCPKG_ROOT=!VCPKG_ROOT:~0,-1!" From 1a5759da932c3c1e2026c69b06db05c6e585f465 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 13:30:10 -0700 Subject: [PATCH 073/357] Fixed broken windows build script --- buildwin.bat | 105 ++++++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 59 deletions(-) diff --git a/buildwin.bat b/buildwin.bat index 57e564d..9d74445 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -1,27 +1,23 @@ @echo off REM --------------------------------------------------------------------------- -REM Build a Windows-x64 release of Kingfisher and package it with checksums. -REM -REM • Installs Hyperscan statically via vcpkg so vectorscan-rs-sys can link -REM against hs.lib. -REM • Installs Rust (via Chocolatey) if missing. -REM • Call with -force to clone & bootstrap vcpkg if it isn’t found. +REM Build a Windows‑x64 release binary and package it with checksums. +REM ‑ Clones vcpkg (if requested) and pins it to commit 4887ad6d14. REM --------------------------------------------------------------------------- -setlocal EnableDelayedExpansion +setlocal -REM ── Project name ──────────────────────────────────────────────────────────── set "PROJECT_NAME=kingfisher" +set "VCPKG_COMMIT=4887ad6d14" REM ← known‑good vcpkg snapshot -REM ── Require Windows ───────────────────────────────────────────────────────── +REM ── Require Windows ──────────────────────────────────────────────────────── if NOT "%OS%"=="Windows_NT" ( echo This script must be run on Windows. exit /b 1 ) -REM ── Locate MSVC toolchain ─────────────────────────────────────────────────── +REM ── Locate MSVC toolchain ────────────────────────────────────────────────── if "%VCINSTALLDIR%"=="" ( - echo VCINSTALLDIR not set — attempting auto-detection... + echo VCINSTALLDIR not set - attempting auto-detection… for %%P in ( "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC" "C:\Program Files\Microsoft Visual Studio\2022\Professional\VC" @@ -29,11 +25,10 @@ if "%VCINSTALLDIR%"=="" ( "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" ) do if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( set "VCINSTALLDIR=%%~P" - echo Found Visual C++ Build Tools at: %%~P + echo Found Visual C++ Build Tools at: %%~P goto :vc_found ) echo ERROR: Could not find a suitable Visual Studio installation. - echo Install “Desktop development with C++” or set VCINSTALLDIR. exit /b 1 ) :vc_found @@ -45,89 +40,81 @@ call "%VCINSTALLDIR%\Auxiliary\Build\vcvars64.bat" || ( exit /b 1 ) -REM ── Locate or bootstrap vcpkg ─────────────────────────────────────────────── +REM ── Locate or bootstrap vcpkg, then pin to commit ────────────────────────── where vcpkg.exe >nul 2>&1 if %ERRORLEVEL% NEQ 0 ( + REM ----- vcpkg.exe not on PATH ------------------------------------------ if exist "%HOMEDRIVE%\vcpkg\vcpkg.exe" ( - set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" + REM vcpkg folder exists → pin it + echo Found existing vcpkg tree, pinning to %VCPKG_COMMIT%… + git -C "%HOMEDRIVE%\vcpkg" fetch --depth=1 origin %VCPKG_COMMIT% + git -C "%HOMEDRIVE%\vcpkg" checkout %VCPKG_COMMIT% ^ + || (echo ERROR: checkout failed.&exit /b 1) ) else if "%~1"=="-force" ( - echo Cloning and bootstrapping vcpkg… + REM Fresh clone + echo Cloning and bootstrapping vcpkg at commit %VCPKG_COMMIT%… if exist "%HOMEDRIVE%\vcpkg" rmdir /s /q "%HOMEDRIVE%\vcpkg" - git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" + git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" ^ + || (echo ERROR: git clone failed.&exit /b 1) pushd "%HOMEDRIVE%\vcpkg" - call .\bootstrap-vcpkg.bat || (echo ERROR: vcpkg bootstrap failed.&exit /b 1) - set "VCPKG_EXE=%CD%\vcpkg.exe" + git checkout %VCPKG_COMMIT% || (echo ERROR: checkout failed.&exit /b 1) + call .\bootstrap-vcpkg.bat || (echo ERROR: bootstrap failed.&exit /b 1) popd ) else ( echo ERROR: vcpkg not found. Install it or rerun with -force. exit /b 1 ) ) else ( + REM ----- vcpkg.exe already on PATH --------------------------------------- for /f "tokens=*" %%i in ('where vcpkg.exe') do set "VCPKG_EXE=%%i" + echo Found vcpkg at: %VCPKG_EXE% + REM Ensure the tree is on the expected commit + git -C "%HOMEDRIVE%\vcpkg" fetch --depth=1 origin %VCPKG_COMMIT% + git -C "%HOMEDRIVE%\vcpkg" checkout %VCPKG_COMMIT% ^ + || (echo ERROR: checkout failed.&exit /b 1) ) +if not defined VCPKG_EXE set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" -echo Found vcpkg at: !VCPKG_EXE! - -REM Derive vcpkg root -for %%i in ("!VCPKG_EXE!") do set "VCPKG_ROOT=%%~dpi" -if "!VCPKG_ROOT:~-1!"=="\" set "VCPKG_ROOT=!VCPKG_ROOT:~0,-1!" - -REM ── Ensure LOCALAPPDATA has a drive letter (GitHub Actions quirk) ─────────── +REM ── LOCALAPPDATA fix for CI ------------------------------------------------ if /I not "%LOCALAPPDATA:~1,1%"==":" ( echo LOCALAPPDATA lacks drive letter; pointing it at APPDATA. set "LOCALAPPDATA=%APPDATA%" ) -REM ── Install Hyperscan statically ──────────────────────────────────────────── -set "VCPKG_TRIPLET=x64-windows-static" -echo Installing Hyperscan (!VCPKG_TRIPLET!) via vcpkg… - -pushd "!VCPKG_ROOT!" -"!VCPKG_EXE!" install hyperscan:!VCPKG_TRIPLET! --clean-after-build || ( - echo ERROR: vcpkg install failed. - popd - exit /b 1 -) -popd - +REM ── Install Hyperscan (unchanged) ----------------------------------------- +echo Installing hyperscan via vcpkg... +"%VCPKG_EXE%" install hyperscan:x64-windows set "LIBHS_NO_PKG_CONFIG=1" +set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\x64-windows" +set "LIB=%HYPERSCAN_ROOT%\lib;%LIB%" +set "INCLUDE=%HYPERSCAN_ROOT%\include;%INCLUDE%" -REM Path hints for vectorscan-rs-sys -set "HYPERSCAN_ROOT=!VCPKG_ROOT!\installed\!VCPKG_TRIPLET!" -set "LIB=!HYPERSCAN_ROOT!\lib;%LIB%" -set "INCLUDE=!HYPERSCAN_ROOT!\include;%INCLUDE%" - -REM Fallback: rename vectorscan.lib -> hs.lib if vcpkg changed the name -if not exist "!HYPERSCAN_ROOT!\lib\hs.lib" if exist "!HYPERSCAN_ROOT!\lib\vectorscan.lib" ( - copy "!HYPERSCAN_ROOT!\lib\vectorscan.lib" "!HYPERSCAN_ROOT!\lib\hs.lib" >nul -) - -REM ── Install Rust toolchain if absent ──────────────────────────────────────── +REM ── Check for Rust toolchain (unchanged) ----------------------------------- where rustc.exe >nul 2>&1 if %ERRORLEVEL% NEQ 0 ( - echo Installing Rust via Chocolatey… - choco install rust-ms -y || exit /b 1 - choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" || exit /b 1 + echo Installing Rust... + choco install rust-ms -y + choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" call refreshenv ) else ( echo Rust is already installed. ) -REM ── Build ─────────────────────────────────────────────────────────────────── -echo Building for Windows x64… +REM ── Build (unchanged) ------------------------------------------------------ +echo Building for Windows x64... cargo build --release --target x86_64-pc-windows-msvc || ( - echo ERROR: Cargo build failed. + echo Cargo build failed. exit /b 1 ) -REM ── Package & checksum ────────────────────────────────────────────────────── -echo Generating CHECKSUM.txt… +REM ── Package & checksum (unchanged) ---------------------------------------- +echo Generating CHECKSUM.txt... powershell -Command ^ "Get-FileHash .\target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe -Algorithm SHA256 | Out-File .\target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" if not exist "target\release" mkdir "target\release" -copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul -copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul +copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul pushd target\release echo Creating archive: %PROJECT_NAME%-windows-x64.zip From 2d214555c578c759d713fac30cd04aa1e58b63fd Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 13:38:59 -0700 Subject: [PATCH 074/357] Fixed broken windows build script --- buildwin.bat | 113 ++++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/buildwin.bat b/buildwin.bat index 9d74445..55ca71f 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -1,21 +1,21 @@ @echo off -REM --------------------------------------------------------------------------- -REM Build a Windows‑x64 release binary and package it with checksums. -REM ‑ Clones vcpkg (if requested) and pins it to commit 4887ad6d14. -REM --------------------------------------------------------------------------- +REM This script builds a Windows x64 release binary and creates a tarball with checksum. +REM It requires vcpkg to be installed at root of C: drive (https://github.com/microsoft/vcpkg). +REM This script will install Rust (using chocolatey) if it is not already installed. +REM +REM Call with -force to clone and bootstrap vcpkg if it is not found +REM setlocal +REM Set your Cargo project name manually here if desired: set "PROJECT_NAME=kingfisher" -set "VCPKG_COMMIT=4887ad6d14" REM ← known‑good vcpkg snapshot -REM ── Require Windows ──────────────────────────────────────────────────────── +REM Optional check for OS: if NOT "%OS%"=="Windows_NT" ( echo This script must be run on Windows. exit /b 1 ) - -REM ── Locate MSVC toolchain ────────────────────────────────────────────────── if "%VCINSTALLDIR%"=="" ( echo VCINSTALLDIR not set - attempting auto-detection… for %%P in ( @@ -23,15 +23,20 @@ if "%VCINSTALLDIR%"=="" ( "C:\Program Files\Microsoft Visual Studio\2022\Professional\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" - ) do if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( - set "VCINSTALLDIR=%%~P" - echo Found Visual C++ Build Tools at: %%~P - goto :vc_found + ) do ( + if exist "%%~P\Auxiliary\Build\vcvars64.bat" ( + set "VCINSTALLDIR=%%~P" + echo Found Visual C++ Build Tools at: %%~P + goto :vc_found + ) ) echo ERROR: Could not find a suitable Visual Studio installation. + echo Install “Desktop development with C++” or set VCINSTALLDIR. exit /b 1 ) :vc_found + +REM Strip trailing backslash if present if "%VCINSTALLDIR:~-1%"=="\" set "VCINSTALLDIR=%VCINSTALLDIR:~0,-1%" echo Initialising MSVC environment… @@ -40,74 +45,79 @@ call "%VCINSTALLDIR%\Auxiliary\Build\vcvars64.bat" || ( exit /b 1 ) -REM ── Locate or bootstrap vcpkg, then pin to commit ────────────────────────── -where vcpkg.exe >nul 2>&1 +REM Locate vcpkg.exe +where vcpkg.exe >nul 2>nul if %ERRORLEVEL% NEQ 0 ( - REM ----- vcpkg.exe not on PATH ------------------------------------------ if exist "%HOMEDRIVE%\vcpkg\vcpkg.exe" ( - REM vcpkg folder exists → pin it - echo Found existing vcpkg tree, pinning to %VCPKG_COMMIT%… - git -C "%HOMEDRIVE%\vcpkg" fetch --depth=1 origin %VCPKG_COMMIT% - git -C "%HOMEDRIVE%\vcpkg" checkout %VCPKG_COMMIT% ^ - || (echo ERROR: checkout failed.&exit /b 1) - ) else if "%~1"=="-force" ( - REM Fresh clone - echo Cloning and bootstrapping vcpkg at commit %VCPKG_COMMIT%… - if exist "%HOMEDRIVE%\vcpkg" rmdir /s /q "%HOMEDRIVE%\vcpkg" - git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" ^ - || (echo ERROR: git clone failed.&exit /b 1) - pushd "%HOMEDRIVE%\vcpkg" - git checkout %VCPKG_COMMIT% || (echo ERROR: checkout failed.&exit /b 1) - call .\bootstrap-vcpkg.bat || (echo ERROR: bootstrap failed.&exit /b 1) - popd + set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" + echo Found vcpkg at: %VCPKG_EXE% ) else ( - echo ERROR: vcpkg not found. Install it or rerun with -force. - exit /b 1 + if "%~1"=="-force" ( + echo Cloning and bootstrapping vcpkg... + if exist "%HOMEDRIVE%\vcpkg" ( + rmdir /s /q "%HOMEDRIVE%\vcpkg" + ) + git clone https://github.com/microsoft/vcpkg.git "%HOMEDRIVE%\vcpkg" + pushd "%HOMEDRIVE%\vcpkg" + dir + call .\bootstrap-vcpkg.bat + set "VCPKG_EXE=%CD%\vcpkg.exe" + popd + echo Installed vcpkg at: %VCPKG_EXE% + ) else ( + echo ERROR: vcpkg not found. Please install it or re-run script with -force. + exit /b 1 + ) ) ) else ( - REM ----- vcpkg.exe already on PATH --------------------------------------- - for /f "tokens=*" %%i in ('where vcpkg.exe') do set "VCPKG_EXE=%%i" + for /f "tokens=*" %%i in ('where vcpkg.exe') do ( + set "VCPKG_EXE=%%i" + goto :found_vcpkg + ) + :found_vcpkg echo Found vcpkg at: %VCPKG_EXE% - REM Ensure the tree is on the expected commit - git -C "%HOMEDRIVE%\vcpkg" fetch --depth=1 origin %VCPKG_COMMIT% - git -C "%HOMEDRIVE%\vcpkg" checkout %VCPKG_COMMIT% ^ - || (echo ERROR: checkout failed.&exit /b 1) ) -if not defined VCPKG_EXE set "VCPKG_EXE=%HOMEDRIVE%\vcpkg\vcpkg.exe" -REM ── LOCALAPPDATA fix for CI ------------------------------------------------ +REM Check if LOCALAPPDATA starts with a drive letter, if not set it to APPDATA if /I not "%LOCALAPPDATA:~1,1%"==":" ( - echo LOCALAPPDATA lacks drive letter; pointing it at APPDATA. + echo LOCALAPPDATA does not start with a drive letter. Setting it to APPDATA. set "LOCALAPPDATA=%APPDATA%" ) -REM ── Install Hyperscan (unchanged) ----------------------------------------- -echo Installing hyperscan via vcpkg... -"%VCPKG_EXE%" install hyperscan:x64-windows +REM ── Install Hyperscan ------------------------------------------------------ +echo Installing Hyperscan via vcpkg... +pushd "%HOMEDRIVE%\vcpkg" REM ► work inside the vcpkg root +"%VCPKG_EXE%" install hyperscan:x64-windows || ( + echo ERROR: vcpkg install failed. + popd + exit /b 1 +) +popd set "LIBHS_NO_PKG_CONFIG=1" + +REM Point vectorscan‑rs‑sys at the Hyperscan install set "HYPERSCAN_ROOT=%HOMEDRIVE%\vcpkg\installed\x64-windows" set "LIB=%HYPERSCAN_ROOT%\lib;%LIB%" set "INCLUDE=%HYPERSCAN_ROOT%\include;%INCLUDE%" -REM ── Check for Rust toolchain (unchanged) ----------------------------------- -where rustc.exe >nul 2>&1 +REM Check for Rust, install if missing +where rustc.exe >nul 2>nul if %ERRORLEVEL% NEQ 0 ( echo Installing Rust... choco install rust-ms -y choco install cmake -y --installargs "ADD_CMAKE_TO_PATH=System" call refreshenv + ) else ( echo Rust is already installed. ) -REM ── Build (unchanged) ------------------------------------------------------ echo Building for Windows x64... cargo build --release --target x86_64-pc-windows-msvc || ( echo Cargo build failed. exit /b 1 ) -REM ── Package & checksum (unchanged) ---------------------------------------- echo Generating CHECKSUM.txt... powershell -Command ^ "Get-FileHash .\target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe -Algorithm SHA256 | Out-File .\target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" @@ -116,20 +126,21 @@ if not exist "target\release" mkdir "target\release" copy /Y "target\x86_64-pc-windows-msvc\release\%PROJECT_NAME%.exe" "target\release\" >nul copy /Y "target\x86_64-pc-windows-msvc\release\CHECKSUM.txt" "target\release\CHECKSUM-windows-x64.txt" >nul -pushd target\release +cd target\release echo Creating archive: %PROJECT_NAME%-windows-x64.zip if exist "%PROJECT_NAME%-windows-x64.zip" del /f /q "%PROJECT_NAME%-windows-x64.zip" powershell -Command "Compress-Archive -Path '%PROJECT_NAME%.exe','CHECKSUM-windows-x64.txt' -DestinationPath '%PROJECT_NAME%-windows-x64.zip' -Force" if exist "%PROJECT_NAME%-windows-x64.zip" ( + REM -- append the ZIP’s SHA-256 to the existing checksum file ---- certutil -hashfile "%PROJECT_NAME%-windows-x64.zip" SHA256 >> "CHECKSUM-windows-x64.txt" echo Created: %PROJECT_NAME%-windows-x64.zip ) else ( echo ERROR: Archive not created. ) + echo Archives in target\release: dir /b *.zip 2>nul || echo None found. -popd endlocal -exit /b 0 +exit /b 0 \ No newline at end of file From bcf2b60e0bf5816159d719b399591d0f44a84e8a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 19:00:49 -0700 Subject: [PATCH 075/357] Added support for Slack --- .github/workflows/release.yml | 1 - CHANGELOG.md | 3 + Cargo.toml | 2 +- README.md | 19 ++++-- src/cli/commands/inputs.rs | 14 +++- src/findings_store.rs | 10 +++ src/lib.rs | 1 + src/main.rs | 4 ++ src/reporter.rs | 16 +++-- src/reporter/json_format.rs | 8 +++ src/reporter/pretty_format.rs | 6 ++ src/reporter/sarif_format.rs | 4 ++ src/scanner/repos.rs | 34 +++++++++- src/scanner/runner.rs | 6 +- src/slack.rs | 118 ++++++++++++++++++++++++++++++++++ tests/int_dedup.rs | 2 + tests/int_github.rs | 2 + tests/int_gitlab.rs | 2 + tests/int_validation_cache.rs | 2 + tests/int_vulnerable_files.rs | 4 ++ 20 files changed, 240 insertions(+), 18 deletions(-) create mode 100644 src/slack.rs diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 56fb028..956bc1a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -189,7 +189,6 @@ jobs: - name: Cache vcpkg artifacts uses: actions/cache@v3 with: - # Adjust these paths if your vcpkg root is somewhere else path: | C:\vcpkg\buildtrees C:\vcpkg\packages diff --git a/CHANGELOG.md b/CHANGELOG.md index 5556a43..5c1e055 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.28.0] +- Added support for scanning Slack + ## [1.27.0] - Added Buildkite rule - Added support for scanning Docker images via `--docker-image` diff --git a/Cargo.toml b/Cargo.toml index 79595e8..fa9cf8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.27.0" +version = "1.28.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index ad65c78..d459b5b 100644 --- a/README.md +++ b/README.md @@ -15,10 +15,11 @@ Kingfisher extends Nosey Parker by: 1. **Validating secrets** in real time via cloud-provider APIs 2. Enhancing regex-based detection with **source-code parsing** for improved accuracy 3. Adding **GitLab** repository scanning support -4. Adding support for scanning **Docker** images via `--docker-image` +4. Adding support for scanning **Docker** images 5. Providing **Jira** scanning capabilities -6. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones -7. Offering native **Windows** support +6. Adding **Slack** scanning capabilities +7. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones +8. Offering native **Windows** support **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -29,6 +30,7 @@ Kingfisher extends Nosey Parker by: - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos - **Jira Scanning**: Scan issues returned from a JQL search using `--jira-url` and `--jql` +- **Slack Scanning**: Scan messages returned from a Slack search query using `--slack-query` - **Docker Image Scanning**: Scan public or private docker images via `--docker-image` - **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. @@ -353,7 +355,16 @@ KF_JIRA_TOKEN="token" kingfisher scan \ --max-results 1000 ``` --- +## Scanning Slack +### Scan Slack messages matching a search query + +```bash +KF_SLACK_TOKEN="token" kingfisher scan \ + --slack-query "from:username has:link" \ + --max-results 1000 +``` +*The Slack token must be a user token with the `search:read` scope. Bot tokens (those beginning with `xoxb-`) cannot call the Slack search API.* ## Environment Variables for Tokens @@ -362,8 +373,8 @@ KF_JIRA_TOKEN="token" kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | +| `KF_SLACK_TOKEN` | Slack API token | | `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | - Set them temporarily per command: ```bash diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index f698d87..8a1c23d 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -27,7 +27,8 @@ pub struct InputSpecifierArgs { "all_github_organizations", "all_gitlab_groups", "jira_url", - "docker_image" + "docker_image", + "slack_query" ]), value_hint = ValueHint::AnyPath )] @@ -94,7 +95,15 @@ pub struct InputSpecifierArgs { #[arg(long, requires = "jira_url")] pub jql: Option, - /// Maximum number of Jira results to fetch + /// Slack search query + #[arg(long)] + pub slack_query: Option, + + /// Use the specified URL for Slack API access + #[arg(long, default_value = "https://slack.com/api/", value_hint = ValueHint::Url)] + pub slack_api_url: Url, + + /// Maximum number of Slack or Jira results to fetch #[arg(long, default_value_t = 100)] pub max_results: usize, @@ -102,7 +111,6 @@ pub struct InputSpecifierArgs { #[arg(long = "docker-image")] pub docker_image: Vec, - /// Select how to clone Git repositories #[arg(long, default_value_t=GitCloneMode::Bare, alias="git-clone-mode")] pub git_clone: GitCloneMode, diff --git a/src/findings_store.rs b/src/findings_store.rs index 5972490..93e9f1c 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -53,6 +53,7 @@ pub struct FindingsStore { blob_meta: FxHashMap>, origin_meta: FxHashMap>, docker_images: FxHashMap, + slack_links: FxHashMap, } impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { @@ -71,6 +72,7 @@ impl FindingsStore { seen_bloom, bloom_items: 0, docker_images: FxHashMap::default(), + slack_links: FxHashMap::default(), } } @@ -296,6 +298,14 @@ impl FindingsStore { &self.docker_images } + pub fn register_slack_message(&mut self, path: PathBuf, permalink: String) { + self.slack_links.insert(path, permalink); + } + + pub fn slack_links(&self) -> &FxHashMap { + &self.slack_links + } + pub fn get_finding_data_iter( &self, ) -> impl Iterator + '_ { diff --git a/src/lib.rs b/src/lib.rs index af74e7b..85bc57c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,6 +33,7 @@ pub mod safe_list; pub mod scanner; pub mod scanner_pool; pub mod serde_utils; +pub mod slack; pub mod snippet; pub mod update; pub mod util; diff --git a/src/main.rs b/src/main.rs index a85fb48..56f1e15 100644 --- a/src/main.rs +++ b/src/main.rs @@ -282,6 +282,10 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, max_results: 100, + // Slack query + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // Docker image scanning docker_image: Vec::new(), diff --git a/src/reporter.rs b/src/reporter.rs index ca69c97..e6709dc 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -122,13 +122,8 @@ impl DetailsReporter { args: &cli::commands::scan::ScanArgs, ) -> Option { // drop any trailing slash so we don’t end up with “//browse/…” - let jira_url = args - .input_specifier_args - .jira_url - .as_ref()? - .as_str() - .trim_end_matches('/'); - + let jira_url = args.input_specifier_args.jira_url.as_ref()?.as_str().trim_end_matches('/'); + let ds = self.datastore.lock().ok()?; let root = ds.clone_root(); let jira_dir = root.join("jira_issues"); @@ -140,6 +135,13 @@ impl DetailsReporter { } } + /// If the given file path corresponds to a Slack message downloaded to disk, + /// return the permalink for that message. + fn slack_message_url(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + ds.slack_links().get(path).cloned() + } + fn docker_display_path(&self, path: &std::path::Path) -> Option { let ds = self.datastore.lock().ok()?; for (dir, image) in ds.docker_images().iter() { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 6916337..6bad0cb 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -103,6 +103,8 @@ impl DetailsReporter { if let Origin::File(e) = origin { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(url) = self.slack_message_url(&e.path) { + Some(url) } else if let Some(mapped) = self.docker_display_path(&e.path) { Some(mapped) } else { @@ -254,6 +256,8 @@ impl DetailsReporter { if let Origin::File(e) = origin { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(url) = self.slack_message_url(&e.path) { + Some(url) } else if let Some(mapped) = self.docker_display_path(&e.path) { Some(mapped) } else { @@ -434,6 +438,10 @@ mod tests { jql: None, max_results: 100, // Docker image scanning + // Slack options + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + docker_image: Vec::new(), // clone / history options git_clone: GitCloneMode::Bare, diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index ffa7cf0..62dd354 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -216,6 +216,8 @@ impl<'a> Display for PrettyFinding<'a> { Origin::File(e) => { let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) { url + } else if let Some(url) = reporter.slack_message_url(&e.path) { + url } else if let Some(mapped) = reporter.docker_display_path(&e.path) { mapped } else { @@ -347,6 +349,10 @@ fn test_pretty_format_with_nan_entropy_panics() { jira_url: None, jql: None, max_results: 100, + + // Slack options + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index f771c17..3db2d99 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -73,6 +73,8 @@ impl DetailsReporter { Origin::File(e) => { let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { url + } else if let Some(url) = self.slack_message_url(&e.path) { + url } else { e.path.display().to_string() }; @@ -209,6 +211,8 @@ impl DetailsReporter { let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { url + } else if let Some(url) = self.slack_message_url(&e.path) { + url } else { e.path.display().to_string() }; diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index f84b758..2a8044a 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -23,7 +23,7 @@ use crate::{ github, gitlab, jira, matcher::Match, origin::OriginSet, - PathBuf, + slack, PathBuf, }; pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option, Match)>); @@ -252,4 +252,36 @@ pub async fn fetch_jira_issues( ) .await?; Ok(vec![output_dir]) +} + +pub async fn fetch_slack_messages( + args: &scan::ScanArgs, + global_args: &global::GlobalArgs, + datastore: &Arc>, +) -> Result> { + let Some(query) = args.input_specifier_args.slack_query.as_deref() else { + return Ok(Vec::new()); + }; + let api_url = args.input_specifier_args.slack_api_url.clone(); + let max_results = args.input_specifier_args.max_results; + let output_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let output_dir = output_root.join("slack_messages"); + let paths = slack::download_messages_to_dir( + api_url, + query, + max_results, + global_args.ignore_certs, + &output_dir, + ) + .await?; + { + let mut ds = datastore.lock().unwrap(); + for (path, link) in &paths { + ds.register_slack_message(path.clone(), link.clone()); + } + } + Ok(vec![output_dir]) } \ No newline at end of file diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index f1271cf..b727718 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -18,7 +18,7 @@ use crate::{ rules_database::RulesDatabase, scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, - repos::{enumerate_gitlab_repos, fetch_jira_issues}, + repos::{enumerate_gitlab_repos, fetch_jira_issues, fetch_slack_messages}, run_secret_validation, save_docker_images, summary::print_scan_summary, }, @@ -68,6 +68,10 @@ pub async fn run_async_scan( let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); + // Fetch Slack messages if requested + let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; + input_roots.extend(slack_dirs); + // Save Docker images if specified if !args.input_specifier_args.docker_image.is_empty() { let clone_root = { diff --git a/src/slack.rs b/src/slack.rs new file mode 100644 index 0000000..69a02a0 --- /dev/null +++ b/src/slack.rs @@ -0,0 +1,118 @@ +use anyhow::{Context, Result}; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use url::Url; + +#[derive(Debug, Serialize, Deserialize)] +pub struct SlackMessage { + pub permalink: String, + pub text: Option, + pub ts: String, + pub channel: SlackChannel, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct SlackChannel { + pub id: String, + pub name: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct SlackPagination { + page: Option, + page_count: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct SlackMessages { + matches: Vec, + pagination: Option, +} + +#[derive(Debug, Serialize, Deserialize)] +struct SlackSearchResponse { + ok: bool, + error: Option, + messages: Option, +} + +pub async fn search_messages( + api_url: Url, + query: &str, + max_results: usize, + ignore_certs: bool, +) -> Result> { + let token = std::env::var("KF_SLACK_TOKEN") + .context("KF_SLACK_TOKEN environment variable must be set")?; + + let client = Client::builder() + .danger_accept_invalid_certs(ignore_certs) + .build() + .context("Failed to build HTTP client")?; + + let mut page = 1u32; + let mut messages = Vec::new(); + + loop { + let url = api_url.join("search.messages").context("Failed to build Slack API URL")?; + + let resp = client + .get(url) + .bearer_auth(&token) + .query(&[("query", query), ("count", "100"), ("page", &page.to_string())]) + .send() + .await + .context("Failed to send Slack request")?; + + let body: SlackSearchResponse = + resp.json().await.context("Failed to parse Slack response")?; + + if !body.ok { + let err = body.error.unwrap_or_else(|| "unknown".to_string()); + if err == "not_allowed_token_type" { + return Err(anyhow::anyhow!( + "Slack API error: not_allowed_token_type - use a user token with the `search:read` scope" + )); + } + return Err(anyhow::anyhow!("Slack API error: {}", err)); + } + + let Some(msgs) = body.messages else { + break; + }; + for m in msgs.matches { + messages.push(m); + if messages.len() >= max_results { + return Ok(messages); + } + } + let next_page = msgs.pagination.as_ref().and_then(|p| p.page).unwrap_or(page); + let page_count = msgs.pagination.as_ref().and_then(|p| p.page_count).unwrap_or(next_page); + if next_page >= page_count { + break; + } + page += 1; + } + + Ok(messages) +} + +pub async fn download_messages_to_dir( + api_url: Url, + query: &str, + max_results: usize, + ignore_certs: bool, + output_dir: &PathBuf, +) -> Result> { + std::fs::create_dir_all(output_dir)?; + let messages = search_messages(api_url, query, max_results, ignore_certs).await?; + let mut paths = Vec::new(); + for msg in messages { + let ts = msg.ts.replace('.', "_"); + let file = output_dir.join(format!("{}_{}.json", msg.channel.id, ts)); + std::fs::write(&file, serde_json::to_vec(&msg)?)?; + paths.push((file, msg.permalink)); + } + Ok(paths) +} \ No newline at end of file diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 2763ebd..c42967f 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -82,6 +82,8 @@ rules: jira_url: None, jql: None, max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_github.rs b/tests/int_github.rs index d4f7f25..4bda269 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -69,6 +69,8 @@ fn test_github_remote_scan() -> Result<()> { jira_url: None, jql: None, max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 67b1bc3..4087618 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -68,6 +68,8 @@ fn test_gitlab_remote_scan() -> Result<()> { jira_url: None, jql: None, max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), git_clone: GitCloneMode::Bare, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 3e21947..ae8dd50 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -125,6 +125,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { jira_url: None, jql: None, max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index ad78192..187427e 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -68,6 +68,8 @@ impl TestContext { jira_url: None, jql: None, max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), // git clone / history options @@ -138,6 +140,8 @@ impl TestContext { jira_url: None, jql: None, max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), // Docker image scanning docker_image: Vec::new(), // git clone / history options From 0f6f7abf371864aa1700636c09a8b28a35f5680e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 19:51:02 -0700 Subject: [PATCH 076/357] Added support for Slack --- README.md | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index d459b5b..5ade809 100644 --- a/README.md +++ b/README.md @@ -6,33 +6,29 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) Kingfisher is a blazingly fast secret‑scanning and validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production -

-Kingfisher originated as a fork of [Nosey Parker](https://github.com/praetorian-inc/noseyparker) by Praetorian Security, Inc, and is built atop their incredible work and the work contributed by the Nosey Parker community. +Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.com/praetorian-inc/noseyparker), and is built atop their incredible work and the work contributed by the Nosey Parker community. -Kingfisher extends Nosey Parker by: -1. **Validating secrets** in real time via cloud-provider APIs -2. Enhancing regex-based detection with **source-code parsing** for improved accuracy -3. Adding **GitLab** repository scanning support -4. Adding support for scanning **Docker** images -5. Providing **Jira** scanning capabilities -6. Adding **Slack** scanning capabilities -7. Introducing a baseline feature that suppresses known secrets and reports only newly introduced ones -8. Offering native **Windows** support +## What Kingfisher Adds +- **Live validation** via cloud-provider APIs +- **Language-aware detection** (AST parsing) for ~20 languages +- **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages +- **Baseline mode**: ignore known secrets, flag only new ones +- **Native Windows** binaries -**MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) ## Key Features +- **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases +- **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) +- **Multiple targets**: + - **Git history**: local repos or GitHub/GitLab orgs/users + - **Docker images**: public or private via `--docker-image` + - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` + - **Slack messages**: query‑based scans with `--slack-query` +- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) -- **Performance**: Multi‑threaded, Hyperscan‑powered scanning for massive codebases -- **Language‑Aware Accuracy**: AST parsing in 20+ languages via Tree‑Sitter reduces contextless regex matches. see [docs/PARSING.md](/docs/PARSING.md) -- **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details -- **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos -- **Jira Scanning**: Scan issues returned from a JQL search using `--jira-url` and `--jql` -- **Slack Scanning**: Scan messages returned from a Slack search query using `--slack-query` -- **Docker Image Scanning**: Scan public or private docker images via `--docker-image` -- **Baseline Support:** Generate and manage baseline files to ignore known secrets and report only newly introduced ones. See ([docs/BASELINE.md](docs/BASELINE.md)) for details. +**Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) # Getting Started ## Installation @@ -360,9 +356,13 @@ KF_JIRA_TOKEN="token" kingfisher scan \ ### Scan Slack messages matching a search query ```bash -KF_SLACK_TOKEN="token" kingfisher scan \ +KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ --slack-query "from:username has:link" \ --max-results 1000 + +KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ + --slack-query "akia" \ + --max-results 1000 ``` *The Slack token must be a user token with the `search:read` scope. Bot tokens (those beginning with `xoxb-`) cannot call the Slack search API.* From 1b427d97ca7348148332045a6b31eaa64fc725c2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 20:20:33 -0700 Subject: [PATCH 077/357] Added support for Slack. Wrote a basic integration test --- README.md | 4 +- tests/int_slack.rs | 198 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 2 deletions(-) create mode 100644 tests/int_slack.rs diff --git a/README.md b/README.md index 5ade809..09fabc1 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,10 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs -- **Language-aware detection** (AST parsing) for ~20 languages +- **Language-aware detection** (source-code parsing) for ~20 languages - **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages - **Baseline mode**: ignore known secrets, flag only new ones -- **Native Windows** binaries +- **Native Windows** binary ## Key Features diff --git a/tests/int_slack.rs b/tests/int_slack.rs new file mode 100644 index 0000000..d238bce --- /dev/null +++ b/tests/int_slack.rs @@ -0,0 +1,198 @@ +use std::{ + env, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +struct TestContext { + rules_db: Arc, +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + Ok(Self { rules_db: Arc::new(rules_db) }) + } +} + +#[tokio::test] +async fn test_scan_slack_messages() -> Result<()> { + let ctx = TestContext::new()?; + + let server = MockServer::start().await; + let response = serde_json::json!({ + "ok": true, + "messages": { + "matches": [{ + "permalink": "https://example.slack.com/archives/C123/p1234", + "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "ts": "1234.56", + "channel": {"id": "C123", "name": "general"} + }], + "pagination": {"page": 1, "page_count": 1} + } + }); + Mock::given(method("GET")) + .and(path("/search.messages")) + .respond_with(ResponseTemplate::new(200).set_body_json(response)) + .mount(&server) + .await; + + env::set_var("KF_SLACK_TOKEN", "xoxp-test"); + + let temp_dir = TempDir::new()?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: Some("test".into()), + slack_api_url: Url::parse(&format!("{}/", server.uri()))?, + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + assert!(findings > 0); + Ok(()) +} \ No newline at end of file From 1db01311412c4c739d4b7fcf2fbea0a28e0c5028 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 20:54:22 -0700 Subject: [PATCH 078/357] Added support for Slack. Wrote a basic integration test --- README.md | 29 +++++++++------------------- install-prereceive-hook.sh | 39 -------------------------------------- 2 files changed, 9 insertions(+), 59 deletions(-) delete mode 100755 install-prereceive-hook.sh diff --git a/README.md b/README.md index 09fabc1..a7c668d 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,14 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) +# Benchmark Results + +See ([docs/COMPARISON.md](docs/COMPARISON.md)) + +

+ Kingfisher Runtime Comparison +

+ # Getting Started ## Installation @@ -424,15 +432,6 @@ This creates `.git/hooks/pre-commit` that scans the files staged for commit with Installs a global pre-commit hook at `$HOME/.git/hooks/pre-commit`; for every Git repository you use, it runs `kingfisher scan --no-update-check` on the staged files and cancels the commit if any secrets are detected. -To check incoming pushes on a server-side repository, install the pre-receive hook: - -```bash -./install-prereceive-hook.sh -``` - -The resulting `.git/hooks/pre-receive` script scans the files in each pushed commit and rejects the push if any secrets are detected. - - ## Update Checks Kingfisher automatically queries GitHub for a newer release when it starts and tells you whether an update is available. @@ -558,20 +557,10 @@ Real breaches show how one exposed key can snowball into a full-scale incident: Leaked secrets fuel unauthorized access, lateral movement, regulatory fines, and brand-damaging incident-response costs. -# Benchmark Results - -See ([docs/COMPARISON.md](docs/COMPARISON.md)) - - -

- Kingfisher Runtime Comparison -

- - # Roadmap - More rules -- Packages for Linux (deb, rpm) +- More targets - Please file a [feature request](https://github.com/mongodb/kingfisher/issues) if you have specific features you'd like added # License diff --git a/install-prereceive-hook.sh b/install-prereceive-hook.sh deleted file mode 100755 index f7a4d5e..0000000 --- a/install-prereceive-hook.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -HOOK_DIR="$(git rev-parse --git-dir)/hooks" -HOOK_PATH="$HOOK_DIR/pre-receive" - -if [ -e "$HOOK_PATH" ]; then - echo "Error: $HOOK_PATH already exists. Move or remove the existing hook to continue." >&2 - exit 1 -fi - -cat > "$HOOK_PATH" <<'HOOK' -#!/usr/bin/env bash -# Pre-receive hook to scan pushed commits with Kingfisher -set -euo pipefail - -if ! command -v kingfisher >/dev/null 2>&1; then - echo "kingfisher not found in PATH" >&2 - exit 1 -fi - -while read -r oldrev newrev refname; do - if [ "$oldrev" = "0000000000000000000000000000000000000000" ]; then - git diff-tree --name-only -r "$newrev" -z | - xargs -0 --no-run-if-empty kingfisher scan --no-update-check - else - git diff-tree --no-commit-id --name-only -r "$oldrev" "$newrev" -z | - xargs -0 --no-run-if-empty kingfisher scan --no-update-check - fi - status=$? - if [ "$status" -ne 0 ]; then - echo "Kingfisher detected secrets in push. Push rejected." >&2 - exit "$status" - fi -done -HOOK - -chmod +x "$HOOK_PATH" -echo "Pre-receive hook installed to $HOOK_PATH" From aaabcbd4997c99c23308ea0e08ace1a3fd3d9fa2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 29 Jul 2025 20:55:44 -0700 Subject: [PATCH 079/357] Added support for Slack. Wrote a basic integration test --- src/slack.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/slack.rs b/src/slack.rs index 69a02a0..a0cd1f5 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -4,33 +4,32 @@ use serde::{Deserialize, Serialize}; use std::path::PathBuf; use url::Url; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize, Serialize)] pub struct SlackMessage { pub permalink: String, pub text: Option, pub ts: String, pub channel: SlackChannel, } - -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize, Serialize)] pub struct SlackChannel { pub id: String, pub name: Option, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] struct SlackPagination { page: Option, page_count: Option, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] struct SlackMessages { matches: Vec, pagination: Option, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] struct SlackSearchResponse { ok: bool, error: Option, @@ -87,12 +86,13 @@ pub async fn search_messages( return Ok(messages); } } - let next_page = msgs.pagination.as_ref().and_then(|p| p.page).unwrap_or(page); + let next_page = + msgs.pagination.as_ref().and_then(|p| p.page).map(|p| p + 1).unwrap_or(page + 1); let page_count = msgs.pagination.as_ref().and_then(|p| p.page_count).unwrap_or(next_page); - if next_page >= page_count { + if next_page > page_count { break; } - page += 1; + page = next_page; } Ok(messages) From e73aec9d70f3b149cbaf7a4096eabbdfcde073dd Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 31 Jul 2025 16:49:46 -0700 Subject: [PATCH 080/357] - Fixed issue when more than 1 named capture group is used in a rule variable - Added 2 new liquid template filters: 'b64dec' and 'es256_sign' - Added custom validator for Coinbase, and a Coinbase rule that uses it --- CHANGELOG.md | 5 ++ Cargo.toml | 3 + data/rules/coinbase.yml | 67 ++++++++++++++++++++++ data/rules/privkey.yml | 6 +- docs/RULES.md | 2 + src/liquid_filters.rs | 86 ++++++++++++++++++++++++++++ src/main.rs | 26 +++++---- src/rules/rule.rs | 1 + src/validation.rs | 39 ++++++++++++- src/validation/coinbase.rs | 113 +++++++++++++++++++++++++++++++++++++ src/validation/utils.rs | 43 +++++++++++--- 11 files changed, 369 insertions(+), 22 deletions(-) create mode 100644 data/rules/coinbase.yml create mode 100644 src/validation/coinbase.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c1e055..3c2d7eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.29.0] +- Fixed issue when more than 1 named capture group is used in a rule variable +- Added 2 new liquid template filters: `b64dec` and `es256_sign` +- Added custom validator for Coinbase, and a Coinbase rule that uses it + ## [1.28.0] - Added support for scanning Slack diff --git a/Cargo.toml b/Cargo.toml index fa9cf8c..e10fc6b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -188,6 +188,9 @@ ipnet = "2.11.0" jira_query = "1.6.0" oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] } walkdir = "2.5.0" +p256 = "0.13.2" +sec1 = "0.7.3" +rand_core = "0.9.3" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/data/rules/coinbase.yml b/data/rules/coinbase.yml new file mode 100644 index 0000000..4819820 --- /dev/null +++ b/data/rules/coinbase.yml @@ -0,0 +1,67 @@ +rules: + - name: Coinbase Access Token + id: kingfisher.coinbase.1 + pattern: | + (?xi) + \b + coinbase + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [a-zA-Z-0-9]{32} + ) + \b + min_entropy: 3.5 + examples: + - coinbase_token = 32iAkQCcHHYxXGx20VogBZoj27PC1ouI + references: + - https://docs.cloud.coinbase.com/wallet-sdk/docs/api-keys + validation: + type: Http + content: + request: + method: GET + url: https://api.coinbase.com/v2/user + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - name: Coinbase CDP API Key + id: kingfisher.coinbase.2 + pattern: | + (?xims) + "name"\s*:\s*" + (?Porganizations/[0-9a-f-]{36}/apiKeys/[0-9a-f-]{36})" + .*"privateKey"\s*:\s*" + (?P + -----BEGIN\sEC\s{0,1} + PRIVATE\sKEY + (\sBLOCK)? + ----- + [a-z0-9 /+=\r\n\\n]{32,}? + -----END\s + (?: + RSA | + PGP | + DSA | + OPENSSH | + ENCRYPTED | + EC + )? + \s{0,1} + PRIVATE\sKEY + (\sBLOCK)? + ----- + ) + validation: + type: Coinbase + examples: + - | + { + "name": "organizations/243873d8-c14e-436d-9cea-10d530cbe201/apiKeys/d29bb143-ad4c-234f-9bd7-c705c16b6d19", + "privateKey": "-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIDs+vr9F40Mer+qYksK3QhkSMfUXOZsbRVSrelWGnMh3oAoGCCqGSM49\nAwEHoUQDQgAEOXj2qKzLYx21D3plbOa81ilURS/4K1jzLXBvgwfUe4hWDgBdKQvq\nIiet5qqZEwVlR/LqKQEUlP8YLrjLFU8Unw==\n-----END EC PRIVATE KEY-----\n" + } \ No newline at end of file diff --git a/data/rules/privkey.yml b/data/rules/privkey.yml index b0e25b5..58dce65 100644 --- a/data/rules/privkey.yml +++ b/data/rules/privkey.yml @@ -45,8 +45,7 @@ rules: - name: Contains Private Key id: kingfisher.privkey.2 pattern: | - (?xi) - (?ims) + (?xims) ( -----BEGIN\s (?: @@ -68,7 +67,8 @@ rules: PGP | DSA | OPENSSH | - ENCRYPTED + ENCRYPTED | + EC )? \s{0,1} PRIVATE\sKEY diff --git a/docs/RULES.md b/docs/RULES.md index 01ae9ec..01cce17 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -107,6 +107,8 @@ Below is the complete list of Liquid filters available in Kingfisher, along with | --------------------- | -------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | | `b64enc` | – | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` | | `b64url_enc` | – | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` | +| `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | +| `es256_sign` | `key` (string) | Signs the input with an ECDSA P-256 private key and returns a Base64URL signature. | `{{ "data" \| es256_sign: PRIVKEY }}` | | `sha256` | – | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` | | `hmac_sha1` | `key` (string) | Computes HMAC-SHA1 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha1: "secret-key" }}` | | `hmac_sha256` | `key` (string) | Computes HMAC-SHA256 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha256: "secret-key" }}` | diff --git a/src/liquid_filters.rs b/src/liquid_filters.rs index 89fb891..87428fb 100644 --- a/src/liquid_filters.rs +++ b/src/liquid_filters.rs @@ -6,6 +6,10 @@ use liquid_core::{ Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection, FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView, }; + +use p256::ecdsa::{signature::Signer, SigningKey}; +use p256::pkcs8::DecodePrivateKey; +use sec1::DecodeEcPrivateKey; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use rand::{distr::Alphanumeric, Rng}; use sha1::Sha1; @@ -267,6 +271,69 @@ impl Filter for B64EncFilter { } } +#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)] +#[filter(name = "b64dec", description = "Decodes a Base64 string", parsed(B64DecFilter))] +pub struct B64DecFilter; + +impl std::fmt::Display for B64DecFilter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "b64dec") + } +} + +impl Filter for B64DecFilter { + fn evaluate( + &self, + input: &dyn ValueView, + _runtime: &dyn Runtime, + ) -> Result { + let input_str = input.to_kstr(); + match general_purpose::STANDARD.decode(input_str.as_bytes()) { + Ok(bytes) => Ok(Value::scalar(String::from_utf8_lossy(&bytes).to_string())), + Err(e) => Err(LiquidError::with_msg(e.to_string())), + } + } +} + +#[derive(Debug, FilterParameters)] +struct Es256Args { + #[parameter(description = "PEM EC private key", arg_type = "str")] + key: Expression, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "es256_sign", + description = "ECDSA P-256 SHA-256 signature (Base64URL)", + parameters(Es256Args), + parsed(Es256SignFilter) +)] +pub struct Es256Sign; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "es256_sign"] +struct Es256SignFilter { + #[parameters] + args: Es256Args, +} + +impl Filter for Es256SignFilter { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let key_pem = args.key.to_kstr(); + let signing_key = SigningKey::from_sec1_pem(&key_pem) + .or_else(|_| SigningKey::from_pkcs8_pem(&key_pem)) + .map_err(|e| LiquidError::with_msg(e.to_string()))?; + let sig: p256::ecdsa::Signature = signing_key.sign(input.to_kstr().as_bytes()); + // turn the signature into raw bytes… + let raw = sig.to_bytes(); + // …then Base64-URL encode + let b64 = general_purpose::URL_SAFE_NO_PAD.encode(raw); + Ok(Value::scalar(b64)) + } +} + + // ----------------------------------------------------------------------------- // Authentication & Security // ----------------------------------------------------------------------------- @@ -388,6 +455,8 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { .filter(UuidFilter::default()) .filter(JwtHeaderFilter::default()) .filter(B64EncFilter::default()) + .filter(B64DecFilter::default()) + .filter(Es256Sign::default()) .filter(RandomStringFilter::default()) .filter(HmacSha256::default()) .filter(HmacSha1::default()) @@ -424,6 +493,11 @@ mod tests { assert_eq!(render(r#"{{ "hello" | b64enc }}"#), "aGVsbG8="); } + #[test] + fn b64dec_filter() { + assert_eq!(render(r#"{{ "aGVsbG8=" | b64dec }}"#), "hello"); + } + #[test] fn sha256_filter() { let expect = format!("{:x}", Sha256::digest(b"hello")); @@ -441,6 +515,18 @@ mod tests { assert_eq!(render(r#"{{ "data" | hmac_sha1: "key1" }}"#), expect); } + #[test] + fn es256_sign_filter() { + let key = "-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIDs+vr9F40Mer+qYksK3QhkSMfUXOZsbRVSrelWGnMh3oAoGCCqGSM49\nAwEHoUQDQgAEOXj2qKzLYx21D3plbOa81ilURS/4K1jzLXBvgwfUe4hWDgBdKQvq\nIiet5qqZEwVlR/LqKQEUlP8YLrjLFU8Unw==\n-----END EC PRIVATE KEY-----"; + use p256::ecdsa::{signature::Signer, SigningKey}; + let sk = SigningKey::from_sec1_pem(key).unwrap(); + let sig: p256::ecdsa::Signature = sk.sign(b"hello"); + let expect = general_purpose::URL_SAFE_NO_PAD.encode(sig.to_bytes()); + let tmpl = format!(r#"{{ "hello" | es256_sign: "{}" }}"#, key.replace('\n', "\\n")); + assert_eq!(render(&tmpl), expect); + } + + #[test] fn b64url_enc_filter() { assert_eq!( diff --git a/src/main.rs b/src/main.rs index 56f1e15..688f5de 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,20 +5,24 @@ // * Fallback - system allocator (`system-alloc` feature) // ──────────────────────────────────────────────────────────── -// --- jemalloc (opt-in) --- -#[cfg(feature = "use-jemalloc")] -#[global_allocator] -static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +// // --- jemalloc (opt-in) --- +// #[cfg(feature = "use-jemalloc")] +// #[global_allocator] +// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -// --- mimalloc (default) --- -#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; +// // --- mimalloc (default) --- +// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +// #[global_allocator] +// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +// // --- system allocator (explicit opt-out) --- +// #[cfg(feature = "system-alloc")] +// use std::alloc::System; +// #[cfg(feature = "system-alloc")] +// #[global_allocator] +// static GLOBAL: System = System; -// --- system allocator (explicit opt-out) --- -#[cfg(feature = "system-alloc")] use std::alloc::System; -#[cfg(feature = "system-alloc")] #[global_allocator] static GLOBAL: System = System; diff --git a/src/rules/rule.rs b/src/rules/rule.rs index a301a09..a47b172 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -35,6 +35,7 @@ fn default_true() -> bool { pub enum Validation { AWS, AzureStorage, + Coinbase, GCP, MongoDB, Postgres, diff --git a/src/validation.rs b/src/validation.rs index 59f5362..4823c27 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -25,6 +25,7 @@ use crate::{ mod aws; mod azure; +mod coinbase; mod gcp; mod httpvalidation; mod jwt; @@ -254,7 +255,7 @@ async fn timed_validate_single_match<'a>( if !missing.is_empty() { m.validation_success = false; m.validation_response_body = - format!("Validation skipped – missing dependent rules: {}", missing.join(", ")); + format!("Validation skipped - missing dependent rules: {}", missing.join(", ")); m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; commit_and_return(m); return; @@ -828,7 +829,43 @@ async fn timed_validate_single_match<'a>( }, ); } + // ----------------------------------------------------- Coinbase validator + Some(Validation::Coinbase) => { + let cred_name = globals + .get("CRED_NAME") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + let private_key = globals + .get("PRIVATE_KEY") + .and_then(|v| v.as_scalar()) + .map(|s| s.into_owned().to_kstr().to_string()) + .unwrap_or_default(); + if cred_name.is_empty() || private_key.is_empty() { + m.validation_success = false; + m.validation_response_body = "Missing key name or private key.".to_string(); + m.validation_response_status = StatusCode::BAD_REQUEST; + commit_and_return(m); + return; + } + + match coinbase::validate_cdp_api_key(&cred_name, &private_key, client, parser, cache) + .await + { + Ok((ok, msg)) => { + m.validation_success = ok; + m.validation_response_body = msg; + m.validation_response_status = + if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; + } + Err(e) => { + m.validation_success = false; + m.validation_response_body = format!("Coinbase validation error: {}", e); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } + } // --------------------------------------------------------- Raw / none Some(Validation::Raw(raw)) => { debug!("Raw validation not implemented: {}", raw); diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs new file mode 100644 index 0000000..6cf1f1e --- /dev/null +++ b/src/validation/coinbase.rs @@ -0,0 +1,113 @@ +use std::collections::BTreeMap; +use std::time::Duration; + +use anyhow::{anyhow, Result}; +use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; +use chrono::Utc; +use p256::{ + ecdsa::{signature::Signer, SigningKey}, + pkcs8::DecodePrivateKey, + SecretKey, +}; +use rand::TryRngCore; + +use rand::rngs::OsRng; +use reqwest::{Client, StatusCode, Url}; +use sha1::{Digest, Sha1}; + +use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; + +pub fn generate_coinbase_cache_key(cred_name: &str, private_key: &str) -> String { + let mut h = Sha1::new(); + h.update(cred_name.as_bytes()); + h.update(b"\0"); + h.update(private_key.as_bytes()); + format!("COINBASE:{:x}", h.finalize()) +} + +pub async fn validate_cdp_api_key( + cred_name: &str, + private_key_pem: &str, + client: &Client, + parser: &liquid::Parser, + cache: &Cache, +) -> Result<(bool, String)> { + let cache_key = generate_coinbase_cache_key(cred_name, private_key_pem); + if let Some(entry) = cache.get(&cache_key) { + let c = entry.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + return Ok((c.is_valid, c.body.clone())); + } + } + + let jwt = build_jwt("GET", "api.coinbase.com", "/v2/user", cred_name, private_key_pem)?; + + let url = Url::parse("https://api.coinbase.com/v2/user")?; + let headers = BTreeMap::from([("Authorization".to_string(), format!("Bearer {}", jwt))]); + let rb = httpvalidation::build_request_builder( + client, + "GET", + &url, + &headers, + &None, + parser, + &liquid::Object::new(), + ) + .map_err(|e| anyhow!(e))?; + let resp = + httpvalidation::retry_request(rb, 1, Duration::from_millis(500), Duration::from_secs(2)) + .await?; + + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + let ok = status == StatusCode::OK; + let msg = format!("{body}"); + + cache.insert(cache_key.clone(), CachedResponse::new(msg.clone(), status, ok)); + + Ok((ok, msg)) +} + +fn build_jwt( + method: &str, + host: &str, + endpoint: &str, + cred_name: &str, + pem: &str, +) -> Result { + let pem = + pem.replace("\r\n", "\n").replace("\\r\\n", "\n").replace("\\n", "\n").replace("\r", "\n"); + let secret_key = SecretKey::from_sec1_pem(&pem) + .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) + .map_err(|e| anyhow!("invalid EC key: {e}"))?; + let signing_key = SigningKey::from(secret_key); + + let mut rng = OsRng; + let mut nonce = [0u8; 16]; + + let _ = rng.try_fill_bytes(&mut nonce); + + let header = serde_json::json!({ + "typ": "JWT", + "alg": "ES256", + "kid": cred_name, + "nonce": hex::encode(nonce), + }); + let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); + + let now = Utc::now().timestamp(); + let claims = serde_json::json!({ + "sub": cred_name, + "iss": "cdp", + "nbf": now, + "exp": now + 60, + "uri": format!("{} {}{}", method, host, endpoint), + }); + let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); + + let signing_input = format!("{header_b64}.{claims_b64}"); + let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes()); + let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); + + Ok(format!("{signing_input}.{sig_b64}")) +} diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 1f2d8ee..87372d4 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -2,19 +2,47 @@ use reqwest::Url; use tokio::net::lookup_host; use crate::validation::SerializableCaptures; -pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> { - let has_multiple_captures = captures.captures.len() > 1; + +// pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> { +// let has_multiple_captures = captures.captures.len() > 1; +// captures +// .captures +// .iter() +// .enumerate() +// .filter(|(idx, _)| !has_multiple_captures || *idx > 0) +// .map(|(_, capture)| { +// let name = capture.name.as_ref().map_or("TOKEN".to_string(), |n| n.to_uppercase()); +// (name, capture.value.clone().into_owned(), capture.start, capture.end) +// }) +// .collect() +// } + +/// Return (NAME, value, start, end) for every capture we care about. +/// +/// * If a capture has a name, use that (upper-cased) +/// * If it’s unnamed, fall back to `"TOKEN"` +/// * Skip the unnamed “whole-match” capture **only when** there are +/// additional captures to return. +pub fn process_captures( + captures: &SerializableCaptures, +) -> Vec<(String, String, usize, usize)> { + let multiple = captures.captures.len() > 1; + captures .captures .iter() - .enumerate() - .filter(|(idx, _)| !has_multiple_captures || *idx > 0) - .map(|(_, capture)| { - let name = capture.name.as_ref().map_or("TOKEN".to_string(), |n| n.to_uppercase()); - (name, capture.value.clone().into_owned(), capture.start, capture.end) + .filter(|cap| multiple.then(|| cap.name.is_some()).unwrap_or(true)) + .map(|cap| { + let name = cap + .name + .as_ref() + .map(|n| n.to_uppercase()) + .unwrap_or_else(|| "TOKEN".to_string()); + (name, cap.value.clone().into_owned(), cap.start, cap.end) }) .collect() } + pub fn find_closest_variable( captures: &[(String, String, usize, usize)], target_value: &String, @@ -47,6 +75,7 @@ pub fn find_closest_variable( } closest_value } + pub async fn check_url_resolvable(url: &Url) -> Result<(), Box> { let host = url.host_str().ok_or("No host in URL")?; let port = url.port().unwrap_or(if url.scheme() == "https" { 443 } else { 80 }); From 1cd69dc26700009511c7faf508ed77ee8a0b2237 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 31 Jul 2025 16:53:52 -0700 Subject: [PATCH 081/357] updated version number --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e10fc6b..4cb3b18 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.28.0" +version = "1.29.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From 902446d7543e76aee0f1023048a3a85c173d5d33 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 31 Jul 2025 18:29:21 -0700 Subject: [PATCH 082/357] bug fixes in response to code review. Also added support for ed25519 coinbase cdp api keys --- CHANGELOG.md | 2 +- Cargo.toml | 1 + data/rules/coinbase.yml | 15 +++- src/liquid_filters.rs | 51 ------------- src/main.rs | 30 ++++---- src/validation/coinbase.rs | 146 +++++++++++++++++++++++++++++-------- src/validation/utils.rs | 16 +--- 7 files changed, 148 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c2d7eb..79a3ac4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.29.0] - Fixed issue when more than 1 named capture group is used in a rule variable -- Added 2 new liquid template filters: `b64dec` and `es256_sign` +- Added a new liquid template filters: `b64dec` - Added custom validator for Coinbase, and a Coinbase rule that uses it ## [1.28.0] diff --git a/Cargo.toml b/Cargo.toml index 4cb3b18..e4576b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -191,6 +191,7 @@ walkdir = "2.5.0" p256 = "0.13.2" sec1 = "0.7.3" rand_core = "0.9.3" +ed25519-dalek = { version = "2.2", features = ["pkcs8"] } [dependencies.tikv-jemallocator] version = "0.6" diff --git a/data/rules/coinbase.yml b/data/rules/coinbase.yml index 4819820..c5a5763 100644 --- a/data/rules/coinbase.yml +++ b/data/rules/coinbase.yml @@ -30,7 +30,7 @@ rules: - report_response: true - type: StatusMatch status: [200] - - name: Coinbase CDP API Key + - name: Coinbase CDP API Key (ECDSA) id: kingfisher.coinbase.2 pattern: | (?xims) @@ -64,4 +64,17 @@ rules: { "name": "organizations/243873d8-c14e-436d-9cea-10d530cbe201/apiKeys/d29bb143-ad4c-234f-9bd7-c705c16b6d19", "privateKey": "-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIDs+vr9F40Mer+qYksK3QhkSMfUXOZsbRVSrelWGnMh3oAoGCCqGSM49\nAwEHoUQDQgAEOXj2qKzLYx21D3plbOa81ilURS/4K1jzLXBvgwfUe4hWDgBdKQvq\nIiet5qqZEwVlR/LqKQEUlP8YLrjLFU8Unw==\n-----END EC PRIVATE KEY-----\n" + } + - name: Coinbase CDP API Key (Ed25519) + id: kingfisher.coinbase.3 + pattern: | + (?xis) + "id"\s*:\s*"(?P[0-9a-f-]{36})"[^{]*?"privateKey"\s*:\s*"(?P[A-Za-z0-9+/=]{88})" + validation: + type: Coinbase + examples: + - | + { + "id": "413b23bf-4582-4e57-b33a-85d9527d9972", + "privateKey": "ygWq07YCO8UkmC9BE0PDBJNGhiu80yslsMUF9WnjPaIF5DBxb/wljjRuHhfuR/AMPC+kdgtL+mWKq/HOnq/YcQ==" } \ No newline at end of file diff --git a/src/liquid_filters.rs b/src/liquid_filters.rs index 87428fb..2142d83 100644 --- a/src/liquid_filters.rs +++ b/src/liquid_filters.rs @@ -295,44 +295,6 @@ impl Filter for B64DecFilter { } } -#[derive(Debug, FilterParameters)] -struct Es256Args { - #[parameter(description = "PEM EC private key", arg_type = "str")] - key: Expression, -} - -#[derive(Clone, ParseFilter, FilterReflection, Default)] -#[filter( - name = "es256_sign", - description = "ECDSA P-256 SHA-256 signature (Base64URL)", - parameters(Es256Args), - parsed(Es256SignFilter) -)] -pub struct Es256Sign; - -#[derive(Debug, FromFilterParameters, Display_filter)] -#[name = "es256_sign"] -struct Es256SignFilter { - #[parameters] - args: Es256Args, -} - -impl Filter for Es256SignFilter { - fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { - let args = self.args.evaluate(runtime)?; - let key_pem = args.key.to_kstr(); - let signing_key = SigningKey::from_sec1_pem(&key_pem) - .or_else(|_| SigningKey::from_pkcs8_pem(&key_pem)) - .map_err(|e| LiquidError::with_msg(e.to_string()))?; - let sig: p256::ecdsa::Signature = signing_key.sign(input.to_kstr().as_bytes()); - // turn the signature into raw bytes… - let raw = sig.to_bytes(); - // …then Base64-URL encode - let b64 = general_purpose::URL_SAFE_NO_PAD.encode(raw); - Ok(Value::scalar(b64)) - } -} - // ----------------------------------------------------------------------------- // Authentication & Security @@ -456,7 +418,6 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { .filter(JwtHeaderFilter::default()) .filter(B64EncFilter::default()) .filter(B64DecFilter::default()) - .filter(Es256Sign::default()) .filter(RandomStringFilter::default()) .filter(HmacSha256::default()) .filter(HmacSha1::default()) @@ -515,18 +476,6 @@ mod tests { assert_eq!(render(r#"{{ "data" | hmac_sha1: "key1" }}"#), expect); } - #[test] - fn es256_sign_filter() { - let key = "-----BEGIN EC PRIVATE KEY-----\nMHcCAQEEIDs+vr9F40Mer+qYksK3QhkSMfUXOZsbRVSrelWGnMh3oAoGCCqGSM49\nAwEHoUQDQgAEOXj2qKzLYx21D3plbOa81ilURS/4K1jzLXBvgwfUe4hWDgBdKQvq\nIiet5qqZEwVlR/LqKQEUlP8YLrjLFU8Unw==\n-----END EC PRIVATE KEY-----"; - use p256::ecdsa::{signature::Signer, SigningKey}; - let sk = SigningKey::from_sec1_pem(key).unwrap(); - let sig: p256::ecdsa::Signature = sk.sign(b"hello"); - let expect = general_purpose::URL_SAFE_NO_PAD.encode(sig.to_bytes()); - let tmpl = format!(r#"{{ "hello" | es256_sign: "{}" }}"#, key.replace('\n', "\\n")); - assert_eq!(render(&tmpl), expect); - } - - #[test] fn b64url_enc_filter() { assert_eq!( diff --git a/src/main.rs b/src/main.rs index 688f5de..06ef3cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,27 +5,27 @@ // * Fallback - system allocator (`system-alloc` feature) // ──────────────────────────────────────────────────────────── -// // --- jemalloc (opt-in) --- -// #[cfg(feature = "use-jemalloc")] -// #[global_allocator] -// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; +// --- jemalloc (opt-in) --- +#[cfg(feature = "use-jemalloc")] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; -// // --- mimalloc (default) --- -// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -// #[global_allocator] -// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -// // --- system allocator (explicit opt-out) --- -// #[cfg(feature = "system-alloc")] -// use std::alloc::System; -// #[cfg(feature = "system-alloc")] -// #[global_allocator] -// static GLOBAL: System = System; +// --- mimalloc (default) --- +#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; +// --- system allocator (explicit opt-out) --- +#[cfg(feature = "system-alloc")] use std::alloc::System; +#[cfg(feature = "system-alloc")] #[global_allocator] static GLOBAL: System = System; +// use std::alloc::System; +// #[global_allocator] +// static GLOBAL: System = System; + use std::{ io::Read, sync::{Arc, Mutex}, diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs index 6cf1f1e..be6045f 100644 --- a/src/validation/coinbase.rs +++ b/src/validation/coinbase.rs @@ -5,15 +5,15 @@ use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::Utc; use p256::{ - ecdsa::{signature::Signer, SigningKey}, + ecdsa::{signature::Signer as _, SigningKey}, pkcs8::DecodePrivateKey, SecretKey, }; -use rand::TryRngCore; - +use ed25519_dalek::{SigningKey as Ed25519Key, Signer as _}; use rand::rngs::OsRng; use reqwest::{Client, StatusCode, Url}; use sha1::{Digest, Sha1}; +use rand::TryRngCore; use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; @@ -61,53 +61,139 @@ pub async fn validate_cdp_api_key( let status = resp.status(); let body = resp.text().await.unwrap_or_default(); let ok = status == StatusCode::OK; - let msg = format!("{body}"); + let msg = body; cache.insert(cache_key.clone(), CachedResponse::new(msg.clone(), status, ok)); Ok((ok, msg)) } + +// fn build_jwt( +// method: &str, +// host: &str, +// endpoint: &str, +// cred_name: &str, +// pem: &str, +// ) -> Result { +// let pem = +// pem.replace("\r\n", "\n").replace("\\r\\n", "\n").replace("\\n", "\n").replace("\r", "\n"); +// let secret_key = SecretKey::from_sec1_pem(&pem) +// .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) +// .map_err(|e| anyhow!("invalid EC key: {e}"))?; +// let signing_key = SigningKey::from(secret_key); + +// let mut rng = OsRng; +// let mut nonce = [0u8; 16]; + +// let _ = rng.try_fill_bytes(&mut nonce); + +// let header = serde_json::json!({ +// "typ": "JWT", +// "alg": "ES256", +// "kid": cred_name, +// "nonce": hex::encode(nonce), +// }); +// let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); + +// let now = Utc::now().timestamp(); +// let claims = serde_json::json!({ +// "sub": cred_name, +// "iss": "cdp", +// "nbf": now, +// "exp": now + 60, +// "uri": format!("{} {}{}", method, host, endpoint), +// }); +// let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); + +// let signing_input = format!("{header_b64}.{claims_b64}"); +// let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes()); +// let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); + +// Ok(format!("{signing_input}.{sig_b64}")) +// } + fn build_jwt( method: &str, host: &str, endpoint: &str, - cred_name: &str, + key_name: &str, pem: &str, ) -> Result { let pem = pem.replace("\r\n", "\n").replace("\\r\\n", "\n").replace("\\n", "\n").replace("\r", "\n"); - let secret_key = SecretKey::from_sec1_pem(&pem) - .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) - .map_err(|e| anyhow!("invalid EC key: {e}"))?; - let signing_key = SigningKey::from(secret_key); let mut rng = OsRng; let mut nonce = [0u8; 16]; let _ = rng.try_fill_bytes(&mut nonce); - let header = serde_json::json!({ - "typ": "JWT", - "alg": "ES256", - "kid": cred_name, - "nonce": hex::encode(nonce), - }); - let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); + // Try ECDSA (PEM encoded EC key). Fallback to raw Ed25519 base64 key. + if let Ok(secret_key) = SecretKey::from_sec1_pem(&pem) + .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) + { + let signing_key = SigningKey::from(secret_key); + let header = serde_json::json!({ + "typ": "JWT", + "alg": "ES256", + "kid": key_name, + "nonce": hex::encode(nonce), + }); + let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); - let now = Utc::now().timestamp(); - let claims = serde_json::json!({ - "sub": cred_name, - "iss": "cdp", - "nbf": now, - "exp": now + 60, - "uri": format!("{} {}{}", method, host, endpoint), - }); - let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); + let now = Utc::now().timestamp(); + let claims = serde_json::json!({ + "sub": key_name, + "iss": "cdp", + "nbf": now, + "exp": now + 120, + "uri": format!("{} {}{}", method, host, endpoint), + }); + let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); - let signing_input = format!("{header_b64}.{claims_b64}"); - let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes()); - let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); + let signing_input = format!("{header_b64}.{claims_b64}"); + let sig: p256::ecdsa::Signature = signing_key.sign(signing_input.as_bytes()); + let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); - Ok(format!("{signing_input}.{sig_b64}")) -} + return Ok(format!("{signing_input}.{sig_b64}")); + } else { + // Assume base64-encoded Ed25519 keypair + let key_bytes = base64::engine::general_purpose::STANDARD + .decode(pem.as_bytes()) + .map_err(|e| anyhow!("invalid base64 key: {e}"))?; + let signing_key = match key_bytes.len() { + 32 => { + let arr: [u8; 32] = key_bytes[..32].try_into().unwrap(); + Ed25519Key::from_bytes(&arr) + } + 64 => { + let arr: [u8; 64] = key_bytes[..64].try_into().unwrap(); + Ed25519Key::from_keypair_bytes(&arr).map_err(|e| anyhow!("invalid Ed25519 key: {e}"))? + } + _ => return Err(anyhow!("invalid Ed25519 key length")), + }; + + let header = serde_json::json!({ + "typ": "JWT", + "alg": "EdDSA", + "kid": key_name, + "nonce": hex::encode(nonce), + }); + let header_b64 = URL_SAFE_NO_PAD.encode(header.to_string()); + + let now = Utc::now().timestamp(); + let claims = serde_json::json!({ + "sub": key_name, + "iss": "cdp", + "nbf": now, + "exp": now + 120, + "uri": format!("{} {}{}", method, host, endpoint), + }); + let claims_b64 = URL_SAFE_NO_PAD.encode(claims.to_string()); + + let signing_input = format!("{header_b64}.{claims_b64}"); + let sig: ed25519_dalek::Signature = signing_key.sign(signing_input.as_bytes()); + let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); + return Ok(format!("{signing_input}.{sig_b64}")); + } +} \ No newline at end of file diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 87372d4..9736aab 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -3,20 +3,6 @@ use tokio::net::lookup_host; use crate::validation::SerializableCaptures; -// pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> { -// let has_multiple_captures = captures.captures.len() > 1; -// captures -// .captures -// .iter() -// .enumerate() -// .filter(|(idx, _)| !has_multiple_captures || *idx > 0) -// .map(|(_, capture)| { -// let name = capture.name.as_ref().map_or("TOKEN".to_string(), |n| n.to_uppercase()); -// (name, capture.value.clone().into_owned(), capture.start, capture.end) -// }) -// .collect() -// } - /// Return (NAME, value, start, end) for every capture we care about. /// /// * If a capture has a name, use that (upper-cased) @@ -31,7 +17,7 @@ pub fn process_captures( captures .captures .iter() - .filter(|cap| multiple.then(|| cap.name.is_some()).unwrap_or(true)) + .filter(|cap| !multiple || cap.name.is_some()) .map(|cap| { let name = cap .name From f48eeb79e20931d1faeb258af3336d1e12f151cc Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 09:15:24 -0700 Subject: [PATCH 083/357] Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active, in some cases --- CHANGELOG.md | 3 + Cargo.toml | 2 +- install-precommit-hook.sh | 4 + src/validation.rs | 129 ++++--------------------------- src/validation/httpvalidation.rs | 8 +- 5 files changed, 28 insertions(+), 118 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79a3ac4..b43faf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.30.0] +- Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active. + ## [1.29.0] - Fixed issue when more than 1 named capture group is used in a rule variable - Added a new liquid template filters: `b64dec` diff --git a/Cargo.toml b/Cargo.toml index e4576b5..d6e7199 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.29.0" +version = "1.30.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh index 923fb94..3f852df 100755 --- a/install-precommit-hook.sh +++ b/install-precommit-hook.sh @@ -68,6 +68,10 @@ if [[ $status -eq 200 ]]; then status=0 fi +if [[ $status -eq 205 ]]; then + status=0 +fi + if [[ $status -ne 0 ]]; then echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 exit $status diff --git a/src/validation.rs b/src/validation.rs index 4823c27..7ef8c24 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1,6 +1,7 @@ use std::{ fs, hash::{Hash, Hasher}, + collections::BTreeMap, sync::Arc, time::{Duration, Instant}, }; @@ -345,10 +346,24 @@ async fn timed_validate_single_match<'a>( // old per-request cache (optional) if !is_multipart { + let rendered_headers = httpvalidation::process_headers( + &http_validation.request.headers, + parser, + &globals, + &url, + ) + .unwrap_or_default(); + + let mut header_map = BTreeMap::new(); + for (name, value) in rendered_headers.iter() { + if let Ok(v) = value.to_str() { + header_map.insert(name.as_str().to_string(), v.to_string()); + } + } cache_key = httpvalidation::generate_http_cache_key_parts( http_validation.request.method.as_str(), &url, - &http_validation.request.headers, + &header_map, ); if let Some(cached) = cache.get(&cache_key) { let c = cached.value(); @@ -1029,116 +1044,4 @@ rules: Ok(()) } - // // ──────────────────────────────────────────────────────────────── - // // Slack Webhook – end-to-end validation test - // // ──────────────────────────────────────────────────────────────── - // #[tokio::test] - // async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> { - // use std::sync::Arc; - - // use crossbeam_skiplist::SkipMap; - // use http::StatusCode; - // use rustc_hash::FxHashMap; - - // use crate::{ - // blob::BlobId, - // liquid_filters::register_all, - // location::OffsetSpan, - // matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, - // rules::{ - // rule::{Confidence, Rule}, - // Rules, - // }, - // validation::{validate_single_match, Cache}, - // }; - - // // 1️⃣ YAML snippet with the **exact** Slack rule - // let slack_yaml = r#" - // rules: - // - name: Slack Webhook id: kingfisher.slack.4 pattern: | (?xi) \b ( https://hooks\.slack\.com/services/ - // T[a-z0-9_-]{8,12}/ B[a-z0-9_-]{8,12}/ [a-z0-9_-]{20,30} ) \b min_entropy: 3.3 confidence: - // medium examples: - // - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV - // - https://hooks.slack.com/services/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 - // validation: - // type: Http - // content: - // request: - // headers: - // Content-Type: application/json - // method: POST - // response_matcher: - // - report_response: true - // - type: WordMatch words: - // - invalid_payload - // - type: WordMatch words: - // - "invalid_token" - // negative: true - // url: "{{ TOKEN }}" - // "#; - - // // 2️⃣ Load that YAML into a Rules object - // let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())]; - // let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; - - // // 3️⃣ Pull the rule syntax & wrap into a Rule - // let slack_rule_syntax = rules - // .rules - // .iter() - // .find(|r| r.id == "kingfisher.slack.4") - // .expect("Slack rule not found") - // .clone(); - // let slack_rule = Rule::new(slack_rule_syntax); - - // // 4️⃣ Provide a real-looking webhook URL (use one of the examples) - // let token = "ENTER YOUR SLACK WEBHOOK URL HERE"; - - // // 5️⃣ Build OwnedBlobMatch stub - // let blob_id = BlobId::new(&token.as_bytes()); - // let mut owned_blob_match = OwnedBlobMatch { - // rule: slack_rule.into(), - // blob_id, - // finding_fingerprint: 0, - // matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, - // captures: SerializableCaptures { - // captures: vec![SerializableCapture { - // name: Some("TOKEN".to_string()), - // match_number: -1, - // start: 0, - // end: token.len(), - // value: token.into(), - // }], - // }, - // validation_response_body: String::new(), - // validation_response_status: StatusCode::OK, - // validation_success: false, - // calculated_entropy: 5.0, - // }; - - // // 6️⃣ Prepare helpers and run validation - // let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; - // let client = reqwest::Client::new(); - // let cache: Cache = Arc::new(SkipMap::new()); - // let dependent_vars = FxHashMap::default(); - // let missing_deps = FxHashMap::default(); - - // validate_single_match( - // &mut owned_blob_match, - // &parser, - // &client, - // &dependent_vars, - // &missing_deps, - // &cache, - // ) - // .await; - - // // 7️⃣ Inspect outcome (true ⇒ credential considered ACTIVE) - // assert!( - // owned_blob_match.validation_success, - // "Slack webhook should be reported ACTIVE; body was {:?}", - // owned_blob_match.validation_response_body - // ); - - // Ok(()) - // } } diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index 598da51..908de67 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -491,7 +491,7 @@ mod tests { } #[test] fn test_validate_response_slack_webhook() { - // 1️⃣ Build matchers equivalent to rule kingfisher.slack.4 + // 1Build matchers equivalent to rule kingfisher.slack.4 let matchers = vec![ ResponseMatcher::WordMatch { r#type: "word-match".to_string(), @@ -507,16 +507,16 @@ mod tests { }, ]; - // 2️⃣ Simulate the real Slack response you posted + // 2️Simulate the real Slack response you posted let body = "invalid_payload"; let status = StatusCode::BAD_REQUEST; // 400 let mut headers = HeaderMap::new(); headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain")); - // 3️⃣ Call validate_response with html_allowed = false + // 3️Call validate_response with html_allowed = false let ok = validate_response(&matchers, body, &status, &headers, false); - // 4️⃣ It *should* be valid (true) because all matcher conditions hold + // 4️It *should* be valid (true) because all matcher conditions hold assert!(ok, "Slack webhook response should be considered ACTIVE"); } } From d35c44a94a8d3a68ade5425a8afe9e6e812f543e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 09:15:47 -0700 Subject: [PATCH 084/357] Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active, in some cases --- install-precommit-hook.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh index 3f852df..923fb94 100755 --- a/install-precommit-hook.sh +++ b/install-precommit-hook.sh @@ -68,10 +68,6 @@ if [[ $status -eq 200 ]]; then status=0 fi -if [[ $status -eq 205 ]]; then - status=0 -fi - if [[ $status -ne 0 ]]; then echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 exit $status From c508befe63ca1ed007eaa4a4a31fd3002fb463e3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 09:17:04 -0700 Subject: [PATCH 085/357] Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active, in some cases. Removed pre-commit installation hook, due to bugs --- CHANGELOG.md | 1 + README.md | 20 ---------- install-precommit-hook.sh | 78 --------------------------------------- 3 files changed, 1 insertion(+), 98 deletions(-) delete mode 100755 install-precommit-hook.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index b43faf6..15ebdab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.30.0] - Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active. +- Removed pre-commit installation hook, due to bugs ## [1.29.0] - Fixed issue when more than 1 named capture group is used in a rule variable diff --git a/README.md b/README.md index a7c668d..a837351 100644 --- a/README.md +++ b/README.md @@ -412,26 +412,6 @@ _If no token is provided Kingfisher still works for public repositories._ | 200 | Findings discovered | | 205 | Validated findings discovered | - -## Install a Pre-Commit Hook - -Run the provided helper script to add a hook that scans staged files before each commit: - -```bash -# local (current repo only ─ default) -./install-precommit-hook.sh -``` - -This creates `.git/hooks/pre-commit` that scans the files staged for commit with `kingfisher scan --no-update-check` and blocks the commit if any secrets are found. - -```bash -# global (every repo on this machine) -./install-precommit-hook.sh --global -### Install a Pre-Receive Hook -``` - -Installs a global pre-commit hook at `$HOME/.git/hooks/pre-commit`; for every Git repository you use, it runs `kingfisher scan --no-update-check` on the staged files and cancels the commit if any secrets are detected. - ## Update Checks Kingfisher automatically queries GitHub for a newer release when it starts and tells you whether an update is available. diff --git a/install-precommit-hook.sh b/install-precommit-hook.sh deleted file mode 100755 index 923fb94..0000000 --- a/install-precommit-hook.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env bash -# -# Install a Git pre‑commit hook that runs `kingfisher scan`. -# -# --global → install once for all repos via core.hooksPath -# --force → overwrite an existing pre‑commit hook -# -set -euo pipefail - -MODE="local" -FORCE=0 - -while [[ $# -gt 0 ]]; do - case "$1" in - -g|--global) MODE="global" ;; - -f|--force) FORCE=1 ;; - -h|--help) - echo "Usage: $0 [--global] [--force]" && exit 0 - ;; - *) echo "Unknown flag: $1" >&2; exit 1 ;; - esac - shift -done - -if [[ "$MODE" == "local" ]]; then - # ensure we're inside a Git repo - REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) \ - || { echo "Not inside a Git repository" >&2; exit 1; } - - HOOK_DIR="$(git rev-parse --git-dir)/hooks" -else - # global: honour existing core.hooksPath or default to ~/.git-hooks - HOOK_DIR=$(git config --global --get core.hooksPath || echo "$HOME/.git-hooks") - mkdir -p "$HOOK_DIR" - - # if the user hasn’t set core.hooksPath, do it now - if ! git config --global --get core.hooksPath >/dev/null; then - git config --global core.hooksPath "$HOOK_DIR" - echo "Set git config --global core.hooksPath to $HOOK_DIR" - fi -fi - -HOOK_PATH="$HOOK_DIR/pre-commit" - -if [[ -e "$HOOK_PATH" && $FORCE -eq 0 ]]; then - echo "Error: $HOOK_PATH already exists. Use --force to overwrite." >&2 - exit 1 -fi - -cat >"$HOOK_PATH" <<'HOOK' -#!/usr/bin/env bash -# Git pre‑commit hook to run Kingfisher on staged changes -set -euo pipefail - -if ! command -v kingfisher >/dev/null 2>&1; then - echo "kingfisher not found in PATH" >&2 - exit 1 -fi - -git diff --cached --name-only -z | \ - xargs -0 --no-run-if-empty kingfisher scan --only-valid --no-update-check -status=$? - -# ──────────────────────────────────────────────────────────────── -# Treat Kingfisher exit‑code 200 as success (map → 0) -# ──────────────────────────────────────────────────────────────── -if [[ $status -eq 200 ]]; then - status=0 -fi - -if [[ $status -ne 0 ]]; then - echo "Kingfisher detected secrets in staged files. Commit aborted." >&2 - exit $status -fi -HOOK - -chmod +x "$HOOK_PATH" -echo "Pre‑commit hook installed to $HOOK_PATH ($MODE mode)" From 3ecce947ab7319dd14452d085c132b05f6aa6a0d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 09:18:29 -0700 Subject: [PATCH 086/357] Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active, in some cases. Removed pre-commit installation hook, due to bugs --- src/validation/httpvalidation.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index 908de67..1e15605 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -491,7 +491,7 @@ mod tests { } #[test] fn test_validate_response_slack_webhook() { - // 1Build matchers equivalent to rule kingfisher.slack.4 + // Build matchers equivalent to rule kingfisher.slack.4 let matchers = vec![ ResponseMatcher::WordMatch { r#type: "word-match".to_string(), @@ -507,16 +507,16 @@ mod tests { }, ]; - // 2️Simulate the real Slack response you posted + // Simulate the real Slack response you posted let body = "invalid_payload"; let status = StatusCode::BAD_REQUEST; // 400 let mut headers = HeaderMap::new(); headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain")); - // 3️Call validate_response with html_allowed = false + // Call validate_response with html_allowed = false let ok = validate_response(&matchers, body, &status, &headers, false); - // 4️It *should* be valid (true) because all matcher conditions hold + // 4It *should* be valid (true) because all matcher conditions hold assert!(ok, "Slack webhook response should be considered ACTIVE"); } } From 87871055b98de7625744ef526cb4d20c8029b8f4 Mon Sep 17 00:00:00 2001 From: Josh Larsen <2565382+joshlarsen@users.noreply.github.com> Date: Fri, 1 Aug 2025 15:04:37 -0400 Subject: [PATCH 087/357] add newer OpenAI API key formats --- data/rules/openai.yml | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/data/rules/openai.yml b/data/rules/openai.yml index ef78a3f..064278c 100644 --- a/data/rules/openai.yml +++ b/data/rules/openai.yml @@ -20,11 +20,42 @@ rules: content: request: headers: - Authorization: "Bearer {{ TOKEN }}" + Authorization: 'Bearer {{ TOKEN }}' method: GET response_matcher: - report_response: true - status: - 200 type: StatusMatch - url: https://api.openai.com/v1/me \ No newline at end of file + url: https://api.openai.com/v1/me + + - name: OpenAI API Key + id: kingfisher.openai.2 + pattern: | + (?xi) + \b + ( + (sk-(?:proj|svcacct|None)-[A-Z0-9_-]{100,}[A-Z0-9_-]*) + ) + \b + min_entropy: 4.0 + confidence: medium + examples: + - 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 + - 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 + - sk-None-JEBiV9H-bLEZoOhNvLWmCNR74dIbql-p3yWFmWpdYCjeR1PWM_PS40yTLowkF3VzXHYJ3VbFarT3BlbkFJYznRo8bADhczK0Ca7t-WRbdwRlC1DPc8P2EaJm03OIg01Uj0cQxRAPO-4Rjs_TNyKXnVePtkcA + references: + - https://help.openai.com/en/articles/9132009-how-can-i-view-the-users-or-organizations-associated-with-an-api-key + validation: + type: Http + content: + request: + headers: + Authorization: 'Bearer {{ TOKEN }}' + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: https://api.openai.com/v1/models From e1533118c050536a1c7c17d43b9f0b3c89ee8099 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 15:31:29 -0700 Subject: [PATCH 088/357] Update data/rules/openai.yml Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- data/rules/openai.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/rules/openai.yml b/data/rules/openai.yml index 064278c..d0b01cb 100644 --- a/data/rules/openai.yml +++ b/data/rules/openai.yml @@ -35,7 +35,7 @@ rules: (?xi) \b ( - (sk-(?:proj|svcacct|None)-[A-Z0-9_-]{100,}[A-Z0-9_-]*) + (sk-(?:proj|svcacct|None)-[A-Z0-9_-]{100,}) ) \b min_entropy: 4.0 From 46d0ecce3ba63fc28ca669938fd8b7860b575c3c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 16:56:04 -0700 Subject: [PATCH 089/357] - New rules: Telegram bot token, OpenWeatherMap, Apify - New OpenAI detectors added (@joshlarsen) - Fixed bug that broke validation when using unnamed group captures --- CHANGELOG.md | 5 ++ Cargo.toml | 8 +-- Makefile | 1 + data/rules/apify.yml | 35 ++++++++++++ data/rules/openweathermap.yml | 42 ++++++++++++++ data/rules/recaptcha.yml | 2 - data/rules/telegram.yml | 30 ++++++++++ docs/RULES.md | 1 - rustfmt.toml | 6 -- src/git_binary.rs | 6 +- src/git_metadata_graph.rs | 2 +- src/jira.rs | 2 +- src/liquid_filters.rs | 4 -- src/main.rs | 2 +- src/reporter.rs | 3 +- src/reporter/json_format.rs | 2 +- src/reporter/sarif_format.rs | 3 +- src/scanner/docker.rs | 6 +- src/scanner/mod.rs | 2 +- src/scanner/repos.rs | 3 +- src/scanner/runner.rs | 2 +- src/slack.rs | 2 +- src/validation.rs | 3 +- src/validation/coinbase.rs | 18 +++--- src/validation/jwt.rs | 6 +- src/validation/utils.rs | 103 +++++++++++++++++++++++++++++++--- tests/int_gitlab.rs | 2 +- tests/int_slack.rs | 2 +- tests/smoke_docker.rs | 2 +- 29 files changed, 241 insertions(+), 64 deletions(-) create mode 100644 data/rules/apify.yml create mode 100644 data/rules/openweathermap.yml create mode 100644 data/rules/telegram.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 15ebdab..427b527 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.31.0] +- New rules: Telegram bot token, OpenWeatherMap, Apify +- New OpenAI detectors added (@joshlarsen) +- Fixed bug that broke validation when using unnamed group captures + ## [1.30.0] - Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active. - Removed pre-commit installation hook, due to bugs diff --git a/Cargo.toml b/Cargo.toml index d6e7199..de43779 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.30.0" +version = "1.31.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -104,7 +104,6 @@ http = "1.3.1" liquid = "0.26.11" liquid-core = "0.26.11" flate2 = "1.1.2" -brotli = "6.0.0" thousands = "0.2.0" base32 = "0.5.1" crossbeam-skiplist = "0.1.3" @@ -172,13 +171,10 @@ color-backtrace = "0.7.0" gitlab = "0.1801.0" mimalloc = {version = "0.1.47", features = ["override"]} thread_local = "1.1.9" -crc32fast = "1.5.0" bloomfilter = "3.0.1" uuid = "1.17.0" -urlencoding = "2.1.3" rand = "0.9.1" percent-encoding = "2.3.1" -trust-dns-resolver = { version = "0.23.2", default-features = false, features = ["tokio-runtime"] } atty = "0.2.14" self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] } semver = "1.0.26" @@ -189,8 +185,6 @@ jira_query = "1.6.0" oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] } walkdir = "2.5.0" p256 = "0.13.2" -sec1 = "0.7.3" -rand_core = "0.9.3" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } [dependencies.tikv-jemallocator] diff --git a/Makefile b/Makefile index e662d7e..b4d63a6 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ endif ifeq ($(OS),darwin) export HOMEBREW_NO_INSTALL_CLEANUP=1 export HOMEBREW_NO_ENV_HINTS=1 + export HOMEBREW_NO_AUTO_UPDATE=1 endif # detect host architecture and map to our target suffixes diff --git a/data/rules/apify.yml b/data/rules/apify.yml new file mode 100644 index 0000000..78c99dd --- /dev/null +++ b/data/rules/apify.yml @@ -0,0 +1,35 @@ +rules: + - name: Apify API Token + id: kingfisher.apify.1 + pattern: | + (?xi) + \b + ( + apify_api_[A-Z0-9]{34,38} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.apify.com/v2/users/me" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"data"' + - '"username"' + match_all_words: true + references: + - https://docs.gitguardian.com/secrets-detection/secrets-detection-engine/detectors/specifics/apify_token + - https://docs.apify.com/api/v2#/reference/users/user-object/get-user-public-profile-or-me + - https://docs.apify.com/api/v2/users-me-get + examples: + - "apify_api_NcjXcxEz2XL1irjppyWSHvjghalQOd1LXOHv" + - "apify_api_9uyewBxQUF1EXWdKVc4lNaTSM461Ls4oQouz" \ No newline at end of file diff --git a/data/rules/openweathermap.yml b/data/rules/openweathermap.yml new file mode 100644 index 0000000..c6b0251 --- /dev/null +++ b/data/rules/openweathermap.yml @@ -0,0 +1,42 @@ +rules: + # --------------------------------------------------------------------- + # 1. OpenWeather Map API Key (detector unchanged, new validation) + # --------------------------------------------------------------------- + - name: OpenWeather Map API Key + id: kingfisher.openweather.1 + pattern: | + (?xi) + \b + (?:pyowm|openweather|owm\b) + (?:.|[\n\r]){0,64}? + \b + ( + [a-z0-9]{32} + | + APPID= + [a-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - pyowm = '3k144a5af729351d0fc58bdrj9a21mkr' + - owm = '3k144a5af729351d0fc58bdrj9a21mkr' + - openweatherapikey=cd2b1d12d01ae2deffecfebafcc3c31d + - apikey=openweather:cd2b1d12d01ae2deffecfebafcc3c31d + validation: + type: Http + content: + request: + method: GET + url: https://api.openweathermap.org/data/2.5/forecast?q=London&appid={{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + words: ['"cod":"200"'] + references: + - https://openweathermap.org/forecast5 + - https://openweathermap.org/appid + - https://publicapi.dev/open-weather-map-api + diff --git a/data/rules/recaptcha.yml b/data/rules/recaptcha.yml index 4966857..e8f62a6 100644 --- a/data/rules/recaptcha.yml +++ b/data/rules/recaptcha.yml @@ -5,8 +5,6 @@ rules: (?xi) recaptcha (?:.|[\n\r]){0,16}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? \b ( 6l[c-f][a-z0-9_-].{36} diff --git a/data/rules/telegram.yml b/data/rules/telegram.yml new file mode 100644 index 0000000..9b6dc98 --- /dev/null +++ b/data/rules/telegram.yml @@ -0,0 +1,30 @@ +rules: + - name: Telegram Bot Token + id: kingfisher.telegram.1 + pattern: | + (?xi) + \b + ( + [0-9]{8,10} + : + [A-Z0-9_-]{35} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.telegram.org/bot{{TOKEN}}/getMe" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"ok":true' + examples: + - "110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsawd" + - "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0" + - "3628091811:BAG9RuJiqgOGIfFbOPBpAo6QhIJoD9mCdDs" diff --git a/docs/RULES.md b/docs/RULES.md index 01cce17..5a2fbfc 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -108,7 +108,6 @@ Below is the complete list of Liquid filters available in Kingfisher, along with | `b64enc` | – | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` | | `b64url_enc` | – | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` | | `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | -| `es256_sign` | `key` (string) | Signs the input with an ECDSA P-256 private key and returns a Base64URL signature. | `{{ "data" \| es256_sign: PRIVKEY }}` | | `sha256` | – | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` | | `hmac_sha1` | `key` (string) | Computes HMAC-SHA1 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha1: "secret-key" }}` | | `hmac_sha256` | `key` (string) | Computes HMAC-SHA256 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha256: "secret-key" }}` | diff --git a/rustfmt.toml b/rustfmt.toml index 1f12150..5aede1f 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -5,11 +5,5 @@ tab_spaces = 4 use_small_heuristics = "Max" newline_style = "Unix" -imports_granularity = "Crate" -group_imports = "StdExternalCrate" reorder_imports = true -normalize_doc_attributes = true -format_code_in_doc_comments = true -wrap_comments = true -comment_width = 100 diff --git a/src/git_binary.rs b/src/git_binary.rs index b2b6918..fc2baa1 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -36,13 +36,11 @@ impl Git { /// Create a new `Git` instance. /// /// * `ignore_certs`: If `true`, disables SSL certificate verification for `git` operations. -pub fn new(ignore_certs: bool) -> Self { + pub fn new(ignore_certs: bool) -> Self { let mut credentials = Vec::new(); // If either GitHub or GitLab token is set, first clear existing credential.helpers - if std::env::var("KF_GITHUB_TOKEN").is_ok() - || std::env::var("KF_GITLAB_TOKEN").is_ok() - { + if std::env::var("KF_GITHUB_TOKEN").is_ok() || std::env::var("KF_GITLAB_TOKEN").is_ok() { credentials.push("-c".into()); credentials.push(r#"credential.helper="#.into()); } diff --git a/src/git_metadata_graph.rs b/src/git_metadata_graph.rs index 04d6729..4a45940 100644 --- a/src/git_metadata_graph.rs +++ b/src/git_metadata_graph.rs @@ -139,7 +139,7 @@ impl RepositoryIndex { let mut num_trees = 0; let mut num_blobs = 0; let mut num_commits = 0; - + for oid in odb .iter() .context("Failed to iterate object database")? diff --git a/src/jira.rs b/src/jira.rs index 9b9e4fb..e3c2adc 100644 --- a/src/jira.rs +++ b/src/jira.rs @@ -49,4 +49,4 @@ pub async fn download_issues_to_dir( paths.push(file); } Ok(paths) -} \ No newline at end of file +} diff --git a/src/liquid_filters.rs b/src/liquid_filters.rs index 2142d83..e9d6ea4 100644 --- a/src/liquid_filters.rs +++ b/src/liquid_filters.rs @@ -7,9 +7,6 @@ use liquid_core::{ FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView, }; -use p256::ecdsa::{signature::Signer, SigningKey}; -use p256::pkcs8::DecodePrivateKey; -use sec1::DecodeEcPrivateKey; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use rand::{distr::Alphanumeric, Rng}; use sha1::Sha1; @@ -295,7 +292,6 @@ impl Filter for B64DecFilter { } } - // ----------------------------------------------------------------------------- // Authentication & Security // ----------------------------------------------------------------------------- diff --git a/src/main.rs b/src/main.rs index 06ef3cf..9c30b92 100644 --- a/src/main.rs +++ b/src/main.rs @@ -289,7 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { // Slack query slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - + // Docker image scanning docker_image: Vec::new(), diff --git a/src/reporter.rs b/src/reporter.rs index e6709dc..210da31 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -113,7 +113,6 @@ impl DetailsReporter { } } - /// If the given file path corresponds to a Jira issue downloaded to disk, /// return the online Jira URL for that issue. fn jira_issue_url( @@ -123,7 +122,7 @@ impl DetailsReporter { ) -> Option { // drop any trailing slash so we don’t end up with “//browse/…” let jira_url = args.input_specifier_args.jira_url.as_ref()?.as_str().trim_end_matches('/'); - + let ds = self.datastore.lock().ok()?; let root = ds.clone_root(); let jira_dir = root.join("jira_issues"); diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 6bad0cb..5533b55 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -441,7 +441,7 @@ mod tests { // Slack options slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - + docker_image: Vec::new(), // clone / history options git_clone: GitCloneMode::Bare, diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index 3db2d99..5829bba 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -208,7 +208,6 @@ impl DetailsReporter { let p = first_match.origin.first(); match p { Origin::File(e) => { - let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { url } else if let Some(url) = self.slack_message_url(&e.path) { @@ -351,7 +350,7 @@ impl DetailsReporter { .build()?, ) .build()?; - + let sarif_results: Vec = findings .par_iter() .filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok()) diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 5a6daa4..b427a3b 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -61,7 +61,7 @@ fn image_dir_name(reference: &str) -> String { // add a truncated SHA-256 to guarantee uniqueness let hash = Sha256::digest(reference.as_bytes()); - let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty + let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty name.push('_'); name.push_str(short); name @@ -258,7 +258,7 @@ pub async fn save_docker_images( ) -> Result> { let docker = Docker::new(); let mut dirs = Vec::new(); - + for image in images { let dir_name = image_dir_name(image); let out_dir = clone_root.join(format!("docker_{dir_name}")); @@ -280,4 +280,4 @@ mod tests { fn docker_struct_new() { let _ = Docker::new(); } -} \ No newline at end of file +} diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index fff9440..8b905af 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,9 +1,9 @@ //! Public façade for the scanner subsystem. +pub(crate) use docker::save_docker_images; pub(crate) use enumerate::enumerate_filesystem_inputs; pub(crate) use repos::{clone_or_update_git_repos, enumerate_github_repos}; pub use runner::{load_and_record_rules, run_async_scan, run_scan}; pub(crate) use validation::run_secret_validation; -pub(crate) use docker::save_docker_images; mod docker; mod enumerate; diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 2a8044a..9d944ea 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -225,7 +225,6 @@ pub async fn enumerate_gitlab_repos( Ok(repo_urls) } - pub async fn fetch_jira_issues( args: &scan::ScanArgs, global_args: &global::GlobalArgs, @@ -284,4 +283,4 @@ pub async fn fetch_slack_messages( } } Ok(vec![output_dir]) -} \ No newline at end of file +} diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index b727718..0a880da 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -67,7 +67,7 @@ pub async fn run_async_scan( // Fetch Jira issues if requested let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); - + // Fetch Slack messages if requested let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); diff --git a/src/slack.rs b/src/slack.rs index a0cd1f5..ce6f90d 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -115,4 +115,4 @@ pub async fn download_messages_to_dir( paths.push((file, msg.permalink)); } Ok(paths) -} \ No newline at end of file +} diff --git a/src/validation.rs b/src/validation.rs index 7ef8c24..cff95c0 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1,7 +1,7 @@ use std::{ + collections::BTreeMap, fs, hash::{Hash, Hasher}, - collections::BTreeMap, sync::Arc, time::{Duration, Instant}, }; @@ -1043,5 +1043,4 @@ rules: println!("Body: {:?}", owned_blob_match.validation_response_body); Ok(()) } - } diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs index be6045f..d879601 100644 --- a/src/validation/coinbase.rs +++ b/src/validation/coinbase.rs @@ -4,16 +4,16 @@ use std::time::Duration; use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::Utc; +use ed25519_dalek::SigningKey as Ed25519Key; use p256::{ ecdsa::{signature::Signer as _, SigningKey}, pkcs8::DecodePrivateKey, SecretKey, }; -use ed25519_dalek::{SigningKey as Ed25519Key, Signer as _}; use rand::rngs::OsRng; +use rand::TryRngCore; use reqwest::{Client, StatusCode, Url}; use sha1::{Digest, Sha1}; -use rand::TryRngCore; use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; @@ -68,7 +68,6 @@ pub async fn validate_cdp_api_key( Ok((ok, msg)) } - // fn build_jwt( // method: &str, // host: &str, @@ -85,7 +84,7 @@ pub async fn validate_cdp_api_key( // let mut rng = OsRng; // let mut nonce = [0u8; 16]; - + // let _ = rng.try_fill_bytes(&mut nonce); // let header = serde_json::json!({ @@ -125,12 +124,12 @@ fn build_jwt( let mut rng = OsRng; let mut nonce = [0u8; 16]; - + let _ = rng.try_fill_bytes(&mut nonce); // Try ECDSA (PEM encoded EC key). Fallback to raw Ed25519 base64 key. - if let Ok(secret_key) = SecretKey::from_sec1_pem(&pem) - .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) + if let Ok(secret_key) = + SecretKey::from_sec1_pem(&pem).or_else(|_| SecretKey::from_pkcs8_pem(&pem)) { let signing_key = SigningKey::from(secret_key); let header = serde_json::json!({ @@ -168,7 +167,8 @@ fn build_jwt( } 64 => { let arr: [u8; 64] = key_bytes[..64].try_into().unwrap(); - Ed25519Key::from_keypair_bytes(&arr).map_err(|e| anyhow!("invalid Ed25519 key: {e}"))? + Ed25519Key::from_keypair_bytes(&arr) + .map_err(|e| anyhow!("invalid Ed25519 key: {e}"))? } _ => return Err(anyhow!("invalid Ed25519 key length")), }; @@ -196,4 +196,4 @@ fn build_jwt( let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); return Ok(format!("{signing_input}.{sig_b64}")); } -} \ No newline at end of file +} diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 327f0d2..25a7206 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -162,7 +162,11 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { return Ok(( true, - format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, extract_aud_strings(&claims)), + format!( + "JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", + alg, + extract_aud_strings(&claims) + ), )); } diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 9736aab..ee118c8 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -9,21 +9,19 @@ use crate::validation::SerializableCaptures; /// * If it’s unnamed, fall back to `"TOKEN"` /// * Skip the unnamed “whole-match” capture **only when** there are /// additional captures to return. -pub fn process_captures( - captures: &SerializableCaptures, -) -> Vec<(String, String, usize, usize)> { +pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> { let multiple = captures.captures.len() > 1; captures .captures .iter() - .filter(|cap| !multiple || cap.name.is_some()) + // Skip the whole-match capture (match_number == 0) only when there + // are additional captures. All other captures – named or unnamed – + // should be preserved. + .filter(|cap| !multiple || cap.match_number != 0) .map(|cap| { - let name = cap - .name - .as_ref() - .map(|n| n.to_uppercase()) - .unwrap_or_else(|| "TOKEN".to_string()); + let name = + cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string()); (name, cap.value.clone().into_owned(), cap.start, cap.end) }) .collect() @@ -68,3 +66,90 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/")?, gitlab_repo_type: GitLabRepoType::Owner, - + jira_url: None, jql: None, max_results: 100, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index d238bce..699dad9 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -195,4 +195,4 @@ async fn test_scan_slack_messages() -> Result<()> { }; assert!(findings > 0); Ok(()) -} \ No newline at end of file +} diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs index 2fd61bc..40cc420 100644 --- a/tests/smoke_docker.rs +++ b/tests/smoke_docker.rs @@ -17,4 +17,4 @@ fn smoke_scan_docker_image() -> anyhow::Result<()> { .code(205) .stdout(predicate::str::contains("Active Credential")); Ok(()) -} \ No newline at end of file +} From 11b7fac03b39f309e2b96a6692762d9a6987027e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 17:07:13 -0700 Subject: [PATCH 090/357] New rule: Groq --- CHANGELOG.md | 3 +++ data/rules/groq.yml | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 data/rules/groq.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 427b527..49a5b42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.32.0] +- New rules: Groq + ## [1.31.0] - New rules: Telegram bot token, OpenWeatherMap, Apify - New OpenAI detectors added (@joshlarsen) diff --git a/data/rules/groq.yml b/data/rules/groq.yml new file mode 100644 index 0000000..b232ddd --- /dev/null +++ b/data/rules/groq.yml @@ -0,0 +1,34 @@ +rules: + - name: Groq API Key + id: kingfisher.groq.1 + pattern: | + (?xi) + \b + ( + gsk_[a-zA-Z0-9]{52} + ) + \b + confidence: medium + min_entropy: 4.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.groq.com/openai/v1/models" + headers: + Authorization: "Bearer {{TOKEN}}" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"id"' + - '"data"' + match_all_words: true + references: + - https://console.groq.com/docs/api-keys + - https://console.groq.com/docs/api-reference#models + examples: + - "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" + From 280fd928682bcd15528faf5639ad49104f9ae780 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 17:28:55 -0700 Subject: [PATCH 091/357] New rule: Groq --- CHANGELOG.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49a5b42..350a3fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,8 @@ All notable changes to this project will be documented in this file. -## [1.32.0] -- New rules: Groq - ## [1.31.0] -- New rules: Telegram bot token, OpenWeatherMap, Apify +- New rules: Telegram bot token, OpenWeatherMap, Apify, Groq - New OpenAI detectors added (@joshlarsen) - Fixed bug that broke validation when using unnamed group captures From 40e760ea2ceff7f3e6dd8b8323d3a525beaa1fda Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 2 Aug 2025 20:40:16 -0700 Subject: [PATCH 092/357] -Added support for scanning AWS S3 buckets via --s3-bucket and optional --s3-prefix - Added --role-arn and --aws-local-profile flags for S3 authentication alongside KF_AWS_KEY/KF_AWS_SECRET --- CHANGELOG.md | 4 ++ Cargo.toml | 5 +- README.md | 10 ++++ src/cli/commands/inputs.rs | 20 +++++++- src/findings_store.rs | 10 ++++ src/lib.rs | 1 + src/main.rs | 5 ++ src/reporter.rs | 11 +++++ src/reporter/json_format.rs | 22 ++++++--- src/reporter/pretty_format.rs | 25 ++++++++-- src/reporter/sarif_format.rs | 23 ++++++++-- src/s3.rs | 86 +++++++++++++++++++++++++++++++++++ src/scanner/repos.rs | 73 +++++++++++++++++++++++++++-- src/scanner/runner.rs | 43 ++++++++++++++---- tests/int_dedup.rs | 5 ++ tests/int_github.rs | 5 ++ tests/int_gitlab.rs | 5 ++ tests/int_slack.rs | 9 ++++ tests/int_validation_cache.rs | 5 ++ tests/int_vulnerable_files.rs | 10 ++++ 20 files changed, 347 insertions(+), 30 deletions(-) create mode 100644 src/s3.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 350a3fb..07607fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.32.0] +- Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` +- Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` +- ## [1.31.0] - New rules: Telegram bot token, OpenWeatherMap, Apify, Groq - New OpenAI detectors added (@joshlarsen) diff --git a/Cargo.toml b/Cargo.toml index de43779..425db27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.31.0" +version = "1.32.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -186,6 +186,7 @@ oci-client = { version = "0.15", default-features = false, features = ["rustls-t walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } +aws-sdk-s3 = "1.100.0" [dependencies.tikv-jemallocator] version = "0.6" @@ -207,7 +208,7 @@ rand_chacha = "0.9.0" [profile.release] debug = false -strip = "debuginfo" +strip = true #"debuginfo" opt-level = 3 # Maximum optimization for performance lto = true # Enable Link Time Optimization codegen-units = 1 # Optimize for size but slower compilation diff --git a/README.md b/README.md index a837351..4af4295 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` + - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, or `--aws-local-profile` - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -109,6 +110,15 @@ docker run --rm \ ghcr.io/mongodb/kingfisher:latest \ scan --git-url https://github.com/org/private_repo.git +# Scan an S3 bucket +# Credentials can come from KF_AWS_KEY/KF_AWS_SECRET, --role-arn, or --aws-local-profile +docker run --rm \ + -e KF_AWS_KEY=AKIA... \ + -e KF_AWS_SECRET=g5nYW... \ + ghcr.io/mongodb/kingfisher:latest \ + scan --s3-bucket bucket-name + + # Scan and write a JSON report locally # Here we: # 1. Mount $PWD → /proj diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 8a1c23d..ea38722 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -28,7 +28,8 @@ pub struct InputSpecifierArgs { "all_gitlab_groups", "jira_url", "docker_image", - "slack_query" + "slack_query", + "s3_bucket" ]), value_hint = ValueHint::AnyPath )] @@ -107,6 +108,23 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = 100)] pub max_results: usize, + /// Scan the specified S3 bucket + #[arg(long)] + pub s3_bucket: Option, + + /// Optional prefix within the S3 bucket + #[arg(long, requires = "s3_bucket")] + pub s3_prefix: Option, + + /// AWS IAM role ARN to assume for S3 access + #[arg(long, requires = "s3_bucket")] + pub role_arn: Option, + + /// Use credentials from a local AWS profile in ~/.aws/config + #[arg(long, requires = "s3_bucket")] + pub aws_local_profile: Option, + + /// Docker/OCI images to scan (no local Docker required) #[arg(long = "docker-image")] pub docker_image: Vec, diff --git a/src/findings_store.rs b/src/findings_store.rs index 93e9f1c..a1c94d4 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -54,6 +54,7 @@ pub struct FindingsStore { origin_meta: FxHashMap>, docker_images: FxHashMap, slack_links: FxHashMap, + s3_buckets: FxHashMap, } impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { @@ -73,6 +74,7 @@ impl FindingsStore { bloom_items: 0, docker_images: FxHashMap::default(), slack_links: FxHashMap::default(), + s3_buckets: FxHashMap::default(), } } @@ -306,6 +308,14 @@ impl FindingsStore { &self.slack_links } + pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) { + self.s3_buckets.insert(dir, bucket); + } + + pub fn s3_buckets(&self) -> &FxHashMap { + &self.s3_buckets + } + pub fn get_finding_data_iter( &self, ) -> impl Iterator + '_ { diff --git a/src/lib.rs b/src/lib.rs index 85bc57c..90d0451 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,7 @@ pub mod rule_profiling; pub mod rules; pub mod rules_database; pub mod safe_list; +pub mod s3; pub mod scanner; pub mod scanner_pool; pub mod serde_utils; diff --git a/src/main.rs b/src/main.rs index 9c30b92..73c77a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -286,6 +286,11 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, max_results: 100, + + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Slack query slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/src/reporter.rs b/src/reporter.rs index 210da31..ad0efe9 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -141,6 +141,17 @@ impl DetailsReporter { ds.slack_links().get(path).cloned() } + fn s3_display_path(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + for (dir, bucket) in ds.s3_buckets().iter() { + if path.starts_with(dir) { + let rel = path.strip_prefix(dir).ok()?; + return Some(format!("s3://{}/{}", bucket, rel.display())); + } + } + None + } + fn docker_display_path(&self, path: &std::path::Path) -> Option { let ds = self.datastore.lock().ok()?; for (dir, image) in ds.docker_images().iter() { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 5533b55..9fcb1ec 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -99,20 +99,22 @@ impl DetailsReporter { let file_path = rm .origin .iter() - .find_map(|origin| { - if let Origin::File(e) = origin { + .find_map(|origin| match origin { + Origin::File(e) => { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) + } else if let Some(mapped) = self.s3_display_path(&e.path) { + Some(mapped) } else if let Some(mapped) = self.docker_display_path(&e.path) { Some(mapped) } else { Some(e.path.display().to_string()) } - } else { - None - } + } + Origin::Extended(e) => e.path().map(|p| p.display().to_string()), + _ => None, }) .unwrap_or_default(); @@ -258,11 +260,15 @@ impl DetailsReporter { Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) + } else if let Some(mapped) = self.s3_display_path(&e.path) { + Some(mapped) } else if let Some(mapped) = self.docker_display_path(&e.path) { Some(mapped) } else { Some(e.path.display().to_string()) } + } else if let Origin::Extended(e) = origin { + e.path().map(|p| p.display().to_string()) } else { None } @@ -437,10 +443,14 @@ mod tests { jira_url: None, jql: None, max_results: 100, - // Docker image scanning // Slack options slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, docker_image: Vec::new(), // clone / history options diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 62dd354..942e7ad 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -218,6 +218,8 @@ impl<'a> Display for PrettyFinding<'a> { url } else if let Some(url) = reporter.slack_message_url(&e.path) { url + } else if let Some(mapped) = reporter.s3_display_path(&e.path) { + mapped } else if let Some(mapped) = reporter.docker_display_path(&e.path) { mapped } else { @@ -233,13 +235,23 @@ impl<'a> Display for PrettyFinding<'a> { } )?; } + Origin::Extended(e) => { + if let Some(p) = e.path() { + let display_path = p.display().to_string(); + writeln!( + f, + " |Path..........: {}", + if rm.validation_success { + reporter.style_active_creds(&display_path).to_string() + } else { + display_path + } + )?; + } + } Origin::GitRepo(e) => { reporter.write_git_metadata(f, e, args, source_span.start.line)?; } - Origin::Extended(e) => { - writeln!(f, " |Extended......: {}", reporter.style_metadata(e).to_string())?; - // Convert StyledObject to String - } } } Ok(()) @@ -353,6 +365,11 @@ fn test_pretty_format_with_nan_entropy_panics() { // Slack options slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index 5829bba..033d37c 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -75,6 +75,8 @@ impl DetailsReporter { url } else if let Some(url) = self.slack_message_url(&e.path) { url + } else if let Some(mapped) = self.s3_display_path(&e.path) { + mapped } else { e.path.display().to_string() }; @@ -82,6 +84,16 @@ impl DetailsReporter { sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?, ); } + Origin::Extended(e) => { + if let Some(p) = e.path() { + artifact_locations.push( + sarif::ArtifactLocationBuilder::default() + .uri(p.display().to_string()) + .build() + .ok()?, + ); + } + } Origin::GitRepo(e) => { // Extract and store Git metadata if let Some(git_metadata) = self.extract_git_metadata(e, source_span) { @@ -111,7 +123,6 @@ impl DetailsReporter { ); } } - Origin::Extended(_) => (), } } @@ -212,11 +223,18 @@ impl DetailsReporter { url } else if let Some(url) = self.slack_message_url(&e.path) { url + } else if let Some(mapped) = self.s3_display_path(&e.path) { + mapped } else { e.path.display().to_string() }; msg.push_str(&format!("Location: {}\n", uri)); } + Origin::Extended(e) => { + if let Some(p) = e.path() { + msg.push_str(&format!("Location: {}\n", p.display())); + } + } Origin::GitRepo(e) => { if let Some(cs) = &e.first_commit { let repo_url = get_repo_url(&e.repo_path) @@ -235,9 +253,6 @@ impl DetailsReporter { msg.push_str(&format!("File: {}", cs.blob_path)); } } - Origin::Extended(e) => { - msg.push_str(&format!("Extended: {}\n", e)); - } } msg } else { diff --git a/src/s3.rs b/src/s3.rs new file mode 100644 index 0000000..5f35dde --- /dev/null +++ b/src/s3.rs @@ -0,0 +1,86 @@ +use anyhow::{Context, Result}; +use aws_config::{meta::region::RegionProviderChain, BehaviorVersion}; +use aws_credential_types::Credentials; +use aws_sdk_s3::Client; + +/// Visit all objects in the given S3 bucket (optionally under a prefix), +/// calling `visitor` with each object's key and bytes. +pub async fn visit_bucket_objects( + bucket: &str, + prefix: Option<&str>, + role_arn: Option<&str>, + profile: Option<&str>, + mut visitor: F, +) -> Result<()> +where + F: FnMut(String, Vec) -> Result<()>, +{ + let mut config_loader = aws_config::defaults(BehaviorVersion::latest()); + + if let Some(profile) = profile { + config_loader = config_loader.profile_name(profile); + } + + // If explicit credentials are provided via KF_AWS_KEY/KF_AWS_SECRET use them + if let (Ok(key), Ok(secret)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) { + let creds = Credentials::new(key, secret, None, None, "kf_env"); + config_loader = config_loader.credentials_provider(creds); + } + + // Resolve region using the default chain, falling back to us-east-1 + let region_provider = RegionProviderChain::default_provider().or_else("us-east-1"); + let base_config = config_loader.region(region_provider).load().await; + + let client = if let Some(role) = role_arn { + let assume_role = aws_config::sts::AssumeRoleProvider::builder(role.to_string()) + .session_name("kingfisher") + .configure(&base_config) + .build() + .await; + let conf = aws_sdk_s3::config::Builder::from(&base_config) + .credentials_provider(assume_role) + .build(); + Client::from_conf(conf) + } else { + Client::new(&base_config) + }; + + let mut continuation_token = None; + + loop { + let mut req = client.list_objects_v2().bucket(bucket.to_string()); + if let Some(p) = prefix { + req = req.prefix(p.to_string()); + } + if let Some(token) = continuation_token.clone() { + req = req.continuation_token(token); + } + + let resp = req.send().await.context("Failed to list objects in bucket")?; + + if let Some(objects) = resp.contents { + for obj in objects { + if let Some(key) = obj.key { + let get_resp = client + .get_object() + .bucket(bucket) + .key(&key) + .send() + .await + .with_context(|| format!("Failed to fetch object {key}"))?; + let data = + get_resp.body.collect().await.context("Failed to read S3 object body")?; + visitor(key, data.into_bytes().to_vec())?; + } + } + } + + if resp.is_truncated.unwrap_or(false) { + continuation_token = resp.next_continuation_token; + } else { + break; + } + } + + Ok(()) +} \ No newline at end of file diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 9d944ea..735e381 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -8,6 +8,7 @@ use indicatif::{HumanCount, ProgressBar, ProgressStyle}; use tokio::time::Duration; use tracing::{debug, error, info}; +use crate::blob::BlobIdMap; use crate::{ blob::BlobMetadata, cli::{ @@ -21,10 +22,15 @@ use crate::{ git_binary::{CloneMode, Git}, git_url::GitUrl, github, gitlab, jira, - matcher::Match, - origin::OriginSet, - slack, PathBuf, + matcher::{Match, Matcher, MatcherStats}, + origin::{Origin, OriginSet}, + rules_database::RulesDatabase, + s3, + scanner::processing::BlobProcessor, + scanner_pool::ScannerPool, + slack, guesser::Guesser, PathBuf, }; + pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option, Match)>); pub fn clone_or_update_git_repos( @@ -284,3 +290,64 @@ pub async fn fetch_slack_messages( } Ok(vec![output_dir]) } + + +pub async fn fetch_s3_objects( + args: &scan::ScanArgs, + datastore: &Arc>, + rules_db: &RulesDatabase, + matcher_stats: &Mutex, + enable_profiling: bool, + shared_profiler: Arc, +) -> Result<()> { + let Some(bucket) = args.input_specifier_args.s3_bucket.as_deref() else { + return Ok(()); + }; + let prefix = args.input_specifier_args.s3_prefix.as_deref(); + let role_arn = args.input_specifier_args.role_arn.as_deref(); + let profile = args.input_specifier_args.aws_local_profile.as_deref(); + + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let seen_blobs = BlobIdMap::new(); + let matcher = Matcher::new( + rules_db, + scanner_pool, + &seen_blobs, + Some(matcher_stats), + enable_profiling, + Some(shared_profiler.clone()), + )?; + let guesser = Guesser::new().expect("should be able to create filetype guesser"); + let mut processor = BlobProcessor { matcher, guesser }; + let bucket_name = bucket.to_string(); + + s3::visit_bucket_objects(bucket, prefix, role_arn, profile, |key, bytes| { + let origin = OriginSet::new( + Origin::from_extended(serde_json::json!({ + "path": format!("s3://{}/{}", bucket_name, key) + })), + Vec::new(), + ); + let blob = crate::blob::Blob::from_bytes(bytes); + + if let Some((origin, blob_md, scored_matches)) = processor.run(origin, blob, args.no_dedup)? { + // Wrap origin & metadata once: + let origin_arc = Arc::new(origin); + let blob_arc = Arc::new(blob_md); + + // Now build a batch of exactly one FindingsStoreMessage per Match + let mut batch = Vec::with_capacity(scored_matches.len()); + for (_score, m) in scored_matches { + batch.push((origin_arc.clone(), blob_arc.clone(), m)); + } + + // Call record with the right type + let added = datastore.lock().unwrap().record(batch, !args.no_dedup); + debug!("Added {} new S3 blobs", added); + } + Ok(()) + }) + .await?; + + Ok(()) +} \ No newline at end of file diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 0a880da..f8dae87 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -18,7 +18,9 @@ use crate::{ rules_database::RulesDatabase, scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, - repos::{enumerate_gitlab_repos, fetch_jira_issues, fetch_slack_messages}, + repos::{ + enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, + }, run_secret_validation, save_docker_images, summary::print_scan_summary, }, @@ -72,6 +74,7 @@ pub async fn run_async_scan( let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); + // Save Docker images if specified if !args.input_specifier_args.docker_image.is_empty() { let clone_root = { @@ -93,22 +96,42 @@ pub async fn run_async_scan( } } - if input_roots.is_empty() { - bail!("No inputs to scan"); - } + // if input_roots.is_empty() { + // bail!("No inputs to scan"); + // } let shared_profiler = Arc::new(ConcurrentRuleProfiler::new()); let enable_profiling = args.rule_stats; let matcher_stats = Mutex::new(MatcherStats::default()); - let _inputs = enumerate_filesystem_inputs( + + // Fetch S3 objects if requested (scanned immediately) + fetch_s3_objects( args, - datastore.clone(), - &input_roots, - progress_enabled, + &datastore, rules_db, + &matcher_stats, enable_profiling, Arc::clone(&shared_profiler), - &matcher_stats, - )?; + ) + .await?; + + let has_s3 = args.input_specifier_args.s3_bucket.is_some(); + if input_roots.is_empty() && !has_s3 { + bail!("No inputs to scan"); + } + + if !input_roots.is_empty() { + let _inputs = enumerate_filesystem_inputs( + args, + datastore.clone(), + &input_roots, + progress_enabled, + rules_db, + enable_profiling, + Arc::clone(&shared_profiler), + &matcher_stats, + )?; + } + if !args.no_dedup { // Final deduplication step before validation (or before reporting) diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index c42967f..0c93023 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -84,6 +84,11 @@ rules: max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_github.rs b/tests/int_github.rs index 4bda269..2892b91 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -71,6 +71,11 @@ fn test_github_remote_scan() -> Result<()> { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index e53bbd8..0b55799 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -70,6 +70,11 @@ fn test_gitlab_remote_scan() -> Result<()> { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), git_clone: GitCloneMode::Bare, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 699dad9..d22b8f0 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -59,6 +59,10 @@ impl TestContext { jql: None, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, max_results: 10, docker_image: Vec::new(), git_clone: GitCloneMode::Bare, @@ -147,6 +151,11 @@ async fn test_scan_slack_messages() -> Result<()> { slack_query: Some("test".into()), slack_api_url: Url::parse(&format!("{}/", server.uri()))?, max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index ae8dd50..a7ab9ea 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -127,6 +127,11 @@ async fn test_validation_cache_and_depvars() -> Result<()> { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 187427e..abeb6f1 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -70,6 +70,11 @@ impl TestContext { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options @@ -142,6 +147,11 @@ impl TestContext { max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, // Docker image scanning docker_image: Vec::new(), // git clone / history options From 57cb4f320c3b434f674326b126667a176baee24b Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 08:13:08 -0700 Subject: [PATCH 093/357] improving s3 bucket scanning feature --- Cargo.toml | 1 + src/s3.rs | 144 ++++++++++++------ .../{ => baseline}/archive/kfArchiveTest.7z | Bin .../{ => baseline}/archive/kfArchiveTest.tar | Bin .../archive/kfArchiveTest.tar.bz2 | Bin .../archive/kfArchiveTest.tar.gz | Bin .../archive/kfArchiveTest.tar.lz4 | Bin .../archive/kfArchiveTest.tar.xz | Bin .../{ => baseline}/archive/kfArchiveTest.zip | Bin .../archive/kfArchiveTest_zip_inside.zip | Bin .../{ => baseline}/archive/makeArchives.sh | 0 testdata/{ => baseline}/archive/template.zip | Bin 12 files changed, 102 insertions(+), 43 deletions(-) rename testdata/{ => baseline}/archive/kfArchiveTest.7z (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.bz2 (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.gz (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.lz4 (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.tar.xz (100%) rename testdata/{ => baseline}/archive/kfArchiveTest.zip (100%) rename testdata/{ => baseline}/archive/kfArchiveTest_zip_inside.zip (100%) rename testdata/{ => baseline}/archive/makeArchives.sh (100%) rename testdata/{ => baseline}/archive/template.zip (100%) diff --git a/Cargo.toml b/Cargo.toml index 425db27..686812d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,6 +187,7 @@ walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } aws-sdk-s3 = "1.100.0" +aws-smithy-http = "0.62.2" [dependencies.tikv-jemallocator] version = "0.6" diff --git a/src/s3.rs b/src/s3.rs index 5f35dde..7a180f4 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -1,10 +1,14 @@ use anyhow::{Context, Result}; -use aws_config::{meta::region::RegionProviderChain, BehaviorVersion}; +use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion, ConfigLoader}; use aws_credential_types::Credentials; -use aws_sdk_s3::Client; +use aws_sdk_s3::{ + Client, + operation::list_objects_v2::ListObjectsV2Error, // modeled service error + error::ProvideErrorMetadata, // for .code() :contentReference[oaicite:8]{index=8} +}; +use aws_types::region::Region; +use reqwest; // HTTP client for HEAD fallback -/// Visit all objects in the given S3 bucket (optionally under a prefix), -/// calling `visitor` with each object's key and bytes. pub async fn visit_bucket_objects( bucket: &str, prefix: Option<&str>, @@ -15,66 +19,120 @@ pub async fn visit_bucket_objects( where F: FnMut(String, Vec) -> Result<()>, { - let mut config_loader = aws_config::defaults(BehaviorVersion::latest()); + // Helper to build ConfigLoader with profile/creds/no_credentials + let build_loader = || { + let mut loader = defaults(BehaviorVersion::latest()); + if let Some(p) = profile { + loader = loader.profile_name(p); + } + if let (Ok(k), Ok(s)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) { + loader = loader.credentials_provider(Credentials::new(k, s, None, None, "kf_env")); + } + if profile.is_none() && std::env::var("KF_AWS_KEY").is_err() && role_arn.is_none() { + loader = loader.no_credentials(); + } + loader + }; - if let Some(profile) = profile { - config_loader = config_loader.profile_name(profile); - } - - // If explicit credentials are provided via KF_AWS_KEY/KF_AWS_SECRET use them - if let (Ok(key), Ok(secret)) = (std::env::var("KF_AWS_KEY"), std::env::var("KF_AWS_SECRET")) { - let creds = Credentials::new(key, secret, None, None, "kf_env"); - config_loader = config_loader.credentials_provider(creds); - } - - // Resolve region using the default chain, falling back to us-east-1 - let region_provider = RegionProviderChain::default_provider().or_else("us-east-1"); - let base_config = config_loader.region(region_provider).load().await; - - let client = if let Some(role) = role_arn { - let assume_role = aws_config::sts::AssumeRoleProvider::builder(role.to_string()) + // Initial client in default→us-east-1 + let default_region = RegionProviderChain::default_provider().or_else("us-east-1"); + let mut config = build_loader().region(default_region).load().await; + let mut client = if let Some(role) = role_arn { + let assume = aws_config::sts::AssumeRoleProvider::builder(role.to_string()) .session_name("kingfisher") - .configure(&base_config) + .configure(&config) .build() .await; - let conf = aws_sdk_s3::config::Builder::from(&base_config) - .credentials_provider(assume_role) + let conf = aws_sdk_s3::config::Builder::from(&config) + .credentials_provider(assume) .build(); Client::from_conf(conf) } else { - Client::new(&base_config) + Client::new(&config) }; - let mut continuation_token = None; - + let mut continuation_token: Option = None; loop { - let mut req = client.list_objects_v2().bucket(bucket.to_string()); + let mut req = client.list_objects_v2().bucket(bucket); if let Some(p) = prefix { - req = req.prefix(p.to_string()); + req = req.prefix(p); } - if let Some(token) = continuation_token.clone() { + if let Some(ref token) = continuation_token { req = req.continuation_token(token); } - let resp = req.send().await.context("Failed to list objects in bucket")?; + let resp = match req.send().await { + Ok(r) => r, - if let Some(objects) = resp.contents { - for obj in objects { - if let Some(key) = obj.key { - let get_resp = client - .get_object() - .bucket(bucket) - .key(&key) + // On error, extract the modeled service error + Err(err) => { + let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError :contentReference[oaicite:9]{index=9} + + // If the bucket must be addressed at another region... + if svc_err.code() == Some("PermanentRedirect") { + // HEAD request to get x-amz-bucket-region header + let url = format!("https://{bucket}.s3.amazonaws.com"); + let head = reqwest::Client::new() + .head(&url) .send() .await - .with_context(|| format!("Failed to fetch object {key}"))?; - let data = - get_resp.body.collect().await.context("Failed to read S3 object body")?; - visitor(key, data.into_bytes().to_vec())?; + .context("Failed to HEAD bucket for region")?; + let region_str = head + .headers() + .get("x-amz-bucket-region") + .and_then(|v| v.to_str().ok()) + .unwrap_or("us-east-1") + .to_string(); + + // Rebuild client in the correct region + let override_region = RegionProviderChain::first_try(Region::new(region_str)) + .or_else("us-east-1"); + config = build_loader().region(override_region).load().await; + client = if let Some(r) = role_arn { + let assume = aws_config::sts::AssumeRoleProvider::builder(r.to_string()) + .session_name("kingfisher") + .configure(&config) + .build() + .await; + let conf = aws_sdk_s3::config::Builder::from(&config) + .credentials_provider(assume) + .build(); + Client::from_conf(conf) + } else { + Client::new(&config) + }; + + // Reset pagination and retry list + continuation_token = None; + continue; } + + // Any other error is fatal + return Err(svc_err).context("Failed to list objects in bucket"); + } + }; + + // Process objects + for obj in resp.contents.unwrap_or_default() { + if let Some(key) = obj.key { + let data = client + .get_object() + .bucket(bucket) + .key(&key) + .send() + .await + .with_context(|| format!("Failed to fetch object {}", key))? + .body + .collect() + .await + .context("Failed to read S3 object body")? + .into_bytes() + .to_vec(); + visitor(key, data)?; } } + // Continue or finish pagination if resp.is_truncated.unwrap_or(false) { continuation_token = resp.next_continuation_token; } else { @@ -83,4 +141,4 @@ where } Ok(()) -} \ No newline at end of file +} diff --git a/testdata/archive/kfArchiveTest.7z b/testdata/baseline/archive/kfArchiveTest.7z similarity index 100% rename from testdata/archive/kfArchiveTest.7z rename to testdata/baseline/archive/kfArchiveTest.7z diff --git a/testdata/archive/kfArchiveTest.tar b/testdata/baseline/archive/kfArchiveTest.tar similarity index 100% rename from testdata/archive/kfArchiveTest.tar rename to testdata/baseline/archive/kfArchiveTest.tar diff --git a/testdata/archive/kfArchiveTest.tar.bz2 b/testdata/baseline/archive/kfArchiveTest.tar.bz2 similarity index 100% rename from testdata/archive/kfArchiveTest.tar.bz2 rename to testdata/baseline/archive/kfArchiveTest.tar.bz2 diff --git a/testdata/archive/kfArchiveTest.tar.gz b/testdata/baseline/archive/kfArchiveTest.tar.gz similarity index 100% rename from testdata/archive/kfArchiveTest.tar.gz rename to testdata/baseline/archive/kfArchiveTest.tar.gz diff --git a/testdata/archive/kfArchiveTest.tar.lz4 b/testdata/baseline/archive/kfArchiveTest.tar.lz4 similarity index 100% rename from testdata/archive/kfArchiveTest.tar.lz4 rename to testdata/baseline/archive/kfArchiveTest.tar.lz4 diff --git a/testdata/archive/kfArchiveTest.tar.xz b/testdata/baseline/archive/kfArchiveTest.tar.xz similarity index 100% rename from testdata/archive/kfArchiveTest.tar.xz rename to testdata/baseline/archive/kfArchiveTest.tar.xz diff --git a/testdata/archive/kfArchiveTest.zip b/testdata/baseline/archive/kfArchiveTest.zip similarity index 100% rename from testdata/archive/kfArchiveTest.zip rename to testdata/baseline/archive/kfArchiveTest.zip diff --git a/testdata/archive/kfArchiveTest_zip_inside.zip b/testdata/baseline/archive/kfArchiveTest_zip_inside.zip similarity index 100% rename from testdata/archive/kfArchiveTest_zip_inside.zip rename to testdata/baseline/archive/kfArchiveTest_zip_inside.zip diff --git a/testdata/archive/makeArchives.sh b/testdata/baseline/archive/makeArchives.sh similarity index 100% rename from testdata/archive/makeArchives.sh rename to testdata/baseline/archive/makeArchives.sh diff --git a/testdata/archive/template.zip b/testdata/baseline/archive/template.zip similarity index 100% rename from testdata/archive/template.zip rename to testdata/baseline/archive/template.zip From 047d58086028a157e572a989de39e286dc8be89d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 08:14:32 -0700 Subject: [PATCH 094/357] improving s3 bucket scanning feature --- .../{baseline => }/archive/kfArchiveTest.7z | Bin .../{baseline => }/archive/kfArchiveTest.tar | Bin .../archive/kfArchiveTest.tar.bz2 | Bin .../archive/kfArchiveTest.tar.gz | Bin .../archive/kfArchiveTest.tar.lz4 | Bin .../archive/kfArchiveTest.tar.xz | Bin .../{baseline => }/archive/kfArchiveTest.zip | Bin .../archive/kfArchiveTest_zip_inside.zip | Bin .../{baseline => }/archive/makeArchives.sh | 0 testdata/{baseline => }/archive/template.zip | Bin testdata/baseline/baseline_test.go | 95 ------------------ 11 files changed, 95 deletions(-) rename testdata/{baseline => }/archive/kfArchiveTest.7z (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.bz2 (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.gz (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.lz4 (100%) rename testdata/{baseline => }/archive/kfArchiveTest.tar.xz (100%) rename testdata/{baseline => }/archive/kfArchiveTest.zip (100%) rename testdata/{baseline => }/archive/kfArchiveTest_zip_inside.zip (100%) rename testdata/{baseline => }/archive/makeArchives.sh (100%) rename testdata/{baseline => }/archive/template.zip (100%) delete mode 100644 testdata/baseline/baseline_test.go diff --git a/testdata/baseline/archive/kfArchiveTest.7z b/testdata/archive/kfArchiveTest.7z similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.7z rename to testdata/archive/kfArchiveTest.7z diff --git a/testdata/baseline/archive/kfArchiveTest.tar b/testdata/archive/kfArchiveTest.tar similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar rename to testdata/archive/kfArchiveTest.tar diff --git a/testdata/baseline/archive/kfArchiveTest.tar.bz2 b/testdata/archive/kfArchiveTest.tar.bz2 similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.bz2 rename to testdata/archive/kfArchiveTest.tar.bz2 diff --git a/testdata/baseline/archive/kfArchiveTest.tar.gz b/testdata/archive/kfArchiveTest.tar.gz similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.gz rename to testdata/archive/kfArchiveTest.tar.gz diff --git a/testdata/baseline/archive/kfArchiveTest.tar.lz4 b/testdata/archive/kfArchiveTest.tar.lz4 similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.lz4 rename to testdata/archive/kfArchiveTest.tar.lz4 diff --git a/testdata/baseline/archive/kfArchiveTest.tar.xz b/testdata/archive/kfArchiveTest.tar.xz similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.tar.xz rename to testdata/archive/kfArchiveTest.tar.xz diff --git a/testdata/baseline/archive/kfArchiveTest.zip b/testdata/archive/kfArchiveTest.zip similarity index 100% rename from testdata/baseline/archive/kfArchiveTest.zip rename to testdata/archive/kfArchiveTest.zip diff --git a/testdata/baseline/archive/kfArchiveTest_zip_inside.zip b/testdata/archive/kfArchiveTest_zip_inside.zip similarity index 100% rename from testdata/baseline/archive/kfArchiveTest_zip_inside.zip rename to testdata/archive/kfArchiveTest_zip_inside.zip diff --git a/testdata/baseline/archive/makeArchives.sh b/testdata/archive/makeArchives.sh similarity index 100% rename from testdata/baseline/archive/makeArchives.sh rename to testdata/archive/makeArchives.sh diff --git a/testdata/baseline/archive/template.zip b/testdata/archive/template.zip similarity index 100% rename from testdata/baseline/archive/template.zip rename to testdata/archive/template.zip diff --git a/testdata/baseline/baseline_test.go b/testdata/baseline/baseline_test.go deleted file mode 100644 index 00e7770..0000000 --- a/testdata/baseline/baseline_test.go +++ /dev/null @@ -1,95 +0,0 @@ -package core - -import ( - "io/ioutil" - "os" - "path" - "path/filepath" - "runtime" - "testing" - - "github.com/10gen/kingfisher/core" -) - -func rootDir() string { - _, b, _, _ := runtime.Caller(0) - return filepath.Dir(path.Dir(b)) -} - -func NewTestSession(baselineFilename string) (*core.Session, error) { - session := core.PrepareTestSession() - session.Testing = true - session.ReqScanMode = core.LocalFiles - session.Options.ValidateSecrets = true - session.Options.BaselineFilename = baselineFilename - session.Options.KingfisherTempDir = core.GetTempDir() - core.GlobalSessionRef = session - session.InitializeTargetModeClient() - return session, nil -} - -func beginTesting(t *testing.T, testfile string, expectedSkippedFindings, expectedFindingsSuppressKingfisher int) { - rootdir := rootDir() - testfilePath := filepath.Join(rootdir, testfile) - _, filename := filepath.Split(testfilePath) - - byteBaseLine := []byte(`FileContent: - matches: [] -FilePaths: - matches: [] -ExactFindings: - matches: - - filepath: testdata/ruby_vulnerable.rb - findinghash: 701c302855ecc97e8415c44f37123bc2ca0c3343bd87028682aaaeaa90568084 - linenum: 40 - lastupdated: Tue Apr 16 13:04:10 PDT 2024 - - filepath: testdata/ruby_vulnerable.rb - findinghash: 065d1e2faeae9328ca8b2f2754afa6c196d3ef2da2720dabca7e5161d67a6ca1 - linenum: 40 - lastupdated: Tue Apr 16 13:04:10 PDT 2024 -`) - - // Write byteBaseline to a file in a temp directory and give yaml extension - tempFile, err := ioutil.TempFile("", "baseline-*.yaml") - if err != nil { - t.Fatal(err) - } - defer os.Remove(tempFile.Name()) // Clean up the file after test - - if _, err := tempFile.Write(byteBaseLine); err != nil { - t.Fatal(err) - } - if err := tempFile.Close(); err != nil { - t.Fatal(err) - } - - sess, err := NewTestSession(tempFile.Name()) - if err != nil { - t.Fatal(err) - } - - matchFile := core.NewMatchFile(testfilePath, sess, nil) - core.BeginFileAnalysis(matchFile) - if sess.Stats.SkippedFindings != expectedSkippedFindings { - core.PrintSessionStats(sess) - t.Errorf("Expected %d findings, got %d -- file: <%s>", expectedSkippedFindings, sess.Stats.SkippedFindings, filename) - } -} - -func TestBaselineFeature(t *testing.T) { - - tests := []struct { - fileName string - expectedSkippedFindings int - expectedFindingsSuppressKingfisher int - }{ - {"ruby_vulnerable.rb", 3, 0}, - } - - for _, tt := range tests { - t.Run(tt.fileName, func(t *testing.T) { - beginTesting(t, tt.fileName, tt.expectedSkippedFindings, tt.expectedFindingsSuppressKingfisher) - }) - } - -} From 1b2f2b445270594f67239a7840c9aa990f1cccd1 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 08:56:22 -0700 Subject: [PATCH 095/357] added integration test --- tests/int_s3.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/int_s3.rs diff --git a/tests/int_s3.rs b/tests/int_s3.rs new file mode 100644 index 0000000..948734c --- /dev/null +++ b/tests/int_s3.rs @@ -0,0 +1,21 @@ +use anyhow::Result; +use kingfisher::s3::visit_bucket_objects; + +#[tokio::test] +async fn test_visit_public_bucket() -> Result<()> { + let mut objects = Vec::new(); + visit_bucket_objects("wikisum", None, None, None, |key, data| { + objects.push((key, data)); + Ok(()) + }) + .await?; + + assert!(objects.iter().any(|(k, _)| k == "README.txt"), "README object not found"); + let creds = objects.iter().find(|(k, _)| k == "README.txt").expect("README object"); + let body = std::str::from_utf8(&creds.1)?; + assert!( + body.contains("This dataset provides how-to articles"), + "expected README file" + ); + Ok(()) +} \ No newline at end of file From 505d775302cea10ba7fab716a2b4a65eb58d0f00 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 09:45:52 -0700 Subject: [PATCH 096/357] improved integration test and updated README --- README.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- src/s3.rs | 6 +++--- tests/int_s3.rs | 17 ++++++++++----- 3 files changed, 68 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 4af4295..2297709 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs - **Language-aware detection** (source-code parsing) for ~20 languages -- **Extra targets**: GitLab repos, Docker images, Jira issues, and Slack messages +- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages - **Baseline mode**: ignore known secrets, flag only new ones - **Native Windows** binary @@ -26,7 +26,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` - - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, or `--aws-local-profile` + - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -274,6 +274,57 @@ kingfisher scan ./my-project \ --exclude tests \ -v ``` +## Scan an S3 bucket +You can scan S3 objects directly: + +```bash +kingfisher scan --s3-bucket bucket-name [--s3-prefix path/] +``` + +Credential resolution happens in this order: + +1. `KF_AWS_KEY` and `KF_AWS_SECRET` environment variables +2. `--aws-local-profile` pointing to a profile in `~/.aws/config` (works with AWS SSO) +3. anonymous access for public buckets + +If `--role-arn` is supplied, the credentials from steps 1–2 are used to assume that role. + +Examples: + +```bash +# using explicit keys +export KF_AWS_KEY=AKIA... +export KF_AWS_SECRET=g5nYW... +kingfisher scan --s3-bucket some-example-bucket + +# Above can also be run as: +KF_AWS_KEY=AKIA... KF_AWS_SECRET=g5nYW... kingfisher scan --s3-bucket some-example-bucket + +# using a local profile (e.g., SSO) that exists in your AWS profile (~/.aws/config) +kingfisher scan --s3-bucket some-example-bucket --aws-local-profile myprofile + +# anonymous scan of a bucket, while providing an object prefix to only scan subset of the s3 bucket +kingfisher scan \ + --s3-bucket awsglue-datasets \ + --s3-prefix examples/us-legislators/all + +# assuming a role when scanning +kingfisher scan --s3-bucket some-example-bucket \ + --role-arn arn:aws:iam::123456789012:role/MyRole + +# anonymous scan of a public bucket +kingfisher scan --s3-bucket some-example-bucket +``` + +Docker example: + +```bash +docker run --rm \ + -e KF_AWS_KEY=AKIA... \ + -e KF_AWS_SECRET=g5nYW... \ + ghcr.io/mongodb/kingfisher:latest \ + scan --s3-bucket bucket-name +``` ## Scanning Docker Images Kingfisher will first try to use any locally available image, then fall back to pulling via OCI. diff --git a/src/s3.rs b/src/s3.rs index 7a180f4..ed18a52 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -1,10 +1,10 @@ use anyhow::{Context, Result}; -use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion, ConfigLoader}; +use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion}; use aws_credential_types::Credentials; use aws_sdk_s3::{ Client, operation::list_objects_v2::ListObjectsV2Error, // modeled service error - error::ProvideErrorMetadata, // for .code() :contentReference[oaicite:8]{index=8} + error::ProvideErrorMetadata, // for .code() }; use aws_types::region::Region; use reqwest; // HTTP client for HEAD fallback @@ -66,7 +66,7 @@ where // On error, extract the modeled service error Err(err) => { - let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError :contentReference[oaicite:9]{index=9} + let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError // If the bucket must be addressed at another region... if svc_err.code() == Some("PermanentRedirect") { diff --git a/tests/int_s3.rs b/tests/int_s3.rs index 948734c..c44afe8 100644 --- a/tests/int_s3.rs +++ b/tests/int_s3.rs @@ -4,18 +4,25 @@ use kingfisher::s3::visit_bucket_objects; #[tokio::test] async fn test_visit_public_bucket() -> Result<()> { let mut objects = Vec::new(); - visit_bucket_objects("wikisum", None, None, None, |key, data| { + visit_bucket_objects("awsglue-datasets", Some("examples/us-legislators/all/"), None, None, |key, data| { objects.push((key, data)); Ok(()) }) .await?; - assert!(objects.iter().any(|(k, _)| k == "README.txt"), "README object not found"); - let creds = objects.iter().find(|(k, _)| k == "README.txt").expect("README object"); + assert!( + objects.iter().any(|(k, _)| k.ends_with("events.json")), + "events.json object not found" + ); + let creds = objects + .iter() + .find(|(k, _)| k.ends_with("events.json")) + .expect("events.json object"); + let body = std::str::from_utf8(&creds.1)?; assert!( - body.contains("This dataset provides how-to articles"), - "expected README file" + body.contains("Q4450263"), + "expected events.json file" ); Ok(()) } \ No newline at end of file From 1e466feee86aa949cadf6e2a403b10ce63d1474f Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 10:35:52 -0700 Subject: [PATCH 097/357] Update src/scanner/runner.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/scanner/runner.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index f8dae87..63f7bee 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -96,9 +96,6 @@ pub async fn run_async_scan( } } - // if input_roots.is_empty() { - // bail!("No inputs to scan"); - // } let shared_profiler = Arc::new(ConcurrentRuleProfiler::new()); let enable_profiling = args.rule_stats; let matcher_stats = Mutex::new(MatcherStats::default()); From cb5595be2306a1234552688f117b1aba1bee8810 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 10:37:02 -0700 Subject: [PATCH 098/357] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- CHANGELOG.md | 1 - Cargo.toml | 1 - 2 files changed, 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 07607fe..2c92a92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ All notable changes to this project will be documented in this file. ## [1.32.0] - Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` - Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` -- ## [1.31.0] - New rules: Telegram bot token, OpenWeatherMap, Apify, Groq - New OpenAI detectors added (@joshlarsen) diff --git a/Cargo.toml b/Cargo.toml index 686812d..425db27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -187,7 +187,6 @@ walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } aws-sdk-s3 = "1.100.0" -aws-smithy-http = "0.62.2" [dependencies.tikv-jemallocator] version = "0.6" From bbe1c38c930f855cb0b4a969fb1dcb1b2a1e11a0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 3 Aug 2025 20:59:58 -0700 Subject: [PATCH 099/357] updating s3 feature --- README.md | 2 +- data/rules/vmware.yml | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 data/rules/vmware.yml diff --git a/README.md b/README.md index 2297709..b60ae55 100644 --- a/README.md +++ b/README.md @@ -301,7 +301,7 @@ kingfisher scan --s3-bucket some-example-bucket KF_AWS_KEY=AKIA... KF_AWS_SECRET=g5nYW... kingfisher scan --s3-bucket some-example-bucket # using a local profile (e.g., SSO) that exists in your AWS profile (~/.aws/config) -kingfisher scan --s3-bucket some-example-bucket --aws-local-profile myprofile +kingfisher scan --s3-bucket some-example-bucket --aws-local-profile default # anonymous scan of a bucket, while providing an object prefix to only scan subset of the s3 bucket kingfisher scan \ diff --git a/data/rules/vmware.yml b/data/rules/vmware.yml new file mode 100644 index 0000000..9332a37 --- /dev/null +++ b/data/rules/vmware.yml @@ -0,0 +1,21 @@ +rules: + - name: Credentials in Connect-VIServer Invocation + id: kingfisher.vmware.1 + pattern: | + (?xi) + Connect-VIServer + .{0,50} + -User \s+ (\S{3,30}) \s+ (?# username ) + .{0,50} + -Password \s+ (\S{3,30}) (?# password ) + + examples: + - 'Connect-VIServer -Server 192.168.1.51 -User administrator@vSphere.local -Password VMware1!' + - | + #Set-PowerCLIConfiguration -InvalidCertificateAction:Ignore + Connect-VIServer "$endpoint" -User "$username" -Password "$password" | Out-Null + - 'Connect-VIServer $ESXiHost.EsxiHost -user $ESXiUser -password $ESXipass' + - '$null = connect-viserver vc.lab.local -user administrator@vsphere.local -password VMware1!' + + references: + - https://developer.broadcom.com/powercli/latest/vmware.vimautomation.core/commands/connect-viserver \ No newline at end of file From bc05c3e5f2ab2caeb15d6403dd2986c68c568ba8 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 08:58:06 -0700 Subject: [PATCH 100/357] refactored output reporting and formatting logic --- CHANGELOG.md | 2 + data/rules/credentials.yml | 2 +- data/rules/generic.yml | 1 - f1.patch | 719 ++++++++++++++++++++++++++++++++++ src/cli/commands/inputs.rs | 1 - src/lib.rs | 2 +- src/reporter.rs | 229 +++++++++-- src/reporter/bson_format.rs | 81 +--- src/reporter/json_format.rs | 500 ++--------------------- src/reporter/pretty_format.rs | 439 ++++----------------- src/reporter/sarif_format.rs | 350 +++-------------- src/s3.rs | 12 +- src/scanner/repos.rs | 15 +- src/scanner/runner.rs | 2 - tests/int_s3.rs | 27 +- 15 files changed, 1108 insertions(+), 1274 deletions(-) create mode 100644 f1.patch diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c92a92..6483f58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file. ## [1.32.0] - Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` - Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` +- Refactored output reporting and formatting logic + ## [1.31.0] - New rules: Telegram bot token, OpenWeatherMap, Apify, Groq - New OpenAI detectors added (@joshlarsen) diff --git a/data/rules/credentials.yml b/data/rules/credentials.yml index 38c1057..cb7e866 100644 --- a/data/rules/credentials.yml +++ b/data/rules/credentials.yml @@ -20,6 +20,6 @@ rules: [a-z0-9\/._~-]* )? min_entropy: 3.0 - confidence: low + confidence: medium examples: - https://eaRIWNkE:qyOIhJiM@j2LYY414Q5cCYD \ No newline at end of file diff --git a/data/rules/generic.yml b/data/rules/generic.yml index 9338797..3f71d4a 100644 --- a/data/rules/generic.yml +++ b/data/rules/generic.yml @@ -96,7 +96,6 @@ rules: ["'] min_entropy: 3.3 confidence: low - categories: [fuzzy, generic, secret] examples: - | password = "super$ecret" diff --git a/f1.patch b/f1.patch new file mode 100644 index 0000000..a132a31 --- /dev/null +++ b/f1.patch @@ -0,0 +1,719 @@ +diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs +index 9fcb1ecdfe8decc60278848c4a7be43cc9ebee70..b600f9f65838e52ce5dc3d7bb3bb1a5d5ff2bcaf 100644 +--- a/src/reporter/json_format.rs ++++ b/src/reporter/json_format.rs +@@ -1,436 +1,80 @@ +-use http::StatusCode; +-use serde_json::json; +- + use super::*; +-use crate::bstring_escape::Escaped; + + impl DetailsReporter { +- pub fn deduplicate_matches( +- &self, +- matches: Vec, +- no_dedup: bool, +- ) -> Vec { +- if no_dedup { +- return matches; +- } +- +- use std::collections::HashMap; +- let mut by_fp: HashMap = HashMap::new(); +- +- for rm in matches { +- let fp = rm.m.finding_fingerprint; +- if let Some(existing) = by_fp.get_mut(&fp) { +- // merge origin sets (keep first origin, append the rest) +- for o in rm.origin.iter() { +- if !existing.origin.iter().any(|e| e == o) { +- existing.origin = OriginSet::new( +- existing.origin.first().clone(), +- existing +- .origin +- .iter() +- .skip(1) +- .cloned() +- .chain(std::iter::once(o.clone())) +- .collect(), +- ); +- } +- } +- continue; +- } +- by_fp.insert(fp, rm); +- } +- by_fp.into_values().collect() +- } +- +- pub fn gather_json_findings( +- &self, +- args: &cli::commands::scan::ScanArgs, +- ) -> Result> { +- let mut matches = self.get_filtered_matches()?; +- if !args.no_dedup { +- matches = self.deduplicate_matches(matches, args.no_dedup); +- } +- +- let mut json_findings = Vec::new(); +- for rm in matches { +- let source_span = &rm.m.location.source_span; +- let line_num = source_span.start.line; +- +- let snippet = Escaped( +- rm.m.groups +- .captures +- .get(1) +- .or_else(|| rm.m.groups.captures.get(0)) +- .map(|capture| capture.value.as_bytes()) +- .unwrap_or_default(), +- ) +- .to_string(); +- +- let validation_status = if rm.validation_success { +- "Active Credential" +- } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { +- "Not Attempted" +- } else { +- "Inactive Credential" +- }; +- +- const MAX_RESPONSE_LENGTH: usize = 512; +- let truncated_body: String = +- rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); +- let ellipsis = +- if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; +- let response_body = format!("{}{}", truncated_body, ellipsis); +- +- // Call extract_git_metadata on each GitRepo origin and take the first non-null result. +- let git_metadata_val = rm +- .origin +- .iter() +- .filter_map(|origin| { +- if let Origin::GitRepo(e) = origin { +- self.extract_git_metadata(e, source_span) +- } else { +- None +- } +- }) +- .next() +- .unwrap_or(serde_json::Value::Null); +- +- // Collect a file path from an Origin::File, if available. +- let file_path = rm +- .origin +- .iter() +- .find_map(|origin| match origin { +- Origin::File(e) => { +- if let Some(url) = self.jira_issue_url(&e.path, args) { +- Some(url) +- } else if let Some(url) = self.slack_message_url(&e.path) { +- Some(url) +- } else if let Some(mapped) = self.s3_display_path(&e.path) { +- Some(mapped) +- } else if let Some(mapped) = self.docker_display_path(&e.path) { +- Some(mapped) +- } else { +- Some(e.path.display().to_string()) +- } +- } +- Origin::Extended(e) => e.path().map(|p| p.display().to_string()), +- _ => None, +- }) +- .unwrap_or_default(); +- +- let match_json = json!({ +- "rule": { +- "name": rm.m.rule_name, +- "id": rm.m.rule_text_id, +- }, +- "finding": { +- "snippet": snippet, +- "fingerprint": rm.m.finding_fingerprint.to_string(), +- "confidence": rm.match_confidence.to_string(), +- "entropy": format!("{:.2}", rm.m.calculated_entropy), +- "validation": { +- "status": validation_status, +- "response": response_body, +- }, +- "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()), +- "line": line_num, +- "column_start": source_span.start.column, +- "column_end": source_span.end.column, +- "path": file_path, +- "git_metadata": git_metadata_val +- } +- }); +- +- let finding_json = json!({ +- "id": rm.m.rule_text_id, +- "matches": [ match_json ] +- }); +- json_findings.push(finding_json); +- } +- Ok(json_findings) +- } + pub fn json_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { +- let mut findings = Vec::new(); +- +- // Get filtered matches +- let mut matches = self.get_filtered_matches()?; +- +- // Apply deduplication only if requested +- if !args.no_dedup { +- matches = self.deduplicate_matches(matches, args.no_dedup); +- } +- +- // For each match, handle it based on the no_dedup flag +- for rm in matches { +- if args.no_dedup && rm.origin.len() > 1 { +- // For no_dedup and multiple origins, create separate findings for each origin +- for origin in rm.origin.iter() { +- // Create a single-origin version of this match +- let single_origin_rm = ReportMatch { +- origin: OriginSet::new(origin.clone(), Vec::new()), +- blob_metadata: rm.blob_metadata.clone(), +- m: rm.m.clone(), +- comment: rm.comment.clone(), +- visible: rm.visible, +- match_confidence: rm.match_confidence, +- validation_response_body: rm.validation_response_body.clone(), +- validation_response_status: rm.validation_response_status, +- validation_success: rm.validation_success, +- }; +- +- // Process this single-origin match into a JSON finding +- let json_finding = self.process_match_to_json(&single_origin_rm, args)?; +- findings.push(json_finding); +- } +- } else { +- // Process normally for deduped matches or matches with only one origin +- let json_finding = self.process_match_to_json(&rm, args)?; +- findings.push(json_finding); +- } +- } +- +- // Write the JSON output +- if !findings.is_empty() { +- serde_json::to_writer_pretty(&mut writer, &findings)?; ++ let records = self.build_finding_records(args)?; ++ if !records.is_empty() { ++ serde_json::to_writer_pretty(&mut writer, &records)?; + writeln!(writer)?; + } + Ok(()) + } + +- // Add a helper method to convert a ReportMatch to a JSON finding +- pub fn process_match_to_json( +- &self, +- rm: &ReportMatch, +- args: &cli::commands::scan::ScanArgs, +- ) -> Result { +- // Extract the relevant data from the match as you already do in your current implementation +- let source_span = &rm.m.location.source_span; +- let line_num = source_span.start.line; +- +- let snippet = Escaped( +- rm.m.groups +- .captures +- .get(1) +- .or_else(|| rm.m.groups.captures.get(0)) +- .map(|capture| capture.value.as_bytes()) +- .unwrap_or_default(), +- ) +- .to_string(); +- +- let validation_status = if rm.validation_success { +- "Active Credential" +- } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { +- "Not Attempted" +- } else { +- "Inactive Credential" +- }; +- +- const MAX_RESPONSE_LENGTH: usize = 512; +- let truncated_body: String = +- rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); +- let ellipsis = +- if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; +- let response_body = format!("{}{}", truncated_body, ellipsis); +- +- // Call extract_git_metadata on each GitRepo origin and take the first non-null result. +- let git_metadata_val = rm +- .origin +- .iter() +- .filter_map(|origin| { +- if let Origin::GitRepo(e) = origin { +- self.extract_git_metadata(e, source_span) +- } else { +- None +- } +- }) +- .next() +- .unwrap_or(serde_json::Value::Null); +- +- // Collect a file path from an Origin::File, if available. +- let file_path = rm +- .origin +- .iter() +- .find_map(|origin| { +- if let Origin::File(e) = origin { +- if let Some(url) = self.jira_issue_url(&e.path, args) { +- Some(url) +- } else if let Some(url) = self.slack_message_url(&e.path) { +- Some(url) +- } else if let Some(mapped) = self.s3_display_path(&e.path) { +- Some(mapped) +- } else if let Some(mapped) = self.docker_display_path(&e.path) { +- Some(mapped) +- } else { +- Some(e.path.display().to_string()) +- } +- } else if let Origin::Extended(e) = origin { +- e.path().map(|p| p.display().to_string()) +- } else { +- None +- } +- }) +- .unwrap_or_default(); +- +- let match_json = json!({ +- "rule": { +- "name": rm.m.rule_name, +- "id": rm.m.rule_text_id, +- }, +- "finding": { +- "snippet": snippet, +- "fingerprint": rm.m.finding_fingerprint.to_string(), +- "confidence": rm.match_confidence.to_string(), +- "entropy": format!("{:.2}", rm.m.calculated_entropy), +- "validation": { +- "status": validation_status, +- "response": response_body, +- }, +- "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()), +- "line": line_num, +- "column_start": source_span.start.column, +- "column_end": source_span.end.column, +- "path": file_path, +- "git_metadata": git_metadata_val +- } +- }); +- +- let finding_json = json!({ +- "id": rm.m.rule_text_id, +- "matches": [ match_json ] +- }); +- +- Ok(finding_json) +- } +- // // Modified JSON format to pass args to gather_json_findings +- // pub fn json_format( +- // &self, +- // mut writer: W, +- // args: &cli::commands::scan::ScanArgs, +- // ) -> Result<()> { +- // let findings = self.gather_json_findings(args)?; +- // if !findings.is_empty() { +- // serde_json::to_writer_pretty(&mut writer, &findings)?; +- // writeln!(writer)?; +- // } +- // Ok(()) +- // } +- + pub fn jsonl_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { +- // Get filtered matches +- let mut matches = self.get_filtered_matches()?; +- +- // Apply deduplication only if requested +- if !args.no_dedup { +- matches = self.deduplicate_matches(matches, args.no_dedup); +- } +- +- // For each match, handle it based on the no_dedup flag +- for rm in matches { +- if args.no_dedup && rm.origin.len() > 1 { +- // For no_dedup and multiple origins, create separate findings for each origin +- for origin in rm.origin.iter() { +- // Create a single-origin version of this match +- let single_origin_rm = ReportMatch { +- origin: OriginSet::new(origin.clone(), Vec::new()), +- blob_metadata: rm.blob_metadata.clone(), +- m: rm.m.clone(), +- comment: rm.comment.clone(), +- visible: rm.visible, +- match_confidence: rm.match_confidence, +- validation_response_body: rm.validation_response_body.clone(), +- validation_response_status: rm.validation_response_status, +- validation_success: rm.validation_success, +- }; +- +- // Process this single-origin match into a JSON finding and write it +- let json_finding = self.process_match_to_json(&single_origin_rm, args)?; +- serde_json::to_writer(&mut writer, &json_finding)?; +- writeln!(writer)?; +- } +- } else { +- // Process normally for deduped matches or matches with only one origin +- let json_finding = self.process_match_to_json(&rm, args)?; +- serde_json::to_writer(&mut writer, &json_finding)?; +- writeln!(writer)?; +- } ++ let records = self.build_finding_records(args)?; ++ for record in records { ++ serde_json::to_writer(&mut writer, &record)?; ++ writeln!(writer)?; + } + Ok(()) + } +- // // Modified JSONL format to pass args to gather_json_findings +- // pub fn jsonl_format( +- // &self, +- // mut writer: W, +- // args: &cli::commands::scan::ScanArgs, +- // ) -> Result<()> { +- // let findings = self.gather_json_findings(args)?; +- // for finding in findings { +- // serde_json::to_writer(&mut writer, &finding)?; +- // writeln!(writer)?; +- // } +- // Ok(()) +- // } + } + + #[cfg(test)] + mod tests { +- use std::{ +- io::Cursor, +- path::PathBuf, +- sync::{Arc, Mutex}, +- }; +- +- use anyhow::Result; +- use serde_json::Value; +- use url::Url; +- + use super::*; + use crate::{ + blob::BlobId, +- cli::commands::{ +- github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, +- inputs::{ContentFilteringArgs, InputSpecifierArgs}, +- output::OutputArgs, +- rules::RuleSpecifierArgs, +- scan::ConfidenceLevel, ++ cli::commands::github::GitHubRepoType, ++ cli::commands::output::{OutputArgs, ReportOutputFormat}, ++ cli::commands::scan::{ ++ ConfidenceLevel, ContentFilteringArgs, GitCloneMode, GitHistoryMode, ++ InputSpecifierArgs, RuleSpecifierArgs, + }, + findings_store::FindingsStore, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, +- matcher::{Match, SerializableCapture, SerializableCaptures}, +- origin::{Origin, OriginSet}, +- reporter::{ReportMatch, Styles}, +- rules::rule::Confidence, +- util::intern, ++ matcher::serializable::{SerializableCapture, SerializableCaptures}, ++ matcher::Match, ++ origin::Origin, ++ reporter::styles::Styles, ++ scanner::test_utils::intern, + }; ++ use std::{ ++ io::Cursor, ++ path::PathBuf, ++ sync::{Arc, Mutex}, ++ }; ++ use url::Url; + + fn create_default_args() -> cli::commands::scan::ScanArgs { + use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope + + cli::commands::scan::ScanArgs { + num_jobs: 1, + no_dedup: false, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + // local path / git URL inputs + path_inputs: Vec::new(), + git_url: Vec::new(), + + // GitHub + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + + // GitLab +diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs +index 9fcb1ecdfe8decc60278848c4a7be43cc9ebee70..b600f9f65838e52ce5dc3d7bb3bb1a5d5ff2bcaf 100644 +--- a/src/reporter/json_format.rs ++++ b/src/reporter/json_format.rs +@@ -458,240 +102,168 @@ mod tests { + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + exclude: Vec::new(), // Exclude patterns + no_binary: true, + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + } + } + +- // Helper function to create a mock Match + fn create_mock_match( + rule_name: &str, + rule_text_id: &str, + rule_finding_fingerprint: &str, + validation_success: bool, + ) -> Match { + Match { + location: Location { + offset_span: OffsetSpan { start: 10, end: 20 }, + source_span: SourceSpan { + start: SourcePoint { line: 5, column: 10 }, + end: SourcePoint { line: 5, column: 20 }, + }, + }, + groups: SerializableCaptures { + captures: vec![SerializableCapture { + name: Some("token".to_string()), + match_number: 1, + start: 10, + end: 20, + value: "mock_token".into(), + }], + }, + blob_id: BlobId::new(b"mock_blob"), + finding_fingerprint: 0123, + rule_finding_fingerprint: intern(rule_finding_fingerprint), + rule_text_id: intern(rule_text_id), +- rule_name: intern(rule_name), //.to_string(), ++ rule_name: intern(rule_name), + rule_confidence: Confidence::Medium, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success, + calculated_entropy: 4.5, + visible: true, + } + } + +- // Helper function to create a mock DetailsReporter + fn setup_mock_reporter(matches: Vec) -> DetailsReporter { + let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); +- // Create mock origin and blob metadata for the first test match + if !matches.is_empty() { + let blob_metadata = BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }; + let dedup = true; +- // Add matches to datastore + for m in matches.clone() { + datastore.record( + vec![( + Arc::new(OriginSet::new( +- // OriginSet -- Arc<…> + Origin::from_file(PathBuf::from("/mock/path/file.rs")), + vec![], + )), +- Arc::new(blob_metadata.clone()), // BlobMetadata -- Arc<…> ++ Arc::new(blob_metadata.clone()), + m.m.clone(), + )], + dedup, + ); + } + } + DetailsReporter { + datastore: Arc::new(Mutex::new(datastore)), + styles: Styles::new(false), + only_valid: false, + } + } ++ + #[test] + fn test_json_format() -> Result<()> { +- // Create a mock match with successful validation + let mock_match = + create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); + let matches = vec![ReportMatch { + origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]), + blob_metadata: BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }, + m: mock_match, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success: true, + }]; + let reporter = setup_mock_reporter(matches); + let mut output = Cursor::new(Vec::new()); +- // Call the json_format method + reporter.json_format(&mut output, &create_default_args())?; +- // Parse and validate JSON output +- let json_output: Vec = serde_json::from_slice(&output.into_inner())?; ++ let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + assert!(!json_output.is_empty(), "JSON output should not be empty"); +- let first_finding = &json_output[0]; +- assert!(first_finding.get("id").is_some(), "Finding should have an 'id'"); +- assert!(first_finding.get("matches").is_some(), "Finding should have 'matches'"); +- // Validate the structure of the first match +- let matches = first_finding.get("matches").unwrap().as_array().unwrap(); +- let first_match = &matches[0]; +- assert_eq!(first_match.get("rule").unwrap().get("name").unwrap(), "MockRule"); +- assert_eq!(first_match.get("finding").unwrap().get("language").unwrap(), "Rust"); ++ let first = &json_output[0]; ++ assert_eq!(first["rule"]["name"], "MockRule"); ++ assert_eq!(first["finding"]["language"], "Rust"); + Ok(()) + } + +- // #[test] +- // fn test_jsonl_format() -> Result<()> { +- // // Create a mock match with successful validation +- // let mock_match = +- // create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); +- // let matches = vec![ReportMatch { +- // origin: OriginSet::new( +- // Origin::from_file(PathBuf::from("/mock/path/file.rs")), +- // vec![], +- // ), +- // blob_metadata: BlobMetadata { +- // id: BlobId::new(b"mock_blob"), +- // num_bytes: 1024, +- // mime_essence: Some("text/plain".to_string()), +- // charset: Some("UTF-8".to_string()), +- // language: Some("Rust".to_string()), +- // }, +- // m: mock_match, +- // comment: None, +- // match_confidence: Confidence::Medium, +- // visible: true, +- // validation_response_body: "validation response".to_string(), +- // validation_response_status: 200, +- // validation_success: true, +- // }]; +- // let reporter = setup_mock_reporter(matches); +- // let mut output = Cursor::new(Vec::new()); +- // // Call the jsonl_format method +- // reporter.jsonl_format(&mut output, &create_default_args())?; +- // // Split output into lines and validate +- // let jsonl_output = String::from_utf8(output.into_inner())?; +- // let lines: Vec<&str> = jsonl_output.lines().collect(); +- // assert!(!lines.is_empty(), "JSONL output should not be empty"); +- // for line in &lines { +- // let json_value: serde_json::Value = serde_json::from_str(line)?; +- // assert!( +- // json_value.get("rule_name").is_some(), +- // "Each line should have a 'rule_name'" +- // ); +- // assert!( +- // json_value.get("matches").is_some(), +- // "Each line should have 'matches'" +- // ); +- // } +- // Ok(()) +- // } +- + #[test] + fn test_validation_status_in_json() -> Result<()> { +- // Test validation status in JSON output + let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; + for (validation_success, expected_status) in test_cases { + let mock_match = create_mock_match( + "MockRule", + "mock_rule_1", + "mock_finding_fingerprint", + validation_success, + ); + let matches = vec![ReportMatch { + origin: OriginSet::new( + Origin::from_file(PathBuf::from("/mock/path/file.rs")), + vec![], + ), + blob_metadata: BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }, + m: mock_match, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success, + }]; + let reporter = setup_mock_reporter(matches); + let mut output = Cursor::new(Vec::new()); +- // Call the json_format method + reporter.json_format(&mut output, &create_default_args())?; +- // Parse and validate JSON output +- let json_output: Vec = serde_json::from_slice(&output.into_inner())?; ++ let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + assert!(!json_output.is_empty(), "JSON output should not be empty"); +- let first_finding = &json_output[0]; +- let matches = first_finding.get("matches").unwrap().as_array().unwrap(); +- let first_match = &matches[0]; +- let validation_status = first_match +- .get("finding") +- .unwrap() +- .get("validation") +- .unwrap() +- .get("status") +- .unwrap() +- .as_str() +- .unwrap(); ++ let first = &json_output[0]; ++ let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); + assert_eq!(validation_status, expected_status); + } + Ok(()) + } + } diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index ea38722..13bc78b 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -124,7 +124,6 @@ pub struct InputSpecifierArgs { #[arg(long, requires = "s3_bucket")] pub aws_local_profile: Option, - /// Docker/OCI images to scan (no local Docker required) #[arg(long = "docker-image")] pub docker_image: Vec, diff --git a/src/lib.rs b/src/lib.rs index 90d0451..04f7303 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,8 +29,8 @@ pub mod rule_loader; pub mod rule_profiling; pub mod rules; pub mod rules_database; -pub mod safe_list; pub mod s3; +pub mod safe_list; pub mod scanner; pub mod scanner_pool; pub mod serde_utils; diff --git a/src/reporter.rs b/src/reporter.rs index ad0efe9..5159651 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -5,12 +5,12 @@ use std::{ use anyhow::Result; use http::StatusCode; -use indenter::indented; use schemars::JsonSchema; use serde::Serialize; use crate::{ blob::BlobMetadata, + bstring_escape::Escaped, cli, cli::global::GlobalArgs, finding_data, findings_store, @@ -226,38 +226,6 @@ impl DetailsReporter { .collect()) } - // fn process_matches(&self, only_valid: bool) -> Result> { - // let datastore = self.datastore.lock().unwrap(); - // Ok(datastore - // .get_matches() - // .iter() - // .filter(|msg| { - // let (_origin, _blob_metadata, match_item) = &***msg; - // if only_valid { - // match_item.validation_success - // && match_item.validation_response_status != StatusCode::CONTINUE.as_u16() - // && match_item.visible - // } else { - // match_item.visible - // } - // }) - // .map(|msg| { - // let (origin, blob_metadata, match_item) = &**msg; - // ReportMatch { - // origin: origin.clone(), - // blob_metadata: blob_metadata.clone(), - // m: match_item.clone(), - // comment: None, - // visible: match_item.visible, - // match_confidence: match_item.rule_confidence, - // validation_response_body: match_item.validation_response_body.clone(), - // validation_response_status: match_item.validation_response_status, - // validation_success: match_item.validation_success, - // } - // }) - // .collect()) - // } - pub fn get_filtered_matches(&self) -> Result> { self.process_matches(self.only_valid, true) } @@ -266,6 +234,166 @@ impl DetailsReporter { self.process_matches(only_valid.unwrap_or(self.only_valid), false) } + pub fn deduplicate_matches( + &self, + matches: Vec, + no_dedup: bool, + ) -> Vec { + if no_dedup { + return matches; + } + + use std::collections::HashMap; + let mut by_fp: HashMap = HashMap::new(); + + for rm in matches { + let fp = rm.m.finding_fingerprint; + if let Some(existing) = by_fp.get_mut(&fp) { + // merge origin sets (keep first origin, append the rest) + for o in rm.origin.iter() { + if !existing.origin.iter().any(|e| e == o) { + existing.origin = OriginSet::new( + existing.origin.first().clone(), + existing + .origin + .iter() + .skip(1) + .cloned() + .chain(std::iter::once(o.clone())) + .collect(), + ); + } + } + continue; + } + by_fp.insert(fp, rm); + } + by_fp.into_values().collect() + } + + fn matches_for_output(&self, args: &cli::commands::scan::ScanArgs) -> Result> { + let mut matches = self.get_filtered_matches()?; + if !args.no_dedup { + matches = self.deduplicate_matches(matches, args.no_dedup); + } + if args.no_dedup { + let mut expanded = Vec::new(); + for rm in matches { + if rm.origin.len() > 1 { + for origin in rm.origin.iter() { + let mut single = rm.clone(); + single.origin = OriginSet::new(origin.clone(), Vec::new()); + expanded.push(single); + } + } else { + expanded.push(rm); + } + } + matches = expanded; + } + Ok(matches) + } + + pub fn build_finding_record( + &self, + rm: &ReportMatch, + args: &cli::commands::scan::ScanArgs, + ) -> FindingReporterRecord { + let source_span = &rm.m.location.source_span; + let line_num = source_span.start.line; + + let snippet = Escaped( + rm.m.groups + .captures + .get(1) + .or_else(|| rm.m.groups.captures.get(0)) + .map(|capture| capture.value.as_bytes()) + .unwrap_or_default(), + ) + .to_string(); + + let validation_status = if rm.validation_success { + "Active Credential".to_string() + } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { + "Not Attempted".to_string() + } else { + "Inactive Credential".to_string() + }; + + const MAX_RESPONSE_LENGTH: usize = 512; + let truncated_body: String = + rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); + let ellipsis = + if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; + let response_body = format!("{}{}", truncated_body, ellipsis); + + let git_metadata_val = rm + .origin + .iter() + .filter_map(|origin| { + if let Origin::GitRepo(e) = origin { + self.extract_git_metadata(e, source_span) + } else { + None + } + }) + .next(); + + let file_path = rm + .origin + .iter() + .find_map(|origin| match origin { + Origin::File(e) => { + if let Some(url) = self.jira_issue_url(&e.path, args) { + Some(url) + } else if let Some(url) = self.slack_message_url(&e.path) { + Some(url) + } else if let Some(mapped) = self.s3_display_path(&e.path) { + Some(mapped) + } else if let Some(mapped) = self.docker_display_path(&e.path) { + Some(mapped) + } else { + Some(e.path.display().to_string()) + } + } + Origin::Extended(e) => e.path().map(|p| p.display().to_string()), + _ => None, + }) + .unwrap_or_default(); + + FindingReporterRecord { + rule: RuleMetadata { + name: rm.m.rule_name.to_string().clone(), + id: rm.m.rule_text_id.to_string().clone(), + }, + finding: FindingRecordData { + snippet, + fingerprint: rm.m.finding_fingerprint.to_string(), + confidence: rm.match_confidence.to_string(), + entropy: format!("{:.2}", rm.m.calculated_entropy), + validation: ValidationInfo { status: validation_status, response: response_body }, + language: rm + .blob_metadata + .language + .clone() + .unwrap_or_else(|| "Unknown".to_string()), + line: line_num as u32, + column_start: source_span.start.column as u32, + column_end: source_span.end.column as u32, + path: file_path, + git_metadata: git_metadata_val, + }, + } + } + + pub fn build_finding_records( + &self, + args: &cli::commands::scan::ScanArgs, + ) -> Result> { + let matches = self.matches_for_output(args)?; + Ok(matches.iter().map(|rm| self.build_finding_record(rm, args)).collect()) + } + fn get_finding_data(&self) -> Result> { let datastore = self.datastore.lock().unwrap(); Ok(datastore @@ -388,6 +516,41 @@ pub struct ReportMatch { /// Validation Success pub validation_success: bool, } + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct FindingReporterRecord { + pub rule: RuleMetadata, + pub finding: FindingRecordData, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct RuleMetadata { + pub name: String, + pub id: String, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct ValidationInfo { + pub status: String, + pub response: String, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct FindingRecordData { + pub snippet: String, + pub fingerprint: String, + pub confidence: String, + pub entropy: String, + pub validation: ValidationInfo, + pub language: String, + pub line: u32, + pub column_start: u32, + pub column_end: u32, + pub path: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub git_metadata: Option, +} + impl From for ReportMatch { fn from(e: finding_data::FindingDataEntry) -> Self { ReportMatch { diff --git a/src/reporter/bson_format.rs b/src/reporter/bson_format.rs index c1470d0..6691c3b 100644 --- a/src/reporter/bson_format.rs +++ b/src/reporter/bson_format.rs @@ -1,90 +1,17 @@ -use bson::Document; -use serde_json::Value; - use super::*; + impl DetailsReporter { /// Formats findings as BSON and writes them to the provided writer. - /// For testing purposes, prints the full JSON for each finding before converting. pub fn bson_format( &self, mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - // Get filtered matches - let mut matches = self.get_filtered_matches()?; - - // Apply deduplication only if requested - if !args.no_dedup { - matches = self.deduplicate_matches(matches, args.no_dedup); - } - - let mut bson_findings = Vec::new(); - - // For each match, handle it based on the no_dedup flag - for rm in matches { - if args.no_dedup && rm.origin.len() > 1 { - // For no_dedup and multiple origins, create separate findings for each origin - for origin in rm.origin.iter() { - // Create a single-origin version of this match - let single_origin_rm = ReportMatch { - origin: OriginSet::new(origin.clone(), Vec::new()), - blob_metadata: rm.blob_metadata.clone(), - m: rm.m.clone(), - comment: rm.comment.clone(), - visible: rm.visible, - match_confidence: rm.match_confidence, - validation_response_body: rm.validation_response_body.clone(), - validation_response_status: rm.validation_response_status, - validation_success: rm.validation_success, - }; - - // Process to JSON first, then convert to BSON - let json_finding = self.process_match_to_json(&single_origin_rm, args)?; - if let Ok(bson_doc) = json_to_bson_document(&json_finding) { - bson_findings.push(bson_doc); - } - } - } else { - // Process normally for deduped matches or matches with only one origin - let json_finding = self.process_match_to_json(&rm, args)?; - if let Ok(bson_doc) = json_to_bson_document(&json_finding) { - bson_findings.push(bson_doc); - } - } - } - - // Write each BSON document - for doc in bson_findings { + let records = self.build_finding_records(args)?; + for record in records { + let doc = bson::to_document(&record)?; doc.to_writer(&mut writer)?; } Ok(()) } - // pub fn bson_format( - // &self, - // mut writer: W, - // args: &cli::commands::scan::ScanArgs, - // ) -> Result<()> { - // let findings = self.gather_json_findings(args)?; - - // // Print the full JSON for each finding - // for finding in &findings { - // println!("Full JSON:\n{}", serde_json::to_string_pretty(finding)?); - // } - - // let bson_findings: Vec = findings - // .into_iter() - // .filter_map(|finding| json_to_bson_document(&finding).ok()) - // .collect(); - // for doc in bson_findings { - // doc.to_writer(&mut writer)?; - // } - // Ok(()) - // } -} - -fn json_to_bson_document(json: &Value) -> Result { - match bson::to_bson(json)? { - bson::Bson::Document(doc) => Ok(doc), - _ => Err(anyhow::anyhow!("Failed to convert JSON to BSON document")), - } } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 9fcb1ec..b64a777 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -1,415 +1,61 @@ -use http::StatusCode; -use serde_json::json; - use super::*; -use crate::bstring_escape::Escaped; +use serde_json::Value; impl DetailsReporter { - pub fn deduplicate_matches( - &self, - matches: Vec, - no_dedup: bool, - ) -> Vec { - if no_dedup { - return matches; - } - - use std::collections::HashMap; - let mut by_fp: HashMap = HashMap::new(); - - for rm in matches { - let fp = rm.m.finding_fingerprint; - if let Some(existing) = by_fp.get_mut(&fp) { - // merge origin sets (keep first origin, append the rest) - for o in rm.origin.iter() { - if !existing.origin.iter().any(|e| e == o) { - existing.origin = OriginSet::new( - existing.origin.first().clone(), - existing - .origin - .iter() - .skip(1) - .cloned() - .chain(std::iter::once(o.clone())) - .collect(), - ); - } - } - continue; - } - by_fp.insert(fp, rm); - } - by_fp.into_values().collect() - } - - pub fn gather_json_findings( - &self, - args: &cli::commands::scan::ScanArgs, - ) -> Result> { - let mut matches = self.get_filtered_matches()?; - if !args.no_dedup { - matches = self.deduplicate_matches(matches, args.no_dedup); - } - - let mut json_findings = Vec::new(); - for rm in matches { - let source_span = &rm.m.location.source_span; - let line_num = source_span.start.line; - - let snippet = Escaped( - rm.m.groups - .captures - .get(1) - .or_else(|| rm.m.groups.captures.get(0)) - .map(|capture| capture.value.as_bytes()) - .unwrap_or_default(), - ) - .to_string(); - - let validation_status = if rm.validation_success { - "Active Credential" - } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { - "Not Attempted" - } else { - "Inactive Credential" - }; - - const MAX_RESPONSE_LENGTH: usize = 512; - let truncated_body: String = - rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); - let ellipsis = - if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; - let response_body = format!("{}{}", truncated_body, ellipsis); - - // Call extract_git_metadata on each GitRepo origin and take the first non-null result. - let git_metadata_val = rm - .origin - .iter() - .filter_map(|origin| { - if let Origin::GitRepo(e) = origin { - self.extract_git_metadata(e, source_span) - } else { - None - } - }) - .next() - .unwrap_or(serde_json::Value::Null); - - // Collect a file path from an Origin::File, if available. - let file_path = rm - .origin - .iter() - .find_map(|origin| match origin { - Origin::File(e) => { - if let Some(url) = self.jira_issue_url(&e.path, args) { - Some(url) - } else if let Some(url) = self.slack_message_url(&e.path) { - Some(url) - } else if let Some(mapped) = self.s3_display_path(&e.path) { - Some(mapped) - } else if let Some(mapped) = self.docker_display_path(&e.path) { - Some(mapped) - } else { - Some(e.path.display().to_string()) - } - } - Origin::Extended(e) => e.path().map(|p| p.display().to_string()), - _ => None, - }) - .unwrap_or_default(); - - let match_json = json!({ - "rule": { - "name": rm.m.rule_name, - "id": rm.m.rule_text_id, - }, - "finding": { - "snippet": snippet, - "fingerprint": rm.m.finding_fingerprint.to_string(), - "confidence": rm.match_confidence.to_string(), - "entropy": format!("{:.2}", rm.m.calculated_entropy), - "validation": { - "status": validation_status, - "response": response_body, - }, - "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()), - "line": line_num, - "column_start": source_span.start.column, - "column_end": source_span.end.column, - "path": file_path, - "git_metadata": git_metadata_val - } - }); - - let finding_json = json!({ - "id": rm.m.rule_text_id, - "matches": [ match_json ] - }); - json_findings.push(finding_json); - } - Ok(json_findings) - } pub fn json_format( &self, mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let mut findings = Vec::new(); - - // Get filtered matches - let mut matches = self.get_filtered_matches()?; - - // Apply deduplication only if requested - if !args.no_dedup { - matches = self.deduplicate_matches(matches, args.no_dedup); - } - - // For each match, handle it based on the no_dedup flag - for rm in matches { - if args.no_dedup && rm.origin.len() > 1 { - // For no_dedup and multiple origins, create separate findings for each origin - for origin in rm.origin.iter() { - // Create a single-origin version of this match - let single_origin_rm = ReportMatch { - origin: OriginSet::new(origin.clone(), Vec::new()), - blob_metadata: rm.blob_metadata.clone(), - m: rm.m.clone(), - comment: rm.comment.clone(), - visible: rm.visible, - match_confidence: rm.match_confidence, - validation_response_body: rm.validation_response_body.clone(), - validation_response_status: rm.validation_response_status, - validation_success: rm.validation_success, - }; - - // Process this single-origin match into a JSON finding - let json_finding = self.process_match_to_json(&single_origin_rm, args)?; - findings.push(json_finding); - } - } else { - // Process normally for deduped matches or matches with only one origin - let json_finding = self.process_match_to_json(&rm, args)?; - findings.push(json_finding); - } - } - - // Write the JSON output - if !findings.is_empty() { - serde_json::to_writer_pretty(&mut writer, &findings)?; + let records = self.build_finding_records(args)?; + if !records.is_empty() { + serde_json::to_writer_pretty(&mut writer, &records)?; writeln!(writer)?; } Ok(()) } - // Add a helper method to convert a ReportMatch to a JSON finding - pub fn process_match_to_json( - &self, - rm: &ReportMatch, - args: &cli::commands::scan::ScanArgs, - ) -> Result { - // Extract the relevant data from the match as you already do in your current implementation - let source_span = &rm.m.location.source_span; - let line_num = source_span.start.line; - - let snippet = Escaped( - rm.m.groups - .captures - .get(1) - .or_else(|| rm.m.groups.captures.get(0)) - .map(|capture| capture.value.as_bytes()) - .unwrap_or_default(), - ) - .to_string(); - - let validation_status = if rm.validation_success { - "Active Credential" - } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { - "Not Attempted" - } else { - "Inactive Credential" - }; - - const MAX_RESPONSE_LENGTH: usize = 512; - let truncated_body: String = - rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); - let ellipsis = - if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; - let response_body = format!("{}{}", truncated_body, ellipsis); - - // Call extract_git_metadata on each GitRepo origin and take the first non-null result. - let git_metadata_val = rm - .origin - .iter() - .filter_map(|origin| { - if let Origin::GitRepo(e) = origin { - self.extract_git_metadata(e, source_span) - } else { - None - } - }) - .next() - .unwrap_or(serde_json::Value::Null); - - // Collect a file path from an Origin::File, if available. - let file_path = rm - .origin - .iter() - .find_map(|origin| { - if let Origin::File(e) = origin { - if let Some(url) = self.jira_issue_url(&e.path, args) { - Some(url) - } else if let Some(url) = self.slack_message_url(&e.path) { - Some(url) - } else if let Some(mapped) = self.s3_display_path(&e.path) { - Some(mapped) - } else if let Some(mapped) = self.docker_display_path(&e.path) { - Some(mapped) - } else { - Some(e.path.display().to_string()) - } - } else if let Origin::Extended(e) = origin { - e.path().map(|p| p.display().to_string()) - } else { - None - } - }) - .unwrap_or_default(); - - let match_json = json!({ - "rule": { - "name": rm.m.rule_name, - "id": rm.m.rule_text_id, - }, - "finding": { - "snippet": snippet, - "fingerprint": rm.m.finding_fingerprint.to_string(), - "confidence": rm.match_confidence.to_string(), - "entropy": format!("{:.2}", rm.m.calculated_entropy), - "validation": { - "status": validation_status, - "response": response_body, - }, - "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()), - "line": line_num, - "column_start": source_span.start.column, - "column_end": source_span.end.column, - "path": file_path, - "git_metadata": git_metadata_val - } - }); - - let finding_json = json!({ - "id": rm.m.rule_text_id, - "matches": [ match_json ] - }); - - Ok(finding_json) - } - // // Modified JSON format to pass args to gather_json_findings - // pub fn json_format( - // &self, - // mut writer: W, - // args: &cli::commands::scan::ScanArgs, - // ) -> Result<()> { - // let findings = self.gather_json_findings(args)?; - // if !findings.is_empty() { - // serde_json::to_writer_pretty(&mut writer, &findings)?; - // writeln!(writer)?; - // } - // Ok(()) - // } - pub fn jsonl_format( &self, mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - // Get filtered matches - let mut matches = self.get_filtered_matches()?; - - // Apply deduplication only if requested - if !args.no_dedup { - matches = self.deduplicate_matches(matches, args.no_dedup); - } - - // For each match, handle it based on the no_dedup flag - for rm in matches { - if args.no_dedup && rm.origin.len() > 1 { - // For no_dedup and multiple origins, create separate findings for each origin - for origin in rm.origin.iter() { - // Create a single-origin version of this match - let single_origin_rm = ReportMatch { - origin: OriginSet::new(origin.clone(), Vec::new()), - blob_metadata: rm.blob_metadata.clone(), - m: rm.m.clone(), - comment: rm.comment.clone(), - visible: rm.visible, - match_confidence: rm.match_confidence, - validation_response_body: rm.validation_response_body.clone(), - validation_response_status: rm.validation_response_status, - validation_success: rm.validation_success, - }; - - // Process this single-origin match into a JSON finding and write it - let json_finding = self.process_match_to_json(&single_origin_rm, args)?; - serde_json::to_writer(&mut writer, &json_finding)?; - writeln!(writer)?; - } - } else { - // Process normally for deduped matches or matches with only one origin - let json_finding = self.process_match_to_json(&rm, args)?; - serde_json::to_writer(&mut writer, &json_finding)?; - writeln!(writer)?; - } + let records = self.build_finding_records(args)?; + for record in records { + serde_json::to_writer(&mut writer, &record)?; + writeln!(writer)?; } Ok(()) } - // // Modified JSONL format to pass args to gather_json_findings - // pub fn jsonl_format( - // &self, - // mut writer: W, - // args: &cli::commands::scan::ScanArgs, - // ) -> Result<()> { - // let findings = self.gather_json_findings(args)?; - // for finding in findings { - // serde_json::to_writer(&mut writer, &finding)?; - // writeln!(writer)?; - // } - // Ok(()) - // } } #[cfg(test)] mod tests { + use super::*; + use crate::cli::commands::github::GitCloneMode; + use crate::cli::commands::github::GitHistoryMode; + use crate::cli::commands::rules::RuleSpecifierArgs; + use crate::matcher::{SerializableCapture, SerializableCaptures}; + use crate::util::intern; + use crate::{ + blob::BlobId, + cli::commands::github::GitHubRepoType, + cli::commands::inputs::ContentFilteringArgs, + cli::commands::inputs::InputSpecifierArgs, + cli::commands::output::{OutputArgs, ReportOutputFormat}, + cli::commands::scan::ConfidenceLevel, + findings_store::FindingsStore, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, + matcher::Match, + origin::Origin, + reporter::styles::Styles, + }; use std::{ io::Cursor, path::PathBuf, sync::{Arc, Mutex}, }; - - use anyhow::Result; - use serde_json::Value; use url::Url; - - use super::*; - use crate::{ - blob::BlobId, - cli::commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::OutputArgs, - rules::RuleSpecifierArgs, - scan::ConfidenceLevel, - }, - findings_store::FindingsStore, - location::{Location, OffsetSpan, SourcePoint, SourceSpan}, - matcher::{Match, SerializableCapture, SerializableCaptures}, - origin::{Origin, OriginSet}, - reporter::{ReportMatch, Styles}, - rules::rule::Confidence, - util::intern, - }; - fn create_default_args() -> cli::commands::scan::ScanArgs { use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope @@ -480,7 +126,6 @@ mod tests { } } - // Helper function to create a mock Match fn create_mock_match( rule_name: &str, rule_text_id: &str, @@ -508,7 +153,7 @@ mod tests { finding_fingerprint: 0123, rule_finding_fingerprint: intern(rule_finding_fingerprint), rule_text_id: intern(rule_text_id), - rule_name: intern(rule_name), //.to_string(), + rule_name: intern(rule_name), rule_confidence: Confidence::Medium, validation_response_body: "validation response".to_string(), validation_response_status: 200, @@ -518,10 +163,8 @@ mod tests { } } - // Helper function to create a mock DetailsReporter fn setup_mock_reporter(matches: Vec) -> DetailsReporter { let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); - // Create mock origin and blob metadata for the first test match if !matches.is_empty() { let blob_metadata = BlobMetadata { id: BlobId::new(b"mock_blob"), @@ -531,16 +174,14 @@ mod tests { language: Some("Rust".to_string()), }; let dedup = true; - // Add matches to datastore for m in matches.clone() { datastore.record( vec![( Arc::new(OriginSet::new( - // OriginSet -- Arc<…> Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![], )), - Arc::new(blob_metadata.clone()), // BlobMetadata -- Arc<…> + Arc::new(blob_metadata.clone()), m.m.clone(), )], dedup, @@ -553,9 +194,9 @@ mod tests { only_valid: false, } } + #[test] fn test_json_format() -> Result<()> { - // Create a mock match with successful validation let mock_match = create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); let matches = vec![ReportMatch { @@ -577,72 +218,17 @@ mod tests { }]; let reporter = setup_mock_reporter(matches); let mut output = Cursor::new(Vec::new()); - // Call the json_format method reporter.json_format(&mut output, &create_default_args())?; - // Parse and validate JSON output - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + let json_output: Vec = serde_json::from_slice(&output.into_inner())?; assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first_finding = &json_output[0]; - assert!(first_finding.get("id").is_some(), "Finding should have an 'id'"); - assert!(first_finding.get("matches").is_some(), "Finding should have 'matches'"); - // Validate the structure of the first match - let matches = first_finding.get("matches").unwrap().as_array().unwrap(); - let first_match = &matches[0]; - assert_eq!(first_match.get("rule").unwrap().get("name").unwrap(), "MockRule"); - assert_eq!(first_match.get("finding").unwrap().get("language").unwrap(), "Rust"); + let first = &json_output[0]; + assert_eq!(first["rule"]["name"], "MockRule"); + assert_eq!(first["finding"]["language"], "Rust"); Ok(()) } - // #[test] - // fn test_jsonl_format() -> Result<()> { - // // Create a mock match with successful validation - // let mock_match = - // create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); - // let matches = vec![ReportMatch { - // origin: OriginSet::new( - // Origin::from_file(PathBuf::from("/mock/path/file.rs")), - // vec![], - // ), - // blob_metadata: BlobMetadata { - // id: BlobId::new(b"mock_blob"), - // num_bytes: 1024, - // mime_essence: Some("text/plain".to_string()), - // charset: Some("UTF-8".to_string()), - // language: Some("Rust".to_string()), - // }, - // m: mock_match, - // comment: None, - // match_confidence: Confidence::Medium, - // visible: true, - // validation_response_body: "validation response".to_string(), - // validation_response_status: 200, - // validation_success: true, - // }]; - // let reporter = setup_mock_reporter(matches); - // let mut output = Cursor::new(Vec::new()); - // // Call the jsonl_format method - // reporter.jsonl_format(&mut output, &create_default_args())?; - // // Split output into lines and validate - // let jsonl_output = String::from_utf8(output.into_inner())?; - // let lines: Vec<&str> = jsonl_output.lines().collect(); - // assert!(!lines.is_empty(), "JSONL output should not be empty"); - // for line in &lines { - // let json_value: serde_json::Value = serde_json::from_str(line)?; - // assert!( - // json_value.get("rule_name").is_some(), - // "Each line should have a 'rule_name'" - // ); - // assert!( - // json_value.get("matches").is_some(), - // "Each line should have 'matches'" - // ); - // } - // Ok(()) - // } - #[test] fn test_validation_status_in_json() -> Result<()> { - // Test validation status in JSON output let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; for (validation_success, expected_status) in test_cases { let mock_match = create_mock_match( @@ -673,23 +259,11 @@ mod tests { }]; let reporter = setup_mock_reporter(matches); let mut output = Cursor::new(Vec::new()); - // Call the json_format method reporter.json_format(&mut output, &create_default_args())?; - // Parse and validate JSON output - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + let json_output: Vec = serde_json::from_slice(&output.into_inner())?; assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first_finding = &json_output[0]; - let matches = first_finding.get("matches").unwrap().as_array().unwrap(); - let first_match = &matches[0]; - let validation_status = first_match - .get("finding") - .unwrap() - .get("validation") - .unwrap() - .get("status") - .unwrap() - .as_str() - .unwrap(); + let first = &json_output[0]; + let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); assert_eq!(validation_status, expected_status); } Ok(()) diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 942e7ad..6790a44 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -1,413 +1,126 @@ use std::fmt::{Display, Formatter, Result as FmtResult}; -use http::StatusCode; +use indenter::indented; use super::*; -use crate::{ - bstring_escape::Escaped, - origin::{get_repo_url, GitRepoOrigin}, -}; + impl DetailsReporter { - // Modified pretty format to use deduplicate_matches helper pub fn pretty_format( &self, mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let mut matches = self.get_filtered_matches()?; - let num_findings = matches.len(); - - if !args.no_dedup { - matches = self.deduplicate_matches(matches, args.no_dedup); - } - - for (index, rm) in matches.into_iter().enumerate() { - // When no_dedup is true, we'll handle each origin separately - if args.no_dedup && rm.origin.len() > 1 { - // For each origin, create a separate "finding" - for origin in rm.origin.iter() { - // Create a new ReportMatch with just this single origin - let single_origin_rm = ReportMatch { - origin: OriginSet::new(origin.clone(), Vec::new()), - blob_metadata: rm.blob_metadata.clone(), - m: rm.m.clone(), - comment: rm.comment.clone(), - visible: rm.visible, - match_confidence: rm.match_confidence, - validation_response_body: rm.validation_response_body.clone(), - validation_response_status: rm.validation_response_status, - validation_success: rm.validation_success, - }; - - self.write_finding( - &mut writer, - &single_origin_rm, - index + 1, - num_findings, - args, - )?; - } - } else { - // Normal processing for deduped matches or matches with only one origin - self.write_finding(&mut writer, &rm, index + 1, num_findings, args)?; - } + let records = self.build_finding_records(args)?; + let num_findings = records.len(); + for (index, record) in records.iter().enumerate() { + self.write_finding_record(&mut writer, record, index + 1, num_findings)?; } Ok(()) } - fn write_finding( + fn write_finding_record( &self, writer: &mut W, - rm: &ReportMatch, + record: &FindingReporterRecord, _finding_num: usize, _num_findings: usize, - args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let lock_icon = if rm.validation_success { "🔓 " } else { "" }; + let is_active = record.finding.validation.status == "Active Credential"; + let lock_icon = if is_active { "🔓 " } else { "" }; let formatted_heading = format!( "{}{} => [{}]", lock_icon, - rm.m.rule_name.to_uppercase(), - rm.m.rule_text_id.to_uppercase() + record.rule.name.to_uppercase(), + record.rule.id.to_uppercase() ); - if rm.validation_success { + if is_active { writeln!(writer, "{}", self.style_finding_active_heading(formatted_heading))?; } else { writeln!(writer, "{}", self.style_finding_heading(formatted_heading))?; } - writeln!(writer, "{}", PrettyFinding(self, rm, args))?; + writeln!(writer, "{}", PrettyFindingRecord(self, record))?; writeln!(writer)?; Ok(()) } - fn write_git_metadata( + fn write_git_metadata_value( &self, f: &mut Formatter<'_>, - e: &GitRepoOrigin, - _args: &cli::commands::scan::ScanArgs, - line_num: usize, + git: &serde_json::Value, ) -> FmtResult { - // Check if this is a remote git scan - // let mut is_remote_git_scan = !args.input_specifier_args.git_url.is_empty(); - // let mut git_url_string = String::new(); - let repo_url = get_repo_url(&e.repo_path) - .unwrap_or_else(|_| e.repo_path.to_string_lossy().to_string().into()); - let mut git_url_string = repo_url.clone(); - if git_url_string.ends_with(".git") { - git_url_string = git_url_string.strip_suffix(".git").unwrap().to_string().into(); + let repo_url = git["repository_url"].as_str().unwrap_or(""); + writeln!(f, " |Git Repo......: {}", self.style_metadata(repo_url))?; + if let Some(commit) = git.get("commit") { + if let Some(url) = commit.get("url").and_then(|v| v.as_str()) { + writeln!(f, " |__Commit......: {}", self.style_metadata(url))?; + } + if let Some(committer) = commit.get("committer") { + let name = committer.get("name").and_then(|v| v.as_str()).unwrap_or(""); + let email = committer.get("email").and_then(|v| v.as_str()).unwrap_or(""); + writeln!(indented(f).with_str(" |__"), "Committer...: {} <{}>", name, email)?; + } + if let Some(date) = commit.get("date").and_then(|v| v.as_str()) { + writeln!(indented(f).with_str(" |__"), "Date........: {}", date)?; + } } - writeln!(f, " |Git Repo......: {}", self.style_metadata(&git_url_string),)?; - if let Some(cs) = &e.first_commit { - let cmd = &cs.commit_metadata; - - let atime = - cmd.committer_timestamp.format(gix::date::time::format::SHORT.clone()).to_string(); - - let commit_id = &cmd.commit_id; - let commit_url = format!("{}/commit/{}", &git_url_string, commit_id); - // Write Commit Information - writeln!(f, " |__Commit......: {}", self.style_metadata(&commit_url))?; - writeln!( - indented(f).with_str(" |__"), - "Committer...: {} <{}>", - cmd.committer_name, - cmd.committer_email - )?; - writeln!(indented(f).with_str(" |__"), "Date........: {}", atime)?; - // writeln!(indented(f).with_str(" |__"), "Summary.....: {}", msg)?; - writeln!(indented(f).with_str(" |__"), "Path........: {}", cs.blob_path)?; - // Construct Git Command - let git_link = - format!("{}/blob/{}/{}#L{}", &git_url_string, commit_id, cs.blob_path, line_num); - let git_command = - format!("git -C {} show {}:{}", e.repo_path.display(), commit_id, cs.blob_path); - writeln!( - indented(f).with_str(" |__"), - "Git Link....: {}", - self.style_metadata(&git_link) - )?; - writeln!( - indented(f).with_str(" |__"), - "Git Command.: {}", - self.style_metadata(&git_command) - )?; + if let Some(file) = git.get("file") { + if let Some(path) = file.get("path").and_then(|v| v.as_str()) { + writeln!(indented(f).with_str(" |__"), "Path........: {}", path)?; + } + if let Some(url) = file.get("url").and_then(|v| v.as_str()) { + writeln!( + indented(f).with_str(" |__"), + "Git Link....: {}", + self.style_metadata(url) + )?; + } + if let Some(cmd) = file.get("git_command").and_then(|v| v.as_str()) { + writeln!( + indented(f).with_str(" |__"), + "Git Command.: {}", + self.style_metadata(cmd) + )?; + } } Ok(()) } } -// pub struct PrettyFinding<'a>(&'a DetailsReporter, &'a Finding); -pub struct PrettyFinding<'a>( - &'a DetailsReporter, - &'a ReportMatch, - &'a cli::commands::scan::ScanArgs, -); -impl<'a> Display for PrettyFinding<'a> { + +pub struct PrettyFindingRecord<'a>(&'a DetailsReporter, &'a FindingReporterRecord); + +impl<'a> Display for PrettyFindingRecord<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { - let PrettyFinding(reporter, rm, args) = self; - // Use Box String> to store the closure - let style_fn: Box String> = if rm.validation_success { - Box::new(|s: &str| reporter.style_active_creds(s).to_string()) // Convert StyledObject - // to String + let reporter = self.0; + let record = self.1; + let is_active = record.finding.validation.status == "Active Credential"; + let style_fn: Box String> = if is_active { + Box::new(|s| reporter.style_active_creds(s).to_string()) } else { - Box::new(|s: &str| reporter.style_match(s).to_string()) // Convert StyledObject to - // String + Box::new(|s| reporter.style_match(s).to_string()) }; - let matching_finding = - rm.m.groups - .captures - .get(1) - .or_else(|| rm.m.groups.captures.get(0)) - .map(|capture| capture.value.as_bytes()) - .unwrap_or(&[]); - writeln!(f, " |Finding.......: {}", style_fn(&Escaped(matching_finding).to_string()))?; - writeln!(f, " |Fingerprint...: {}", rm.m.finding_fingerprint)?; - writeln!(f, " |Confidence....: {}", rm.match_confidence.to_string())?; - writeln!(f, " |Entropy.......: {:.2}", rm.m.calculated_entropy)?; - let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16() - || rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16() - { - "Not Attempted".to_string() - } else if rm.validation_success { - "Active Credential".to_string() - } else { - "Inactive Credential".to_string() - }; - writeln!( - f, - " |Validation....: {}", - if rm.validation_success { - reporter.style_finding_active_heading(&validation_status).to_string() - // Convert StyledObject to String - } else { - (&validation_status).to_string() - } - )?; - const MAX_RESPONSE_LENGTH: usize = 512; - if rm.validation_response_status != StatusCode::CONTINUE.as_u16() { - let truncated_body: String = - rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); - let ellipsis = - if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; + let finding = &record.finding; + writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?; + writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?; + writeln!(f, " |Confidence....: {}", finding.confidence)?; + writeln!(f, " |Entropy.......: {}", finding.entropy)?; + if is_active { writeln!( f, - " |__Response....: {}{}", - if rm.validation_success { - reporter.style_active_creds(&truncated_body).to_string() // Convert StyledObject - // to String - } else { - reporter.style_metadata(&truncated_body).to_string() // Convert StyledObject to - // String - }, - ellipsis + " |Validation....: {}", + reporter.style_finding_active_heading(&finding.validation.status).to_string() )?; + } else { + writeln!(f, " |Validation....: {}", finding.validation.status)?; } - writeln!( - f, - " |Language......: {}", - rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()) - )?; - - let source_span = &rm.m.location.source_span; - writeln!(f, " |Line Num......: {}", source_span.start.line)?; - - //print all the other areas where this was seen - for p in rm.origin.iter() { - match p { - Origin::File(e) => { - let display_path = if let Some(url) = reporter.jira_issue_url(&e.path, args) { - url - } else if let Some(url) = reporter.slack_message_url(&e.path) { - url - } else if let Some(mapped) = reporter.s3_display_path(&e.path) { - mapped - } else if let Some(mapped) = reporter.docker_display_path(&e.path) { - mapped - } else { - e.path.display().to_string() - }; - writeln!( - f, - " |Path..........: {}", - if rm.validation_success { - reporter.style_active_creds(&display_path).to_string() - } else { - display_path - } - )?; - } - Origin::Extended(e) => { - if let Some(p) = e.path() { - let display_path = p.display().to_string(); - writeln!( - f, - " |Path..........: {}", - if rm.validation_success { - reporter.style_active_creds(&display_path).to_string() - } else { - display_path - } - )?; - } - } - Origin::GitRepo(e) => { - reporter.write_git_metadata(f, e, args, source_span.start.line)?; - } - } + if finding.validation.status != "Not Attempted" { + writeln!(f, " |__Response....: {}", style_fn(&finding.validation.response))?; + } + writeln!(f, " |Language......: {}", finding.language)?; + writeln!(f, " |Line Num......: {}", finding.line)?; + writeln!(f, " |Path..........: {}", style_fn(&finding.path))?; + if let Some(git) = &finding.git_metadata { + reporter.write_git_metadata_value(f, git)?; } Ok(()) } } - -#[test] -fn test_pretty_format_with_nan_entropy_panics() { - use std::{ - io::Cursor, - sync::{Arc, Mutex}, - }; - - use http::StatusCode; - use url::Url; - - use crate::{ - blob::BlobMetadata, - cli::commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - location::{Location, OffsetSpan, SourcePoint, SourceSpan}, - matcher::{Match, SerializableCaptures}, - origin::{Origin, OriginSet}, - reporter::{DetailsReporter, Styles}, - }; - - // Construct a fake match with NaN entropy - let m = Match { - rule_name: "dummy_rule".into(), - rule_text_id: "dummy.id".into(), - finding_fingerprint: 123456789, - rule_finding_fingerprint: "abc".into(), - location: Location { - offset_span: OffsetSpan { start: 0, end: 1 }, - source_span: SourceSpan { - start: SourcePoint { line: 1, column: 0 }, - end: SourcePoint { line: 1, column: 10 }, - }, - }, - blob_id: crate::blob::BlobId::default(), - groups: SerializableCaptures { captures: vec![] }, - rule_confidence: crate::rules::rule::Confidence::Medium, - validation_success: true, - validation_response_status: StatusCode::OK.as_u16(), - validation_response_body: "OK".into(), - calculated_entropy: f32::NAN, // Here's the trigger - visible: true, - }; - - let _rm = crate::reporter::ReportMatch { - origin: OriginSet::new(Origin::from_file("dummy.txt".into()), vec![]), - blob_metadata: BlobMetadata { - id: m.blob_id, - num_bytes: 1, - mime_essence: None, - charset: None, - language: Some("Rust".into()), - }, - m, - comment: None, - visible: true, - match_confidence: crate::rules::rule::Confidence::Medium, - validation_response_body: "OK".into(), - validation_response_status: StatusCode::OK.as_u16(), - validation_success: true, - }; - - let store = Arc::new(Mutex::new(crate::findings_store::FindingsStore::new(".".into()))); - let reporter = - DetailsReporter { datastore: store, styles: Styles::new(false), only_valid: false }; - - let mut buf = Cursor::new(Vec::new()); - let args = ScanArgs { - // core execution / performance - num_jobs: 1, - no_dedup: false, - - // rule selection - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - - // input discovery - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: url::Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - // Jira options - jira_url: None, - jql: None, - max_results: 100, - - // Slack options - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - - // content filtering - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - exclude: Vec::new(), // Exclude patterns - no_binary: true, - }, - - // scanning behaviour - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - - // output - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - - // display - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - // This will panic if the entropy isn't checked for NaN - let _result = reporter.pretty_format(&mut buf, &args); - // assert!(result.is_err() || result.is_ok(), "Should not crash"); // remove this line if panic - // is expected pre-fix -} diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index 033d37c..ff771dd 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -1,274 +1,56 @@ -use std::collections::HashMap; +use std::collections::{BTreeMap, HashSet}; use rayon::prelude::*; use serde_sarif::sarif; use super::*; -use crate::{bstring_escape::Escaped, defaults::get_builtin_rules, origin::get_repo_url}; -#[derive(Hash, Eq, PartialEq)] -struct LocationKey { - file_path: String, - line: usize, - column_start: usize, - column_end: usize, - text: String, -} +use crate::defaults::get_builtin_rules; + impl DetailsReporter { - fn make_sarif_result( - &self, - finding: &Finding, - no_dedup: bool, - args: &cli::commands::scan::ScanArgs, - ) -> Result { - // Deduplicate exactly as in the JSON reporter - // let matches = self.deduplicate_matches(finding.matches.clone(), no_dedup); - // Deduplicate exactly as in the JSON reporter - but only if no_dedup is false - let matches = if no_dedup { - finding.matches.clone() - } else { - self.deduplicate_matches(finding.matches.clone(), no_dedup) - }; + fn record_to_sarif_result(&self, record: &FindingReporterRecord) -> Result { + let finding = &record.finding; + let artifact_location = + sarif::ArtifactLocationBuilder::default().uri(finding.path.clone()).build()?; + let region = sarif::RegionBuilder::default() + .start_line(finding.line as i64) + .start_column(finding.column_start as i64) + .end_line(finding.line as i64) + .end_column(finding.column_end as i64) + .snippet( + sarif::ArtifactContentBuilder::default().text(finding.snippet.clone()).build()?, + ) + .build()?; - let metadata = &finding.metadata; - - let mut location_map: HashMap> = HashMap::new(); - for rm in &matches { - let source_span = &rm.m.location.source_span; - let snippet = - rm.m.groups - .captures - .get(1) - .or_else(|| rm.m.groups.captures.get(0)) - .map(|capture| capture.value.as_bytes()) - .unwrap_or(&[]); - let key = LocationKey { - file_path: rm - .origin - .first() - .blob_path() - .map(|p| p.to_string_lossy().into_owned()) - .unwrap_or_default(), - line: source_span.start.line, - column_start: source_span.start.column, - column_end: source_span.end.column, - text: Escaped(snippet).to_string(), - }; - location_map.entry(key).or_default().push((&rm.origin, &rm.m)); + let mut props = BTreeMap::new(); + props.insert("validation_status".to_string(), serde_json::json!(finding.validation.status)); + props.insert("entropy".to_string(), serde_json::json!(finding.entropy)); + if let Some(git) = &finding.git_metadata { + props.insert("git_metadata".to_string(), git.clone()); } + let properties = + sarif::PropertyBagBuilder::default().additional_properties(props).build()?; - let mut fpu64: u64 = 0; + let location = sarif::LocationBuilder::default() + .physical_location( + sarif::PhysicalLocationBuilder::default() + .artifact_location(artifact_location) + .region(region) + .build()?, + ) + .properties(properties) + .build()?; - let locations: Vec = location_map - .into_iter() - .filter_map(|(key, matches)| { - let (prov, m) = matches[0]; - let source_span = &m.location.source_span; - let mut artifact_locations = Vec::new(); - let mut git_metadata_list = Vec::new(); - - fpu64 = m.finding_fingerprint; - - for p in prov.iter() { - match p { - Origin::File(e) => { - let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { - url - } else if let Some(url) = self.slack_message_url(&e.path) { - url - } else if let Some(mapped) = self.s3_display_path(&e.path) { - mapped - } else { - e.path.display().to_string() - }; - artifact_locations.push( - sarif::ArtifactLocationBuilder::default().uri(uri).build().ok()?, - ); - } - Origin::Extended(e) => { - if let Some(p) = e.path() { - artifact_locations.push( - sarif::ArtifactLocationBuilder::default() - .uri(p.display().to_string()) - .build() - .ok()?, - ); - } - } - Origin::GitRepo(e) => { - // Extract and store Git metadata - if let Some(git_metadata) = self.extract_git_metadata(e, source_span) { - git_metadata_list.push(git_metadata); - } - - // Build Git artifact location - if let Some(cs) = &e.first_commit { - let repo_url = get_repo_url(&e.repo_path) - .unwrap_or_else(|_| { - e.repo_path.to_string_lossy().to_string().into() - }) - .trim_end_matches(".git") - .to_string(); - let git_url = format!( - "{}/blob/{}/{}#L{}", - repo_url, - cs.commit_metadata.commit_id, - cs.blob_path, - source_span.start.line - ); - artifact_locations.push( - sarif::ArtifactLocationBuilder::default() - .uri(git_url) - .build() - .ok()?, - ); - } - } - } - } - - if artifact_locations.is_empty() { - return None; - } - - let region = sarif::RegionBuilder::default() - .start_line(key.line as i64) - .start_column(key.column_start as i64) - .end_line(key.line as i64) - .end_column(key.column_end as i64) - .snippet(sarif::ArtifactContentBuilder::default().text(key.text).build().ok()?) - .build() - .ok()?; - - let logical_location = sarif::LogicalLocationBuilder::default() - .kind("blob") - .name(m.finding_fingerprint.to_string()) - .build() - .ok()?; - - let validation_status = - if m.validation_response_status == StatusCode::CONTINUE.as_u16() { - "Not Attempted" - } else if m.validation_success { - "Active Credential" - } else { - "Inactive Credential" - }; - - // Build combined properties including Git metadata and fingerprint - let mut props = std::collections::BTreeMap::new(); - props.insert("validation_status".to_string(), serde_json::json!(validation_status)); - - props.insert( - "entropy".to_string(), - serde_json::json!(format!("{:.2}", m.calculated_entropy)), - ); - - // Add the fingerprint property from the match - props.insert("fingerprint".to_string(), serde_json::json!(m.finding_fingerprint)); - - if !git_metadata_list.is_empty() { - props.insert("git_metadata".to_string(), serde_json::json!(git_metadata_list)); - } - - let properties = sarif::PropertyBagBuilder::default() - .additional_properties(props) - .build() - .ok()?; - - // Create locations for each artifact location - let locations = artifact_locations - .into_iter() - .map(|artifact_location| { - sarif::LocationBuilder::default() - .physical_location( - sarif::PhysicalLocationBuilder::default() - .artifact_location(artifact_location) - .region(region.clone()) - .build() - .ok()?, - ) - .logical_locations(vec![logical_location.clone()]) - .properties(properties.clone()) - .build() - .ok() - }) - .collect::>>()?; - Some(locations) - }) - .flatten() - .collect(); - // let message = sarif::MessageBuilder::default() - // .text(format!( - // "Rule {} found {} unique {}.\nFirst blob id matched: {}", - // metadata.rule_name, - // locations.len(), - // if locations.len() == 1 { "match" } else { "matches" }, - // first_match_blob_id - // )) - // .build()?; - // Create detailed message from first location's information - let detailed_msg = if let Some(first_match) = matches.first() { - let mut msg = format!( - "Rule {} found {} unique {}.\n", - metadata.rule_name, - locations.len(), - if locations.len() == 1 { "match" } else { "matches" } - ); - // Add file or Git information based on origin - // Get first origin of first match - we know this exists - let p = first_match.origin.first(); - match p { - Origin::File(e) => { - let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { - url - } else if let Some(url) = self.slack_message_url(&e.path) { - url - } else if let Some(mapped) = self.s3_display_path(&e.path) { - mapped - } else { - e.path.display().to_string() - }; - msg.push_str(&format!("Location: {}\n", uri)); - } - Origin::Extended(e) => { - if let Some(p) = e.path() { - msg.push_str(&format!("Location: {}\n", p.display())); - } - } - Origin::GitRepo(e) => { - if let Some(cs) = &e.first_commit { - let repo_url = get_repo_url(&e.repo_path) - .unwrap_or_else(|_| e.repo_path.to_string_lossy().to_string().into()) - .trim_end_matches(".git") - .to_string(); - // Add commit and author information - let cmd = &cs.commit_metadata; - msg.push_str(&format!("Repository: {}\n", repo_url)); - msg.push_str(&format!("Commit: {}\n", cmd.commit_id)); - msg.push_str(&format!( - "Committer: {} <{}>\n", - String::from_utf8_lossy(&cmd.committer_name), - String::from_utf8_lossy(&cmd.committer_email) - )); - msg.push_str(&format!("File: {}", cs.blob_path)); - } - } - } - msg - } else { - format!("Rule {} found {} unique matches.", metadata.rule_name, locations.len(),) - }; - let message = sarif::MessageBuilder::default().text(detailed_msg).build()?; - let fingerprint_name = "fingerprint".to_string(); - let fingerprint = fpu64.to_string(); + let message = sarif::MessageBuilder::default() + .text(format!("Rule {} matched {}", record.rule.name, finding.path)) + .build()?; let result = sarif::ResultBuilder::default() - .rule_id(&metadata.rule_name) + .rule_id(&record.rule.name) .message(message) .kind(sarif::ResultKind::Review.to_string()) - .locations(locations) + .locations(vec![location]) .level(sarif::ResultLevel::Warning.to_string()) - .partial_fingerprints([(fingerprint_name, fingerprint)]) + .partial_fingerprints([("fingerprint".to_string(), finding.fingerprint.clone())]) .build()?; Ok(result) } @@ -276,54 +58,11 @@ impl DetailsReporter { pub fn sarif_format( &self, mut writer: W, - no_dedup: bool, + _no_dedup: bool, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - // Gather findings first - let mut findings = self.gather_findings()?; - - // If no_dedup is true, expand findings with multiple origins into separate findings - if no_dedup { - let mut expanded_findings = Vec::new(); - for finding in findings { - // Check matches with multiple origins - let matches_with_multiple_origins: Vec<_> = - finding.matches.iter().filter(|rm| rm.origin.len() > 1).collect(); - - if !matches_with_multiple_origins.is_empty() { - // For each match with multiple origins, create separate findings - for rm in matches_with_multiple_origins { - for origin in rm.origin.iter() { - // Create a single-origin match - let single_origin_rm = ReportMatch { - origin: OriginSet::new(origin.clone(), Vec::new()), - blob_metadata: rm.blob_metadata.clone(), - m: rm.m.clone(), - comment: rm.comment.clone(), - visible: rm.visible, - match_confidence: rm.match_confidence, - validation_response_body: rm.validation_response_body.clone(), - validation_response_status: rm.validation_response_status, - validation_success: rm.validation_success, - }; - - // Create a new finding with just this single-origin match - let new_finding = - Finding::new(finding.metadata.clone(), vec![single_origin_rm]); - expanded_findings.push(new_finding); - } - } - } else { - // If the finding has no matches with multiple origins, keep it as is - expanded_findings.push(finding); - } - } - findings = expanded_findings; - } - - // Filter only rules relevant to the findings - let finding_rule_ids: std::collections::HashSet<_> = - findings.iter().map(|f| f.metadata.rule_name.clone()).collect(); + let records = self.build_finding_records(args)?; + let finding_rule_ids: HashSet<_> = records.iter().map(|r| r.rule.name.clone()).collect(); let rules: Vec = get_builtin_rules(None)? .iter_rules() .par_bridge() @@ -366,10 +105,9 @@ impl DetailsReporter { ) .build()?; - let sarif_results: Vec = findings - .par_iter() - .filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok()) - .collect(); + let sarif_results: Vec = + records.iter().filter_map(|r| self.record_to_sarif_result(r).ok()).collect(); + let run = sarif::RunBuilder::default().tool(tool).results(sarif_results).build()?; let sarif = sarif::SarifBuilder::default() .version(sarif::Version::V2_1_0.to_string()) diff --git a/src/s3.rs b/src/s3.rs index ed18a52..0f1fcef 100644 --- a/src/s3.rs +++ b/src/s3.rs @@ -2,12 +2,12 @@ use anyhow::{Context, Result}; use aws_config::{defaults, meta::region::RegionProviderChain, BehaviorVersion}; use aws_credential_types::Credentials; use aws_sdk_s3::{ + error::ProvideErrorMetadata, // for .code() + operation::list_objects_v2::ListObjectsV2Error, // modeled service error Client, - operation::list_objects_v2::ListObjectsV2Error, // modeled service error - error::ProvideErrorMetadata, // for .code() }; use aws_types::region::Region; -use reqwest; // HTTP client for HEAD fallback +use reqwest; // HTTP client for HEAD fallback pub async fn visit_bucket_objects( bucket: &str, @@ -43,9 +43,7 @@ where .configure(&config) .build() .await; - let conf = aws_sdk_s3::config::Builder::from(&config) - .credentials_provider(assume) - .build(); + let conf = aws_sdk_s3::config::Builder::from(&config).credentials_provider(assume).build(); Client::from_conf(conf) } else { Client::new(&config) @@ -66,7 +64,7 @@ where // On error, extract the modeled service error Err(err) => { - let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError + let svc_err: ListObjectsV2Error = err.into_service_error(); // from SdkError // If the bucket must be addressed at another region... if svc_err.code() == Some("PermanentRedirect") { diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 735e381..59bc536 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -21,14 +21,16 @@ use crate::{ findings_store, git_binary::{CloneMode, Git}, git_url::GitUrl, - github, gitlab, jira, + github, gitlab, + guesser::Guesser, + jira, matcher::{Match, Matcher, MatcherStats}, origin::{Origin, OriginSet}, rules_database::RulesDatabase, s3, scanner::processing::BlobProcessor, scanner_pool::ScannerPool, - slack, guesser::Guesser, PathBuf, + slack, PathBuf, }; pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option, Match)>); @@ -291,7 +293,6 @@ pub async fn fetch_slack_messages( Ok(vec![output_dir]) } - pub async fn fetch_s3_objects( args: &scan::ScanArgs, datastore: &Arc>, @@ -330,10 +331,12 @@ pub async fn fetch_s3_objects( ); let blob = crate::blob::Blob::from_bytes(bytes); - if let Some((origin, blob_md, scored_matches)) = processor.run(origin, blob, args.no_dedup)? { + if let Some((origin, blob_md, scored_matches)) = + processor.run(origin, blob, args.no_dedup)? + { // Wrap origin & metadata once: let origin_arc = Arc::new(origin); - let blob_arc = Arc::new(blob_md); + let blob_arc = Arc::new(blob_md); // Now build a batch of exactly one FindingsStoreMessage per Match let mut batch = Vec::with_capacity(scored_matches.len()); @@ -350,4 +353,4 @@ pub async fn fetch_s3_objects( .await?; Ok(()) -} \ No newline at end of file +} diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 63f7bee..33dc644 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -74,7 +74,6 @@ pub async fn run_async_scan( let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); - // Save Docker images if specified if !args.input_specifier_args.docker_image.is_empty() { let clone_root = { @@ -129,7 +128,6 @@ pub async fn run_async_scan( )?; } - if !args.no_dedup { // Final deduplication step before validation (or before reporting) let reporter = crate::reporter::DetailsReporter { diff --git a/tests/int_s3.rs b/tests/int_s3.rs index c44afe8..c8d6b9c 100644 --- a/tests/int_s3.rs +++ b/tests/int_s3.rs @@ -4,25 +4,26 @@ use kingfisher::s3::visit_bucket_objects; #[tokio::test] async fn test_visit_public_bucket() -> Result<()> { let mut objects = Vec::new(); - visit_bucket_objects("awsglue-datasets", Some("examples/us-legislators/all/"), None, None, |key, data| { - objects.push((key, data)); - Ok(()) - }) + visit_bucket_objects( + "awsglue-datasets", + Some("examples/us-legislators/all/"), + None, + None, + |key, data| { + objects.push((key, data)); + Ok(()) + }, + ) .await?; assert!( objects.iter().any(|(k, _)| k.ends_with("events.json")), "events.json object not found" ); - let creds = objects - .iter() - .find(|(k, _)| k.ends_with("events.json")) - .expect("events.json object"); + let creds = + objects.iter().find(|(k, _)| k.ends_with("events.json")).expect("events.json object"); let body = std::str::from_utf8(&creds.1)?; - assert!( - body.contains("Q4450263"), - "expected events.json file" - ); + assert!(body.contains("Q4450263"), "expected events.json file"); Ok(()) -} \ No newline at end of file +} From dcf31e8150620e5b718bcf05467e4ce55dc94aff Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 08:58:20 -0700 Subject: [PATCH 101/357] refactored output reporting and formatting logic --- f1.patch | 719 ------------------------------------------------------- 1 file changed, 719 deletions(-) delete mode 100644 f1.patch diff --git a/f1.patch b/f1.patch deleted file mode 100644 index a132a31..0000000 --- a/f1.patch +++ /dev/null @@ -1,719 +0,0 @@ -diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs -index 9fcb1ecdfe8decc60278848c4a7be43cc9ebee70..b600f9f65838e52ce5dc3d7bb3bb1a5d5ff2bcaf 100644 ---- a/src/reporter/json_format.rs -+++ b/src/reporter/json_format.rs -@@ -1,436 +1,80 @@ --use http::StatusCode; --use serde_json::json; -- - use super::*; --use crate::bstring_escape::Escaped; - - impl DetailsReporter { -- pub fn deduplicate_matches( -- &self, -- matches: Vec, -- no_dedup: bool, -- ) -> Vec { -- if no_dedup { -- return matches; -- } -- -- use std::collections::HashMap; -- let mut by_fp: HashMap = HashMap::new(); -- -- for rm in matches { -- let fp = rm.m.finding_fingerprint; -- if let Some(existing) = by_fp.get_mut(&fp) { -- // merge origin sets (keep first origin, append the rest) -- for o in rm.origin.iter() { -- if !existing.origin.iter().any(|e| e == o) { -- existing.origin = OriginSet::new( -- existing.origin.first().clone(), -- existing -- .origin -- .iter() -- .skip(1) -- .cloned() -- .chain(std::iter::once(o.clone())) -- .collect(), -- ); -- } -- } -- continue; -- } -- by_fp.insert(fp, rm); -- } -- by_fp.into_values().collect() -- } -- -- pub fn gather_json_findings( -- &self, -- args: &cli::commands::scan::ScanArgs, -- ) -> Result> { -- let mut matches = self.get_filtered_matches()?; -- if !args.no_dedup { -- matches = self.deduplicate_matches(matches, args.no_dedup); -- } -- -- let mut json_findings = Vec::new(); -- for rm in matches { -- let source_span = &rm.m.location.source_span; -- let line_num = source_span.start.line; -- -- let snippet = Escaped( -- rm.m.groups -- .captures -- .get(1) -- .or_else(|| rm.m.groups.captures.get(0)) -- .map(|capture| capture.value.as_bytes()) -- .unwrap_or_default(), -- ) -- .to_string(); -- -- let validation_status = if rm.validation_success { -- "Active Credential" -- } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { -- "Not Attempted" -- } else { -- "Inactive Credential" -- }; -- -- const MAX_RESPONSE_LENGTH: usize = 512; -- let truncated_body: String = -- rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); -- let ellipsis = -- if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; -- let response_body = format!("{}{}", truncated_body, ellipsis); -- -- // Call extract_git_metadata on each GitRepo origin and take the first non-null result. -- let git_metadata_val = rm -- .origin -- .iter() -- .filter_map(|origin| { -- if let Origin::GitRepo(e) = origin { -- self.extract_git_metadata(e, source_span) -- } else { -- None -- } -- }) -- .next() -- .unwrap_or(serde_json::Value::Null); -- -- // Collect a file path from an Origin::File, if available. -- let file_path = rm -- .origin -- .iter() -- .find_map(|origin| match origin { -- Origin::File(e) => { -- if let Some(url) = self.jira_issue_url(&e.path, args) { -- Some(url) -- } else if let Some(url) = self.slack_message_url(&e.path) { -- Some(url) -- } else if let Some(mapped) = self.s3_display_path(&e.path) { -- Some(mapped) -- } else if let Some(mapped) = self.docker_display_path(&e.path) { -- Some(mapped) -- } else { -- Some(e.path.display().to_string()) -- } -- } -- Origin::Extended(e) => e.path().map(|p| p.display().to_string()), -- _ => None, -- }) -- .unwrap_or_default(); -- -- let match_json = json!({ -- "rule": { -- "name": rm.m.rule_name, -- "id": rm.m.rule_text_id, -- }, -- "finding": { -- "snippet": snippet, -- "fingerprint": rm.m.finding_fingerprint.to_string(), -- "confidence": rm.match_confidence.to_string(), -- "entropy": format!("{:.2}", rm.m.calculated_entropy), -- "validation": { -- "status": validation_status, -- "response": response_body, -- }, -- "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()), -- "line": line_num, -- "column_start": source_span.start.column, -- "column_end": source_span.end.column, -- "path": file_path, -- "git_metadata": git_metadata_val -- } -- }); -- -- let finding_json = json!({ -- "id": rm.m.rule_text_id, -- "matches": [ match_json ] -- }); -- json_findings.push(finding_json); -- } -- Ok(json_findings) -- } - pub fn json_format( - &self, - mut writer: W, - args: &cli::commands::scan::ScanArgs, - ) -> Result<()> { -- let mut findings = Vec::new(); -- -- // Get filtered matches -- let mut matches = self.get_filtered_matches()?; -- -- // Apply deduplication only if requested -- if !args.no_dedup { -- matches = self.deduplicate_matches(matches, args.no_dedup); -- } -- -- // For each match, handle it based on the no_dedup flag -- for rm in matches { -- if args.no_dedup && rm.origin.len() > 1 { -- // For no_dedup and multiple origins, create separate findings for each origin -- for origin in rm.origin.iter() { -- // Create a single-origin version of this match -- let single_origin_rm = ReportMatch { -- origin: OriginSet::new(origin.clone(), Vec::new()), -- blob_metadata: rm.blob_metadata.clone(), -- m: rm.m.clone(), -- comment: rm.comment.clone(), -- visible: rm.visible, -- match_confidence: rm.match_confidence, -- validation_response_body: rm.validation_response_body.clone(), -- validation_response_status: rm.validation_response_status, -- validation_success: rm.validation_success, -- }; -- -- // Process this single-origin match into a JSON finding -- let json_finding = self.process_match_to_json(&single_origin_rm, args)?; -- findings.push(json_finding); -- } -- } else { -- // Process normally for deduped matches or matches with only one origin -- let json_finding = self.process_match_to_json(&rm, args)?; -- findings.push(json_finding); -- } -- } -- -- // Write the JSON output -- if !findings.is_empty() { -- serde_json::to_writer_pretty(&mut writer, &findings)?; -+ let records = self.build_finding_records(args)?; -+ if !records.is_empty() { -+ serde_json::to_writer_pretty(&mut writer, &records)?; - writeln!(writer)?; - } - Ok(()) - } - -- // Add a helper method to convert a ReportMatch to a JSON finding -- pub fn process_match_to_json( -- &self, -- rm: &ReportMatch, -- args: &cli::commands::scan::ScanArgs, -- ) -> Result { -- // Extract the relevant data from the match as you already do in your current implementation -- let source_span = &rm.m.location.source_span; -- let line_num = source_span.start.line; -- -- let snippet = Escaped( -- rm.m.groups -- .captures -- .get(1) -- .or_else(|| rm.m.groups.captures.get(0)) -- .map(|capture| capture.value.as_bytes()) -- .unwrap_or_default(), -- ) -- .to_string(); -- -- let validation_status = if rm.validation_success { -- "Active Credential" -- } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { -- "Not Attempted" -- } else { -- "Inactive Credential" -- }; -- -- const MAX_RESPONSE_LENGTH: usize = 512; -- let truncated_body: String = -- rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); -- let ellipsis = -- if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; -- let response_body = format!("{}{}", truncated_body, ellipsis); -- -- // Call extract_git_metadata on each GitRepo origin and take the first non-null result. -- let git_metadata_val = rm -- .origin -- .iter() -- .filter_map(|origin| { -- if let Origin::GitRepo(e) = origin { -- self.extract_git_metadata(e, source_span) -- } else { -- None -- } -- }) -- .next() -- .unwrap_or(serde_json::Value::Null); -- -- // Collect a file path from an Origin::File, if available. -- let file_path = rm -- .origin -- .iter() -- .find_map(|origin| { -- if let Origin::File(e) = origin { -- if let Some(url) = self.jira_issue_url(&e.path, args) { -- Some(url) -- } else if let Some(url) = self.slack_message_url(&e.path) { -- Some(url) -- } else if let Some(mapped) = self.s3_display_path(&e.path) { -- Some(mapped) -- } else if let Some(mapped) = self.docker_display_path(&e.path) { -- Some(mapped) -- } else { -- Some(e.path.display().to_string()) -- } -- } else if let Origin::Extended(e) = origin { -- e.path().map(|p| p.display().to_string()) -- } else { -- None -- } -- }) -- .unwrap_or_default(); -- -- let match_json = json!({ -- "rule": { -- "name": rm.m.rule_name, -- "id": rm.m.rule_text_id, -- }, -- "finding": { -- "snippet": snippet, -- "fingerprint": rm.m.finding_fingerprint.to_string(), -- "confidence": rm.match_confidence.to_string(), -- "entropy": format!("{:.2}", rm.m.calculated_entropy), -- "validation": { -- "status": validation_status, -- "response": response_body, -- }, -- "language": rm.blob_metadata.language.clone().unwrap_or_else(|| "Unknown".to_string()), -- "line": line_num, -- "column_start": source_span.start.column, -- "column_end": source_span.end.column, -- "path": file_path, -- "git_metadata": git_metadata_val -- } -- }); -- -- let finding_json = json!({ -- "id": rm.m.rule_text_id, -- "matches": [ match_json ] -- }); -- -- Ok(finding_json) -- } -- // // Modified JSON format to pass args to gather_json_findings -- // pub fn json_format( -- // &self, -- // mut writer: W, -- // args: &cli::commands::scan::ScanArgs, -- // ) -> Result<()> { -- // let findings = self.gather_json_findings(args)?; -- // if !findings.is_empty() { -- // serde_json::to_writer_pretty(&mut writer, &findings)?; -- // writeln!(writer)?; -- // } -- // Ok(()) -- // } -- - pub fn jsonl_format( - &self, - mut writer: W, - args: &cli::commands::scan::ScanArgs, - ) -> Result<()> { -- // Get filtered matches -- let mut matches = self.get_filtered_matches()?; -- -- // Apply deduplication only if requested -- if !args.no_dedup { -- matches = self.deduplicate_matches(matches, args.no_dedup); -- } -- -- // For each match, handle it based on the no_dedup flag -- for rm in matches { -- if args.no_dedup && rm.origin.len() > 1 { -- // For no_dedup and multiple origins, create separate findings for each origin -- for origin in rm.origin.iter() { -- // Create a single-origin version of this match -- let single_origin_rm = ReportMatch { -- origin: OriginSet::new(origin.clone(), Vec::new()), -- blob_metadata: rm.blob_metadata.clone(), -- m: rm.m.clone(), -- comment: rm.comment.clone(), -- visible: rm.visible, -- match_confidence: rm.match_confidence, -- validation_response_body: rm.validation_response_body.clone(), -- validation_response_status: rm.validation_response_status, -- validation_success: rm.validation_success, -- }; -- -- // Process this single-origin match into a JSON finding and write it -- let json_finding = self.process_match_to_json(&single_origin_rm, args)?; -- serde_json::to_writer(&mut writer, &json_finding)?; -- writeln!(writer)?; -- } -- } else { -- // Process normally for deduped matches or matches with only one origin -- let json_finding = self.process_match_to_json(&rm, args)?; -- serde_json::to_writer(&mut writer, &json_finding)?; -- writeln!(writer)?; -- } -+ let records = self.build_finding_records(args)?; -+ for record in records { -+ serde_json::to_writer(&mut writer, &record)?; -+ writeln!(writer)?; - } - Ok(()) - } -- // // Modified JSONL format to pass args to gather_json_findings -- // pub fn jsonl_format( -- // &self, -- // mut writer: W, -- // args: &cli::commands::scan::ScanArgs, -- // ) -> Result<()> { -- // let findings = self.gather_json_findings(args)?; -- // for finding in findings { -- // serde_json::to_writer(&mut writer, &finding)?; -- // writeln!(writer)?; -- // } -- // Ok(()) -- // } - } - - #[cfg(test)] - mod tests { -- use std::{ -- io::Cursor, -- path::PathBuf, -- sync::{Arc, Mutex}, -- }; -- -- use anyhow::Result; -- use serde_json::Value; -- use url::Url; -- - use super::*; - use crate::{ - blob::BlobId, -- cli::commands::{ -- github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, -- inputs::{ContentFilteringArgs, InputSpecifierArgs}, -- output::OutputArgs, -- rules::RuleSpecifierArgs, -- scan::ConfidenceLevel, -+ cli::commands::github::GitHubRepoType, -+ cli::commands::output::{OutputArgs, ReportOutputFormat}, -+ cli::commands::scan::{ -+ ConfidenceLevel, ContentFilteringArgs, GitCloneMode, GitHistoryMode, -+ InputSpecifierArgs, RuleSpecifierArgs, - }, - findings_store::FindingsStore, - location::{Location, OffsetSpan, SourcePoint, SourceSpan}, -- matcher::{Match, SerializableCapture, SerializableCaptures}, -- origin::{Origin, OriginSet}, -- reporter::{ReportMatch, Styles}, -- rules::rule::Confidence, -- util::intern, -+ matcher::serializable::{SerializableCapture, SerializableCaptures}, -+ matcher::Match, -+ origin::Origin, -+ reporter::styles::Styles, -+ scanner::test_utils::intern, - }; -+ use std::{ -+ io::Cursor, -+ path::PathBuf, -+ sync::{Arc, Mutex}, -+ }; -+ use url::Url; - - fn create_default_args() -> cli::commands::scan::ScanArgs { - use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope - - cli::commands::scan::ScanArgs { - num_jobs: 1, - no_dedup: false, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - // local path / git URL inputs - path_inputs: Vec::new(), - git_url: Vec::new(), - - // GitHub - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - - // GitLab -diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs -index 9fcb1ecdfe8decc60278848c4a7be43cc9ebee70..b600f9f65838e52ce5dc3d7bb3bb1a5d5ff2bcaf 100644 ---- a/src/reporter/json_format.rs -+++ b/src/reporter/json_format.rs -@@ -458,240 +102,168 @@ mod tests { - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - exclude: Vec::new(), // Exclude patterns - no_binary: true, - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - } - } - -- // Helper function to create a mock Match - fn create_mock_match( - rule_name: &str, - rule_text_id: &str, - rule_finding_fingerprint: &str, - validation_success: bool, - ) -> Match { - Match { - location: Location { - offset_span: OffsetSpan { start: 10, end: 20 }, - source_span: SourceSpan { - start: SourcePoint { line: 5, column: 10 }, - end: SourcePoint { line: 5, column: 20 }, - }, - }, - groups: SerializableCaptures { - captures: vec![SerializableCapture { - name: Some("token".to_string()), - match_number: 1, - start: 10, - end: 20, - value: "mock_token".into(), - }], - }, - blob_id: BlobId::new(b"mock_blob"), - finding_fingerprint: 0123, - rule_finding_fingerprint: intern(rule_finding_fingerprint), - rule_text_id: intern(rule_text_id), -- rule_name: intern(rule_name), //.to_string(), -+ rule_name: intern(rule_name), - rule_confidence: Confidence::Medium, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success, - calculated_entropy: 4.5, - visible: true, - } - } - -- // Helper function to create a mock DetailsReporter - fn setup_mock_reporter(matches: Vec) -> DetailsReporter { - let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); -- // Create mock origin and blob metadata for the first test match - if !matches.is_empty() { - let blob_metadata = BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }; - let dedup = true; -- // Add matches to datastore - for m in matches.clone() { - datastore.record( - vec![( - Arc::new(OriginSet::new( -- // OriginSet -- Arc<…> - Origin::from_file(PathBuf::from("/mock/path/file.rs")), - vec![], - )), -- Arc::new(blob_metadata.clone()), // BlobMetadata -- Arc<…> -+ Arc::new(blob_metadata.clone()), - m.m.clone(), - )], - dedup, - ); - } - } - DetailsReporter { - datastore: Arc::new(Mutex::new(datastore)), - styles: Styles::new(false), - only_valid: false, - } - } -+ - #[test] - fn test_json_format() -> Result<()> { -- // Create a mock match with successful validation - let mock_match = - create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); - let matches = vec![ReportMatch { - origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]), - blob_metadata: BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }, - m: mock_match, - comment: None, - match_confidence: Confidence::Medium, - visible: true, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success: true, - }]; - let reporter = setup_mock_reporter(matches); - let mut output = Cursor::new(Vec::new()); -- // Call the json_format method - reporter.json_format(&mut output, &create_default_args())?; -- // Parse and validate JSON output -- let json_output: Vec = serde_json::from_slice(&output.into_inner())?; -+ let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); -- let first_finding = &json_output[0]; -- assert!(first_finding.get("id").is_some(), "Finding should have an 'id'"); -- assert!(first_finding.get("matches").is_some(), "Finding should have 'matches'"); -- // Validate the structure of the first match -- let matches = first_finding.get("matches").unwrap().as_array().unwrap(); -- let first_match = &matches[0]; -- assert_eq!(first_match.get("rule").unwrap().get("name").unwrap(), "MockRule"); -- assert_eq!(first_match.get("finding").unwrap().get("language").unwrap(), "Rust"); -+ let first = &json_output[0]; -+ assert_eq!(first["rule"]["name"], "MockRule"); -+ assert_eq!(first["finding"]["language"], "Rust"); - Ok(()) - } - -- // #[test] -- // fn test_jsonl_format() -> Result<()> { -- // // Create a mock match with successful validation -- // let mock_match = -- // create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); -- // let matches = vec![ReportMatch { -- // origin: OriginSet::new( -- // Origin::from_file(PathBuf::from("/mock/path/file.rs")), -- // vec![], -- // ), -- // blob_metadata: BlobMetadata { -- // id: BlobId::new(b"mock_blob"), -- // num_bytes: 1024, -- // mime_essence: Some("text/plain".to_string()), -- // charset: Some("UTF-8".to_string()), -- // language: Some("Rust".to_string()), -- // }, -- // m: mock_match, -- // comment: None, -- // match_confidence: Confidence::Medium, -- // visible: true, -- // validation_response_body: "validation response".to_string(), -- // validation_response_status: 200, -- // validation_success: true, -- // }]; -- // let reporter = setup_mock_reporter(matches); -- // let mut output = Cursor::new(Vec::new()); -- // // Call the jsonl_format method -- // reporter.jsonl_format(&mut output, &create_default_args())?; -- // // Split output into lines and validate -- // let jsonl_output = String::from_utf8(output.into_inner())?; -- // let lines: Vec<&str> = jsonl_output.lines().collect(); -- // assert!(!lines.is_empty(), "JSONL output should not be empty"); -- // for line in &lines { -- // let json_value: serde_json::Value = serde_json::from_str(line)?; -- // assert!( -- // json_value.get("rule_name").is_some(), -- // "Each line should have a 'rule_name'" -- // ); -- // assert!( -- // json_value.get("matches").is_some(), -- // "Each line should have 'matches'" -- // ); -- // } -- // Ok(()) -- // } -- - #[test] - fn test_validation_status_in_json() -> Result<()> { -- // Test validation status in JSON output - let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; - for (validation_success, expected_status) in test_cases { - let mock_match = create_mock_match( - "MockRule", - "mock_rule_1", - "mock_finding_fingerprint", - validation_success, - ); - let matches = vec![ReportMatch { - origin: OriginSet::new( - Origin::from_file(PathBuf::from("/mock/path/file.rs")), - vec![], - ), - blob_metadata: BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }, - m: mock_match, - comment: None, - match_confidence: Confidence::Medium, - visible: true, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success, - }]; - let reporter = setup_mock_reporter(matches); - let mut output = Cursor::new(Vec::new()); -- // Call the json_format method - reporter.json_format(&mut output, &create_default_args())?; -- // Parse and validate JSON output -- let json_output: Vec = serde_json::from_slice(&output.into_inner())?; -+ let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); -- let first_finding = &json_output[0]; -- let matches = first_finding.get("matches").unwrap().as_array().unwrap(); -- let first_match = &matches[0]; -- let validation_status = first_match -- .get("finding") -- .unwrap() -- .get("validation") -- .unwrap() -- .get("status") -- .unwrap() -- .as_str() -- .unwrap(); -+ let first = &json_output[0]; -+ let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); - assert_eq!(validation_status, expected_status); - } - Ok(()) - } - } From 1f7653c28826199bbc7cedeaad49dfe8c54bf342 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 09:09:51 -0700 Subject: [PATCH 102/357] Update src/reporter.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Mick Grove --- src/reporter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reporter.rs b/src/reporter.rs index 5159651..f639c6f 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -363,8 +363,8 @@ impl DetailsReporter { FindingReporterRecord { rule: RuleMetadata { - name: rm.m.rule_name.to_string().clone(), - id: rm.m.rule_text_id.to_string().clone(), + name: rm.m.rule_name.to_string(), + id: rm.m.rule_text_id.to_string(), }, finding: FindingRecordData { snippet, From 55f2c27ed5d6eae7326ad9e3b1c36e78ac048508 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 09:21:49 -0700 Subject: [PATCH 103/357] fixed issues found by pr review --- src/reporter.rs | 57 ++++--------------------------------- src/reporter/json_format.rs | 1 - 2 files changed, 5 insertions(+), 53 deletions(-) diff --git a/src/reporter.rs b/src/reporter.rs index f639c6f..4669484 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -23,7 +23,7 @@ mod json_format; mod pretty_format; mod sarif_format; pub mod styles; -use std::{hash::Hash, io::IsTerminal}; +use std::io::IsTerminal; use styles::{StyledObject, Styles}; @@ -167,19 +167,6 @@ impl DetailsReporter { None } - fn gather_findings(&self) -> Result> { - let metadata_list = self.get_finding_data()?; - let all_matches = self.get_filtered_matches()?; - let mut findings = Vec::new(); - for md in metadata_list { - // Filter matches that belong to this metadata if needed - let matches_for_md = - all_matches.iter().filter(|m| m.m.rule_name == md.rule_name).cloned().collect(); - findings.push(Finding::new(md.clone(), matches_for_md)); - } - Ok(findings) - } - fn process_matches(&self, only_valid: bool, filter_visible: bool) -> Result> { let datastore = self.datastore.lock().unwrap(); Ok(datastore @@ -394,26 +381,6 @@ impl DetailsReporter { Ok(matches.iter().map(|rm| self.build_finding_record(rm, args)).collect()) } - fn get_finding_data(&self) -> Result> { - let datastore = self.datastore.lock().unwrap(); - Ok(datastore - .get_finding_data_iter() - .filter(|metadata| { - if self.only_valid { - datastore.get_matches().iter().any(|msg| { - let (_, _, match_item) = &**msg; - match_item.rule_name == metadata.rule_name - && match_item.validation_success - && match_item.validation_response_status - != StatusCode::CONTINUE.as_u16() - }) - } else { - true - } - }) - .collect()) - } - fn style_finding_heading(&self, val: D) -> StyledObject { self.styles.style_finding_heading.apply_to(val) } @@ -475,13 +442,7 @@ impl Reportable for DetailsReporter { } } } -/// A group of matches that all have the same rule and capture group content -#[derive(Serialize, JsonSchema)] -pub(crate) struct Finding { - #[serde(flatten)] - metadata: finding_data::FindingMetadata, - matches: Vec, -} + /// A match produced by one of kingfisher's rules. /// This corresponds to a single location. #[derive(Serialize, JsonSchema, Clone)] @@ -494,18 +455,14 @@ pub struct ReportMatch { #[serde(flatten)] pub m: Match, - /// An optional score assigned to the match - // #[validate(range(min = 0.0, max = 1.0))] - // score: Option, - /// An optional comment assigned to the match pub comment: Option, + /// The confidence level of the match pub match_confidence: Confidence, + /// Whether the match is visible in the output pub visible: bool, - /// An optional status assigned to the match - // status: Option, /// Validation Body pub validation_response_body: String, @@ -566,8 +523,4 @@ impl From for ReportMatch { } } } -impl Finding { - fn new(metadata: finding_data::FindingMetadata, matches: Vec) -> Self { - Self { metadata, matches } - } -} + diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index b64a777..154bb58 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -1,5 +1,4 @@ use super::*; -use serde_json::Value; impl DetailsReporter { pub fn json_format( From be35bb532f7bdca0d5f9d5edbdddb7dee286ba62 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 12:32:19 -0700 Subject: [PATCH 104/357] added progress bar to s3 downloads, and attempting to fix linux-arm64 test failure due to code 143 --- .github/workflows/ci.yml | 2 ++ CHANGELOG.md | 1 + src/scanner/repos.rs | 23 ++++++++++++++++++++++- src/scanner/runner.rs | 1 + 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 389b9c3..27e1f98 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,6 +24,8 @@ jobs: run: make ubuntu-arm64 - name: Run tests run: make tests + env: + CARGO_BUILD_JOBS: 1 macos-arm64: name: macOS arm64 diff --git a/CHANGELOG.md b/CHANGELOG.md index 6483f58..59f655a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ## [1.32.0] - Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` - Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` +- Added progress bar for scanning s3 buckets - Refactored output reporting and formatting logic ## [1.31.0] diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 59bc536..ffa29ea 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -300,6 +300,7 @@ pub async fn fetch_s3_objects( matcher_stats: &Mutex, enable_profiling: bool, shared_profiler: Arc, + progress_enabled: bool, ) -> Result<()> { let Some(bucket) = args.input_specifier_args.s3_bucket.as_deref() else { return Ok(()); @@ -320,9 +321,25 @@ pub async fn fetch_s3_objects( )?; let guesser = Guesser::new().expect("should be able to create filetype guesser"); let mut processor = BlobProcessor { matcher, guesser }; + + let progress = if progress_enabled { + let style = + ProgressStyle::with_template("{spinner} {msg} ({pos} objects) [{elapsed_precise}]") + .expect("progress bar style template should compile"); + let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching S3 objects"); + pb.enable_steady_tick(Duration::from_millis(500)); + pb + } else { + ProgressBar::hidden() + }; + + let bucket_name = bucket.to_string(); + let pb = progress.clone(); + + let bucket_name = bucket.to_string(); - s3::visit_bucket_objects(bucket, prefix, role_arn, profile, |key, bytes| { + s3::visit_bucket_objects(bucket, prefix, role_arn, profile, move |key, bytes| { let origin = OriginSet::new( Origin::from_extended(serde_json::json!({ "path": format!("s3://{}/{}", bucket_name, key) @@ -348,9 +365,13 @@ pub async fn fetch_s3_objects( let added = datastore.lock().unwrap().record(batch, !args.no_dedup); debug!("Added {} new S3 blobs", added); } + pb.inc(1); Ok(()) }) .await?; + let total = progress.position(); + progress.finish_with_message(format!("Fetched {} S3 objects", total)); + Ok(()) } diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 33dc644..10b6e51 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -107,6 +107,7 @@ pub async fn run_async_scan( &matcher_stats, enable_profiling, Arc::clone(&shared_profiler), + progress_enabled, ) .await?; From 28fd24c9b45ba843536e64e2d22b96a10a5bcd61 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 19:32:19 -0700 Subject: [PATCH 105/357] - Fixed header precedence so custom HTTP validation headers like "Accept" are preserved - Added new Heroku rule --- CHANGELOG.md | 4 ++++ README.md | 2 ++ data/rules/heroku.yml | 33 ++++++++++++++++++++++++++++++++ src/scanner/repos.rs | 1 - src/validation/httpvalidation.rs | 23 ++++++++++++++++------ 5 files changed, 56 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59f655a..4f169a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.33.0] +- Fixed header precedence so custom HTTP validation headers like `Accept` are preserved +- Added new Heroku rule + ## [1.32.0] - Added support for scanning AWS S3 buckets via `--s3-bucket` and optional `--s3-prefix` - Added `--role-arn` and `--aws-local-profile` flags for S3 authentication alongside `KF_AWS_KEY`/`KF_AWS_SECRET` diff --git a/README.md b/README.md index b60ae55..ef68c1f 100644 --- a/README.md +++ b/README.md @@ -444,6 +444,8 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ | `KF_JIRA_TOKEN` | Jira API token | | `KF_SLACK_TOKEN` | Slack API token | | `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | +| `KF_AWS_KEY` and `KF_AWS_SECRET` | AWS Credentials to use with S3 bucket scanning | + Set them temporarily per command: ```bash diff --git a/data/rules/heroku.yml b/data/rules/heroku.yml index 817a2a4..3686e7a 100644 --- a/data/rules/heroku.yml +++ b/data/rules/heroku.yml @@ -32,3 +32,36 @@ rules: - report_response: true - type: StatusMatch status: [200] + - name: Heroku API Key (Platform Key) + id: kingfisher.heroku.2 + pattern: | + (?xi) + \b + ( + HRKU-[A-Z0-9_]{60} + ) + confidence: medium + min_entropy: 4.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.heroku.com/apps" + headers: + Authorization: "Bearer {{TOKEN}}" + Accept: "application/vnd.heroku+json;version=3" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"id":' + - '"name":' + match_all_words: true + references: + - https://devcenter.heroku.com/articles/platform-api-quickstart + examples: + - "HRKU-AADVTUYvfjT4nhuJ07bEfAUq9GS3PkTdyWuNBiXYmYMg_____wgAf6OTnGyh" + - "HRKU-AABW9W1iH9NHEIlAABq9nZUq9GS3PkTdyWuNBiXYmYMg_____wV2XYIXxm5p" + - "HRKU-AAWpqREEr2V1gqh6urSXWYUq9GS3PkTdyWuNBiXYmYMg_____wNI1VGijd8y" diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index ffa29ea..75fa919 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -333,7 +333,6 @@ pub async fn fetch_s3_objects( ProgressBar::hidden() }; - let bucket_name = bucket.to_string(); let pb = progress.clone(); diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index 1e15605..cc866b3 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -92,13 +92,17 @@ pub fn build_request_builder( (header::ACCEPT_ENCODING, "gzip, deflate, br"), (header::CONNECTION, "keep-alive"), ]; - // Extend custom headers with the standard ones (overwriting any duplicates). - let mut combined_headers = custom_headers; + // Start with the standard headers and then overlay any custom headers so + // caller-specified values take precedence over defaults. + let mut combined_headers = HeaderMap::new(); for (name, value) in &standard_headers { if let Ok(hv) = HeaderValue::from_str(value) { combined_headers.insert(name.clone(), hv); } } + for (name, value) in custom_headers.iter() { + combined_headers.insert(name.clone(), value.clone()); + } request_builder = request_builder.headers(combined_headers); // If a body template is provided, parse and render it @@ -437,12 +441,19 @@ mod tests { .expect("building reqwest client"); let parser = liquid::ParserBuilder::with_stdlib().build().unwrap(); let globals = liquid::Object::new(); - let headers = - BTreeMap::from([("Content-Type".to_string(), "application/json".to_string())]); + let headers = BTreeMap::from([ + ("Content-Type".to_string(), "application/json".to_string()), + ("Accept".to_string(), "application/custom".to_string()), + ]); let url = Url::from_str("https://example.com").unwrap(); let result = - build_request_builder(&client, "GET", &url, &headers, &None, &parser, &globals); - assert!(result.is_ok()); + build_request_builder(&client, "GET", &url, &headers, &None, &parser, &globals) + .expect("building request"); + let req = result.build().expect("finalizing request"); + assert_eq!( + req.headers().get(header::ACCEPT).and_then(|v| v.to_str().ok()), + Some("application/custom"), + ); } #[tokio::test] async fn test_retry_request() { From ff5250c9a2a81850985665daf3de480bb7d72b32 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 19:32:42 -0700 Subject: [PATCH 106/357] - Fixed header precedence so custom HTTP validation headers like "Accept" are preserved - Added new Heroku rule --- src/reporter.rs | 1 - src/scanner/repos.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/reporter.rs b/src/reporter.rs index 4669484..ef4c763 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -523,4 +523,3 @@ impl From for ReportMatch { } } } - diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 75fa919..7999988 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -335,7 +335,6 @@ pub async fn fetch_s3_objects( let pb = progress.clone(); - let bucket_name = bucket.to_string(); s3::visit_bucket_objects(bucket, prefix, role_arn, profile, move |key, bytes| { From 8ff147c08c723e2c60f7fac9937366b647b274ad Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 4 Aug 2025 21:38:23 -0700 Subject: [PATCH 107/357] - Fixed header precedence so custom HTTP validation headers like "Accept" are preserved - Added new Heroku rule --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 425db27..c4328e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.32.0" +version = "1.33.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From 593184730033cad10be3439aa4b01acf88bb3322 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 5 Aug 2025 14:45:51 -0700 Subject: [PATCH 108/357] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs - Added new rule: Coze personal access token --- CHANGELOG.md | 4 ++++ Cargo.toml | 4 ++-- data/rules/aws.yml | 2 +- data/rules/coze.yml | 31 +++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 data/rules/coze.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f169a7..58c66e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.34.0] +- Use system TLS root certificates to support self-hosted GitLab instances with internal CAs +- Added new rule: Coze personal access token + ## [1.33.0] - Fixed header precedence so custom HTTP validation headers like `Accept` are preserved - Added new Heroku rule diff --git a/Cargo.toml b/Cargo.toml index c4328e0..4295167 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.33.0" +version = "1.34.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -82,9 +82,9 @@ reqwest = { version = "0.12", default-features = false, features = [ "deflate", "stream", "rustls-tls", + "rustls-tls-native-roots", "blocking", "multipart", - "rustls-tls", ] } diff --git a/data/rules/aws.yml b/data/rules/aws.yml index b324dcc..49fcbe3 100644 --- a/data/rules/aws.yml +++ b/data/rules/aws.yml @@ -6,7 +6,7 @@ rules: \b ( (?:AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) - [0-9A-Z]{16} + [2-7A-Z]{16} ) \b min_entropy: 3.2 diff --git a/data/rules/coze.yml b/data/rules/coze.yml new file mode 100644 index 0000000..df3692a --- /dev/null +++ b/data/rules/coze.yml @@ -0,0 +1,31 @@ +rules: + - name: Coze Personal Access Token + id: kingfisher.coze.1 + pattern: '(?i)\b(pat_[a-zA-Z0-9]{64})\b' + confidence: medium + min_entropy: 5.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.coze.com/v1/workspaces?" + headers: + Authorization: "Bearer {{TOKEN}}" + Content-Type: application/json + response_matcher: + - type: StatusMatch + status: [200, 403] # API returns 403 for a valid token without permission to route + - type: JsonValid + - type: WordMatch + words: + - '"access token invalid"' + - '"does not have permission"' + negative: true + references: + - https://www.coze.com/docs/developer_guides/coze_api_overview + - https://www.coze.com/docs/developer_guides/retrieve_files + examples: + - "pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f" + - "pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI" + - "pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV" From 646e6175d2fe6f67aa5ef0ee0db4787dd172f977 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 5 Aug 2025 16:25:22 -0700 Subject: [PATCH 109/357] Updated Supabase rule to detect project url's and validate their corresponding tokens --- CHANGELOG.md | 1 + data/rules/supabase.yml | 54 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58c66e5..4a982f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ## [1.34.0] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs - Added new rule: Coze personal access token +- Updated Supabase rule to detect project url's and validate their corresponding tokens ## [1.33.0] - Fixed header precedence so custom HTTP validation headers like `Accept` are preserved diff --git a/data/rules/supabase.yml b/data/rules/supabase.yml index 6cb4abb..c93c648 100644 --- a/data/rules/supabase.yml +++ b/data/rules/supabase.yml @@ -1,5 +1,5 @@ rules: - - name: Supabase API Key + - name: Supabase Management Token id: kingfisher.supabase.1 pattern: | (?xi) @@ -27,4 +27,54 @@ rules: - report_response: true - type: StatusMatch status: - - 200 \ No newline at end of file + - 200 + - name: Supabase Project API Key + id: kingfisher.supabase.2 + pattern: | + (?xi) + \b + ( + sb_secret_[a-z0-9_-]{31} + ) + \b + min_entropy: 4.0 + confidence: high + validation: + type: Http + content: + request: + method: GET + url: "{{SBPROJECTURL}}/rest/v1/?select=*" + headers: + Apikey: "{{TOKEN}}" + User-Agent: "" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"host":' + references: + - https://supabase.com/docs/reference/api/v1-get-an-organization + examples: + - "sb_secret_9uM4GhB0STF5R4K3HxQtlg_bzWW6DRj" + - "sb_secret_szE_jsbktD3pWgnfUjgahw_hcHEIOBH" + depends_on_rule: + - rule_id: "kingfisher.supabase.3" + variable: SBPROJECTURL + - name: Supabase Project URL + id: kingfisher.supabase.3 + pattern: + (?xi) + ( + https:\/\/[a-z0-9]{16,32}\.supabase\.co + ) + confidence: medium + min_entropy: 3.0 + visible: false + validation: + references: + - https://supabase.com/docs/guides/api + examples: + - "https://ejcvydfyxzmbtfbfstnq.supabase.co" From a502375f7887718949eed80fcb3fb5aca11eb5ea Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 5 Aug 2025 18:06:09 -0700 Subject: [PATCH 110/357] fixing github action failure for linux-arm6 when making deb --- .github/workflows/release.yml | 3 +++ data/rules/supabase.yml | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 956bc1a..abe38f9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -81,6 +81,9 @@ jobs: - name: Build (Makefile linux-arm64) run: make linux-arm64 + - name: Fix permissions + run: sudo chown -R $(id -u):$(id -g) target + - name: Build Debian package run: | cargo deb --no-build --target aarch64-unknown-linux-musl \ diff --git a/data/rules/supabase.yml b/data/rules/supabase.yml index c93c648..d297514 100644 --- a/data/rules/supabase.yml +++ b/data/rules/supabase.yml @@ -73,7 +73,6 @@ rules: confidence: medium min_entropy: 3.0 visible: false - validation: references: - https://supabase.com/docs/guides/api examples: From a81cfb963ada115ec71c8bc035fb7b282bf6bd61 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 6 Aug 2025 19:15:50 -0700 Subject: [PATCH 111/357] Remote scans with --git-history=none now clone repositories with a working tree and scan the current files instead of erroring with 'No inputs to scan.' --- CHANGELOG.md | 3 ++ Cargo.toml | 2 +- README.md | 6 ++- src/git_binary.rs | 27 ++++++++---- src/scanner/repos.rs | 18 ++++---- tests/int_gitlab.rs | 103 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 140 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a982f5..0ff7097 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.35.0] +- Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". + ## [1.34.0] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs - Added new rule: Coze personal access token diff --git a/Cargo.toml b/Cargo.toml index 4295167..930a196 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.34.0" +version = "1.35.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index ef68c1f..1fba1fb 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,17 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -Kingfisher is a blazingly fast secret‑scanning and validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production +Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production

Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.com/praetorian-inc/noseyparker), and is built atop their incredible work and the work contributed by the Nosey Parker community. ## What Kingfisher Adds - **Live validation** via cloud-provider APIs -- **Language-aware detection** (source-code parsing) for ~20 languages - **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages +- **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline mode**: ignore known secrets, flag only new ones +- **Language-aware detection** (source-code parsing) for ~20 languages - **Native Windows** binary @@ -27,6 +28,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous +- **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) diff --git a/src/git_binary.rs b/src/git_binary.rs index fc2baa1..6e9bd8d 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -105,8 +105,13 @@ impl Git { let _span = debug_span!("git_update", "{repo_url} {}", output_dir.display()).entered(); debug!("Attempting to update clone of {repo_url} at {}", output_dir.display()); let mut cmd = self.git(); - cmd.arg("--git-dir"); - cmd.arg(output_dir); + if output_dir.join(".git").is_dir() { + cmd.arg("-C"); + cmd.arg(output_dir); + } else { + cmd.arg("--git-dir"); + cmd.arg(output_dir); + } cmd.arg("remote"); cmd.arg("update"); cmd.arg("--prune"); @@ -129,7 +134,9 @@ impl Git { debug!("Attempting to create fresh clone of {} at {}", repo_url, output_dir.display()); let mut cmd = self.git(); cmd.arg("clone"); - cmd.arg(clone_mode.arg()); + if let Some(arg) = clone_mode.arg() { + cmd.arg(arg); + } cmd.arg(repo_url.as_str()); cmd.arg(output_dir); debug!("{cmd:#?}"); @@ -151,14 +158,17 @@ pub enum CloneMode { Bare, /// Equivalent to `git clone --mirror` Mirror, + /// Standard clone with a working tree + Checkout, } impl CloneMode { /// Return the CLI argument for this clone mode. - pub fn arg(&self) -> &str { + pub fn arg(&self) -> Option<&str> { match self { - Self::Bare => "--bare", - Self::Mirror => "--mirror", + Self::Bare => Some("--bare"), + Self::Mirror => Some("--mirror"), + Self::Checkout => None, } } } @@ -183,8 +193,9 @@ mod tests { #[test] fn test_clone_mode_arg() { - assert_eq!(CloneMode::Bare.arg(), "--bare"); - assert_eq!(CloneMode::Mirror.arg(), "--mirror"); + assert_eq!(CloneMode::Bare.arg(), Some("--bare")); + assert_eq!(CloneMode::Mirror.arg(), Some("--mirror")); + assert_eq!(CloneMode::Checkout.arg(), None); } #[test] diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 7999988..19f6b4d 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -12,10 +12,7 @@ use crate::blob::BlobIdMap; use crate::{ blob::BlobMetadata, cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode}, - scan, - }, + commands::{github::GitCloneMode, github::GitHistoryMode, scan}, global, }, findings_store, @@ -42,16 +39,20 @@ pub fn clone_or_update_git_repos( datastore: &Arc>, ) -> Result> { let mut input_roots = args.input_specifier_args.path_inputs.clone(); - if repo_urls.is_empty() || args.input_specifier_args.git_history == GitHistoryMode::None { + if repo_urls.is_empty() { return Ok(input_roots); } info!("{} Git URLs to fetch", repo_urls.len()); for repo_url in repo_urls { debug!("Need to fetch {repo_url}") } - let clone_mode = match args.input_specifier_args.git_clone { - GitCloneMode::Mirror => CloneMode::Mirror, - GitCloneMode::Bare => CloneMode::Bare, + let clone_mode = if args.input_specifier_args.git_history == GitHistoryMode::None { + CloneMode::Checkout + } else { + match args.input_specifier_args.git_clone { + GitCloneMode::Mirror => CloneMode::Mirror, + GitCloneMode::Bare => CloneMode::Bare, + } }; let git = Git::new(global_args.ignore_certs); @@ -68,6 +69,7 @@ pub fn clone_or_update_git_repos( } else { ProgressBar::hidden() }; + for repo_url in repo_urls { let output_dir = { let datastore = datastore.lock().unwrap(); diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 0b55799..3903ddb 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -139,3 +139,106 @@ fn test_gitlab_remote_scan() -> Result<()> { drop(rt); Ok(()) } + + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} \ No newline at end of file From 6cbde61099fa04f039e4691edcc3af317b7c7880 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 6 Aug 2025 19:16:22 -0700 Subject: [PATCH 112/357] Remote scans with --git-history=none now clone repositories with a working tree and scan the current files instead of erroring with 'No inputs to scan.' --- src/scanner/repos.rs | 2 +- tests/int_gitlab.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 19f6b4d..c3bb8ba 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -69,7 +69,7 @@ pub fn clone_or_update_git_repos( } else { ProgressBar::hidden() }; - + for repo_url in repo_urls { let output_dir = { let datastore = datastore.lock().unwrap(); diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 3903ddb..fa3e169 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -140,7 +140,6 @@ fn test_gitlab_remote_scan() -> Result<()> { Ok(()) } - #[test] fn test_gitlab_remote_scan_no_history() -> Result<()> { let temp_dir = TempDir::new().context("tmp dir")?; @@ -241,4 +240,4 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { drop(rt); Ok(()) -} \ No newline at end of file +} From 6fe4d0e7897a215287143313421782e2054f3d34 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 6 Aug 2025 21:23:27 -0700 Subject: [PATCH 113/357] fixed issue where --redact did not function properly --- CHANGELOG.md | 1 + src/scanner/enumerate.rs | 2 +- src/scanner/processing.rs | 3 +- src/scanner/repos.rs | 2 +- tests/int_redact.rs | 115 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 tests/int_redact.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ff7097..4201640 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.35.0] - Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". +- Fixed issue where `--redact` did not function properly ## [1.34.0] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 0e0c795..87a8011 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -183,7 +183,7 @@ pub fn enumerate_filesystem_inputs( return Ok(()); } progress.inc(blob.len().try_into().unwrap()); - match processor.run(origin, blob, args.no_dedup) { + match processor.run(origin, blob, args.no_dedup, args.redact) { Ok(None) => { // nothing to record } diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs index 8c36514..e1551b0 100644 --- a/src/scanner/processing.rs +++ b/src/scanner/processing.rs @@ -25,11 +25,12 @@ impl<'a> BlobProcessor<'a> { origin: OriginSet, blob: Blob, no_dedup: bool, + redact: bool, ) -> Result> { let blob_id = blob.id.hex(); let _span = debug_span!("matcher", blob_id).entered(); let t1 = Instant::now(); - let res = self.matcher.scan_blob(&blob, &origin, None, false, no_dedup)?; + let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup)?; let scan_us = t1.elapsed().as_micros(); match res { // blob already seen, but with no matches; nothing to do! diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index c3bb8ba..7bd6b8d 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -349,7 +349,7 @@ pub async fn fetch_s3_objects( let blob = crate::blob::Blob::from_bytes(bytes); if let Some((origin, blob_md, scored_matches)) = - processor.run(origin, blob, args.no_dedup)? + processor.run(origin, blob, args.no_dedup, args.redact)? { // Wrap origin & metadata once: let origin_arc = Arc::new(origin); diff --git a/tests/int_redact.rs b/tests/int_redact.rs new file mode 100644 index 0000000..5d72c61 --- /dev/null +++ b/tests/int_redact.rs @@ -0,0 +1,115 @@ +// Integration test to ensure --redact replaces secret values with hashes +use std::{ + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, GlobalArgs, Mode}, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[tokio::test] +async fn test_redact_hashes_finding_values() -> Result<()> { + let temp_dir = TempDir::new()?; + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![PathBuf::from("testdata/generic_secrets.py")], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: true, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Never, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(temp_dir.path().to_path_buf()))); + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; + + let ds = datastore.lock().unwrap(); + let matches = ds.get_matches(); + assert!(!matches.is_empty()); + for m_arc in matches { + let m = &m_arc.2; + assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); + } + + Ok(()) +} \ No newline at end of file From fb2b91595bbda08c2486dabe9276e9acedb36ca0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 6 Aug 2025 21:31:02 -0700 Subject: [PATCH 114/357] Fixed validation logic for clarifai rule --- CHANGELOG.md | 1 + data/rules/clarifai.yml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4201640..482eca5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ## [1.35.0] - Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". - Fixed issue where `--redact` did not function properly +- Fixed validation logic for clarifai rule ## [1.34.0] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs diff --git a/data/rules/clarifai.yml b/data/rules/clarifai.yml index e3f7167..d72c9f2 100644 --- a/data/rules/clarifai.yml +++ b/data/rules/clarifai.yml @@ -34,5 +34,5 @@ rules: - type: WordMatch match_all_words: true words: - - '"models"' - - '"status"' + - '"code":10000' + - '"description":"Ok"' From 63125b3a7fde452786e5ecde34df5da64649aaa6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 Aug 2025 16:13:57 -0700 Subject: [PATCH 115/357] Fixed GitHub organization and GitLab group scans when using '--git-history=none' --- CHANGELOG.md | 3 +++ data/rules/onepassword.yml | 1 - src/cli/commands/github.rs | 2 +- src/cli/commands/inputs.rs | 4 ++-- src/gitlab.rs | 40 ++++++++++++++++++++++++++++++------- src/main.rs | 4 ++-- src/reporter/json_format.rs | 4 ++-- 7 files changed, 43 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 482eca5..b06639d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.36.0] +- Fixed GitHub organization and GitLab group scans when using `--git-history=none` + ## [1.35.0] - Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". - Fixed issue where `--redact` did not function properly diff --git a/data/rules/onepassword.yml b/data/rules/onepassword.yml index e521e01..b7cc0bc 100644 --- a/data/rules/onepassword.yml +++ b/data/rules/onepassword.yml @@ -44,7 +44,6 @@ rules: \b min_entropy: 3.8 confidence: medium - prevalidated: true examples: - A3-R69SQK-TZ9KPW-8MXYD-6W373-V7GHJ-EDJQW - A3-ASWWYB-798JRY-LJVD4-23DC2-86TVM-H43EB diff --git a/src/cli/commands/github.rs b/src/cli/commands/github.rs index 766df83..cea9a44 100644 --- a/src/cli/commands/github.rs +++ b/src/cli/commands/github.rs @@ -60,7 +60,7 @@ pub struct GitHubRepoSpecifiers { pub all_organizations: bool, /// Filter by repository type - #[arg(long, default_value_t = GitHubRepoType::Source, alias = "github-repo-type")] + #[arg(long, default_value_t = GitHubRepoType::All, alias = "github-repo-type")] pub repo_type: GitHubRepoType, } diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 13bc78b..4cf4f26 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -60,7 +60,7 @@ pub struct InputSpecifierArgs { )] pub github_api_url: Url, - #[arg(long, default_value_t = GitHubRepoType::Source)] + #[arg(long, default_value_t = GitHubRepoType::All)] pub github_repo_type: GitHubRepoType, // GitLab Options @@ -85,7 +85,7 @@ pub struct InputSpecifierArgs { )] pub gitlab_api_url: Url, - #[arg(long, default_value_t = GitLabRepoType::Owner)] + #[arg(long, default_value_t = GitLabRepoType::All)] pub gitlab_repo_type: GitLabRepoType, /// Jira base URL (e.g. https://jira.example.com) diff --git a/src/gitlab.rs b/src/gitlab.rs index c7b0549..d94f46c 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -88,9 +88,25 @@ pub async fn enumerate_repo_urls( hits.into_iter().next().context(format!("GitLab user `{}` not found", username))?; let user_id = user.id; - // b) List that user’s projects by ID - let projects_ep = UserProjects::builder().user(user_id).build()?; + // b) List that user's projects applying the requested filter + let mut builder = UserProjects::builder(); + builder.user(user_id); + + match repo_specifiers.repo_filter { + RepoType::Owner => { + builder.owned(true); + } + RepoType::Member => { + builder.membership(true); + } + RepoType::All => { + // nothing + } + } + + let projects_ep = builder.build()?; // now no borrows of a temporary let projects: Vec = projects_ep.query(&client)?; + for proj in projects { repo_urls.push(proj.http_url_to_repo); } @@ -102,19 +118,29 @@ pub async fn enumerate_repo_urls( // all groups let groups: Vec = if repo_specifiers.all_groups { - gitlab::api::groups::Groups::builder().build()?.query(&client.clone())? + gitlab::api::groups::Groups::builder() + .all_available(true) + .build()? + .query(&client.clone())? } else { let mut found: Vec = Vec::new(); for grp in &repo_specifiers.group { - let ep = gitlab::api::groups::Groups::builder().search(grp).build()?; - let page: Vec = ep.query(&client.clone())?; - found.extend(page); + let ep = gitlab::api::groups::Group::builder().group(grp).build()?; + let group: SimpleGroup = ep.query(&client.clone())?; + found.push(group); } found }; for group in groups { - let gp_ep = GroupProjects::builder().group(group.id).build()?; + let mut gp_builder = GroupProjects::builder(); + gp_builder.group(group.id); + + if matches!(repo_specifiers.repo_filter, RepoType::Owner) { + gp_builder.owned(true); + } + + let gp_ep = gp_builder.build()?; let projects: Vec = gp_ep.query(&client)?; for proj in projects { repo_urls.push(proj.http_url_to_repo); diff --git a/src/main.rs b/src/main.rs index 73c77a5..9c9c1bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -275,13 +275,13 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { github_organization: Vec::new(), all_github_organizations: false, github_api_url: url::Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, + github_repo_type: GitHubRepoType::All, // new GitLab defaults gitlab_user: Vec::new(), gitlab_group: Vec::new(), all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, + gitlab_repo_type: GitLabRepoType::All, jira_url: None, jql: None, diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 154bb58..0e5a845 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -76,14 +76,14 @@ mod tests { github_organization: Vec::new(), all_github_organizations: false, github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, + github_repo_type: GitHubRepoType::All, // GitLab gitlab_user: Vec::new(), gitlab_group: Vec::new(), all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, + gitlab_repo_type: GitLabRepoType::All, // Jira options jira_url: None, jql: None, From ac5b9fb594b53f75438de7f319c036dd8957d6eb Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 Aug 2025 17:21:16 -0700 Subject: [PATCH 116/357] JWT tokens without both 'iss' and 'aud' are no longer reported as active credentials --- CHANGELOG.md | 1 + data/rules/jira.yml | 2 +- src/scanner/validation.rs | 6 -- src/validation/jwt.rs | 4 ++ tests/int_rules_no_validated_findings.rs | 79 ++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 7 deletions(-) create mode 100644 tests/int_rules_no_validated_findings.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index b06639d..d6bc09d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.36.0] - Fixed GitHub organization and GitLab group scans when using `--git-history=none` +- JWT tokens without both `iss` and `aud` are no longer reported as active credentials ## [1.35.0] - Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". diff --git a/data/rules/jira.yml b/data/rules/jira.yml index 82ac195..a555d85 100644 --- a/data/rules/jira.yml +++ b/data/rules/jira.yml @@ -11,7 +11,7 @@ rules: visible: false confidence: medium examples: - - example-jira.atlassian.net + - examplefoo-jira.atlassian.net - jira.sprintUri= https://example.atlassian.net/rest - name: Jira Token diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 210ef26..1ba02e1 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -335,12 +335,6 @@ pub async fn run_secret_validation( ds.replace_matches(updated_arcs); } - // ── 5. Done ───────────────────────────────────────────────────────────── - println!( - "Validation complete – {} succeeded, {} failed", - success_count.load(Ordering::Relaxed), - fail_count.load(Ordering::Relaxed) - ); Ok(()) } diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 25a7206..d5485d6 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -71,7 +71,11 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { // --------------------------------------------------------------------------- let issuer = claims.iss.clone().unwrap_or_default(); + let aud_strings = extract_aud_strings(&claims); + if issuer.trim().is_empty() && aud_strings.iter().all(|s| s.trim().is_empty()) { + return Ok((false, "JWT missing issuer and audience".to_string())); + } if let Some(iss) = claims.iss.clone() { // parse header now (kid, alg) let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs new file mode 100644 index 0000000..51d4a3b --- /dev/null +++ b/tests/int_rules_no_validated_findings.rs @@ -0,0 +1,79 @@ +use anyhow::Result; +use assert_cmd::Command; +use serde_json::Value; + +#[test] +fn scan_rules_has_no_validated_findings() -> Result<()> { + let output = Command::cargo_bin("kingfisher")? + .args([ + "scan", "data/rules", + "--format", "json", + "--no-update-check", + "--only-valid", + ]) + .output()?; + + let stdout = String::from_utf8_lossy(&output.stdout); + + // Find the first '[' — start of array + let start = match stdout.find('[') { + Some(i) => i, + None => return Ok(()), // no array found + }; + + let mut depth = 0usize; + let mut end = None; + for (i, ch) in stdout.char_indices().skip(start) { + match ch { + '[' => depth += 1, + ']' => { + depth -= 1; + if depth == 0 { + end = Some(i); + break; + } + } + _ => {} + } + } + + let json_array_str = match end { + Some(end_idx) => &stdout[start..=end_idx], + None => return Ok(()), // no matching close found + }; + + if json_array_str.trim().is_empty() { + return Ok(()); + } + + let findings: Vec = serde_json::from_str(json_array_str)?; + + for finding in findings { + let rule_id = finding["rule"]["id"].as_str().unwrap_or("unknown"); + let rule_prevalidated = finding["rule"]["prevalidated"].as_bool().unwrap_or(false); + + let status = finding["finding"]["validation"]["status"] + .as_str() + .unwrap_or("") + .to_ascii_lowercase(); + + let response = finding["finding"]["validation"]["response"] + .as_str() + .unwrap_or("") + .to_ascii_lowercase(); + + // Skip anything intentionally marked as prevalidated + if rule_prevalidated || status == "prevalidated" || response == "prevalidated" { + continue; + } + + // Fail only on genuinely validated secrets + assert_ne!( + status.as_str(), + "active credential", + "Validated finding detected in rule {rule_id}" + ); + } + + Ok(()) +} From d8624972ecaea44eba6d00cd3eb4a2397d1bd2d2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 Aug 2025 17:21:31 -0700 Subject: [PATCH 117/357] JWT tokens without both 'iss' and 'aud' are no longer reported as active credentials --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 930a196..4114e10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.35.0" +version = "1.36.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From d4e8900d377327f2b2e5cf73749d95be9b8e1b21 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 Aug 2025 17:36:39 -0700 Subject: [PATCH 118/357] JWT tokens without both 'iss' and 'aud' are no longer reported as active credentials --- src/cli/commands/github.rs | 4 ++-- src/cli/commands/inputs.rs | 2 +- src/main.rs | 2 +- src/reporter/json_format.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cli/commands/github.rs b/src/cli/commands/github.rs index cea9a44..50aa190 100644 --- a/src/cli/commands/github.rs +++ b/src/cli/commands/github.rs @@ -60,7 +60,7 @@ pub struct GitHubRepoSpecifiers { pub all_organizations: bool, /// Filter by repository type - #[arg(long, default_value_t = GitHubRepoType::All, alias = "github-repo-type")] + #[arg(long, default_value_t = GitHubRepoType::Source, alias = "github-repo-type")] pub repo_type: GitHubRepoType, } @@ -87,7 +87,7 @@ pub enum GitHubRepoType { impl From for crate::github::RepoType { fn from(val: GitHubRepoType) -> Self { match val { - GitHubRepoType::All => crate::github::RepoType::All, + GitHubRepoType::Source => crate::github::RepoType::All, GitHubRepoType::Source => crate::github::RepoType::Source, GitHubRepoType::Fork => crate::github::RepoType::Fork, } diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 4cf4f26..2249640 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -60,7 +60,7 @@ pub struct InputSpecifierArgs { )] pub github_api_url: Url, - #[arg(long, default_value_t = GitHubRepoType::All)] + #[arg(long, default_value_t = GitHubRepoType::Source)] pub github_repo_type: GitHubRepoType, // GitLab Options diff --git a/src/main.rs b/src/main.rs index 9c9c1bd..58145e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -275,7 +275,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { github_organization: Vec::new(), all_github_organizations: false, github_api_url: url::Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::All, + github_repo_type: GitHubRepoType::Source, // new GitLab defaults gitlab_user: Vec::new(), gitlab_group: Vec::new(), diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 0e5a845..aae16fc 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -76,7 +76,7 @@ mod tests { github_organization: Vec::new(), all_github_organizations: false, github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::All, + github_repo_type: GitHubRepoType::Source, // GitLab gitlab_user: Vec::new(), From dafc123eb961ab14850bf7977eb39e0814e88d4a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 Aug 2025 18:30:40 -0700 Subject: [PATCH 119/357] JWT tokens without both 'iss' and 'aud' are no longer reported as active credentials --- src/cli/commands/github.rs | 3 --- src/gitlab.rs | 2 +- src/validation/jwt.rs | 28 +++++++++++++++++++++++- tests/int_redact.rs | 2 +- tests/int_rules_no_validated_findings.rs | 13 +++-------- 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/cli/commands/github.rs b/src/cli/commands/github.rs index 50aa190..7537c2e 100644 --- a/src/cli/commands/github.rs +++ b/src/cli/commands/github.rs @@ -75,8 +75,6 @@ impl GitHubRepoSpecifiers { #[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] #[strum(serialize_all = "kebab-case")] pub enum GitHubRepoType { - /// Both source and fork repositories - All, /// Only source repositories (not forks) Source, /// Only fork repositories @@ -87,7 +85,6 @@ pub enum GitHubRepoType { impl From for crate::github::RepoType { fn from(val: GitHubRepoType) -> Self { match val { - GitHubRepoType::Source => crate::github::RepoType::All, GitHubRepoType::Source => crate::github::RepoType::Source, GitHubRepoType::Fork => crate::github::RepoType::Fork, } diff --git a/src/gitlab.rs b/src/gitlab.rs index d94f46c..c3bd5ea 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -104,7 +104,7 @@ pub async fn enumerate_repo_urls( } } - let projects_ep = builder.build()?; // now no borrows of a temporary + let projects_ep = builder.build()?; // now no borrows of a temporary let projects: Vec = projects_ep.query(&client)?; for proj in projects { diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index d5485d6..c5649a9 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -69,6 +69,26 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { } } + let header_b64 = token.split('.').next().ok_or_else(|| anyhow!("invalid JWT format"))?; + let header_json = + URL_SAFE_NO_PAD.decode(header_b64).map_err(|e| anyhow!("invalid base64 in header: {e}"))?; + let header_val: serde_json::Value = + serde_json::from_slice(&header_json).map_err(|e| anyhow!("invalid header json: {e}"))?; + let alg_str = header_val.get("alg").and_then(|v| v.as_str()).unwrap_or(""); + + // If alg is "none", skip signature/JWKS entirely + if alg_str.eq_ignore_ascii_case("none") { + // still enforce your time/claims checks that already ran + return Ok(( + true, + format!( + "JWT valid (alg: none, iss: {}, aud: {:?})", + claims.iss.clone().unwrap_or_default(), + extract_aud_strings(&claims), + ), + )); + } + // --------------------------------------------------------------------------- let issuer = claims.iss.clone().unwrap_or_default(); let aud_strings = extract_aud_strings(&claims); @@ -200,7 +220,13 @@ mod tests { fn build_token(exp_offset: i64) -> String { let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#); let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp(); - let payload = URL_SAFE_NO_PAD.encode(format!("{{\"exp\":{exp}}}")); + let payload = URL_SAFE_NO_PAD.encode(format!( + r#"{{ + "exp": {exp}, + "iss": "https://example.com", + "aud": ["test-audience"] + }}"# + )); format!("{header}.{payload}.") } diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 5d72c61..796d019 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -112,4 +112,4 @@ async fn test_redact_hashes_finding_values() -> Result<()> { } Ok(()) -} \ No newline at end of file +} diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index 51d4a3b..aea1cb0 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -5,12 +5,7 @@ use serde_json::Value; #[test] fn scan_rules_has_no_validated_findings() -> Result<()> { let output = Command::cargo_bin("kingfisher")? - .args([ - "scan", "data/rules", - "--format", "json", - "--no-update-check", - "--only-valid", - ]) + .args(["scan", "data/rules", "--format", "json", "--no-update-check", "--only-valid"]) .output()?; let stdout = String::from_utf8_lossy(&output.stdout); @@ -52,10 +47,8 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { let rule_id = finding["rule"]["id"].as_str().unwrap_or("unknown"); let rule_prevalidated = finding["rule"]["prevalidated"].as_bool().unwrap_or(false); - let status = finding["finding"]["validation"]["status"] - .as_str() - .unwrap_or("") - .to_ascii_lowercase(); + let status = + finding["finding"]["validation"]["status"].as_str().unwrap_or("").to_ascii_lowercase(); let response = finding["finding"]["validation"]["response"] .as_str() From 5f1c4fb23660fc6719026e4ae1f41d0709a9120c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 Aug 2025 18:45:46 -0700 Subject: [PATCH 120/357] changes in response to code review --- src/gitlab.rs | 6 ++++-- src/validation/jwt.rs | 2 +- tests/int_rules_no_validated_findings.rs | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/gitlab.rs b/src/gitlab.rs index c3bd5ea..f5b6ee3 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -103,8 +103,10 @@ pub async fn enumerate_repo_urls( // nothing } } - - let projects_ep = builder.build()?; // now no borrows of a temporary + + // Extract the builder to a separate variable to avoid borrowing a temporary, + // allowing us to modify its fields before building the endpoint. + let projects_ep = builder.build()?; let projects: Vec = projects_ep.query(&client)?; for proj in projects { diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index c5649a9..bf4fc5f 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -94,7 +94,7 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { let aud_strings = extract_aud_strings(&claims); if issuer.trim().is_empty() && aud_strings.iter().all(|s| s.trim().is_empty()) { - return Ok((false, "JWT missing issuer and audience".to_string())); + return Ok((false, "JWT missing issuer and audience".into())); } if let Some(iss) = claims.iss.clone() { // parse header now (kid, alg) diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index aea1cb0..a6d171d 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -62,7 +62,7 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { // Fail only on genuinely validated secrets assert_ne!( - status.as_str(), + &status, "active credential", "Validated finding detected in rule {rule_id}" ); From 97956bcc3fff56339eae5e3c75f81f6fba0fb6ff Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 8 Aug 2025 15:11:36 -0700 Subject: [PATCH 121/357] GitLab: include nested subgroup projects when enumerating group repositories --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- src/cli/commands/inputs.rs | 2 +- src/gitlab.rs | 4 +++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d6bc09d..44c6f4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.37.0] +- GitLab: include nested subgroup projects when enumerating group repositories + ## [1.36.0] - Fixed GitHub organization and GitLab group scans when using `--git-history=none` - JWT tokens without both `iss` and `aud` are no longer reported as active credentials diff --git a/Cargo.toml b/Cargo.toml index 4114e10..aec70d4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.36.0" +version = "1.37.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 2249640..13bc78b 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -85,7 +85,7 @@ pub struct InputSpecifierArgs { )] pub gitlab_api_url: Url, - #[arg(long, default_value_t = GitLabRepoType::All)] + #[arg(long, default_value_t = GitLabRepoType::Owner)] pub gitlab_repo_type: GitLabRepoType, /// Jira base URL (e.g. https://jira.example.com) diff --git a/src/gitlab.rs b/src/gitlab.rs index f5b6ee3..dc0915a 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -100,7 +100,7 @@ pub async fn enumerate_repo_urls( builder.membership(true); } RepoType::All => { - // nothing + // this doesn’t set any owned() or membership() flags on the builder, which in GitLab’s API defaults to "all visible repos" } } @@ -137,6 +137,8 @@ pub async fn enumerate_repo_urls( for group in groups { let mut gp_builder = GroupProjects::builder(); gp_builder.group(group.id); + // Ensure projects from nested subgroups are also enumerated + gp_builder.include_subgroups(true); if matches!(repo_specifiers.repo_filter, RepoType::Owner) { gp_builder.owned(true); From 96a08ed8ede9fc41f5a3ae0966034937745aaae0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 8 Aug 2025 15:11:44 -0700 Subject: [PATCH 122/357] GitLab: include nested subgroup projects when enumerating group repositories --- src/gitlab.rs | 2 +- tests/int_rules_no_validated_findings.rs | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/gitlab.rs b/src/gitlab.rs index dc0915a..c0a85e2 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -103,7 +103,7 @@ pub async fn enumerate_repo_urls( // this doesn’t set any owned() or membership() flags on the builder, which in GitLab’s API defaults to "all visible repos" } } - + // Extract the builder to a separate variable to avoid borrowing a temporary, // allowing us to modify its fields before building the endpoint. let projects_ep = builder.build()?; diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index a6d171d..01a6ad9 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -61,11 +61,7 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { } // Fail only on genuinely validated secrets - assert_ne!( - &status, - "active credential", - "Validated finding detected in rule {rule_id}" - ); + assert_ne!(&status, "active credential", "Validated finding detected in rule {rule_id}"); } Ok(()) From b0ce44f7092f2aa3f5b56d8a0a6d38dca41b876e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 8 Aug 2025 15:12:33 -0700 Subject: [PATCH 123/357] GitLab: include nested subgroup projects when enumerating group repositories --- src/cli/commands/inputs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 13bc78b..2249640 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -85,7 +85,7 @@ pub struct InputSpecifierArgs { )] pub gitlab_api_url: Url, - #[arg(long, default_value_t = GitLabRepoType::Owner)] + #[arg(long, default_value_t = GitLabRepoType::All)] pub gitlab_repo_type: GitLabRepoType, /// Jira base URL (e.g. https://jira.example.com) From 2c7b0f770523217cf76eeb7fd4d954e8b436f571 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 8 Aug 2025 18:08:17 -0700 Subject: [PATCH 124/357] GitLab: include nested subgroup projects when enumerating group repositories --- src/gitlab.rs | 44 +++++++------------------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/src/gitlab.rs b/src/gitlab.rs index c0a85e2..c7b0549 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -88,27 +88,9 @@ pub async fn enumerate_repo_urls( hits.into_iter().next().context(format!("GitLab user `{}` not found", username))?; let user_id = user.id; - // b) List that user's projects applying the requested filter - let mut builder = UserProjects::builder(); - builder.user(user_id); - - match repo_specifiers.repo_filter { - RepoType::Owner => { - builder.owned(true); - } - RepoType::Member => { - builder.membership(true); - } - RepoType::All => { - // this doesn’t set any owned() or membership() flags on the builder, which in GitLab’s API defaults to "all visible repos" - } - } - - // Extract the builder to a separate variable to avoid borrowing a temporary, - // allowing us to modify its fields before building the endpoint. - let projects_ep = builder.build()?; + // b) List that user’s projects by ID + let projects_ep = UserProjects::builder().user(user_id).build()?; let projects: Vec = projects_ep.query(&client)?; - for proj in projects { repo_urls.push(proj.http_url_to_repo); } @@ -120,31 +102,19 @@ pub async fn enumerate_repo_urls( // all groups let groups: Vec = if repo_specifiers.all_groups { - gitlab::api::groups::Groups::builder() - .all_available(true) - .build()? - .query(&client.clone())? + gitlab::api::groups::Groups::builder().build()?.query(&client.clone())? } else { let mut found: Vec = Vec::new(); for grp in &repo_specifiers.group { - let ep = gitlab::api::groups::Group::builder().group(grp).build()?; - let group: SimpleGroup = ep.query(&client.clone())?; - found.push(group); + let ep = gitlab::api::groups::Groups::builder().search(grp).build()?; + let page: Vec = ep.query(&client.clone())?; + found.extend(page); } found }; for group in groups { - let mut gp_builder = GroupProjects::builder(); - gp_builder.group(group.id); - // Ensure projects from nested subgroups are also enumerated - gp_builder.include_subgroups(true); - - if matches!(repo_specifiers.repo_filter, RepoType::Owner) { - gp_builder.owned(true); - } - - let gp_ep = gp_builder.build()?; + let gp_ep = GroupProjects::builder().group(group.id).build()?; let projects: Vec = gp_ep.query(&client)?; for proj in projects { repo_urls.push(proj.http_url_to_repo); From c419c164a8bb505932c9ee6c635c2bd4b64ff55d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 8 Aug 2025 21:42:49 -0700 Subject: [PATCH 125/357] GitLab: include nested subgroup projects when enumerating group repositories --- CHANGELOG.md | 2 +- f1.patch | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/gitlab.rs | 40 ++++++++++++++---- 3 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 f1.patch diff --git a/CHANGELOG.md b/CHANGELOG.md index 44c6f4c..608d824 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. ## [1.37.0] -- GitLab: include nested subgroup projects when enumerating group repositories +- GitLab: Matched GitLab group repository listings to glab by only enumerating projects that belong directly to each group, without automatically traversing subgroups ## [1.36.0] - Fixed GitHub organization and GitLab group scans when using `--git-history=none` diff --git a/f1.patch b/f1.patch new file mode 100644 index 0000000..a559f62 --- /dev/null +++ b/f1.patch @@ -0,0 +1,113 @@ +diff --git a/src/gitlab.rs b/src/gitlab.rs +index c7b0549eee0cf1b6ef4772efc955647e3da00cac..e7df15e102cdac3e4017cbab97fab5dc52940b97 100644 +--- a/src/gitlab.rs ++++ b/src/gitlab.rs +@@ -66,77 +66,101 @@ fn create_gitlab_client(gitlab_url: &Url, ignore_certs: bool) -> Result + let mut builder = GitlabBuilder::new_unauthenticated(host); + if ignore_certs { + builder.insecure(); + } + Ok(builder.build()?) + } + + pub async fn enumerate_repo_urls( + repo_specifiers: &RepoSpecifiers, + gitlab_url: Url, + ignore_certs: bool, + mut progress: Option<&mut ProgressBar>, + ) -> Result> { + let client = create_gitlab_client(&gitlab_url, ignore_certs)?; + let mut repo_urls = Vec::new(); + + // 1) Process each GitLab username + for username in &repo_specifiers.user { + // a) Look up the user by username, deserializing only `id` + let users_ep = Users::builder().username(username).build()?; + let hits: Vec = users_ep.query(&client)?; + let user = + hits.into_iter().next().context(format!("GitLab user `{}` not found", username))?; + let user_id = user.id; + +- // b) List that user’s projects by ID +- let projects_ep = UserProjects::builder().user(user_id).build()?; ++ // b) List that user's projects applying the requested filter ++ let mut builder = UserProjects::builder(); ++ builder.user(user_id); ++ ++ match repo_specifiers.repo_filter { ++ RepoType::Owner => { ++ builder.owned(true); ++ } ++ RepoType::Member => { ++ builder.membership(true); ++ } ++ RepoType::All => { ++ // default: list all visible repositories ++ } ++ } ++ ++ let projects_ep = builder.build()?; + let projects: Vec = projects_ep.query(&client)?; + for proj in projects { + repo_urls.push(proj.http_url_to_repo); + } + + if let Some(pb) = progress.as_mut() { + pb.inc(1); + } + } + + // all groups + let groups: Vec = if repo_specifiers.all_groups { +- gitlab::api::groups::Groups::builder().build()?.query(&client.clone())? ++ gitlab::api::groups::Groups::builder() ++ .all_available(true) ++ .build()? ++ .query(&client.clone())? + } else { + let mut found: Vec = Vec::new(); + for grp in &repo_specifiers.group { +- let ep = gitlab::api::groups::Groups::builder().search(grp).build()?; +- let page: Vec = ep.query(&client.clone())?; +- found.extend(page); ++ let ep = gitlab::api::groups::Group::builder().group(grp).build()?; ++ let group: SimpleGroup = ep.query(&client.clone())?; ++ found.push(group); + } + found + }; + + for group in groups { +- let gp_ep = GroupProjects::builder().group(group.id).build()?; ++ let mut gp_builder = GroupProjects::builder(); ++ gp_builder.group(group.id); ++ if matches!(repo_specifiers.repo_filter, RepoType::Owner) { ++ gp_builder.owned(true); ++ } ++ ++ let gp_ep = gp_builder.build()?; + let projects: Vec = gp_ep.query(&client)?; + for proj in projects { + repo_urls.push(proj.http_url_to_repo); + } + if let Some(pb) = progress.as_mut() { + pb.inc(1); + } + } + + // 3) Sort & dedupe + repo_urls.sort_unstable(); + repo_urls.dedup(); + + Ok(repo_urls) + } + + pub async fn list_repositories( + api_url: Url, + ignore_certs: bool, + progress_enabled: bool, + users: &[String], + groups: &[String], + all_groups: bool, + repo_filter: RepoType, + ) -> Result<()> { diff --git a/src/gitlab.rs b/src/gitlab.rs index c7b0549..be9b4a5 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -88,8 +88,23 @@ pub async fn enumerate_repo_urls( hits.into_iter().next().context(format!("GitLab user `{}` not found", username))?; let user_id = user.id; - // b) List that user’s projects by ID - let projects_ep = UserProjects::builder().user(user_id).build()?; + // b) List that user's projects applying the requested filter + let mut builder = UserProjects::builder(); + builder.user(user_id); + + match repo_specifiers.repo_filter { + RepoType::Owner => { + builder.owned(true); + } + RepoType::Member => { + builder.membership(true); + } + RepoType::All => { + // default: list all visible repositories + } + } + + let projects_ep = builder.build()?; let projects: Vec = projects_ep.query(&client)?; for proj in projects { repo_urls.push(proj.http_url_to_repo); @@ -102,19 +117,28 @@ pub async fn enumerate_repo_urls( // all groups let groups: Vec = if repo_specifiers.all_groups { - gitlab::api::groups::Groups::builder().build()?.query(&client.clone())? + gitlab::api::groups::Groups::builder() + .all_available(true) + .build()? + .query(&client.clone())? } else { let mut found: Vec = Vec::new(); for grp in &repo_specifiers.group { - let ep = gitlab::api::groups::Groups::builder().search(grp).build()?; - let page: Vec = ep.query(&client.clone())?; - found.extend(page); + let ep = gitlab::api::groups::Group::builder().group(grp).build()?; + let group: SimpleGroup = ep.query(&client.clone())?; + found.push(group); } found }; for group in groups { - let gp_ep = GroupProjects::builder().group(group.id).build()?; + let mut gp_builder = GroupProjects::builder(); + gp_builder.group(group.id); + if matches!(repo_specifiers.repo_filter, RepoType::Owner) { + gp_builder.owned(true); + } + + let gp_ep = gp_builder.build()?; let projects: Vec = gp_ep.query(&client)?; for proj in projects { repo_urls.push(proj.http_url_to_repo); @@ -163,4 +187,4 @@ pub async fn list_repositories( } Ok(()) -} +} \ No newline at end of file From 3c487de38e1873e5faa5bb9a97ebae2c8b966dbe Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 8 Aug 2025 21:43:01 -0700 Subject: [PATCH 126/357] GitLab: include nested subgroup projects when enumerating group repositories --- f1.patch | 113 ------------------------------------------------------- 1 file changed, 113 deletions(-) delete mode 100644 f1.patch diff --git a/f1.patch b/f1.patch deleted file mode 100644 index a559f62..0000000 --- a/f1.patch +++ /dev/null @@ -1,113 +0,0 @@ -diff --git a/src/gitlab.rs b/src/gitlab.rs -index c7b0549eee0cf1b6ef4772efc955647e3da00cac..e7df15e102cdac3e4017cbab97fab5dc52940b97 100644 ---- a/src/gitlab.rs -+++ b/src/gitlab.rs -@@ -66,77 +66,101 @@ fn create_gitlab_client(gitlab_url: &Url, ignore_certs: bool) -> Result - let mut builder = GitlabBuilder::new_unauthenticated(host); - if ignore_certs { - builder.insecure(); - } - Ok(builder.build()?) - } - - pub async fn enumerate_repo_urls( - repo_specifiers: &RepoSpecifiers, - gitlab_url: Url, - ignore_certs: bool, - mut progress: Option<&mut ProgressBar>, - ) -> Result> { - let client = create_gitlab_client(&gitlab_url, ignore_certs)?; - let mut repo_urls = Vec::new(); - - // 1) Process each GitLab username - for username in &repo_specifiers.user { - // a) Look up the user by username, deserializing only `id` - let users_ep = Users::builder().username(username).build()?; - let hits: Vec = users_ep.query(&client)?; - let user = - hits.into_iter().next().context(format!("GitLab user `{}` not found", username))?; - let user_id = user.id; - -- // b) List that user’s projects by ID -- let projects_ep = UserProjects::builder().user(user_id).build()?; -+ // b) List that user's projects applying the requested filter -+ let mut builder = UserProjects::builder(); -+ builder.user(user_id); -+ -+ match repo_specifiers.repo_filter { -+ RepoType::Owner => { -+ builder.owned(true); -+ } -+ RepoType::Member => { -+ builder.membership(true); -+ } -+ RepoType::All => { -+ // default: list all visible repositories -+ } -+ } -+ -+ let projects_ep = builder.build()?; - let projects: Vec = projects_ep.query(&client)?; - for proj in projects { - repo_urls.push(proj.http_url_to_repo); - } - - if let Some(pb) = progress.as_mut() { - pb.inc(1); - } - } - - // all groups - let groups: Vec = if repo_specifiers.all_groups { -- gitlab::api::groups::Groups::builder().build()?.query(&client.clone())? -+ gitlab::api::groups::Groups::builder() -+ .all_available(true) -+ .build()? -+ .query(&client.clone())? - } else { - let mut found: Vec = Vec::new(); - for grp in &repo_specifiers.group { -- let ep = gitlab::api::groups::Groups::builder().search(grp).build()?; -- let page: Vec = ep.query(&client.clone())?; -- found.extend(page); -+ let ep = gitlab::api::groups::Group::builder().group(grp).build()?; -+ let group: SimpleGroup = ep.query(&client.clone())?; -+ found.push(group); - } - found - }; - - for group in groups { -- let gp_ep = GroupProjects::builder().group(group.id).build()?; -+ let mut gp_builder = GroupProjects::builder(); -+ gp_builder.group(group.id); -+ if matches!(repo_specifiers.repo_filter, RepoType::Owner) { -+ gp_builder.owned(true); -+ } -+ -+ let gp_ep = gp_builder.build()?; - let projects: Vec = gp_ep.query(&client)?; - for proj in projects { - repo_urls.push(proj.http_url_to_repo); - } - if let Some(pb) = progress.as_mut() { - pb.inc(1); - } - } - - // 3) Sort & dedupe - repo_urls.sort_unstable(); - repo_urls.dedup(); - - Ok(repo_urls) - } - - pub async fn list_repositories( - api_url: Url, - ignore_certs: bool, - progress_enabled: bool, - users: &[String], - groups: &[String], - all_groups: bool, - repo_filter: RepoType, - ) -> Result<()> { From f1c3bcb56a68262e6a3fced7d6ae7a0f36b25a64 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 9 Aug 2025 08:45:27 -0700 Subject: [PATCH 127/357] Added X Consumer key detection and validation --- data/rules/twitter.yml | 59 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/data/rules/twitter.yml b/data/rules/twitter.yml index 2722fc1..419c7c4 100644 --- a/data/rules/twitter.yml +++ b/data/rules/twitter.yml @@ -1,6 +1,6 @@ rules: - name: X / Twitter Bearer Token (App-only) - id: kingfisher.twitter.bearer.1 + id: kingfisher.twitter.1 pattern: | (?xi) \b @@ -36,3 +36,60 @@ rules: match_all_words: true references: - https://developer.x.com/en/docs/x-api/v1/developer-utilities/rate-limit-status/api-reference/get-application-rate_limit_status + - name: Twitter Consumer Key + id: kingfisher.twitter.2 + pattern: | + (?xi) + \b + twitter + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Z0-9]{25} + ) + \b + min_entropy: 3.5 + examples: + - "TWITTER_KEY=4RTBCyG2TbvL407A1lWxQFKCC" + - name: X / Twitter Consumer Secret + id: kingfisher.twitter.3 + pattern: | + (?xi) + \b + twitter + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Z0-9]{50} + ) + \b + min_entropy: 4.5 + examples: + - "TWITTER_SECRET=ZGwXeK2DNCqv49Z9ofwYdqlBgeoHDyh8uoAgHju6OeYC7wTQJq" + references: + - https://developer.x.com/en/docs/authentication/oauth-2-0/application-only + validation: + type: Http + content: + request: + method: POST + url: https://api.twitter.com/oauth2/token + headers: + Authorization: "Basic {{ TWITTER_KEY | append: ':' | append: TOKEN | b64enc }}" + Content-Type: "application/x-www-form-urlencoded;charset=UTF-8" + body: "grant_type=client_credentials" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"token_type":"bearer"' + - '"access_token":' + match_all_words: true + depends_on_rule: + - rule_id: "kingfisher.twitter.2" + variable: TWITTER_KEY From 3458c37d33c47fbe854d95ed249cf98426f059e0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 9 Aug 2025 08:46:07 -0700 Subject: [PATCH 128/357] Added X Consumer key detection and validation --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 608d824..8ecd444 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.38.0] +- Added X Consumer key detection and validation + ## [1.37.0] - GitLab: Matched GitLab group repository listings to glab by only enumerating projects that belong directly to each group, without automatically traversing subgroups diff --git a/Cargo.toml b/Cargo.toml index aec70d4..5eb52fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.37.0" +version = "1.38.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From 2fd6cd30e199cd1682e743f7c297e2b737cfa02d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 9 Aug 2025 15:36:12 -0700 Subject: [PATCH 129/357] - --quiet now suppresses scan summaries and rule statistics unless --rule-stats is explicitly provided - Added X Consumer key detection and validation --- CHANGELOG.md | 1 + src/scanner/summary.rs | 78 +++++++++++++++++++++++------------------- tests/int_quiet.rs | 59 ++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 35 deletions(-) create mode 100644 tests/int_quiet.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ecd444..b8d7720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ All notable changes to this project will be documented in this file. ## [1.38.0] +- `--quiet` now suppresses scan summaries and rule statistics unless `--rule-stats` is explicitly provided - Added X Consumer key detection and validation ## [1.37.0] diff --git a/src/scanner/summary.rs b/src/scanner/summary.rs index 7555d22..9d4f30e 100644 --- a/src/scanner/summary.rs +++ b/src/scanner/summary.rs @@ -38,14 +38,50 @@ macro_rules! safe_println { pub fn print_scan_summary( start_time: Instant, datastore: &Arc>, - _global_args: &global::GlobalArgs, + global_args: &global::GlobalArgs, args: &scan::ScanArgs, // inputs: &FilesystemEnumeratorResult, rules_db: &RulesDatabase, matcher_stats: &Mutex, profiler: Option<&ConcurrentRuleProfiler>, ) { - // let duration = start_time.elapsed(); + if global_args.quiet { + if args.rule_stats { + if let Some(prof) = profiler { + let stats = prof.generate_report(); + if !stats.is_empty() { + let name_w = stats.iter().map(|s| s.rule_name.len()).max().unwrap_or(4); + let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2); + safe_println!("\n{:-^1$}", " Rule Performance Stats ", name_w + id_w + 47); + safe_println!( + "{: 8} {: >15} {: >15}", + "Rule", + "ID", + "Matches", + "Slowest", + "Average", + name_w = name_w, + id_w = id_w + ); + safe_println!("{:-8} {: >15?} {: >15?}", + rs.rule_name, + rs.rule_id, + rs.total_matches, + rs.slowest_match_time, + rs.average_match_time, + name_w = name_w, + id_w = id_w + ); + } + } + } + } + return; + } + let ds = datastore.lock().unwrap(); let num_rules = rules_db.num_rules(); @@ -53,17 +89,12 @@ pub fn print_scan_summary( let mut sorted_findings: Vec<_> = findings_by_rule.into_iter().collect(); sorted_findings.sort_by(|a, b| b.1.cmp(&a.1)); let duration = start_time.elapsed(); - // let ds = datastore.lock().unwrap(); - // Get all matches let all_matches = ds.get_matches(); - // Count total findings let total_findings = if args.no_dedup { - // When no_dedup is true, count each origin of validated matches as a separate finding all_matches.iter().fold(0, |count, msg| { let (origin_set, _, match_item) = &**msg; - // If this is a validated match, count each origin as a separate finding if match_item.validation_success { count + origin_set.len() } else { @@ -73,14 +104,13 @@ pub fn print_scan_summary( } else { ds.get_num_matches() }; - // Count successful and failed validations + let (successful_validations, failed_validations) = all_matches.iter().fold((0, 0), |(success, fail), msg| { let (origin_set, _, match_item) = &**msg; if match_item.validation_success { if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { if args.no_dedup { - // Count each origin of a successful validation as a separate success (success + origin_set.len(), fail) } else { (success + 1, fail) @@ -88,17 +118,14 @@ pub fn print_scan_summary( } else { (success, fail) } + } else if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { + (success, fail + 1) } else { - if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { - (success, fail + 1) - } else { - (success, fail) - } + (success, fail) } }); let matcher_stats = matcher_stats.lock().unwrap(); - // Generate JSON or JSONL output if args.output_args.format == ReportOutputFormat::Json || args.output_args.format == ReportOutputFormat::Jsonl { @@ -107,15 +134,11 @@ pub fn print_scan_summary( "successful_validations": successful_validations, "failed_validations": failed_validations, "rules_applied": num_rules, - // "git_repositories": num_git_repos, - // "commits": num_commits, "blobs_scanned": matcher_stats.blobs_scanned, - // "files_read": num_files, "bytes_scanned": matcher_stats.bytes_scanned, "scan_duration": duration.as_secs_f64(), "findings_by_rule": sorted_findings }); - // only printing to stdout, not to the file itself safe_println!("{}", summary.to_string()); } else if args.output_args.format == ReportOutputFormat::Pretty || args.output_args.output.is_some() @@ -133,37 +156,23 @@ pub fn print_scan_summary( failed_validations.separate_with_commas() ); safe_println!(" |Rules Applied...............: {}", num_rules.separate_with_commas()); - // safe_println!(" |Git Repositories............: {}", - // num_git_repos.separate_with_commas()); safe_println!( - // "|__Commits...................: {}", - // num_commits.separate_with_commas() - // ); safe_println!( " |__Blobs Scanned.............: {}", matcher_stats.blobs_scanned.separate_with_commas() ); - // safe_println!(" |Files Read..................: {}", - // num_files.separate_with_commas()); safe_println!( " |Bytes Scanned...............: {}", HumanBytes(matcher_stats.bytes_scanned) ); - safe_println!( - " |Scan Duration...............: {}", - // HumanDuration(duration), - humantime::format_duration(duration) - ); + safe_println!(" |Scan Duration...............: {}", humantime::format_duration(duration)); } if args.rule_stats { if let Some(prof) = profiler { let stats = prof.generate_report(); if !stats.is_empty() { - // Calculate dynamic column widths let name_w = stats.iter().map(|s| s.rule_name.len()).max().unwrap_or(4); let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2); - - // Header safe_println!("\n{:-^1$}", " Rule Performance Stats ", name_w + id_w + 47); safe_println!( "{: 8} {: >15} {: >15}", @@ -177,7 +186,6 @@ pub fn print_scan_summary( ); safe_println!("{:-8} {: >15?} {: >15?}", diff --git a/tests/int_quiet.rs b/tests/int_quiet.rs new file mode 100644 index 0000000..598f103 --- /dev/null +++ b/tests/int_quiet.rs @@ -0,0 +1,59 @@ +use assert_cmd::Command; +use predicates::prelude::*; + +const FORMATS: [&str; 4] = ["pretty", "json", "jsonl", "bson"]; + +fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { + haystack.windows(needle.len()).any(|window| window == needle) +} + +#[test] +fn scan_quiet_suppresses_summary() { + for format in FORMATS { + Command::cargo_bin("kingfisher") + .unwrap() + .env("NO_COLOR", "1") + .args([ + "scan", + "testdata/slack_tokens.properties", + "--confidence=low", + "--format", + format, + "--no-update-check", + "--no-validate", + "--quiet", + ]) + .assert() + .code(200) + .stdout(predicate::function(|out: &[u8]| !contains_bytes(out, b"Scan Summary"))) + .stdout(predicate::function(|out: &[u8]| { + !contains_bytes(out, b"Rule Performance Stats") + })); + } +} + +#[test] +fn scan_quiet_with_rule_stats_prints_rule_stats() { + for format in FORMATS { + Command::cargo_bin("kingfisher") + .unwrap() + .env("NO_COLOR", "1") + .args([ + "scan", + "testdata/slack_tokens.properties", + "--confidence=low", + "--format", + format, + "--no-update-check", + "--quiet", + "--no-validate", + "--rule-stats", + ]) + .assert() + .code(200) + .stdout(predicate::function(|out: &[u8]| !contains_bytes(out, b"Scan Summary"))) + .stdout(predicate::function(|out: &[u8]| { + contains_bytes(out, b"Rule Performance Stats") + })); + } +} \ No newline at end of file From 9275fb55413f517077d54eabcb1dbc31e5c32d62 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 9 Aug 2025 15:52:00 -0700 Subject: [PATCH 130/357] - --quiet now suppresses scan summaries and rule statistics unless --rule-stats is explicitly provided - Added X Consumer key detection and validation --- data/rules/twitter.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/data/rules/twitter.yml b/data/rules/twitter.yml index 419c7c4..e75b48e 100644 --- a/data/rules/twitter.yml +++ b/data/rules/twitter.yml @@ -51,6 +51,7 @@ rules: ) \b min_entropy: 3.5 + visible: false examples: - "TWITTER_KEY=4RTBCyG2TbvL407A1lWxQFKCC" - name: X / Twitter Consumer Secret From 706723e3841adabaf8e597eb76623d5dc01aa857 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 17:25:32 -0700 Subject: [PATCH 131/357] removed unused cli argument, snippet-length --- src/cli/commands/scan.rs | 4 - src/cli/commands/scan.rs.orig | 132 ++++++++ src/main.rs | 1 - src/main.rs.orig | 528 +++++++++++++++++++++++++++++ src/reporter/json_format.rs | 1 - src/reporter/json_format.rs.orig | 270 +++++++++++++++ tests/int_dedup.rs | 1 - tests/int_dedup.rs.orig | 171 ++++++++++ tests/int_github.rs | 1 - tests/int_github.rs.orig | 149 ++++++++ tests/int_gitlab.rs | 2 - tests/int_gitlab.rs.orig | 243 +++++++++++++ tests/int_redact.rs | 1 - tests/int_redact.rs.orig | 115 +++++++ tests/int_slack.rs | 2 - tests/int_slack.rs.orig | 207 +++++++++++ tests/int_validation_cache.rs | 1 - tests/int_validation_cache.rs.orig | 211 ++++++++++++ tests/int_vulnerable_files.rs | 2 - tests/int_vulnerable_files.rs.orig | 257 ++++++++++++++ 20 files changed, 2283 insertions(+), 16 deletions(-) create mode 100644 src/cli/commands/scan.rs.orig create mode 100644 src/main.rs.orig create mode 100644 src/reporter/json_format.rs.orig create mode 100644 tests/int_dedup.rs.orig create mode 100644 tests/int_github.rs.orig create mode 100644 tests/int_gitlab.rs.orig create mode 100644 tests/int_redact.rs.orig create mode 100644 tests/int_slack.rs.orig create mode 100644 tests/int_validation_cache.rs.orig create mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index fe4d5d6..ae2b4f0 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -99,10 +99,6 @@ pub struct ScanArgs { #[command(flatten)] pub output_args: OutputArgs, - /// Bytes of context before and after each match - #[arg(long, default_value_t = 256, value_name = "BYTES")] - pub snippet_length: usize, - /// Baseline file to filter known secrets #[arg(long, value_name = "FILE")] pub baseline_file: Option, diff --git a/src/cli/commands/scan.rs.orig b/src/cli/commands/scan.rs.orig new file mode 100644 index 0000000..fe4d5d6 --- /dev/null +++ b/src/cli/commands/scan.rs.orig @@ -0,0 +1,132 @@ +use clap::{Args, ValueEnum}; +use strum::Display; +use tracing::debug; + +use crate::{ + cli::{ + commands::{ + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + }, + global::RAM_GB, + }, + rules::rule::Confidence, +}; + +/// Determine the default number of parallel scan jobs. +/// +/// * Target = `num_cpus * 2`. +/// * Cap by RAM at ≈ 1 GiB per job (so 16 GiB ⇒ max 16 jobs). +/// * Always ≥ 1. +/// * When `-v/--verbose` is passed, the computed value is logged at DEBUG. +fn default_scan_jobs() -> usize { + // How many logical CPUs do we see? (Falls back to 1 on error.) + let cpu_count = std::thread::available_parallelism().map(usize::from).unwrap_or(1); + + // Desired parallelism is CPU * 2. + let desired = cpu_count * 2; + + match *RAM_GB { + // If we know how much RAM we have, cap by a 1 GiB-per-job heuristic. + Some(ram_gb) => { + let max_by_ram = ram_gb.ceil() as usize; // 1 GiB per job + let jobs = desired.min(max_by_ram).max(1); + + debug!( + "Using {jobs} parallel scan jobs \ + (cpus = {cpu_count}, desired = {desired}, \ + ram = {ram_gb:.1} GiB, cap_by_ram = {max_by_ram})" + ); + jobs + } + // If RAM is unknown, just use the desired value. + None => { + debug!("Using {desired} parallel scan jobs (cpus = {cpu_count}, ram unknown)"); + desired + } + } +} + +/// `kingfisher scan` command and flags +#[derive(Args, Debug, Clone)] +pub struct ScanArgs { + /// Number of parallel scanning threads + #[arg(long = "jobs", short = 'j', default_value_t = default_scan_jobs())] + pub num_jobs: usize, + + #[command(flatten)] + pub rules: RuleSpecifierArgs, + + #[command(flatten)] + pub input_specifier_args: InputSpecifierArgs, + + #[command(flatten)] + pub content_filtering_args: ContentFilteringArgs, + + /// Minimum confidence level for reporting findings + #[arg(long, short = 'c', default_value = "medium")] + pub confidence: ConfidenceLevel, + + /// Disable secret validation + #[arg(long, short = 'n', default_value_t = false)] + pub no_validate: bool, + + /// Display only validated findings + #[arg(long, default_value_t = false)] + pub only_valid: bool, + + /// Override the default minimum entropy threshold + #[arg(long, short = 'e')] + pub min_entropy: Option, + + /// Show performance statistics for each rule + #[arg(long, default_value_t = false)] + pub rule_stats: bool, + + /// Display every occurrence of a finding + #[arg(long, default_value_t = false)] + pub no_dedup: bool, + + /// Redact findings values using a secure hash + #[arg(long, short = 'r', default_value_t = false)] + pub redact: bool, + + /// Timeout for Git repository scanning in seconds + #[arg(long, default_value_t = 1800, value_name = "SECONDS")] + pub git_repo_timeout: u64, + + #[command(flatten)] + pub output_args: OutputArgs, + + /// Bytes of context before and after each match + #[arg(long, default_value_t = 256, value_name = "BYTES")] + pub snippet_length: usize, + + /// Baseline file to filter known secrets + #[arg(long, value_name = "FILE")] + pub baseline_file: Option, + + /// Create or update the baseline file with current findings + #[arg(long, default_value_t = false)] + pub manage_baseline: bool, +} + +/// Confidence levels for findings +#[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +pub enum ConfidenceLevel { + Low, + Medium, + High, +} + +impl From for Confidence { + fn from(level: ConfidenceLevel) -> Self { + match level { + ConfidenceLevel::Low => Confidence::Low, + ConfidenceLevel::Medium => Confidence::Medium, + ConfidenceLevel::High => Confidence::High, + } + } +} diff --git a/src/main.rs b/src/main.rs index 58145e6..ac78ef7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -319,7 +319,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { redact: false, git_repo_timeout: 1800, no_dedup: false, - snippet_length: 256, baseline_file: None, manage_baseline: false, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, diff --git a/src/main.rs.orig b/src/main.rs.orig new file mode 100644 index 0000000..58145e6 --- /dev/null +++ b/src/main.rs.orig @@ -0,0 +1,528 @@ +// ──────────────────────────────────────────────────────────── +// Global allocator setup +// * Default - mimalloc (no feature flags) +// * Debug - jemalloc (`use-jemalloc` feature) +// * Fallback - system allocator (`system-alloc` feature) +// ──────────────────────────────────────────────────────────── + +// --- jemalloc (opt-in) --- +#[cfg(feature = "use-jemalloc")] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +// --- mimalloc (default) --- +#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +// --- system allocator (explicit opt-out) --- +#[cfg(feature = "system-alloc")] +use std::alloc::System; +#[cfg(feature = "system-alloc")] +#[global_allocator] +static GLOBAL: System = System; + +// use std::alloc::System; +// #[global_allocator] +// static GLOBAL: System = System; + +use std::{ + io::Read, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + self, + commands::{ + github::{ + GitCloneMode, GitHistoryMode, GitHubCommand, GitHubRepoType, GitHubReposCommand, + }, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::{ + RuleSpecifierArgs, RulesCheckArgs, RulesCommand, RulesListArgs, + RulesListOutputFormat, + }, + }, + global::Command, + CommandLineArgs, GlobalArgs, + }, + findings_store, + findings_store::FindingsStore, + github, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::{load_and_record_rules, run_scan}, + update::check_for_update, +}; +use serde_json::json; +use tempfile::TempDir; +use term_size; +use tokio::runtime::Builder; +use tracing::{error, info, warn}; +use tracing_core::metadata::LevelFilter; +use tracing_subscriber::{ + self, fmt, prelude::__tracing_subscriber_SubscriberExt, registry, util::SubscriberInitExt, +}; +use url::Url; + +use crate::cli::commands::gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}; + +fn main() -> anyhow::Result<()> { + color_backtrace::install(); + // Parse command-line arguments + let args = CommandLineArgs::parse_args(); + + // Determine the number of jobs, defaulting to the number of CPUs + let num_jobs = match args.command { + Command::Scan(ref scan_args) => scan_args.num_jobs, + Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands + Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands + Command::Rules(_) => num_cpus::get(), // Default for Rules commands + }; + + // Set up the Tokio runtime with the specified number of threads + let runtime = Builder::new_multi_thread() + .worker_threads(num_jobs) + .enable_all() + .build() + .context("Failed to create Tokio runtime")?; + runtime.block_on(async_main(args)) +} + +fn setup_logging(global_args: &GlobalArgs) { + // Determine log level based on global verbosity + let (level, all_targets) = if global_args.quiet { + (LevelFilter::ERROR, false) + } else { + let level = match global_args.verbose { + 0 => LevelFilter::INFO, // Default level if no `-v` is provided + 1 => LevelFilter::DEBUG, // `-v` + 2 => LevelFilter::TRACE, // `-vv` + _ => LevelFilter::TRACE, // `-vvv` or more + }; + let all_targets = global_args.verbose > 2; // Enable all targets for `-vvv` or more + (level, all_targets) + }; + // Create a filter for logging + let filter = if all_targets { + // Enable TRACE for all modules + tracing_subscriber::filter::Targets::new().with_default(LevelFilter::TRACE) + } else { + // Per-target filtering, only TRACE for `kingfisher` + tracing_subscriber::filter::Targets::new() + .with_default(LevelFilter::ERROR) // Default for all modules + .with_target("kingfisher", level) // Replace `kingfisher` with your + // crate's name + }; + // Configure the formatter layer + let fmt_layer = fmt::layer() + .with_writer(std::io::stderr) // Write logs to stderr + .with_target(true) // Enable target filtering + .with_ansi(false) // Disable colors + .without_time(); // Remove timestamps + // Build and initialize the registry + registry() + .with(fmt_layer) // Attach the formatter layer + .with(filter) // Attach the filter + .init(); +} + +pub fn determine_exit_code(datastore: &Arc>) -> i32 { + // exit with code 200 if _any_ findings are discovered + // exit with code 205 if VALIDATED findings are discovered + // exit with code 0 if there are NO findings discovered + let ds = datastore.lock().unwrap(); + // Get all matches + // let all_matches = ds.get_matches(); + + // Only consider visible matches when determining the exit code + let all_matches = ds + .get_matches() + .iter() + .filter(|msg| { + let (_, _, match_item) = &***msg; + match_item.visible + }) + .collect::>(); + + if all_matches.is_empty() { + // No findings discovered + 0 + } else { + // Check if there are any validated findings + let validated_matches = all_matches + .iter() + .filter(|msg| { + let (_, _, match_item) = &****msg; + match_item.validation_success + }) + .count(); + if validated_matches > 0 { + // Validated findings discovered + 205 + } else { + // Findings discovered, but not validated + 200 + } + } +} + +async fn async_main(args: CommandLineArgs) -> Result<()> { + // Create a temporary directory + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Create the in-memory datastore + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + setup_logging(&args.global_args); + let update_msg = check_for_update(&args.global_args, None); + match args.command { + Command::Scan(mut scan_args) => { + // ————————————————————————————————————————— + // If no paths or a single "-", slurp stdin into a temp file + // ————————————————————————————————————————— + info!( + "Launching with {} concurrent scan jobs. Use --num-jobs to override.", + &scan_args.num_jobs + ); + let paths = &scan_args.input_specifier_args.path_inputs; + let is_dash = paths.iter().any(|p| p.as_os_str() == "-"); + if (paths.is_empty() || is_dash) && !atty::is(atty::Stream::Stdin) { + // read all stdin + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + // write into temp_dir + let stdin_file = temp_dir.path().join("stdin_input"); + std::fs::write(&stdin_file, buf)?; + // replace inputs + scan_args.input_specifier_args.path_inputs = vec![stdin_file.into()]; + } + + // now proceed exactly as before + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + run_scan(&args.global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await?; + let exit_code = determine_exit_code(&datastore); + + if let Err(e) = temp_dir.close() { + eprintln!("Failed to close temporary directory: {}", e); + } + std::process::exit(exit_code); + } + Command::Rules(ref rule_args) => match &rule_args.command { + RulesCommand::Check(check_args) => { + run_rules_check(&check_args)?; + } + RulesCommand::List(list_args) => { + run_rules_list(&list_args)?; + } + }, + Command::GitHub(github_args) => match github_args.command { + GitHubCommand::Repos(repos_command) => match repos_command { + GitHubReposCommand::List(list_args) => { + github::list_repositories( + github_args.github_api_url, + args.global_args.ignore_certs, + args.global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.organization, + list_args.repo_specifiers.all_organizations, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, + Command::GitLab(gitlab_args) => match gitlab_args.command { + GitLabCommand::Repos(repos_command) => match repos_command { + GitLabReposCommand::List(list_args) => { + kingfisher::gitlab::list_repositories( + gitlab_args.gitlab_api_url, + args.global_args.ignore_certs, + args.global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.group, + list_args.repo_specifiers.all_groups, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, + } + if let Some(msg) = update_msg { + info!("{msg}"); + } + Ok(()) +} + +/// Create a default ScanArgs instance for rule loading +fn create_default_scan_args() -> cli::commands::scan::ScanArgs { + use cli::commands::scan::*; + ScanArgs { + num_jobs: 1, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: url::Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::All, + + jira_url: None, + jql: None, + max_results: 100, + + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Slack query + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + + // Docker image scanning + docker_image: Vec::new(), + + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: true, + extraction_depth: 2, + exclude: Vec::new(), // Exclude patterns + no_binary: true, + }, + confidence: ConfidenceLevel::Medium, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + no_dedup: false, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + } +} +/// Run the rules check command +pub fn run_rules_check(args: &RulesCheckArgs) -> Result<()> { + let mut num_errors = 0; + let mut num_warnings = 0; + // Load and check rules + let loader = RuleLoader::from_rule_specifiers(&args.rules); + let loaded = loader.load(&create_default_scan_args())?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + + // Check each rule + for (rule_index, rule) in rules_db.rules().iter().enumerate() { + let rule_syntax = rule.syntax(); + // Basic rule validation checks + if rule.name().len() < 3 { + warn!("Rule '{}' has a very short name", rule.name()); + num_warnings += 1; + } + if rule.syntax().pattern.len() < 5 { + warn!("Rule '{}' has a very short pattern", rule.name()); + num_warnings += 1; + } + if rule.syntax().examples.is_empty() { + warn!("Rule '{}' has no examples", rule.name()); + num_warnings += 1; + continue; + } + // Check regex compilation + if let Err(e) = rule.syntax().as_regex() { + error!("Rule '{}' has invalid regex: {}", rule.name(), e); + num_errors += 1; + continue; + } + // Test each example against both vectorscan and regex + for (example_index, example) in rule_syntax.examples.iter().enumerate() { + // Create a test blob from the example + // let blob = Blob::new(BlobId::new(example.as_bytes()), + // example.as_bytes().to_vec()); let origin = OriginSet::new( + // Origin::from_file(PathBuf::from("test_example")), + // Vec::new(), + // ); + // // Check vectorscan match + // let vectorscan_matched = match matcher.scan_blob(&blob, &origin, None)? { + // ScanResult::New(matches) => !matches.is_empty(), + // _ => false, + // }; + // Check regex match + // Get the regex using the public method + let re = + rules_db.get_regex_by_rule_id(rule.id()).expect("Failed to get regex for rule"); + let regex_matched = re.is_match(example.as_bytes()); + if !regex_matched { + // ||!vectorscan_matched { + println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name); + println!(" Processing example {}", example_index + 1); + println!(" [!] Mismatch detected for example: {}", example); + // if !vectorscan_matched { + // println!(" Vectorscan match: {}", vectorscan_matched); + // num_errors += 1; + // } + if !regex_matched { + println!(" Regex match: {}", regex_matched); + num_errors += 1; + } + } + + // // Report any mismatches + // if !vectorscan_matched || !regex_matched { + // error!("Rule '{}' example {} failed validation:", + // rule.name(), example_index + 1); println!(" + // Example text: {}", example); + + // if !vectorscan_matched { + // error!(" - Vectorscan pattern did not match example"); + // num_errors += 1; + // } + + // if !regex_matched { + // error!(" - Regex pattern did not match example"); + // num_errors += 1; + // } + // } + } + } + // Print summary + if num_errors > 0 || num_warnings > 0 { + println!("\nCheck Summary:"); + println!(" Errors: {}", num_errors); + println!(" Warnings: {}", num_warnings); + println!("\nError types include:"); + println!(" - Invalid regex patterns"); + println!(" - Examples that don't match their patterns"); + println!("\nWarning types include:"); + println!(" - Rules with very short names"); + println!(" - Rules with very short patterns"); + println!(" - Rules without examples"); + } else { + println!("\nAll rules passed validation successfully!"); + } + // Exit with error if there are errors or if warnings are treated as errors + if num_errors > 0 || (args.warnings_as_errors && num_warnings > 0) { + std::process::exit(1); + } + Ok(()) +} +/// Run the rules list command +pub fn run_rules_list(args: &RulesListArgs) -> Result<()> { + // Load rules + let loader = RuleLoader::from_rule_specifiers(&args.rules); + let loaded = loader.load(&create_default_scan_args())?; + let resolved = loaded.resolve_enabled_rules()?; + let mut writer = args.output_args.get_writer()?; + match args.output_args.format { + RulesListOutputFormat::Pretty => { + // Determine terminal width if possible, otherwise use default + let term_width = term_size::dimensions().map(|(w, _)| w).unwrap_or(120); + // First pass: calculate column widths + let max_name_width = resolved.iter().map(|r| r.name().len()).max().unwrap_or(0).max(4); // "Rule" header + let max_id_width = resolved.iter().map(|r| r.id().len()).max().unwrap_or(0).max(2); // "ID" header + let max_conf_width = resolved + .iter() + .map(|r| format!("{:?}", r.confidence()).len()) + .max() + .unwrap_or(0) + .max(10); // "Confidence" header + // Calculate pattern width based on terminal width + let reserved_width = max_name_width + max_id_width + max_conf_width + 10; + let pattern_width = term_width.saturating_sub(reserved_width); + // Format pattern on a single line + let format_pattern = |pattern: &str| { + let single_line = pattern + .replace('\n', " ") + .replace('\r', " ") + .split_whitespace() + .collect::>() + .join(" "); + if single_line.len() > pattern_width { + format!("{}...", &single_line[..pattern_width.saturating_sub(3)]) + } else { + single_line + } + }; + // Print header + writeln!( + writer, + "\n{:name_width$} │ {:id_width$} │ {:conf_width$} │ Pattern", + "Rule", + "ID", + "Confidence", + name_width = max_name_width, + id_width = max_id_width, + conf_width = max_conf_width + )?; + // Print separator + writeln!( + writer, + "{0:─ { + // Create JSON format + let rules_json: Vec<_> = resolved + .iter() + .map(|rule| { + json!({ + "name": rule.name(), + "id": rule.id(), + "pattern": rule.syntax().pattern, + "confidence": rule.confidence(), + "examples": rule.syntax().examples, + "visible": rule.visible(), + }) + }) + .collect(); + serde_json::to_writer_pretty(&mut writer, &rules_json)?; + writeln!(writer)?; + } + } + Ok(()) +} diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index aae16fc..d9eda56 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -119,7 +119,6 @@ mod tests { redact: false, git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - snippet_length: 256, baseline_file: None, manage_baseline: false, } diff --git a/src/reporter/json_format.rs.orig b/src/reporter/json_format.rs.orig new file mode 100644 index 0000000..aae16fc --- /dev/null +++ b/src/reporter/json_format.rs.orig @@ -0,0 +1,270 @@ +use super::*; + +impl DetailsReporter { + pub fn json_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { + let records = self.build_finding_records(args)?; + if !records.is_empty() { + serde_json::to_writer_pretty(&mut writer, &records)?; + writeln!(writer)?; + } + Ok(()) + } + + pub fn jsonl_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { + let records = self.build_finding_records(args)?; + for record in records { + serde_json::to_writer(&mut writer, &record)?; + writeln!(writer)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cli::commands::github::GitCloneMode; + use crate::cli::commands::github::GitHistoryMode; + use crate::cli::commands::rules::RuleSpecifierArgs; + use crate::matcher::{SerializableCapture, SerializableCaptures}; + use crate::util::intern; + use crate::{ + blob::BlobId, + cli::commands::github::GitHubRepoType, + cli::commands::inputs::ContentFilteringArgs, + cli::commands::inputs::InputSpecifierArgs, + cli::commands::output::{OutputArgs, ReportOutputFormat}, + cli::commands::scan::ConfidenceLevel, + findings_store::FindingsStore, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, + matcher::Match, + origin::Origin, + reporter::styles::Styles, + }; + use std::{ + io::Cursor, + path::PathBuf, + sync::{Arc, Mutex}, + }; + use url::Url; + fn create_default_args() -> cli::commands::scan::ScanArgs { + use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope + + cli::commands::scan::ScanArgs { + num_jobs: 1, + no_dedup: false, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + // local path / git URL inputs + path_inputs: Vec::new(), + git_url: Vec::new(), + + // GitHub + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + + // GitLab + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::All, + // Jira options + jira_url: None, + jql: None, + max_results: 100, + // Slack options + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + + docker_image: Vec::new(), + // clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + exclude: Vec::new(), // Exclude patterns + no_binary: true, + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + } + } + + fn create_mock_match( + rule_name: &str, + rule_text_id: &str, + rule_finding_fingerprint: &str, + validation_success: bool, + ) -> Match { + Match { + location: Location { + offset_span: OffsetSpan { start: 10, end: 20 }, + source_span: SourceSpan { + start: SourcePoint { line: 5, column: 10 }, + end: SourcePoint { line: 5, column: 20 }, + }, + }, + groups: SerializableCaptures { + captures: vec![SerializableCapture { + name: Some("token".to_string()), + match_number: 1, + start: 10, + end: 20, + value: "mock_token".into(), + }], + }, + blob_id: BlobId::new(b"mock_blob"), + finding_fingerprint: 0123, + rule_finding_fingerprint: intern(rule_finding_fingerprint), + rule_text_id: intern(rule_text_id), + rule_name: intern(rule_name), + rule_confidence: Confidence::Medium, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success, + calculated_entropy: 4.5, + visible: true, + } + } + + fn setup_mock_reporter(matches: Vec) -> DetailsReporter { + let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); + if !matches.is_empty() { + let blob_metadata = BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }; + let dedup = true; + for m in matches.clone() { + datastore.record( + vec![( + Arc::new(OriginSet::new( + Origin::from_file(PathBuf::from("/mock/path/file.rs")), + vec![], + )), + Arc::new(blob_metadata.clone()), + m.m.clone(), + )], + dedup, + ); + } + } + DetailsReporter { + datastore: Arc::new(Mutex::new(datastore)), + styles: Styles::new(false), + only_valid: false, + } + } + + #[test] + fn test_json_format() -> Result<()> { + let mock_match = + create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); + let matches = vec![ReportMatch { + origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]), + blob_metadata: BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }, + m: mock_match, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success: true, + }]; + let reporter = setup_mock_reporter(matches); + let mut output = Cursor::new(Vec::new()); + reporter.json_format(&mut output, &create_default_args())?; + let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + assert!(!json_output.is_empty(), "JSON output should not be empty"); + let first = &json_output[0]; + assert_eq!(first["rule"]["name"], "MockRule"); + assert_eq!(first["finding"]["language"], "Rust"); + Ok(()) + } + + #[test] + fn test_validation_status_in_json() -> Result<()> { + let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; + for (validation_success, expected_status) in test_cases { + let mock_match = create_mock_match( + "MockRule", + "mock_rule_1", + "mock_finding_fingerprint", + validation_success, + ); + let matches = vec![ReportMatch { + origin: OriginSet::new( + Origin::from_file(PathBuf::from("/mock/path/file.rs")), + vec![], + ), + blob_metadata: BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }, + m: mock_match, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success, + }]; + let reporter = setup_mock_reporter(matches); + let mut output = Cursor::new(Vec::new()); + reporter.json_format(&mut output, &create_default_args())?; + let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + assert!(!json_output.is_empty(), "JSON output should not be empty"); + let first = &json_output[0]; + let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); + assert_eq!(validation_status, expected_status); + } + Ok(()) + } +} diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 0c93023..4c3be19 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -113,7 +113,6 @@ rules: git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup, - snippet_length: 64, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_dedup.rs.orig b/tests/int_dedup.rs.orig new file mode 100644 index 0000000..0c93023 --- /dev/null +++ b/tests/int_dedup.rs.orig @@ -0,0 +1,171 @@ +//! Proves that run_async_scan collapses identical findings when +//! ── no_dedup == false ── +//! while keeping them separate when no_dedup == true. + +use std::{ + fs, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Helper: run a scan with the supplied `no_dedup` flag and return how many +/// findings the `FindingsStore` ends up containing. +fn run_scan(count_rt: &Runtime, no_dedup: bool) -> Result { + // ── temp workspace ────────────────────────────────────────────── + let work = TempDir::new()?; + let rules_dir = work.path().join("rules"); + fs::create_dir_all(&rules_dir)?; + let inputs_dir = work.path().join("in"); + fs::create_dir_all(&inputs_dir)?; + + // 1. Tiny custom rule that matches `secret_1234` + fs::write( + rules_dir.join("demo.yml"), + r#" +rules: + - id: demo.secret + name: Demo secret + pattern: "secret_[0-9]{4}" + confidence: low +"#, + )?; + + // 2. Two different blobs that both contain the SAME secret + fs::write(inputs_dir.join("a.txt"), "secret_1234\n")?; + fs::write(inputs_dir.join("b.txt"), "secret_1234\n")?; + + // ── build ScanArgs ────────────────────────────────────────────── + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: vec![rules_dir.clone()], + rule: vec!["all".into()], + load_builtins: false, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![inputs_dir.join("a.txt"), inputs_dir.join("b.txt")], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 5.0, + extraction_depth: 1, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup, + snippet_length: 64, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Never, + progress: Mode::Never, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 8192 }, + }; + + // ── load rules once ───────────────────────────────────────────── + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); + + // Fresh FindingsStore for this run + let store_path = work.path().join("store"); + fs::create_dir_all(&store_path)?; + let datastore = Arc::new(Mutex::new(FindingsStore::new(store_path))); + + // run_async_scan is async – use the supplied Tokio runtime + count_rt.block_on(run_async_scan( + &global_args, + &scan_args, + Arc::clone(&datastore), + &rules_db, + ))?; + + let x = Ok(datastore.lock().unwrap().get_matches().len()); + x +} + +#[test] +fn test_dedup_branch() -> Result<()> { + // A *single* runtime reused for both scans keeps the test fast + let rt = Runtime::new().unwrap(); + + let findings_with_dups = run_scan(&rt, true)?; // keep duplicates + let findings_deduped = run_scan(&rt, false)?; // collapse duplicates + + assert!( + findings_with_dups > findings_deduped, + "expected deduplication to reduce finding count ({} -- {})", + findings_with_dups, + findings_deduped + ); + assert_eq!(findings_deduped, 1, "exactly one unique finding should remain after dedup"); + + Ok(()) +} diff --git a/tests/int_github.rs b/tests/int_github.rs index 2892b91..8edc022 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -100,7 +100,6 @@ fn test_github_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_github.rs.orig b/tests/int_github.rs.orig new file mode 100644 index 0000000..2892b91 --- /dev/null +++ b/tests/int_github.rs.orig @@ -0,0 +1,149 @@ +// tests/int_github.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; +/// Helper function to determine exit code based on findings +fn determine_exit_code(total_findings: usize, validated_findings: usize) -> i32 { + if total_findings == 0 { + 0 // No findings discovered + } else if validated_findings > 0 { + 205 // Validated findings discovered + } else { + 200 // Findings discovered but none validated + } +} +#[test] +fn test_github_remote_scan() -> Result<()> { + // Create a temporary directory for the scan + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + // Create test repository URL + let test_repo_url = "https://github.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("Failed to parse Git URL"); + // Create scan arguments + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + // Create global arguments + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + // Create in-memory datastore + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + // Create the runtime first + let runtime = Runtime::new().expect("Failed to create Tokio runtime"); + // Load rules + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + // Run the scan using runtime.block_on + runtime.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + // Get scan results + let ds = datastore.lock().unwrap(); + let matches = ds.get_matches(); + let total_findings = matches.len(); + let validated_findings = matches.iter().filter(|arc| arc.as_ref().2.validation_success).count(); + + // Print validation statistics + println!("Total findings: {}, Validated findings: {}", total_findings, validated_findings); + // Check total number of findings + assert!(total_findings >= 10, "Expected at least 10 findings, but got {}", total_findings); + // Determine exit code + let exit_code = determine_exit_code(total_findings, validated_findings); + // Test passes if we found some kind of findings (exit code >= 200) + assert!( + exit_code >= 200, + "Test failed: Expected to find vulnerabilities (exit code >= 200), got exit code {}", + exit_code + ); + // Drop the runtime explicitly here, outside of async context + drop(runtime); + Ok(()) +} diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index fa3e169..4668439 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -98,7 +98,6 @@ fn test_gitlab_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; @@ -200,7 +199,6 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig new file mode 100644 index 0000000..fa3e169 --- /dev/null +++ b/tests/int_gitlab.rs.orig @@ -0,0 +1,243 @@ +// tests/int_gitlab.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Derive process exit-codes from findings +fn determine_exit_code(total: usize, validated: usize) -> i32 { + match (total, validated) { + (0, _) => 0, + (_, v) if v > 0 => 205, + _ => 200, + } +} + +#[test] +fn test_gitlab_remote_scan() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Public GitLab repo seeded with test secrets + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 796d019..45f3767 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -80,7 +80,6 @@ async fn test_redact_hashes_finding_values() -> Result<()> { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_redact.rs.orig b/tests/int_redact.rs.orig new file mode 100644 index 0000000..796d019 --- /dev/null +++ b/tests/int_redact.rs.orig @@ -0,0 +1,115 @@ +// Integration test to ensure --redact replaces secret values with hashes +use std::{ + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, GlobalArgs, Mode}, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[tokio::test] +async fn test_redact_hashes_finding_values() -> Result<()> { + let temp_dir = TempDir::new()?; + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![PathBuf::from("testdata/generic_secrets.py")], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: true, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Never, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(temp_dir.path().to_path_buf()))); + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; + + let ds = datastore.lock().unwrap(); + let matches = ds.get_matches(); + assert!(!matches.is_empty()); + for m_arc in matches { + let m = &m_arc.2; + assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); + } + + Ok(()) +} diff --git a/tests/int_slack.rs b/tests/int_slack.rs index d22b8f0..7284e1d 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -86,7 +86,6 @@ impl TestContext { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 128, baseline_file: None, manage_baseline: false, }; @@ -178,7 +177,6 @@ async fn test_scan_slack_messages() -> Result<()> { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 128, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig new file mode 100644 index 0000000..d22b8f0 --- /dev/null +++ b/tests/int_slack.rs.orig @@ -0,0 +1,207 @@ +use std::{ + env, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +struct TestContext { + rules_db: Arc, +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + Ok(Self { rules_db: Arc::new(rules_db) }) + } +} + +#[tokio::test] +async fn test_scan_slack_messages() -> Result<()> { + let ctx = TestContext::new()?; + + let server = MockServer::start().await; + let response = serde_json::json!({ + "ok": true, + "messages": { + "matches": [{ + "permalink": "https://example.slack.com/archives/C123/p1234", + "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "ts": "1234.56", + "channel": {"id": "C123", "name": "general"} + }], + "pagination": {"page": 1, "page_count": 1} + } + }); + Mock::given(method("GET")) + .and(path("/search.messages")) + .respond_with(ResponseTemplate::new(200).set_body_json(response)) + .mount(&server) + .await; + + env::set_var("KF_SLACK_TOKEN", "xoxp-test"); + + let temp_dir = TempDir::new()?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: Some("test".into()), + slack_api_url: Url::parse(&format!("{}/", server.uri()))?, + max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + assert!(findings > 0); + Ok(()) +} diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index a7ab9ea..46d4521 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -156,7 +156,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, // keep duplicates so the cache is stressed - snippet_length: 128, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_validation_cache.rs.orig b/tests/int_validation_cache.rs.orig new file mode 100644 index 0000000..a7ab9ea --- /dev/null +++ b/tests/int_validation_cache.rs.orig @@ -0,0 +1,211 @@ +// tests/int_validation_cache.rs +use std::{ + fs, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, Mutex, + }, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, Request, ResponseTemplate, +}; + +#[tokio::test] +async fn test_validation_cache_and_depvars() -> Result<()> { + /* --------------------------------------------------------- * + * 1. Spin-up Wiremock and count incoming validation calls * + * --------------------------------------------------------- */ + let server = MockServer::start().await; + let hit_counter = Arc::new(AtomicUsize::new(0)); + let counter_clone = Arc::clone(&hit_counter); + + Mock::given(method("GET")) + .and(path("/validate")) + .respond_with(move |_req: &Request| { + counter_clone.fetch_add(1, Ordering::SeqCst); + ResponseTemplate::new(200).set_body_string("ok") + }) + .mount(&server) + .await; + + /* --------------------------------------------------------- * + * 2. Synthetic rules exercising depends_on_rule + HTTP val * + * --------------------------------------------------------- */ + let rules_yaml = format!( + r#" + rules: + - name: Demo API Key + id: demo.key.1 + pattern: '(demokey_[a-z0-9]{{8}})' + confidence: low + min_entropy: 0.0 + + - name: Demo API Key Validation + id: demo.key.validation.1 + depends_on_rule: + - rule_id: demo.key.1 + variable: TOKEN + pattern: '(demokey_[a-z0-9]{{8}})' + confidence: low + validation: + type: Http + content: + request: + method: GET + url: '{base}/validate?token={{ {{ TOKEN }} }}' + response_matcher: + - report_response: true + - type: WordMatch + words: + - '"error_code":"403003"' + negative: true + "#, + base = server.uri() + ); + + /* --------------------------------------------------------- * + * 3. Temp workspace: rules file + input with 2 duplicates * + * --------------------------------------------------------- */ + let work_dir = TempDir::new()?; + let rules_file = work_dir.path().join("demo.yml"); + fs::write(&rules_file, rules_yaml)?; + + let secret_file = work_dir.path().join("secrets.txt"); + fs::write(&secret_file, "demokey_abcdefgh\ndemokey_abcdefgh")?; + + /* --------------------------------------------------------- * + * 4. Build Scan / Global args (no_dedup=true to keep dups) * + * --------------------------------------------------------- */ + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: vec![work_dir.path().to_path_buf()], + rule: vec!["all".into()], + load_builtins: false, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![secret_file.clone()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, // keep duplicates so the cache is stressed + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + /* --------------------------------------------------------- * + * 5. Load rules, run scan * + * --------------------------------------------------------- */ + // --------------------------------------------------------- + // 5. Load rules, record them, run scan + // --------------------------------------------------------- + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); + + let datastore = Arc::new(Mutex::new(FindingsStore::new(work_dir.path().to_path_buf()))); + + // NEW: make the datastore aware of every rule + { + let mut ds = datastore.lock().unwrap(); + ds.record_rules(rules_db.rules()); // <-- **add this line** + } + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + progress: Mode::Never, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 8192 }, + }; + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; + + /* --------------------------------------------------------- * + * 6. Assertions * + * --------------------------------------------------------- */ + // There are two matches for demo.key.validation.1, but the validator + // should have been called only once thanks to SkipMap caching. + assert_eq!( + hit_counter.load(Ordering::SeqCst), + 1, + "validator endpoint should be hit exactly once" + ); + + let ds = datastore.lock().unwrap(); + let total_matches = ds.get_matches().len(); + assert_eq!(total_matches, 4, "expected 2 matches per rule (dup secrets)"); // 2 for each rule + + Ok(()) +} diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index abeb6f1..c53adae 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -99,7 +99,6 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; @@ -176,7 +175,6 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig new file mode 100644 index 0000000..abeb6f1 --- /dev/null +++ b/tests/int_vulnerable_files.rs.orig @@ -0,0 +1,257 @@ +// tests/integration_scan.rs + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[derive(Debug)] +struct TestCase { + file_name: &'static str, + min_expected_findings: usize, +} + +struct TestContext { + rules_db: Arc, +} + +fn root_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) + .load(&scan_args) + .context("Failed to load rules")?; + + let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; + + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) + .context("Failed to compile rules")?; + + Ok(Self { rules_db: Arc::new(rules_db) }) + } + + async fn scan_file(&self, file_path: &Path) -> Result { + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![file_path.to_path_buf()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + + Ok(findings) + } +} + +#[tokio::test] +async fn test_scan_vulnerable_files() -> Result<()> { + let test_context = TestContext::new()?; + + let test_cases = vec![ + TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, + TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, + TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, + TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, + TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, + TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, + TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, + TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, + TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, + TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, + TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, + TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, + TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, + TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, + TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, + TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, + TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, + TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, + TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, + TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, + TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, + TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, + TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, + TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, + ]; + + let root = root_dir(); + + for test_case in test_cases { + let test_file = root.join(test_case.file_name); + println!("Testing file: {}", test_case.file_name); + + let findings = test_context.scan_file(&test_file).await?; + + assert!( + findings >= test_case.min_expected_findings, + "File: {} - Expected >= {} findings, got {}", + test_case.file_name, + test_case.min_expected_findings, + findings + ); + } + + Ok(()) +} From f4a1e85b2653f6e5efe7a6ac28cb46e8d3cb88b5 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 17:27:36 -0700 Subject: [PATCH 132/357] removed unused cli argument, snippet-length --- src/cli/commands/scan.rs.orig | 132 -------- src/main.rs.orig | 528 ----------------------------- src/reporter/json_format.rs.orig | 270 --------------- tests/int_dedup.rs.orig | 171 ---------- tests/int_github.rs.orig | 149 -------- tests/int_gitlab.rs.orig | 243 ------------- tests/int_redact.rs.orig | 115 ------- tests/int_slack.rs.orig | 207 ----------- tests/int_validation_cache.rs.orig | 211 ------------ tests/int_vulnerable_files.rs.orig | 257 -------------- 10 files changed, 2283 deletions(-) delete mode 100644 src/cli/commands/scan.rs.orig delete mode 100644 src/main.rs.orig delete mode 100644 src/reporter/json_format.rs.orig delete mode 100644 tests/int_dedup.rs.orig delete mode 100644 tests/int_github.rs.orig delete mode 100644 tests/int_gitlab.rs.orig delete mode 100644 tests/int_redact.rs.orig delete mode 100644 tests/int_slack.rs.orig delete mode 100644 tests/int_validation_cache.rs.orig delete mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/src/cli/commands/scan.rs.orig b/src/cli/commands/scan.rs.orig deleted file mode 100644 index fe4d5d6..0000000 --- a/src/cli/commands/scan.rs.orig +++ /dev/null @@ -1,132 +0,0 @@ -use clap::{Args, ValueEnum}; -use strum::Display; -use tracing::debug; - -use crate::{ - cli::{ - commands::{ - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - }, - global::RAM_GB, - }, - rules::rule::Confidence, -}; - -/// Determine the default number of parallel scan jobs. -/// -/// * Target = `num_cpus * 2`. -/// * Cap by RAM at ≈ 1 GiB per job (so 16 GiB ⇒ max 16 jobs). -/// * Always ≥ 1. -/// * When `-v/--verbose` is passed, the computed value is logged at DEBUG. -fn default_scan_jobs() -> usize { - // How many logical CPUs do we see? (Falls back to 1 on error.) - let cpu_count = std::thread::available_parallelism().map(usize::from).unwrap_or(1); - - // Desired parallelism is CPU * 2. - let desired = cpu_count * 2; - - match *RAM_GB { - // If we know how much RAM we have, cap by a 1 GiB-per-job heuristic. - Some(ram_gb) => { - let max_by_ram = ram_gb.ceil() as usize; // 1 GiB per job - let jobs = desired.min(max_by_ram).max(1); - - debug!( - "Using {jobs} parallel scan jobs \ - (cpus = {cpu_count}, desired = {desired}, \ - ram = {ram_gb:.1} GiB, cap_by_ram = {max_by_ram})" - ); - jobs - } - // If RAM is unknown, just use the desired value. - None => { - debug!("Using {desired} parallel scan jobs (cpus = {cpu_count}, ram unknown)"); - desired - } - } -} - -/// `kingfisher scan` command and flags -#[derive(Args, Debug, Clone)] -pub struct ScanArgs { - /// Number of parallel scanning threads - #[arg(long = "jobs", short = 'j', default_value_t = default_scan_jobs())] - pub num_jobs: usize, - - #[command(flatten)] - pub rules: RuleSpecifierArgs, - - #[command(flatten)] - pub input_specifier_args: InputSpecifierArgs, - - #[command(flatten)] - pub content_filtering_args: ContentFilteringArgs, - - /// Minimum confidence level for reporting findings - #[arg(long, short = 'c', default_value = "medium")] - pub confidence: ConfidenceLevel, - - /// Disable secret validation - #[arg(long, short = 'n', default_value_t = false)] - pub no_validate: bool, - - /// Display only validated findings - #[arg(long, default_value_t = false)] - pub only_valid: bool, - - /// Override the default minimum entropy threshold - #[arg(long, short = 'e')] - pub min_entropy: Option, - - /// Show performance statistics for each rule - #[arg(long, default_value_t = false)] - pub rule_stats: bool, - - /// Display every occurrence of a finding - #[arg(long, default_value_t = false)] - pub no_dedup: bool, - - /// Redact findings values using a secure hash - #[arg(long, short = 'r', default_value_t = false)] - pub redact: bool, - - /// Timeout for Git repository scanning in seconds - #[arg(long, default_value_t = 1800, value_name = "SECONDS")] - pub git_repo_timeout: u64, - - #[command(flatten)] - pub output_args: OutputArgs, - - /// Bytes of context before and after each match - #[arg(long, default_value_t = 256, value_name = "BYTES")] - pub snippet_length: usize, - - /// Baseline file to filter known secrets - #[arg(long, value_name = "FILE")] - pub baseline_file: Option, - - /// Create or update the baseline file with current findings - #[arg(long, default_value_t = false)] - pub manage_baseline: bool, -} - -/// Confidence levels for findings -#[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] -#[strum(serialize_all = "kebab-case")] -pub enum ConfidenceLevel { - Low, - Medium, - High, -} - -impl From for Confidence { - fn from(level: ConfidenceLevel) -> Self { - match level { - ConfidenceLevel::Low => Confidence::Low, - ConfidenceLevel::Medium => Confidence::Medium, - ConfidenceLevel::High => Confidence::High, - } - } -} diff --git a/src/main.rs.orig b/src/main.rs.orig deleted file mode 100644 index 58145e6..0000000 --- a/src/main.rs.orig +++ /dev/null @@ -1,528 +0,0 @@ -// ──────────────────────────────────────────────────────────── -// Global allocator setup -// * Default - mimalloc (no feature flags) -// * Debug - jemalloc (`use-jemalloc` feature) -// * Fallback - system allocator (`system-alloc` feature) -// ──────────────────────────────────────────────────────────── - -// --- jemalloc (opt-in) --- -#[cfg(feature = "use-jemalloc")] -#[global_allocator] -static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; - -// --- mimalloc (default) --- -#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -// --- system allocator (explicit opt-out) --- -#[cfg(feature = "system-alloc")] -use std::alloc::System; -#[cfg(feature = "system-alloc")] -#[global_allocator] -static GLOBAL: System = System; - -// use std::alloc::System; -// #[global_allocator] -// static GLOBAL: System = System; - -use std::{ - io::Read, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - self, - commands::{ - github::{ - GitCloneMode, GitHistoryMode, GitHubCommand, GitHubRepoType, GitHubReposCommand, - }, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::{ - RuleSpecifierArgs, RulesCheckArgs, RulesCommand, RulesListArgs, - RulesListOutputFormat, - }, - }, - global::Command, - CommandLineArgs, GlobalArgs, - }, - findings_store, - findings_store::FindingsStore, - github, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::{load_and_record_rules, run_scan}, - update::check_for_update, -}; -use serde_json::json; -use tempfile::TempDir; -use term_size; -use tokio::runtime::Builder; -use tracing::{error, info, warn}; -use tracing_core::metadata::LevelFilter; -use tracing_subscriber::{ - self, fmt, prelude::__tracing_subscriber_SubscriberExt, registry, util::SubscriberInitExt, -}; -use url::Url; - -use crate::cli::commands::gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}; - -fn main() -> anyhow::Result<()> { - color_backtrace::install(); - // Parse command-line arguments - let args = CommandLineArgs::parse_args(); - - // Determine the number of jobs, defaulting to the number of CPUs - let num_jobs = match args.command { - Command::Scan(ref scan_args) => scan_args.num_jobs, - Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands - Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands - Command::Rules(_) => num_cpus::get(), // Default for Rules commands - }; - - // Set up the Tokio runtime with the specified number of threads - let runtime = Builder::new_multi_thread() - .worker_threads(num_jobs) - .enable_all() - .build() - .context("Failed to create Tokio runtime")?; - runtime.block_on(async_main(args)) -} - -fn setup_logging(global_args: &GlobalArgs) { - // Determine log level based on global verbosity - let (level, all_targets) = if global_args.quiet { - (LevelFilter::ERROR, false) - } else { - let level = match global_args.verbose { - 0 => LevelFilter::INFO, // Default level if no `-v` is provided - 1 => LevelFilter::DEBUG, // `-v` - 2 => LevelFilter::TRACE, // `-vv` - _ => LevelFilter::TRACE, // `-vvv` or more - }; - let all_targets = global_args.verbose > 2; // Enable all targets for `-vvv` or more - (level, all_targets) - }; - // Create a filter for logging - let filter = if all_targets { - // Enable TRACE for all modules - tracing_subscriber::filter::Targets::new().with_default(LevelFilter::TRACE) - } else { - // Per-target filtering, only TRACE for `kingfisher` - tracing_subscriber::filter::Targets::new() - .with_default(LevelFilter::ERROR) // Default for all modules - .with_target("kingfisher", level) // Replace `kingfisher` with your - // crate's name - }; - // Configure the formatter layer - let fmt_layer = fmt::layer() - .with_writer(std::io::stderr) // Write logs to stderr - .with_target(true) // Enable target filtering - .with_ansi(false) // Disable colors - .without_time(); // Remove timestamps - // Build and initialize the registry - registry() - .with(fmt_layer) // Attach the formatter layer - .with(filter) // Attach the filter - .init(); -} - -pub fn determine_exit_code(datastore: &Arc>) -> i32 { - // exit with code 200 if _any_ findings are discovered - // exit with code 205 if VALIDATED findings are discovered - // exit with code 0 if there are NO findings discovered - let ds = datastore.lock().unwrap(); - // Get all matches - // let all_matches = ds.get_matches(); - - // Only consider visible matches when determining the exit code - let all_matches = ds - .get_matches() - .iter() - .filter(|msg| { - let (_, _, match_item) = &***msg; - match_item.visible - }) - .collect::>(); - - if all_matches.is_empty() { - // No findings discovered - 0 - } else { - // Check if there are any validated findings - let validated_matches = all_matches - .iter() - .filter(|msg| { - let (_, _, match_item) = &****msg; - match_item.validation_success - }) - .count(); - if validated_matches > 0 { - // Validated findings discovered - 205 - } else { - // Findings discovered, but not validated - 200 - } - } -} - -async fn async_main(args: CommandLineArgs) -> Result<()> { - // Create a temporary directory - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Create the in-memory datastore - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - setup_logging(&args.global_args); - let update_msg = check_for_update(&args.global_args, None); - match args.command { - Command::Scan(mut scan_args) => { - // ————————————————————————————————————————— - // If no paths or a single "-", slurp stdin into a temp file - // ————————————————————————————————————————— - info!( - "Launching with {} concurrent scan jobs. Use --num-jobs to override.", - &scan_args.num_jobs - ); - let paths = &scan_args.input_specifier_args.path_inputs; - let is_dash = paths.iter().any(|p| p.as_os_str() == "-"); - if (paths.is_empty() || is_dash) && !atty::is(atty::Stream::Stdin) { - // read all stdin - let mut buf = Vec::new(); - std::io::stdin().read_to_end(&mut buf)?; - // write into temp_dir - let stdin_file = temp_dir.path().join("stdin_input"); - std::fs::write(&stdin_file, buf)?; - // replace inputs - scan_args.input_specifier_args.path_inputs = vec![stdin_file.into()]; - } - - // now proceed exactly as before - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - run_scan(&args.global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await?; - let exit_code = determine_exit_code(&datastore); - - if let Err(e) = temp_dir.close() { - eprintln!("Failed to close temporary directory: {}", e); - } - std::process::exit(exit_code); - } - Command::Rules(ref rule_args) => match &rule_args.command { - RulesCommand::Check(check_args) => { - run_rules_check(&check_args)?; - } - RulesCommand::List(list_args) => { - run_rules_list(&list_args)?; - } - }, - Command::GitHub(github_args) => match github_args.command { - GitHubCommand::Repos(repos_command) => match repos_command { - GitHubReposCommand::List(list_args) => { - github::list_repositories( - github_args.github_api_url, - args.global_args.ignore_certs, - args.global_args.use_progress(), - &list_args.repo_specifiers.user, - &list_args.repo_specifiers.organization, - list_args.repo_specifiers.all_organizations, - list_args.repo_specifiers.repo_type.into(), - ) - .await?; - } - }, - }, - Command::GitLab(gitlab_args) => match gitlab_args.command { - GitLabCommand::Repos(repos_command) => match repos_command { - GitLabReposCommand::List(list_args) => { - kingfisher::gitlab::list_repositories( - gitlab_args.gitlab_api_url, - args.global_args.ignore_certs, - args.global_args.use_progress(), - &list_args.repo_specifiers.user, - &list_args.repo_specifiers.group, - list_args.repo_specifiers.all_groups, - list_args.repo_specifiers.repo_type.into(), - ) - .await?; - } - }, - }, - } - if let Some(msg) = update_msg { - info!("{msg}"); - } - Ok(()) -} - -/// Create a default ScanArgs instance for rule loading -fn create_default_scan_args() -> cli::commands::scan::ScanArgs { - use cli::commands::scan::*; - ScanArgs { - num_jobs: 1, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: url::Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::All, - - jira_url: None, - jql: None, - max_results: 100, - - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Slack query - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - - // Docker image scanning - docker_image: Vec::new(), - - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: true, - extraction_depth: 2, - exclude: Vec::new(), // Exclude patterns - no_binary: true, - }, - confidence: ConfidenceLevel::Medium, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, - no_dedup: false, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - } -} -/// Run the rules check command -pub fn run_rules_check(args: &RulesCheckArgs) -> Result<()> { - let mut num_errors = 0; - let mut num_warnings = 0; - // Load and check rules - let loader = RuleLoader::from_rule_specifiers(&args.rules); - let loaded = loader.load(&create_default_scan_args())?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - - // Check each rule - for (rule_index, rule) in rules_db.rules().iter().enumerate() { - let rule_syntax = rule.syntax(); - // Basic rule validation checks - if rule.name().len() < 3 { - warn!("Rule '{}' has a very short name", rule.name()); - num_warnings += 1; - } - if rule.syntax().pattern.len() < 5 { - warn!("Rule '{}' has a very short pattern", rule.name()); - num_warnings += 1; - } - if rule.syntax().examples.is_empty() { - warn!("Rule '{}' has no examples", rule.name()); - num_warnings += 1; - continue; - } - // Check regex compilation - if let Err(e) = rule.syntax().as_regex() { - error!("Rule '{}' has invalid regex: {}", rule.name(), e); - num_errors += 1; - continue; - } - // Test each example against both vectorscan and regex - for (example_index, example) in rule_syntax.examples.iter().enumerate() { - // Create a test blob from the example - // let blob = Blob::new(BlobId::new(example.as_bytes()), - // example.as_bytes().to_vec()); let origin = OriginSet::new( - // Origin::from_file(PathBuf::from("test_example")), - // Vec::new(), - // ); - // // Check vectorscan match - // let vectorscan_matched = match matcher.scan_blob(&blob, &origin, None)? { - // ScanResult::New(matches) => !matches.is_empty(), - // _ => false, - // }; - // Check regex match - // Get the regex using the public method - let re = - rules_db.get_regex_by_rule_id(rule.id()).expect("Failed to get regex for rule"); - let regex_matched = re.is_match(example.as_bytes()); - if !regex_matched { - // ||!vectorscan_matched { - println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name); - println!(" Processing example {}", example_index + 1); - println!(" [!] Mismatch detected for example: {}", example); - // if !vectorscan_matched { - // println!(" Vectorscan match: {}", vectorscan_matched); - // num_errors += 1; - // } - if !regex_matched { - println!(" Regex match: {}", regex_matched); - num_errors += 1; - } - } - - // // Report any mismatches - // if !vectorscan_matched || !regex_matched { - // error!("Rule '{}' example {} failed validation:", - // rule.name(), example_index + 1); println!(" - // Example text: {}", example); - - // if !vectorscan_matched { - // error!(" - Vectorscan pattern did not match example"); - // num_errors += 1; - // } - - // if !regex_matched { - // error!(" - Regex pattern did not match example"); - // num_errors += 1; - // } - // } - } - } - // Print summary - if num_errors > 0 || num_warnings > 0 { - println!("\nCheck Summary:"); - println!(" Errors: {}", num_errors); - println!(" Warnings: {}", num_warnings); - println!("\nError types include:"); - println!(" - Invalid regex patterns"); - println!(" - Examples that don't match their patterns"); - println!("\nWarning types include:"); - println!(" - Rules with very short names"); - println!(" - Rules with very short patterns"); - println!(" - Rules without examples"); - } else { - println!("\nAll rules passed validation successfully!"); - } - // Exit with error if there are errors or if warnings are treated as errors - if num_errors > 0 || (args.warnings_as_errors && num_warnings > 0) { - std::process::exit(1); - } - Ok(()) -} -/// Run the rules list command -pub fn run_rules_list(args: &RulesListArgs) -> Result<()> { - // Load rules - let loader = RuleLoader::from_rule_specifiers(&args.rules); - let loaded = loader.load(&create_default_scan_args())?; - let resolved = loaded.resolve_enabled_rules()?; - let mut writer = args.output_args.get_writer()?; - match args.output_args.format { - RulesListOutputFormat::Pretty => { - // Determine terminal width if possible, otherwise use default - let term_width = term_size::dimensions().map(|(w, _)| w).unwrap_or(120); - // First pass: calculate column widths - let max_name_width = resolved.iter().map(|r| r.name().len()).max().unwrap_or(0).max(4); // "Rule" header - let max_id_width = resolved.iter().map(|r| r.id().len()).max().unwrap_or(0).max(2); // "ID" header - let max_conf_width = resolved - .iter() - .map(|r| format!("{:?}", r.confidence()).len()) - .max() - .unwrap_or(0) - .max(10); // "Confidence" header - // Calculate pattern width based on terminal width - let reserved_width = max_name_width + max_id_width + max_conf_width + 10; - let pattern_width = term_width.saturating_sub(reserved_width); - // Format pattern on a single line - let format_pattern = |pattern: &str| { - let single_line = pattern - .replace('\n', " ") - .replace('\r', " ") - .split_whitespace() - .collect::>() - .join(" "); - if single_line.len() > pattern_width { - format!("{}...", &single_line[..pattern_width.saturating_sub(3)]) - } else { - single_line - } - }; - // Print header - writeln!( - writer, - "\n{:name_width$} │ {:id_width$} │ {:conf_width$} │ Pattern", - "Rule", - "ID", - "Confidence", - name_width = max_name_width, - id_width = max_id_width, - conf_width = max_conf_width - )?; - // Print separator - writeln!( - writer, - "{0:─ { - // Create JSON format - let rules_json: Vec<_> = resolved - .iter() - .map(|rule| { - json!({ - "name": rule.name(), - "id": rule.id(), - "pattern": rule.syntax().pattern, - "confidence": rule.confidence(), - "examples": rule.syntax().examples, - "visible": rule.visible(), - }) - }) - .collect(); - serde_json::to_writer_pretty(&mut writer, &rules_json)?; - writeln!(writer)?; - } - } - Ok(()) -} diff --git a/src/reporter/json_format.rs.orig b/src/reporter/json_format.rs.orig deleted file mode 100644 index aae16fc..0000000 --- a/src/reporter/json_format.rs.orig +++ /dev/null @@ -1,270 +0,0 @@ -use super::*; - -impl DetailsReporter { - pub fn json_format( - &self, - mut writer: W, - args: &cli::commands::scan::ScanArgs, - ) -> Result<()> { - let records = self.build_finding_records(args)?; - if !records.is_empty() { - serde_json::to_writer_pretty(&mut writer, &records)?; - writeln!(writer)?; - } - Ok(()) - } - - pub fn jsonl_format( - &self, - mut writer: W, - args: &cli::commands::scan::ScanArgs, - ) -> Result<()> { - let records = self.build_finding_records(args)?; - for record in records { - serde_json::to_writer(&mut writer, &record)?; - writeln!(writer)?; - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cli::commands::github::GitCloneMode; - use crate::cli::commands::github::GitHistoryMode; - use crate::cli::commands::rules::RuleSpecifierArgs; - use crate::matcher::{SerializableCapture, SerializableCaptures}; - use crate::util::intern; - use crate::{ - blob::BlobId, - cli::commands::github::GitHubRepoType, - cli::commands::inputs::ContentFilteringArgs, - cli::commands::inputs::InputSpecifierArgs, - cli::commands::output::{OutputArgs, ReportOutputFormat}, - cli::commands::scan::ConfidenceLevel, - findings_store::FindingsStore, - location::{Location, OffsetSpan, SourcePoint, SourceSpan}, - matcher::Match, - origin::Origin, - reporter::styles::Styles, - }; - use std::{ - io::Cursor, - path::PathBuf, - sync::{Arc, Mutex}, - }; - use url::Url; - fn create_default_args() -> cli::commands::scan::ScanArgs { - use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope - - cli::commands::scan::ScanArgs { - num_jobs: 1, - no_dedup: false, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - // local path / git URL inputs - path_inputs: Vec::new(), - git_url: Vec::new(), - - // GitHub - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - - // GitLab - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::All, - // Jira options - jira_url: None, - jql: None, - max_results: 100, - // Slack options - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - - docker_image: Vec::new(), - // clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - exclude: Vec::new(), // Exclude patterns - no_binary: true, - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - } - } - - fn create_mock_match( - rule_name: &str, - rule_text_id: &str, - rule_finding_fingerprint: &str, - validation_success: bool, - ) -> Match { - Match { - location: Location { - offset_span: OffsetSpan { start: 10, end: 20 }, - source_span: SourceSpan { - start: SourcePoint { line: 5, column: 10 }, - end: SourcePoint { line: 5, column: 20 }, - }, - }, - groups: SerializableCaptures { - captures: vec![SerializableCapture { - name: Some("token".to_string()), - match_number: 1, - start: 10, - end: 20, - value: "mock_token".into(), - }], - }, - blob_id: BlobId::new(b"mock_blob"), - finding_fingerprint: 0123, - rule_finding_fingerprint: intern(rule_finding_fingerprint), - rule_text_id: intern(rule_text_id), - rule_name: intern(rule_name), - rule_confidence: Confidence::Medium, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success, - calculated_entropy: 4.5, - visible: true, - } - } - - fn setup_mock_reporter(matches: Vec) -> DetailsReporter { - let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); - if !matches.is_empty() { - let blob_metadata = BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }; - let dedup = true; - for m in matches.clone() { - datastore.record( - vec![( - Arc::new(OriginSet::new( - Origin::from_file(PathBuf::from("/mock/path/file.rs")), - vec![], - )), - Arc::new(blob_metadata.clone()), - m.m.clone(), - )], - dedup, - ); - } - } - DetailsReporter { - datastore: Arc::new(Mutex::new(datastore)), - styles: Styles::new(false), - only_valid: false, - } - } - - #[test] - fn test_json_format() -> Result<()> { - let mock_match = - create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); - let matches = vec![ReportMatch { - origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]), - blob_metadata: BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }, - m: mock_match, - comment: None, - match_confidence: Confidence::Medium, - visible: true, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success: true, - }]; - let reporter = setup_mock_reporter(matches); - let mut output = Cursor::new(Vec::new()); - reporter.json_format(&mut output, &create_default_args())?; - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; - assert_eq!(first["rule"]["name"], "MockRule"); - assert_eq!(first["finding"]["language"], "Rust"); - Ok(()) - } - - #[test] - fn test_validation_status_in_json() -> Result<()> { - let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; - for (validation_success, expected_status) in test_cases { - let mock_match = create_mock_match( - "MockRule", - "mock_rule_1", - "mock_finding_fingerprint", - validation_success, - ); - let matches = vec![ReportMatch { - origin: OriginSet::new( - Origin::from_file(PathBuf::from("/mock/path/file.rs")), - vec![], - ), - blob_metadata: BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }, - m: mock_match, - comment: None, - match_confidence: Confidence::Medium, - visible: true, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success, - }]; - let reporter = setup_mock_reporter(matches); - let mut output = Cursor::new(Vec::new()); - reporter.json_format(&mut output, &create_default_args())?; - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; - let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); - assert_eq!(validation_status, expected_status); - } - Ok(()) - } -} diff --git a/tests/int_dedup.rs.orig b/tests/int_dedup.rs.orig deleted file mode 100644 index 0c93023..0000000 --- a/tests/int_dedup.rs.orig +++ /dev/null @@ -1,171 +0,0 @@ -//! Proves that run_async_scan collapses identical findings when -//! ── no_dedup == false ── -//! while keeping them separate when no_dedup == true. - -use std::{ - fs, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; - -/// Helper: run a scan with the supplied `no_dedup` flag and return how many -/// findings the `FindingsStore` ends up containing. -fn run_scan(count_rt: &Runtime, no_dedup: bool) -> Result { - // ── temp workspace ────────────────────────────────────────────── - let work = TempDir::new()?; - let rules_dir = work.path().join("rules"); - fs::create_dir_all(&rules_dir)?; - let inputs_dir = work.path().join("in"); - fs::create_dir_all(&inputs_dir)?; - - // 1. Tiny custom rule that matches `secret_1234` - fs::write( - rules_dir.join("demo.yml"), - r#" -rules: - - id: demo.secret - name: Demo secret - pattern: "secret_[0-9]{4}" - confidence: low -"#, - )?; - - // 2. Two different blobs that both contain the SAME secret - fs::write(inputs_dir.join("a.txt"), "secret_1234\n")?; - fs::write(inputs_dir.join("b.txt"), "secret_1234\n")?; - - // ── build ScanArgs ────────────────────────────────────────────── - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: vec![rules_dir.clone()], - rule: vec!["all".into()], - load_builtins: false, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![inputs_dir.join("a.txt"), inputs_dir.join("b.txt")], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 5.0, - extraction_depth: 1, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup, - snippet_length: 64, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Never, - progress: Mode::Never, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 8192 }, - }; - - // ── load rules once ───────────────────────────────────────────── - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); - - // Fresh FindingsStore for this run - let store_path = work.path().join("store"); - fs::create_dir_all(&store_path)?; - let datastore = Arc::new(Mutex::new(FindingsStore::new(store_path))); - - // run_async_scan is async – use the supplied Tokio runtime - count_rt.block_on(run_async_scan( - &global_args, - &scan_args, - Arc::clone(&datastore), - &rules_db, - ))?; - - let x = Ok(datastore.lock().unwrap().get_matches().len()); - x -} - -#[test] -fn test_dedup_branch() -> Result<()> { - // A *single* runtime reused for both scans keeps the test fast - let rt = Runtime::new().unwrap(); - - let findings_with_dups = run_scan(&rt, true)?; // keep duplicates - let findings_deduped = run_scan(&rt, false)?; // collapse duplicates - - assert!( - findings_with_dups > findings_deduped, - "expected deduplication to reduce finding count ({} -- {})", - findings_with_dups, - findings_deduped - ); - assert_eq!(findings_deduped, 1, "exactly one unique finding should remain after dedup"); - - Ok(()) -} diff --git a/tests/int_github.rs.orig b/tests/int_github.rs.orig deleted file mode 100644 index 2892b91..0000000 --- a/tests/int_github.rs.orig +++ /dev/null @@ -1,149 +0,0 @@ -// tests/int_github.rs -use std::{ - str::FromStr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - git_url::GitUrl, - scanner::{load_and_record_rules, run_scan}, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; -/// Helper function to determine exit code based on findings -fn determine_exit_code(total_findings: usize, validated_findings: usize) -> i32 { - if total_findings == 0 { - 0 // No findings discovered - } else if validated_findings > 0 { - 205 // Validated findings discovered - } else { - 200 // Findings discovered but none validated - } -} -#[test] -fn test_github_remote_scan() -> Result<()> { - // Create a temporary directory for the scan - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - // Create test repository URL - let test_repo_url = "https://github.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("Failed to parse Git URL"); - // Create scan arguments - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - // Create global arguments - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - // Create in-memory datastore - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - // Create the runtime first - let runtime = Runtime::new().expect("Failed to create Tokio runtime"); - // Load rules - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - // Run the scan using runtime.block_on - runtime.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - // Get scan results - let ds = datastore.lock().unwrap(); - let matches = ds.get_matches(); - let total_findings = matches.len(); - let validated_findings = matches.iter().filter(|arc| arc.as_ref().2.validation_success).count(); - - // Print validation statistics - println!("Total findings: {}, Validated findings: {}", total_findings, validated_findings); - // Check total number of findings - assert!(total_findings >= 10, "Expected at least 10 findings, but got {}", total_findings); - // Determine exit code - let exit_code = determine_exit_code(total_findings, validated_findings); - // Test passes if we found some kind of findings (exit code >= 200) - assert!( - exit_code >= 200, - "Test failed: Expected to find vulnerabilities (exit code >= 200), got exit code {}", - exit_code - ); - // Drop the runtime explicitly here, outside of async context - drop(runtime); - Ok(()) -} diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig deleted file mode 100644 index fa3e169..0000000 --- a/tests/int_gitlab.rs.orig +++ /dev/null @@ -1,243 +0,0 @@ -// tests/int_gitlab.rs -use std::{ - str::FromStr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - git_url::GitUrl, - scanner::{load_and_record_rules, run_scan}, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; - -/// Derive process exit-codes from findings -fn determine_exit_code(total: usize, validated: usize) -> i32 { - match (total, validated) { - (0, _) => 0, - (_, v) if v > 0 => 205, - _ => 200, - } -} - -#[test] -fn test_gitlab_remote_scan() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Public GitLab repo seeded with test secrets - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} - -#[test] -fn test_gitlab_remote_scan_no_history() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::None, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} diff --git a/tests/int_redact.rs.orig b/tests/int_redact.rs.orig deleted file mode 100644 index 796d019..0000000 --- a/tests/int_redact.rs.orig +++ /dev/null @@ -1,115 +0,0 @@ -// Integration test to ensure --redact replaces secret values with hashes -use std::{ - path::PathBuf, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, GlobalArgs, Mode}, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; - -#[tokio::test] -async fn test_redact_hashes_finding_values() -> Result<()> { - let temp_dir = TempDir::new()?; - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![PathBuf::from("testdata/generic_secrets.py")], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: true, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Never, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(temp_dir.path().to_path_buf()))); - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; - - let ds = datastore.lock().unwrap(); - let matches = ds.get_matches(); - assert!(!matches.is_empty()); - for m_arc in matches { - let m = &m_arc.2; - assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); - } - - Ok(()) -} diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig deleted file mode 100644 index d22b8f0..0000000 --- a/tests/int_slack.rs.orig +++ /dev/null @@ -1,207 +0,0 @@ -use std::{ - env, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, -}; - -struct TestContext { - rules_db: Arc, -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - max_results: 10, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 128, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - Ok(Self { rules_db: Arc::new(rules_db) }) - } -} - -#[tokio::test] -async fn test_scan_slack_messages() -> Result<()> { - let ctx = TestContext::new()?; - - let server = MockServer::start().await; - let response = serde_json::json!({ - "ok": true, - "messages": { - "matches": [{ - "permalink": "https://example.slack.com/archives/C123/p1234", - "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", - "ts": "1234.56", - "channel": {"id": "C123", "name": "general"} - }], - "pagination": {"page": 1, "page_count": 1} - } - }); - Mock::given(method("GET")) - .and(path("/search.messages")) - .respond_with(ResponseTemplate::new(200).set_body_json(response)) - .mount(&server) - .await; - - env::set_var("KF_SLACK_TOKEN", "xoxp-test"); - - let temp_dir = TempDir::new()?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: Some("test".into()), - slack_api_url: Url::parse(&format!("{}/", server.uri()))?, - max_results: 10, - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 128, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - assert!(findings > 0); - Ok(()) -} diff --git a/tests/int_validation_cache.rs.orig b/tests/int_validation_cache.rs.orig deleted file mode 100644 index a7ab9ea..0000000 --- a/tests/int_validation_cache.rs.orig +++ /dev/null @@ -1,211 +0,0 @@ -// tests/int_validation_cache.rs -use std::{ - fs, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, - }, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, Request, ResponseTemplate, -}; - -#[tokio::test] -async fn test_validation_cache_and_depvars() -> Result<()> { - /* --------------------------------------------------------- * - * 1. Spin-up Wiremock and count incoming validation calls * - * --------------------------------------------------------- */ - let server = MockServer::start().await; - let hit_counter = Arc::new(AtomicUsize::new(0)); - let counter_clone = Arc::clone(&hit_counter); - - Mock::given(method("GET")) - .and(path("/validate")) - .respond_with(move |_req: &Request| { - counter_clone.fetch_add(1, Ordering::SeqCst); - ResponseTemplate::new(200).set_body_string("ok") - }) - .mount(&server) - .await; - - /* --------------------------------------------------------- * - * 2. Synthetic rules exercising depends_on_rule + HTTP val * - * --------------------------------------------------------- */ - let rules_yaml = format!( - r#" - rules: - - name: Demo API Key - id: demo.key.1 - pattern: '(demokey_[a-z0-9]{{8}})' - confidence: low - min_entropy: 0.0 - - - name: Demo API Key Validation - id: demo.key.validation.1 - depends_on_rule: - - rule_id: demo.key.1 - variable: TOKEN - pattern: '(demokey_[a-z0-9]{{8}})' - confidence: low - validation: - type: Http - content: - request: - method: GET - url: '{base}/validate?token={{ {{ TOKEN }} }}' - response_matcher: - - report_response: true - - type: WordMatch - words: - - '"error_code":"403003"' - negative: true - "#, - base = server.uri() - ); - - /* --------------------------------------------------------- * - * 3. Temp workspace: rules file + input with 2 duplicates * - * --------------------------------------------------------- */ - let work_dir = TempDir::new()?; - let rules_file = work_dir.path().join("demo.yml"); - fs::write(&rules_file, rules_yaml)?; - - let secret_file = work_dir.path().join("secrets.txt"); - fs::write(&secret_file, "demokey_abcdefgh\ndemokey_abcdefgh")?; - - /* --------------------------------------------------------- * - * 4. Build Scan / Global args (no_dedup=true to keep dups) * - * --------------------------------------------------------- */ - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: vec![work_dir.path().to_path_buf()], - rule: vec!["all".into()], - load_builtins: false, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![secret_file.clone()], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, // keep duplicates so the cache is stressed - snippet_length: 128, - baseline_file: None, - manage_baseline: false, - }; - - /* --------------------------------------------------------- * - * 5. Load rules, run scan * - * --------------------------------------------------------- */ - // --------------------------------------------------------- - // 5. Load rules, record them, run scan - // --------------------------------------------------------- - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); - - let datastore = Arc::new(Mutex::new(FindingsStore::new(work_dir.path().to_path_buf()))); - - // NEW: make the datastore aware of every rule - { - let mut ds = datastore.lock().unwrap(); - ds.record_rules(rules_db.rules()); // <-- **add this line** - } - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - progress: Mode::Never, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 8192 }, - }; - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; - - /* --------------------------------------------------------- * - * 6. Assertions * - * --------------------------------------------------------- */ - // There are two matches for demo.key.validation.1, but the validator - // should have been called only once thanks to SkipMap caching. - assert_eq!( - hit_counter.load(Ordering::SeqCst), - 1, - "validator endpoint should be hit exactly once" - ); - - let ds = datastore.lock().unwrap(); - let total_matches = ds.get_matches().len(); - assert_eq!(total_matches, 4, "expected 2 matches per rule (dup secrets)"); // 2 for each rule - - Ok(()) -} diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig deleted file mode 100644 index abeb6f1..0000000 --- a/tests/int_vulnerable_files.rs.orig +++ /dev/null @@ -1,257 +0,0 @@ -// tests/integration_scan.rs - -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; - -#[derive(Debug)] -struct TestCase { - file_name: &'static str, - min_expected_findings: usize, -} - -struct TestContext { - rules_db: Arc, -} - -fn root_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) - .load(&scan_args) - .context("Failed to load rules")?; - - let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; - - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) - .context("Failed to compile rules")?; - - Ok(Self { rules_db: Arc::new(rules_db) }) - } - - async fn scan_file(&self, file_path: &Path) -> Result { - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![file_path.to_path_buf()], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - - Ok(findings) - } -} - -#[tokio::test] -async fn test_scan_vulnerable_files() -> Result<()> { - let test_context = TestContext::new()?; - - let test_cases = vec![ - TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, - TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, - TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, - TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, - TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, - TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, - TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, - TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, - TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, - TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, - TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, - TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, - TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, - TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, - TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, - TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, - TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, - TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, - TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, - TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, - TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, - TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, - TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, - TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, - ]; - - let root = root_dir(); - - for test_case in test_cases { - let test_file = root.join(test_case.file_name); - println!("Testing file: {}", test_case.file_name); - - let findings = test_context.scan_file(&test_file).await?; - - assert!( - findings >= test_case.min_expected_findings, - "File: {} - Expected >= {} findings, got {}", - test_case.file_name, - test_case.min_expected_findings, - findings - ); - } - - Ok(()) -} From 22c5594b535f9b247ce0c8080b28e58956fc2c04 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:51:31 -0700 Subject: [PATCH 133/357] Added support for scanning Confluence pages --- CHANGELOG.md | 3 + README.md | 46 +++++- src/cli/commands/inputs.rs | 11 +- src/confluence.rs | 142 ++++++++++++++++ src/findings_store.rs | 10 ++ src/gitlab.rs | 2 +- src/lib.rs | 1 + src/main.rs | 2 + src/reporter.rs | 9 + src/reporter/json_format.rs | 3 + src/scanner/repos.rs | 36 +++- src/scanner/runner.rs | 7 +- tests/int_dedup.rs | 2 + tests/int_github.rs | 2 + tests/int_gitlab.rs | 4 + tests/int_gitlab.rs.orig | 241 +++++++++++++++++++++++++++ tests/int_quiet.rs | 2 +- tests/int_redact.rs | 2 + tests/int_slack.rs | 4 + tests/int_slack.rs.orig | 205 +++++++++++++++++++++++ tests/int_validation_cache.rs | 2 + tests/int_vulnerable_files.rs | 4 + tests/int_vulnerable_files.rs.orig | 255 +++++++++++++++++++++++++++++ 23 files changed, 985 insertions(+), 10 deletions(-) create mode 100644 src/confluence.rs create mode 100644 tests/int_gitlab.rs.orig create mode 100644 tests/int_slack.rs.orig create mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/CHANGELOG.md b/CHANGELOG.md index b8d7720..a4e50d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.39.0] +- Added support for scanning Confluence pages via `--confluence-url` and `--cql` + ## [1.38.0] - `--quiet` now suppresses scan summaries and rule statistics unless `--rule-stats` is explicitly provided - Added X Consumer key detection and validation diff --git a/README.md b/README.md index 1fba1fb..18b940c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs -- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages +- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, Confluence pages, and Slack messages - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline mode**: ignore known secrets, flag only new ones - **Language-aware detection** (source-code parsing) for ~20 languages @@ -24,9 +24,10 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Multiple targets**: - **Git history**: local repos or GitHub/GitLab orgs/users - - **Docker images**: public or private via `--docker-image` - - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - - **Slack messages**: query‑based scans with `--slack-query` + - **Docker images**: public or private via `--docker-image` + - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` + - **Confluence pages**: CQL‑driven scans with `--confluence-url` and `--cql` + - **Slack messages**: query‑based scans with `--slack-query` - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) @@ -421,7 +422,36 @@ KF_JIRA_TOKEN="token" kingfisher scan \ --jql 'ORDER BY created DESC' \ --max-results 1000 ``` ---- + +## Scanning Confluence + +### Scan Confluence pages matching a CQL query + +```bash +# Bearer token +KF_CONFLUENCE_TOKEN="token" kingfisher scan \ + --confluence-url https://confluence.company.com \ + --cql "label = secret" \ + --max-results 500 + +# Basic auth with username and token +KF_CONFLUENCE_USER="user@example.com" KF_CONFLUENCE_TOKEN="token" kingfisher scan \ + --confluence-url https://confluence.company.com \ + --cql "label = secret" \ + --max-results 500 +``` + +Use the base URL of your Confluence site for `--confluence-url`. Kingfisher +automatically adds `/rest/api` to the end, so `https://example.com/wiki` and +`https://example.com` both work depending on your server configuration. + +Generate a [personal access token](https://confluence.atlassian.com/doc/personal-access-tokens-938022290.html) and +set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher +sends the token as a bearer token in the `Authorization` header. To use basic +authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email +address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a +Basic auth header. If the server responds with a redirect to a login page, the +credentials are invalid or lack the required permissions. ## Scanning Slack ### Scan Slack messages matching a search query @@ -444,6 +474,7 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | +| `KF_CONFLUENCE_TOKEN` | Confluence API token | | `KF_SLACK_TOKEN` | Slack API token | | `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | | `KF_AWS_KEY` and `KF_AWS_SECRET` | AWS Credentials to use with S3 bucket scanning | @@ -465,6 +496,11 @@ To authenticate Jira requests: export KF_JIRA_TOKEN="token" ``` +To authenticate Confluence requests: +```bash +export KF_CONFLUENCE_TOKEN="token" +``` + _If no token is provided Kingfisher still works for public repositories._ --- diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 2249640..e6f9168 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -27,6 +27,7 @@ pub struct InputSpecifierArgs { "all_github_organizations", "all_gitlab_groups", "jira_url", + "confluence_url", "docker_image", "slack_query", "s3_bucket" @@ -96,6 +97,14 @@ pub struct InputSpecifierArgs { #[arg(long, requires = "jira_url")] pub jql: Option, + /// Confluence base URL (e.g. https://confluence.example.com) + #[arg(long, value_hint = ValueHint::Url, requires = "cql")] + pub confluence_url: Option, + + /// CQL query to select Confluence pages + #[arg(long, requires = "confluence_url")] + pub cql: Option, + /// Slack search query #[arg(long)] pub slack_query: Option, @@ -104,7 +113,7 @@ pub struct InputSpecifierArgs { #[arg(long, default_value = "https://slack.com/api/", value_hint = ValueHint::Url)] pub slack_api_url: Url, - /// Maximum number of Slack or Jira results to fetch + /// Maximum number of Slack, Jira, or Confluence results to fetch #[arg(long, default_value_t = 100)] pub max_results: usize, diff --git a/src/confluence.rs b/src/confluence.rs new file mode 100644 index 0000000..50e525c --- /dev/null +++ b/src/confluence.rs @@ -0,0 +1,142 @@ +use anyhow::{bail, Context, Result}; +use reqwest::{header, Client}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use url::Url; + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluencePage { + pub id: String, + pub title: String, + #[serde(default)] + pub body: Option, + #[serde(rename = "_links")] + pub links: ConfluenceLinks, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceBody { + #[serde(default)] + pub storage: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceStorage { + #[serde(default)] + pub value: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceLinks { + pub webui: String, +} + +#[derive(Debug, Deserialize)] +struct ConfluenceSearchResponse { + results: Vec, + #[serde(rename = "_links")] + links: ConfluenceResultLinks, +} + +#[derive(Debug, Deserialize)] +struct ConfluenceResultLinks { + next: Option, +} + +pub async fn search_pages( + confluence_url: Url, + cql: &str, + max_results: usize, + ignore_certs: bool, +) -> Result> { + let token = std::env::var("KF_CONFLUENCE_TOKEN") + .context("KF_CONFLUENCE_TOKEN environment variable must be set")?; + let user = std::env::var("KF_CONFLUENCE_USER").ok(); + if let Some(ref u) = user { + if !u.contains('@') { + bail!("KF_CONFLUENCE_USER must be an email address"); + } + } + + let client = Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .danger_accept_invalid_certs(ignore_certs) + .build() + .context("Failed to build HTTP client")?; + + let base = confluence_url.as_str().trim_end_matches('/'); + let api_base = format!("{}/rest/api/content/search", base); + + let mut pages = Vec::new(); + let mut start = 0usize; + while pages.len() < max_results { + let limit = std::cmp::min(100, max_results - pages.len()); + let url = Url::parse(&api_base)?; + let req = client.get(url).query(&[ + ("cql", cql), + ("limit", &limit.to_string()), + ("start", &start.to_string()), + ("expand", "body.storage"), + ]); + let req = if let Some(user) = &user { + req.basic_auth(user, Some(token.clone())) + } else { + req.bearer_auth(&token) + }; + let resp = req.send().await.context("Failed to send Confluence request")?; + + let status = resp.status(); + if !status.is_success() { + let location = resp + .headers() + .get(header::LOCATION) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + let body = resp.text().await.unwrap_or_default(); + if let Some(loc) = location { + bail!( + "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER", + status, + loc + ); + } else { + bail!("Confluence API request failed with status {}: {}", status, body); + } + } + + let body: ConfluenceSearchResponse = + resp.json().await.context("Failed to parse Confluence response")?; + for p in body.results { + pages.push(p); + if pages.len() >= max_results { + break; + } + } + if pages.len() >= max_results || body.links.next.is_none() { + break; + } + start += limit; + } + Ok(pages) +} + +pub async fn download_pages_to_dir( + confluence_url: Url, + cql: &str, + max_results: usize, + ignore_certs: bool, + output_dir: &PathBuf, +) -> Result> { + std::fs::create_dir_all(output_dir)?; + let pages = search_pages(confluence_url.clone(), cql, max_results, ignore_certs).await?; + let mut paths = Vec::new(); + let base = confluence_url.as_str().trim_end_matches('/'); + let web_base = base.to_string(); + for page in pages { + let file = output_dir.join(format!("{}.json", page.id)); + std::fs::write(&file, serde_json::to_vec(&page)?)?; + let link = format!("{}{}", web_base, page.links.webui); + paths.push((file, link)); + } + Ok(paths) +} \ No newline at end of file diff --git a/src/findings_store.rs b/src/findings_store.rs index a1c94d4..8b8e0a4 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -54,6 +54,7 @@ pub struct FindingsStore { origin_meta: FxHashMap>, docker_images: FxHashMap, slack_links: FxHashMap, + confluence_links: FxHashMap, s3_buckets: FxHashMap, } impl FindingsStore { @@ -74,6 +75,7 @@ impl FindingsStore { bloom_items: 0, docker_images: FxHashMap::default(), slack_links: FxHashMap::default(), + confluence_links: FxHashMap::default(), s3_buckets: FxHashMap::default(), } } @@ -308,6 +310,14 @@ impl FindingsStore { &self.slack_links } + pub fn register_confluence_page(&mut self, path: PathBuf, link: String) { + self.confluence_links.insert(path, link); + } + + pub fn confluence_links(&self) -> &FxHashMap { + &self.confluence_links + } + pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) { self.s3_buckets.insert(dir, bucket); } diff --git a/src/gitlab.rs b/src/gitlab.rs index be9b4a5..e7df15e 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -187,4 +187,4 @@ pub async fn list_repositories( } Ok(()) -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 04f7303..a531c4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod blob; pub mod bstring_escape; pub mod bstring_table; pub mod cli; +pub mod confluence; pub mod content_type; pub mod decompress; pub mod defaults; diff --git a/src/main.rs b/src/main.rs index ac78ef7..3943b1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -285,6 +285,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, s3_bucket: None, diff --git a/src/reporter.rs b/src/reporter.rs index ef4c763..13ca84c 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -134,6 +134,13 @@ impl DetailsReporter { } } + /// If the given file path corresponds to a Confluence page downloaded to disk, + /// return the URL for that page. + fn confluence_page_url(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + ds.confluence_links().get(path).cloned() + } + /// If the given file path corresponds to a Slack message downloaded to disk, /// return the permalink for that message. fn slack_message_url(&self, path: &std::path::Path) -> Option { @@ -333,6 +340,8 @@ impl DetailsReporter { Origin::File(e) => { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(url) = self.confluence_page_url(&e.path) { + Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) } else if let Some(mapped) = self.s3_display_path(&e.path) { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index d9eda56..10d7aee 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -87,6 +87,9 @@ mod tests { // Jira options jira_url: None, jql: None, + // Confluence options + confluence_url: None, + cql: None, max_results: 100, // Slack options slack_query: None, diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 7bd6b8d..a249f02 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -15,7 +15,7 @@ use crate::{ commands::{github::GitCloneMode, github::GitHistoryMode, scan}, global, }, - findings_store, + confluence, findings_store, git_binary::{CloneMode, Git}, git_url::GitUrl, github, gitlab, @@ -263,6 +263,40 @@ pub async fn fetch_jira_issues( Ok(vec![output_dir]) } +pub async fn fetch_confluence_pages( + args: &scan::ScanArgs, + global_args: &global::GlobalArgs, + datastore: &Arc>, +) -> Result> { + let Some(confluence_url) = args.input_specifier_args.confluence_url.clone() else { + return Ok(Vec::new()); + }; + let Some(cql) = args.input_specifier_args.cql.as_deref() else { + return Ok(Vec::new()); + }; + let max_results = args.input_specifier_args.max_results; + let output_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let output_dir = output_root.join("confluence_pages"); + let paths = confluence::download_pages_to_dir( + confluence_url, + cql, + max_results, + global_args.ignore_certs, + &output_dir, + ) + .await?; + { + let mut ds = datastore.lock().unwrap(); + for (path, link) in &paths { + ds.register_confluence_page(path.clone(), link.clone()); + } + } + Ok(vec![output_dir]) +} + pub async fn fetch_slack_messages( args: &scan::ScanArgs, global_args: &global::GlobalArgs, diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 10b6e51..e389543 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -19,7 +19,8 @@ use crate::{ scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{ - enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, + enumerate_gitlab_repos, fetch_confluence_pages, fetch_jira_issues, fetch_s3_objects, + fetch_slack_messages, }, run_secret_validation, save_docker_images, summary::print_scan_summary, @@ -70,6 +71,10 @@ pub async fn run_async_scan( let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); + // Fetch Confluence pages if requested + let confluence_dirs = fetch_confluence_pages(args, global_args, &datastore).await?; + input_roots.extend(confluence_dirs); + // Fetch Slack messages if requested let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 4c3be19..68b9663 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -81,6 +81,8 @@ rules: jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_github.rs b/tests/int_github.rs index 8edc022..0bae089 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -68,6 +68,8 @@ fn test_github_remote_scan() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 4668439..7e48f60 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -67,6 +67,8 @@ fn test_gitlab_remote_scan() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), @@ -170,6 +172,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig new file mode 100644 index 0000000..4668439 --- /dev/null +++ b/tests/int_gitlab.rs.orig @@ -0,0 +1,241 @@ +// tests/int_gitlab.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Derive process exit-codes from findings +fn determine_exit_code(total: usize, validated: usize) -> i32 { + match (total, validated) { + (0, _) => 0, + (_, v) if v > 0 => 205, + _ => 200, + } +} + +#[test] +fn test_gitlab_remote_scan() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Public GitLab repo seeded with test secrets + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} diff --git a/tests/int_quiet.rs b/tests/int_quiet.rs index 598f103..1968972 100644 --- a/tests/int_quiet.rs +++ b/tests/int_quiet.rs @@ -56,4 +56,4 @@ fn scan_quiet_with_rule_stats_prints_rule_stats() { contains_bytes(out, b"Rule Performance Stats") })); } -} \ No newline at end of file +} diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 45f3767..69a1061 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -51,6 +51,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 7284e1d..e94607f 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -57,6 +57,8 @@ impl TestContext { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), s3_bucket: None, @@ -147,6 +149,8 @@ async fn test_scan_slack_messages() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, slack_query: Some("test".into()), slack_api_url: Url::parse(&format!("{}/", server.uri()))?, max_results: 10, diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig new file mode 100644 index 0000000..7284e1d --- /dev/null +++ b/tests/int_slack.rs.orig @@ -0,0 +1,205 @@ +use std::{ + env, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +struct TestContext { + rules_db: Arc, +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + Ok(Self { rules_db: Arc::new(rules_db) }) + } +} + +#[tokio::test] +async fn test_scan_slack_messages() -> Result<()> { + let ctx = TestContext::new()?; + + let server = MockServer::start().await; + let response = serde_json::json!({ + "ok": true, + "messages": { + "matches": [{ + "permalink": "https://example.slack.com/archives/C123/p1234", + "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "ts": "1234.56", + "channel": {"id": "C123", "name": "general"} + }], + "pagination": {"page": 1, "page_count": 1} + } + }); + Mock::given(method("GET")) + .and(path("/search.messages")) + .respond_with(ResponseTemplate::new(200).set_body_json(response)) + .mount(&server) + .await; + + env::set_var("KF_SLACK_TOKEN", "xoxp-test"); + + let temp_dir = TempDir::new()?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: Some("test".into()), + slack_api_url: Url::parse(&format!("{}/", server.uri()))?, + max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + assert!(findings > 0); + Ok(()) +} diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 46d4521..2caa10a 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -124,6 +124,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index c53adae..2478170 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -67,6 +67,8 @@ impl TestContext { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), @@ -143,6 +145,8 @@ impl TestContext { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig new file mode 100644 index 0000000..c53adae --- /dev/null +++ b/tests/int_vulnerable_files.rs.orig @@ -0,0 +1,255 @@ +// tests/integration_scan.rs + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[derive(Debug)] +struct TestCase { + file_name: &'static str, + min_expected_findings: usize, +} + +struct TestContext { + rules_db: Arc, +} + +fn root_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) + .load(&scan_args) + .context("Failed to load rules")?; + + let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; + + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) + .context("Failed to compile rules")?; + + Ok(Self { rules_db: Arc::new(rules_db) }) + } + + async fn scan_file(&self, file_path: &Path) -> Result { + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![file_path.to_path_buf()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + + Ok(findings) + } +} + +#[tokio::test] +async fn test_scan_vulnerable_files() -> Result<()> { + let test_context = TestContext::new()?; + + let test_cases = vec![ + TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, + TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, + TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, + TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, + TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, + TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, + TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, + TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, + TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, + TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, + TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, + TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, + TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, + TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, + TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, + TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, + TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, + TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, + TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, + TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, + TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, + TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, + TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, + TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, + ]; + + let root = root_dir(); + + for test_case in test_cases { + let test_file = root.join(test_case.file_name); + println!("Testing file: {}", test_case.file_name); + + let findings = test_context.scan_file(&test_file).await?; + + assert!( + findings >= test_case.min_expected_findings, + "File: {} - Expected >= {} findings, got {}", + test_case.file_name, + test_case.min_expected_findings, + findings + ); + } + + Ok(()) +} From 053d1a322452369035c5900ff98a0aabf189b1d3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:54:26 -0700 Subject: [PATCH 134/357] Added support for scanning Confluence pages --- tests/int_gitlab.rs.orig | 241 --------------------------- tests/int_slack.rs.orig | 205 ----------------------- tests/int_vulnerable_files.rs.orig | 255 ----------------------------- 3 files changed, 701 deletions(-) delete mode 100644 tests/int_gitlab.rs.orig delete mode 100644 tests/int_slack.rs.orig delete mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig deleted file mode 100644 index 4668439..0000000 --- a/tests/int_gitlab.rs.orig +++ /dev/null @@ -1,241 +0,0 @@ -// tests/int_gitlab.rs -use std::{ - str::FromStr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - git_url::GitUrl, - scanner::{load_and_record_rules, run_scan}, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; - -/// Derive process exit-codes from findings -fn determine_exit_code(total: usize, validated: usize) -> i32 { - match (total, validated) { - (0, _) => 0, - (_, v) if v > 0 => 205, - _ => 200, - } -} - -#[test] -fn test_gitlab_remote_scan() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Public GitLab repo seeded with test secrets - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} - -#[test] -fn test_gitlab_remote_scan_no_history() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::None, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig deleted file mode 100644 index 7284e1d..0000000 --- a/tests/int_slack.rs.orig +++ /dev/null @@ -1,205 +0,0 @@ -use std::{ - env, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, -}; - -struct TestContext { - rules_db: Arc, -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - max_results: 10, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - Ok(Self { rules_db: Arc::new(rules_db) }) - } -} - -#[tokio::test] -async fn test_scan_slack_messages() -> Result<()> { - let ctx = TestContext::new()?; - - let server = MockServer::start().await; - let response = serde_json::json!({ - "ok": true, - "messages": { - "matches": [{ - "permalink": "https://example.slack.com/archives/C123/p1234", - "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", - "ts": "1234.56", - "channel": {"id": "C123", "name": "general"} - }], - "pagination": {"page": 1, "page_count": 1} - } - }); - Mock::given(method("GET")) - .and(path("/search.messages")) - .respond_with(ResponseTemplate::new(200).set_body_json(response)) - .mount(&server) - .await; - - env::set_var("KF_SLACK_TOKEN", "xoxp-test"); - - let temp_dir = TempDir::new()?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: Some("test".into()), - slack_api_url: Url::parse(&format!("{}/", server.uri()))?, - max_results: 10, - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - assert!(findings > 0); - Ok(()) -} diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig deleted file mode 100644 index c53adae..0000000 --- a/tests/int_vulnerable_files.rs.orig +++ /dev/null @@ -1,255 +0,0 @@ -// tests/integration_scan.rs - -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; - -#[derive(Debug)] -struct TestCase { - file_name: &'static str, - min_expected_findings: usize, -} - -struct TestContext { - rules_db: Arc, -} - -fn root_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) - .load(&scan_args) - .context("Failed to load rules")?; - - let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; - - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) - .context("Failed to compile rules")?; - - Ok(Self { rules_db: Arc::new(rules_db) }) - } - - async fn scan_file(&self, file_path: &Path) -> Result { - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![file_path.to_path_buf()], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - - Ok(findings) - } -} - -#[tokio::test] -async fn test_scan_vulnerable_files() -> Result<()> { - let test_context = TestContext::new()?; - - let test_cases = vec![ - TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, - TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, - TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, - TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, - TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, - TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, - TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, - TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, - TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, - TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, - TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, - TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, - TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, - TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, - TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, - TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, - TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, - TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, - TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, - TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, - TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, - TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, - TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, - TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, - ]; - - let root = root_dir(); - - for test_case in test_cases { - let test_file = root.join(test_case.file_name); - println!("Testing file: {}", test_case.file_name); - - let findings = test_context.scan_file(&test_file).await?; - - assert!( - findings >= test_case.min_expected_findings, - "File: {} - Expected >= {} findings, got {}", - test_case.file_name, - test_case.min_expected_findings, - findings - ); - } - - Ok(()) -} From 4489b576f88fe9e545f251a9e852d123d40ec339 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:55:45 -0700 Subject: [PATCH 135/357] Added support for scanning Confluence pages --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 18b940c..06a0a70 100644 --- a/README.md +++ b/README.md @@ -437,7 +437,7 @@ KF_CONFLUENCE_TOKEN="token" kingfisher scan \ # Basic auth with username and token KF_CONFLUENCE_USER="user@example.com" KF_CONFLUENCE_TOKEN="token" kingfisher scan \ --confluence-url https://confluence.company.com \ - --cql "label = secret" \ + --cql "text ~ 'password'" \ --max-results 500 ``` From 0e7a0b57dd3426640f73ff92c11de009f3820bd3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:57:35 -0700 Subject: [PATCH 136/357] Added support for scanning Confluence pages --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 06a0a70..d51f0b3 100644 --- a/README.md +++ b/README.md @@ -445,13 +445,10 @@ Use the base URL of your Confluence site for `--confluence-url`. Kingfisher automatically adds `/rest/api` to the end, so `https://example.com/wiki` and `https://example.com` both work depending on your server configuration. -Generate a [personal access token](https://confluence.atlassian.com/doc/personal-access-tokens-938022290.html) and -set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher -sends the token as a bearer token in the `Authorization` header. To use basic -authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email -address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a -Basic auth header. If the server responds with a redirect to a login page, the -credentials are invalid or lack the required permissions. +Generate a personal access token and set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher sends the token as a bearer token in the `Authorization` header. + +To use basic authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a Basic auth header. If the server responds with a redirect to a login page, the credentials are invalid or lack the required permissions. + ## Scanning Slack ### Scan Slack messages matching a search query From 38371b538eb969418a74aa443c73e815b5ef8aca Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 08:04:52 -0700 Subject: [PATCH 137/357] Added support for scanning Confluence pages --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5eb52fc..90ac4a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.38.0" +version = "1.39.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From 39588cc0988f58aaf61c6a2e86c92fe4add67587 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 08:25:24 -0700 Subject: [PATCH 138/357] Added support for scanning Confluence pages --- src/confluence.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/confluence.rs b/src/confluence.rs index 50e525c..7150e90 100644 --- a/src/confluence.rs +++ b/src/confluence.rs @@ -67,11 +67,13 @@ pub async fn search_pages( let base = confluence_url.as_str().trim_end_matches('/'); let api_base = format!("{}/rest/api/content/search", base); + let api_url = Url::parse(&api_base)?; let mut pages = Vec::new(); let mut start = 0usize; + while pages.len() < max_results { let limit = std::cmp::min(100, max_results - pages.len()); - let url = Url::parse(&api_base)?; + let url = api_url.clone(); let req = client.get(url).query(&[ ("cql", cql), ("limit", &limit.to_string()), From 35e4b9011def97eb6bb9df0118fb62184792b13e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 08:26:49 -0700 Subject: [PATCH 139/357] Added support for scanning Confluence pages --- src/confluence.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/confluence.rs b/src/confluence.rs index 7150e90..aecf637 100644 --- a/src/confluence.rs +++ b/src/confluence.rs @@ -94,7 +94,8 @@ pub async fn search_pages( .get(header::LOCATION) .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); - let body = resp.text().await.unwrap_or_default(); + let body = resp.text().await.unwrap_or_else(|e| format!("Failed to read response: {}", e)); + if let Some(loc) = location { bail!( "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER", From 98ce4b9296ce9dd900c72de40b2a938c555be719 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 09:03:58 -0700 Subject: [PATCH 140/357] Added support for scanning Confluence pages --- src/confluence.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/confluence.rs b/src/confluence.rs index aecf637..2a1bddb 100644 --- a/src/confluence.rs +++ b/src/confluence.rs @@ -81,7 +81,7 @@ pub async fn search_pages( ("expand", "body.storage"), ]); let req = if let Some(user) = &user { - req.basic_auth(user, Some(token.clone())) + req.basic_auth(user, Some(&token)) } else { req.bearer_auth(&token) }; @@ -95,7 +95,7 @@ pub async fn search_pages( .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); let body = resp.text().await.unwrap_or_else(|e| format!("Failed to read response: {}", e)); - + if let Some(loc) = location { bail!( "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER", From 8c71eae231c5c3a147390cc4751a5638b7821058 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 13 Aug 2025 08:22:53 -0700 Subject: [PATCH 141/357] =?UTF-8?q?Dropped=20the=20=E2=80=9Cprevalidated?= =?UTF-8?q?=E2=80=9D=20flag=20from=20rule=20definitions=20and=20validation?= =?UTF-8?q?=20logic=20so=20every=20finding=20now=20flows=20through=20the?= =?UTF-8?q?=20standard=20active/inactive/unknown=20pipeline,=20simplifying?= =?UTF-8?q?=20rule=20configuration=20and=20preventing=20special=E2=80=91ca?= =?UTF-8?q?se=20bypasses?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- data/rules/pem.yml | 2 -- data/rules/privkey.yml | 2 -- src/matcher.rs | 3 --- src/rules/rule.rs | 3 --- src/scanner/validation.rs | 9 --------- tests/int_rules_no_validated_findings.rs | 17 ++++++----------- 8 files changed, 10 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4e50d6..8ed7595 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.40.0] +- Dropped the “prevalidated” flag from rule definitions and validation logic so every finding now flows through the standard active/inactive/unknown pipeline, simplifying rule configuration and preventing special‑case bypasses + ## [1.39.0] - Added support for scanning Confluence pages via `--confluence-url` and `--cql` diff --git a/Cargo.toml b/Cargo.toml index 90ac4a4..15ad49a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.39.0" +version = "1.40.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/pem.yml b/data/rules/pem.yml index dc8e558..390171d 100644 --- a/data/rules/pem.yml +++ b/data/rules/pem.yml @@ -10,7 +10,6 @@ rules: -----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}----- min_entropy: 4.5 confidence: high - prevalidated: true examples: - | -----BEGIN RSA PRIVATE KEY----- @@ -62,7 +61,6 @@ rules: (?: [^a-zA-Z0-9+/=] | $ ) min_entropy: 4.5 confidence: high - prevalidated: true examples: - 'PRIVATE_KEY_B64=LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBb3kxWFh1VkFRcHFIYlFFMDVta2hyTmcvMTI0Ri8ySzlPYW5pelpUWlVVaEswOFU4CkxhaC9SbVVsWHFRMDEvU255aktGOWZqUDhFcU1OZ1dpamUzYmVwL3RPOVpTMEFUMi9PVlJXeS9TOG52RDQ5WTMKenMxMktSbERhR2lZc0RsYUZrbHJkeDQ4RWhRVmdHN3hmWE1jaC9OejJzc2FEby9kRkNBOW80TkZZQWUzM2UveApWNVo1UHNkWkl6dkNZQVlCNDRoUEtpN3JXRE1IbFdzM1kvVkVtQXMzSzVNK2QvL3QzRHB4WnBEbWJERGdYa2w2CjZUdDh3VXloUVZ3MkZpMStobTF1T2QwYjFkaW9aNko2OXNTT2JOZXpSR3YxYjdZaFltT0JKL1JBbHN5ZHoxTmgKVXpXT1lYV0Z1OGJrOU9JM3lQMEc0TE84QjhtbWRldE1RVVoyelFJREFRQUJBb0lCQUN2ckhUUHVVZ0JiSlE0QwpvQ0ZQdEgrWDZIN3NIdk1ndVR0VzdUTlYxN1BYMkVQdE53ZzI3S0tld0pNYmNSbWF3THBjSk5BU09xMDY4MGZxCjlsaHE1NEsybnB4WFVBeXErV3NSc1hid2hUODhibm5aQTBaRzZJR2hTaEpFN0t1cGxBU2htQ29FV2ppbmJTNFgKTGlvTW5HWSs4VFMzSzNrMTRWUDBaWUtuNXprMERHZnFBMEo0VTRXSmxUeGwrTWZxd0pJOTlrcTdHbFVlZkdncQpuK3Q1d2NrV3BPbTd5TUJjZTlTSXlmTm54bnU3TkZYQm50VTN5RGxSUThWUWZmNEtRMzJCaWNiYlJWemR1TThNCnNxMU5CZWNzL0EzUXRvdG1nWUc4d094ZXpNS3Iyays2QzB2NmlFc0h5T0lmR25GWktSZDJFd0dnWlo3aytURHUKUUYrcjd1VUNnWUVBMkRqNUJoYmpybDFRNTZya3BhTGFvVldRV1Y5YUYzUUJtNlNZM2VQYmlvY2JNR2k1ak1ESQpkSjdJVXlLYUljK3BNV1RQYlBmVUd2WmNENlczZDFBNUNUSnFuWHVuVlY3czRqaWJ6WDZUbjhNM3IrMHZTZnNZCmdPMHBtRFpndlNqaVZTRUNBQTZFOFUxQ1lFZU5KUDFDOW12cGJVNzJRTEpndWp3M3JMb2oyYmNDZ1lFQXdUSXYKOUNSeWNOQXRBbDcvUHdWZGh5eXRvVHBSRnZDSU1HSVk5SjMxZ3lva0ZlaFQvWjQ4WkF6anl6ZTBSUXYzdGUxTQoveVJMQkVETGkwbEtrZFVXckVkaVR3dm1KdkpwMDZ0OEdCbERsK25ycXVLWTFxVThDbTR5cis4QzZtRThkVnZrClNINXBhRXptOERFTE1wSjhGVTZFYnhmZHZjRzZmSGx6dnVnZmc1c0NnWUFFQ1BRa3QvS2h3MTRLSkxkRm5BZG0KY1ZsVFFhTkZ3c1Z3NlI1dExaNWdOR3MrZVFYVmFaZVVEWTZCZHFqWHJxOWltNVgvVzVTYXVEUTVtb2NVOCt0TQpqNk5Mc3c0SldzOGkzWm1TdVNUNkcwT0R4ZkpXK0JlWitGTUpZeUpsQlVsTCsyUzFLWkF6akpTTGhXcE40V2dKCmZ6UUk5U3RGUTg3b1NzMWpMTW9VZXdLQmdGOE9CMlFURHErTTdhaE4vejROc0wvU2JyZDJEdkcvZFBLQlFaQVIKcS90V0g1MGJ5ejlzdkgvcGk2YXdDS1UwUnpPZXh4UjkwZDhNMWxqNHZaVFZDQ3ZKajRnZTdhVlovbEdqL1JHSwpWS1NJOW1nRXgzaE1vaWJybzByR3lXTnlaaUhFRGFUUmRhRll2UU9PemRpYkZDd1RqcnR1UGE2Z2c5VzhtQU5sCkNDUmpBb0dBSTRIbnpyV3kzaU5kR2xqVnh4bW1DN1V0c0MvajJBUEZpcHc0ZHJ0U2NsMDFRZzF5WkowbDNBTk4KOU5lTmVSUUFzN3pFTng2T1B1SzlxYy83T1ROMTJKaHdoUTIzdXZwNjZjV0krdTRjcVpOZTJyZVFVVWVmM3psbQpMcXRmOU50VHp5M3pjMGZQcGoxQnBlRmxHSG9SVDhjVHpBWjFTeGwyZWChazlqS2RVeDQ9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0t' - ' "privateKey": "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBbUhKOEJHdTFYZUZ4aENVQXBrNHNSTVI4RnRTdGtyMEx0OWtWTGNSUjRFWitiOWhHCmR0blJpOFhqV3d5MU5zMHliMkJMdHBpVHZKSFVKTUphWXluZ2ZkZnZhcWhocm1yYm5vV0pLQkxmeUxwTXFNS1EKQ3RialFxbnVrQURJUWVQd2ZGeTNpVHkxd1JkRC9zTUs1U0VtV0Fxb0pZQk50eTFZZzA2UzVkYVlPM2xjY3hrYQpQWjRjcm9McWF6Ny9tU3dDVTR5VWRSb3h4WVF4VG1MZXg5M2tqU09TTmdpK0FXc0lCbjV3UHI0VHNuVHFSeWpIClN2aEdMdk9YREpRYWZRdk56WjFSL1FYMzlOQk9xOEVKZW5pWXdaUm9uNVcvNVhMYW94MFFyUGhrY1BES3A5SVUKeHpJakUwWlNmMStUK1FFbTQ3TkFtSnhvZjFhdGRFVzZDTCtheHdJREFRQUJBb0lCQUQ3enI4REhsWnFSK1NWZgpmbGd1bWRzLzVCb3Rjd3ZRWXlGbFZIaVV4RmEvNVlCY0tDVDJKN0QzWTc1NmplNTJaK2hVTkkvUGk5cG53ZG40CkpBa2xCdDRRcUg0NzBES05UK216TFFOT1gvanM3YkVXdnhLcTBDZjhNbFptN0V0QlRGS2VtdS9pRVJBT2duYVcKcGs0ZUZVNXdBQ1dVU1FObWgxR1p4ZEdCZjFXM1VjUnQxcFRvOEtQTDluZm4vSGJiRFNsQkNVL3VIcWd2TSt2cApmTE03bzRIVDZ1K1ZzU00rWGZqeDhpeE5ZRHdoalNuKzQyZm13d1d3ZzJISHUrdUozZ1pUSWQwRUI1VW9hdUNjCjZUTlVtcEJscjU5UGFmVkZRWUY1S3VxaHJXKzVQaWpHcHBZcXg4Ynl6aFpOQzkwZnl5V0NXcXg2eGFZVm5OdzgKNkJmUXM2a0NnWUVBeVlyRVg1NU1RTzJnWDY2TGwxaGJDMzNzWk1OZzloVG1SK1doSTFjNksvbFZ1TFoyL0RPdwpsYTZ6eHdBU204Z0ZyVUFYbUljV2h2b3FwWGVzNWZzOVZKeDlNT0ZVYVBrckRPQllnY1laMUR6VVNVOHc3SSttCnlyV3hRUkRNajhvSGpRbHVpM0s2MzZucm5RajhxOGkvQ2dranVPcHJGZnliMzVEMFlDdjVXZzBDZ1lFQXdhT3cKRWFhN0l1MjFGa08vbmFjdVhjSnBhNkVlUTNqZFNlNlRQaXZ6bVVXU0haeGJuUy9XSnJaRjQwSExzUWxOZHl0ZgpNTTBKZFU0VmMyR0NVc1pMYjdQSmJwdVRqRERSSHJXV1pCMnhiemF0K3A3N2RzNWlOcXFRcTZ6M0syUVh4Y3ZTCis5am5VZXpDU2Y0N1R1OWNTTW96V3hTMW82b1BPSFdHVFRvdHR5TUNnWUFQdWc1Y3o4TnZoWnR3Ry9TMG1LWnkKSFI5bk5YL0pkQlFNSkRVUXh1dTVKcm16c2psU3NNM2t3RDh6RmlSZGw1d3B5c2lNbEc0RGxsM2hqNWNrVXhpVQpFNm9KT0d3WHpPbTVGWUNTajl6UUhQY0x5V3d0NlgvQWJiRXBQS0JaMEJBS3gyT2k2ZzcvQ1FsanRhSFIzZFphCmVDQWJlOTlqVmRUcit5bTJuM2ZUdVFLQmdBMm5TZ25rbEx0Z3dXMEJkK2hZMm1jWUJ6RGttbXF0Z2dUdGdvcFcKdFFWd3AxM1pJWWlTeituSTNtS295QUVDbytpc01Ua1NyQUVPY1dyQ1RGc2p5anZsRkdYdEtGa3hNLzJUVmpoVwo4NlRnMlNHYnhpVlpaZ2x1dTJhdmVub2Z3NkZadnRXdE5KcE5OR0hkUURkUG4xVXVsTEp1WW1SWTRGdmR4WXQ2CmQ3QzdBb0dBRUsvalFiZ0l3OXFLQUNOZ0JySnB1cU5Ham9JajFoQTRlb29DMXp1bFEyZUpnZ2J5OTBpSDg2VzEKM0xyOVZMVFkyc2JKTzlqekZVR0lOL01BOEhYQTE1a2grZHRibkRsdFRFZGNnenBCRzhCQUZRQ3hQWnBGWHhtZgpDUmhXN1l6RW1IeWJ4R0toR3NOK2M3NUhKTHZFSWwrRTh6eitXRk9xT240dkJXU1ZwSnc9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg==",' \ No newline at end of file diff --git a/data/rules/privkey.yml b/data/rules/privkey.yml index 58dce65..7b5e551 100644 --- a/data/rules/privkey.yml +++ b/data/rules/privkey.yml @@ -24,7 +24,6 @@ rules: ----- min_entropy: 4.5 confidence: high - prevalidated: true examples: - |- -----BEGIN RSA PRIVATE KEY----- @@ -77,7 +76,6 @@ rules: ) min_entropy: 4.5 confidence: high - prevalidated: true examples: - | -----BEGIN PRIVATE KEY----- diff --git a/src/matcher.rs b/src/matcher.rs index 33e2425..21f7dec 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -930,7 +930,6 @@ mod test { visible: true, examples: vec![], negative_examples: vec![], - prevalidated: false, references: vec![], validation: None::, // no HTTP validation needed depends_on_rule: vec![], @@ -972,7 +971,6 @@ mod test { visible: true, examples: vec![], negative_examples: vec![], - prevalidated: false, references: vec![], validation: Some(Validation::Http(HttpValidation { request: HttpRequest { @@ -1089,7 +1087,6 @@ mod test { visible: true, examples: vec![], negative_examples: vec![], - prevalidated: false, references: vec![], validation: None::, depends_on_rule: vec![], diff --git a/src/rules/rule.rs b/src/rules/rule.rs index a47b172..1947a0a 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -282,9 +282,6 @@ pub struct RuleSyntax { /// Optional dependencies on other rules. #[serde(default)] pub depends_on_rule: Vec>, - /// Whether matches should always be considered validated. - #[serde(default)] - pub prevalidated: bool, } lazy_static! { diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 1ba02e1..05865b2 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -368,15 +368,6 @@ async fn validate_single( fail_count: &AtomicUsize, cache2: &Arc>, ) { - // Bypass validation if the rule is prevalidated (eg a Private Key) - if om.rule.syntax().prevalidated { - om.validation_success = true; - om.validation_response_status = http::StatusCode::OK; - om.validation_response_body = "Prevalidated".to_string(); - success_count.fetch_add(1, Ordering::Relaxed); - return; - } - // Build key let dep_vars_str = dep_vars .get(om.rule.id()) diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index 01a6ad9..e69e9bf 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -45,23 +45,18 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { for finding in findings { let rule_id = finding["rule"]["id"].as_str().unwrap_or("unknown"); - let rule_prevalidated = finding["rule"]["prevalidated"].as_bool().unwrap_or(false); - let status = - finding["finding"]["validation"]["status"].as_str().unwrap_or("").to_ascii_lowercase(); - - let response = finding["finding"]["validation"]["response"] + let status = finding["finding"]["validation"]["status"] .as_str() .unwrap_or("") .to_ascii_lowercase(); - // Skip anything intentionally marked as prevalidated - if rule_prevalidated || status == "prevalidated" || response == "prevalidated" { - continue; - } - // Fail only on genuinely validated secrets - assert_ne!(&status, "active credential", "Validated finding detected in rule {rule_id}"); + assert_ne!( + &status, + "active credential", + "Validated finding detected in rule {rule_id}" + ); } Ok(()) From f90c0a6effc0808f379515ab74cb2aa953572f08 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 13 Aug 2025 09:13:50 -0700 Subject: [PATCH 142/357] Improved Tailscale api key detectors --- CHANGELOG.md | 1 + data/rules/generic.yml | 2 +- data/rules/tailscale.yml | 2 +- src/safe_list.rs | 52 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ed7595..6526ee6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.40.0] - Dropped the “prevalidated” flag from rule definitions and validation logic so every finding now flows through the standard active/inactive/unknown pipeline, simplifying rule configuration and preventing special‑case bypasses +- Improved Tailscale api key detectors ## [1.39.0] - Added support for scanning Confluence pages via `--confluence-url` and `--cql` diff --git a/data/rules/generic.yml b/data/rules/generic.yml index 3f71d4a..73714b9 100644 --- a/data/rules/generic.yml +++ b/data/rules/generic.yml @@ -127,7 +127,7 @@ rules: 'policy_path': os.path.join(TEST_DIR, 'policy.json') }) - name: Weak Password Pattern - id: kingfisher.weak_password.1 + id: kingfisher.generic.6 pattern: | (?xi) \b diff --git a/data/rules/tailscale.yml b/data/rules/tailscale.yml index f393da7..3ddbc9a 100644 --- a/data/rules/tailscale.yml +++ b/data/rules/tailscale.yml @@ -5,7 +5,7 @@ rules: (?xi) \b ( - tskey-[a-z]+-[A-Z0-9_-]{20,24} + tskey-[a-z]{3,10}-[A-Z0-9_-]{20,36} ) \b min_entropy: 3.0 diff --git a/src/safe_list.rs b/src/safe_list.rs index 468ac2c..bbea073 100644 --- a/src/safe_list.rs +++ b/src/safe_list.rs @@ -1,46 +1,96 @@ use once_cell::sync::Lazy; use regex::bytes::Regex; use tracing::debug; + +/// Case-insensitive patterns that indicate a *benign* match (placeholders, examples, redactions, etc.). +/// `is_safe_match()` returns true if any of these are present. static SAFE_LIST_FILTER_REGEX: Lazy>> = Lazy::new(|| { vec![ + // Assignment-like value that ends with "EXAMPLEKEY" (common placeholder) + // e.g., "KEY=ABC_EXAMPLEKEY" or "key: fooEXAMPLEKEY" compile_regex(r"(?i)[:=][^:=]{0,64}EXAMPLEKEY"), + + // AWS-style AKIA keys explicitly marked as example/fake/test/sample + // e.g., "AKIA...EXAMPLE", "AKIA...FAKE", "AKIA...SAMPLE" compile_regex(r"(?i)\b(AKIA(?:.*?EXAMPLE|.*?FAKE|TEST|.*?SAMPLE))\b"), + + // Secret-y key name followed by short value and then "&&" / "||" or a run of asterisks + // e.g., "password=foo &&", "secret: *****" (redacted/masked) compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\s(&&|\|\||\*{5,50})", ), + + // Secret-y key name with short value, then *another* short assignment on the same line + // Typical of docs/examples rather than hardcoded secrets compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\b\w{4,12}\s{0,6}=\s{0,6}\D{0,3}\w{1,12}", ), + + // Secret-y key assigned to a shell variable reference (e.g., "$FOO") — not a literal secret compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\$\w{4,30}", ), + + // Secret-y key set via command that *generates* randomness, not a literal value + // e.g., "password = openssl rand -base64 32" compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,16}[=:?][^=:?]{0,8}\bopenssl\s{0,4}rand\b", ), + + // Secret-y key assigned a value containing "encrypted" (marker/metadata, not a secret) compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}encrypted", ), + + // Secret-y key assigned boolean literals — not secrets + // e.g., "auth=false" compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\b(?:false|true)\b", ), + + // Secret-y key assigned to null-ish or self-referential placeholders — not secrets + // e.g., "password: null", "secret = none" compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\b(null|nil|none|password|pass|pwd|passwd|secret|cred|key|auth|authorization).{1,6}$", ), + + // The classic xkcd "hunter2" fake password compile_regex( r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}hunter2", ), + + // Obvious placeholder sequences + // (Consider grouping like (?i)(?:123456789|abcdefghij) for clarity.) compile_regex(r"(?i)123456789|abcdefghij"), + + // Literal placeholder tag often used in docs/config compile_regex(r"(?i)"), + + // OpenAPI schema references in assignment/query contexts — not secrets + // e.g., "password?ref=#/components/schemas/Credential" compile_regex(r"(?i)[=:?][^=:?]{0,8}#/components/schemas/"), + + // Example MongoDB URIs with placeholder user/pass like "user:pass" or "foo:bar" compile_regex( r"(?i)\b(mongodb(?:\+srv)?://(?:user|foo)[^:@]+:(?:pass|bar)[^@]+@[-\w.%+/:]{3,64}(?:/\w+)?)", ), + + // "classpath://" URIs — configuration references, not secrets compile_regex(r"(?i)\b(classpath://)"), + + // Assignment where the value dereferences a placeholder/property like ${env_var} + // e.g., "password=${db_password}" compile_regex(r"(?i)(\b[^\s\t]{0,16}[=:][^$]*\$\{[a-z_-]{5,30}\})"), + + // URLs with basic auth to hosts ending in "example" or "test" — placeholders + // e.g., "https://user:pass@example" compile_regex(r"(?i)\b((?:https?:)?//[^:@]{3,50}:[^:@]{3,50}@[\w.]{0,16}(?:example|test))"), + + // Assignment ending with "SECRETMANAGER" — explicit placeholder compile_regex(r"(?i)[:=][^:=]{0,32}\bSECRETMANAGER"), ] }); + fn compile_regex(pattern: &str) -> Option { match Regex::new(pattern) { Ok(regex) => Some(regex), @@ -50,6 +100,8 @@ fn compile_regex(pattern: &str) -> Option { } } } + +/// Returns true if the input likely contains *benign* placeholder/test strings. pub fn is_safe_match(input: &[u8]) -> bool { SAFE_LIST_FILTER_REGEX .iter() From deef5388356b1c3698d461f86cea584e2117e660 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 13 Aug 2025 09:20:36 -0700 Subject: [PATCH 143/357] fixed test --- data/rules/tailscale.yml | 4 ++-- tests/smoke_docker.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data/rules/tailscale.yml b/data/rules/tailscale.yml index 3ddbc9a..3544b1f 100644 --- a/data/rules/tailscale.yml +++ b/data/rules/tailscale.yml @@ -11,8 +11,8 @@ rules: min_entropy: 3.0 confidence: medium examples: - - tskey-secret-12345678-abcdefghijkl - - tskey-api-abcdefg-1234567890123 + - tskey-secret-weRTWSfoeFKI-3480754342kDSFelW3 + - tskey-api-weRTWSfoeFKI-3480754342kDSFelW3 references: - https://tailscale.com/kb/1215/oauth-clients validation: diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs index 40cc420..d931e87 100644 --- a/tests/smoke_docker.rs +++ b/tests/smoke_docker.rs @@ -15,6 +15,6 @@ fn smoke_scan_docker_image() -> anyhow::Result<()> { ]) .assert() .code(205) - .stdout(predicate::str::contains("Active Credential")); + .stdout(predicate::str::contains("Not Attempted")); Ok(()) } From 67b570816f84746c191bc2cf22c508af9fff2263 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 13 Aug 2025 09:23:03 -0700 Subject: [PATCH 144/357] fixed test --- tests/smoke_docker.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs index d931e87..3caef75 100644 --- a/tests/smoke_docker.rs +++ b/tests/smoke_docker.rs @@ -14,7 +14,7 @@ fn smoke_scan_docker_image() -> anyhow::Result<()> { "--no-update-check", ]) .assert() - .code(205) + .code(200) .stdout(predicate::str::contains("Not Attempted")); Ok(()) } From e83b1716943f55f2f03a574d3ff0466d948d6baa Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 13 Aug 2025 15:35:04 -0700 Subject: [PATCH 145/357] added rule for Vercel --- CHANGELOG.md | 3 +++ Cargo.toml | 2 +- data/rules/airbrake.yml | 4 +--- data/rules/aiven.yml | 5 ++--- data/rules/asana.yml | 2 -- data/rules/atlassian.yml | 2 -- data/rules/baremetrics.yml | 2 -- data/rules/fastly.yml | 4 +++- data/rules/heroku.yml | 5 ++--- data/rules/vercel.yml | 39 ++++++++++++++++++++++++++++++++++++++ 10 files changed, 51 insertions(+), 17 deletions(-) create mode 100644 data/rules/vercel.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 6526ee6..aec63c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.41.0] +- Added rules for: Vercel + ## [1.40.0] - Dropped the “prevalidated” flag from rule definitions and validation logic so every finding now flows through the standard active/inactive/unknown pipeline, simplifying rule configuration and preventing special‑case bypasses - Improved Tailscale api key detectors diff --git a/Cargo.toml b/Cargo.toml index 15ad49a..db985c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.40.0" +version = "1.41.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/airbrake.yml b/data/rules/airbrake.yml index 9d55e24..d54d261 100644 --- a/data/rules/airbrake.yml +++ b/data/rules/airbrake.yml @@ -5,9 +5,7 @@ rules: (?xi) \b airbrake - (?:.|[\n\r]){0,16}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,16}? + (?:.|[\n\r]){0,32}? ( [A-Z0-9-]{40} ) diff --git a/data/rules/aiven.yml b/data/rules/aiven.yml index 0f4c727..19f449a 100644 --- a/data/rules/aiven.yml +++ b/data/rules/aiven.yml @@ -2,11 +2,10 @@ rules: - name: Aiven API Key id: kingfisher.aiven.1 pattern: | - (?xi) + (?xi) + \b aiven (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? \b ( [a-z0-9/+=]{372} diff --git a/data/rules/asana.yml b/data/rules/asana.yml index 0824711..85e30c5 100644 --- a/data/rules/asana.yml +++ b/data/rules/asana.yml @@ -6,8 +6,6 @@ rules: \b asana (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? \b ( [0-9]{16} diff --git a/data/rules/atlassian.yml b/data/rules/atlassian.yml index 6d4ac9c..32dccb2 100644 --- a/data/rules/atlassian.yml +++ b/data/rules/atlassian.yml @@ -6,8 +6,6 @@ rules: \b atlassian (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? \b ( [a-z0-9]{24} diff --git a/data/rules/baremetrics.yml b/data/rules/baremetrics.yml index 415731e..2036d18 100644 --- a/data/rules/baremetrics.yml +++ b/data/rules/baremetrics.yml @@ -6,8 +6,6 @@ rules: \b baremetrics (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? \b ( [a-z0-9_-]{25} diff --git a/data/rules/fastly.yml b/data/rules/fastly.yml index c5d4fcb..77618a6 100644 --- a/data/rules/fastly.yml +++ b/data/rules/fastly.yml @@ -9,7 +9,9 @@ rules: (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? \b - ([a-z0-9_-]{32}) + ( + [a-z0-9_-]{32} + ) \b min_entropy: 3.5 confidence: medium diff --git a/data/rules/heroku.yml b/data/rules/heroku.yml index 3686e7a..b06d58e 100644 --- a/data/rules/heroku.yml +++ b/data/rules/heroku.yml @@ -2,10 +2,9 @@ rules: - name: Heroku API Key id: kingfisher.heroku.1 pattern: | - (?xi) + (?xi) + \b heroku - (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? \b ( diff --git a/data/rules/vercel.yml b/data/rules/vercel.yml new file mode 100644 index 0000000..d649b00 --- /dev/null +++ b/data/rules/vercel.yml @@ -0,0 +1,39 @@ +rules: + - name: Vercel API Token + id: kingfisher.vercel.1 + pattern: | + (?xi) + \b + vercel + (?:.|[\n\r]){0,32}? + \b + ( + [a-zA-Z0-9]{24} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: https://api.vercel.com/v2/user + headers: + Authorization: "Bearer {{TOKEN}}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"user":' + - '"email":' + match_all_words: true + references: + - https://vercel.com/docs/rest-api#authentication + examples: + - "vercel-key = DdZV6ZDZW6Vpl7n7JqtrCE5i" + - "vercel_token = zyMBA1qVEMAf4UNNZtCAbg6u" + - "vercel_api_key = MTg0AW799OY1HmyDdn84or3C" + - "vercel_secret = A7n9Xfp3tBz7D0XpOTMWpiOM" From 14fccc9cc64916946b0f7d30caedefe2e99a9978 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 14 Aug 2025 09:25:18 -0700 Subject: [PATCH 146/357] - Added support for scanning gitlab subgroups, with 'kingfisher scan --gitlab-group my-group --gitlab-include-subgroups' --- README.md | 4 ++++ src/cli/commands/gitlab.rs | 4 ++++ src/cli/commands/inputs.rs | 4 ++++ src/gitlab.rs | 14 ++++++++++++-- src/main.rs | 2 ++ src/reporter/json_format.rs | 1 + src/scanner/repos.rs | 1 + tests/int_dedup.rs | 1 + tests/int_github.rs | 1 + tests/int_gitlab.rs | 2 ++ tests/int_redact.rs | 1 + tests/int_slack.rs | 2 ++ tests/int_validation_cache.rs | 1 + tests/int_vulnerable_files.rs | 2 ++ 14 files changed, 38 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d51f0b3..6873a6a 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,8 @@ KF_GITHUB_TOKEN="ghp_…" kingfisher scan --git-url https://github.com/org/priva ```bash kingfisher scan --gitlab-group my-group +# include repositories from all nested subgroups +kingfisher scan --gitlab-group my-group --gitlab-include-subgroups ``` ### Scan GitLab user @@ -402,6 +404,8 @@ kingfisher scan --git-url https://gitlab.com/group/project.git ```bash kingfisher gitlab repos list --group my-group +# include repositories from all nested subgroups +kingfisher gitlab repos list --group my-group --include-subgroups ``` ## Scanning Jira diff --git a/src/cli/commands/gitlab.rs b/src/cli/commands/gitlab.rs index e1bbdc3..8765c87 100644 --- a/src/cli/commands/gitlab.rs +++ b/src/cli/commands/gitlab.rs @@ -56,6 +56,10 @@ pub struct GitLabRepoSpecifiers { /// Filter by repository type #[arg(long, default_value_t = GitLabRepoType::All, alias = "gitlab-repo-type")] pub repo_type: GitLabRepoType, + + /// Include repositories from subgroups of the specified groups + #[arg(long, alias = "gitlab-include-subgroups")] + pub include_subgroups: bool, } impl GitLabRepoSpecifiers { diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index e6f9168..7836d79 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -89,6 +89,10 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = GitLabRepoType::All)] pub gitlab_repo_type: GitLabRepoType, + /// Include projects from GitLab subgroups when scanning groups + #[arg(long, alias = "include-subgroups")] + pub gitlab_include_subgroups: bool, + /// Jira base URL (e.g. https://jira.example.com) #[arg(long, value_hint = ValueHint::Url, requires = "jql")] pub jira_url: Option, diff --git a/src/gitlab.rs b/src/gitlab.rs index e7df15e..80b7751 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -42,6 +42,7 @@ pub struct RepoSpecifiers { pub user: Vec, pub group: Vec, pub all_groups: bool, + pub include_subgroups: bool, pub repo_filter: RepoType, } @@ -137,6 +138,9 @@ pub async fn enumerate_repo_urls( if matches!(repo_specifiers.repo_filter, RepoType::Owner) { gp_builder.owned(true); } + if repo_specifiers.include_subgroups { + gp_builder.include_subgroups(true); + } let gp_ep = gp_builder.build()?; let projects: Vec = gp_ep.query(&client)?; @@ -162,10 +166,16 @@ pub async fn list_repositories( users: &[String], groups: &[String], all_groups: bool, + include_subgroups: bool, repo_filter: RepoType, ) -> Result<()> { - let repo_specifiers = - RepoSpecifiers { user: users.to_vec(), group: groups.to_vec(), all_groups, repo_filter }; + let repo_specifiers = RepoSpecifiers { + user: users.to_vec(), + group: groups.to_vec(), + all_groups, + include_subgroups, + repo_filter, + }; // Create a progress bar for displaying status let mut progress = if progress_enabled { diff --git a/src/main.rs b/src/main.rs index 3943b1e..2e9b6e2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -245,6 +245,7 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { &list_args.repo_specifiers.user, &list_args.repo_specifiers.group, list_args.repo_specifiers.all_groups, + list_args.repo_specifiers.include_subgroups, list_args.repo_specifiers.repo_type.into(), ) .await?; @@ -282,6 +283,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::All, + gitlab_include_subgroups: false, jira_url: None, jql: None, diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 10d7aee..4020cee 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -84,6 +84,7 @@ mod tests { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::All, + gitlab_include_subgroups: false, // Jira options jira_url: None, jql: None, diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index a249f02..e5052c1 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -182,6 +182,7 @@ pub async fn enumerate_gitlab_repos( user: args.input_specifier_args.gitlab_user.clone(), group: args.input_specifier_args.gitlab_group.clone(), all_groups: args.input_specifier_args.all_gitlab_groups, + include_subgroups: args.input_specifier_args.gitlab_include_subgroups, repo_filter: args.input_specifier_args.gitlab_repo_type.into(), }; diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 68b9663..090effa 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -78,6 +78,7 @@ rules: all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, diff --git a/tests/int_github.rs b/tests/int_github.rs index 0bae089..dbedcb5 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -65,6 +65,7 @@ fn test_github_remote_scan() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 7e48f60..5a72ce5 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -64,6 +64,7 @@ fn test_gitlab_remote_scan() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/")?, gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, @@ -169,6 +170,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/")?, gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 69a1061..6bd97c7 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -49,6 +49,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, confluence_url: None, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index e94607f..86cea6f 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -55,6 +55,7 @@ impl TestContext { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, confluence_url: None, @@ -147,6 +148,7 @@ async fn test_scan_slack_messages() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, confluence_url: None, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 2caa10a..6e2cc6a 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -121,6 +121,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 2478170..31a74ac 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -64,6 +64,7 @@ impl TestContext { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, @@ -142,6 +143,7 @@ impl TestContext { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, + gitlab_include_subgroups: false, jira_url: None, jql: None, From 737e4829acdaa6ed3d64c6652dd43c330f9735f4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 14 Aug 2025 09:26:44 -0700 Subject: [PATCH 147/357] - Added support for scanning gitlab subgroups, with 'kingfisher scan --gitlab-group my-group --gitlab-include-subgroups' --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aec63c5..cb5b3e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,8 @@ All notable changes to this project will be documented in this file. ## [1.41.0] -- Added rules for: Vercel +- Added support for scanning gitlab subgroups, with `kingfisher scan --gitlab-group my-group --gitlab-include-subgroups` +- Added rule for Vercel ## [1.40.0] - Dropped the “prevalidated” flag from rule definitions and validation logic so every finding now flows through the standard active/inactive/unknown pipeline, simplifying rule configuration and preventing special‑case bypasses From a3a7efb96e9eaddb117a0908b1a760dc8ab75b7d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 14 Aug 2025 10:17:16 -0700 Subject: [PATCH 148/357] fixed aiven regex to pass test --- data/rules/aiven.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data/rules/aiven.yml b/data/rules/aiven.yml index 19f449a..09a7269 100644 --- a/data/rules/aiven.yml +++ b/data/rules/aiven.yml @@ -2,8 +2,7 @@ rules: - name: Aiven API Key id: kingfisher.aiven.1 pattern: | - (?xi) - \b + (?xi) aiven (?:.|[\n\r]){0,32}? \b From 9b282cb33f8eaa3bc0d42b7a1839c4ce12404616 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 15 Aug 2025 09:07:25 -0700 Subject: [PATCH 149/357] code cleanup' --- .gitignore | 1 + src/git_commit_metadata.rs | 11 +++++- src/git_metadata_graph.rs | 79 +++++++++++++++++++++++++------------- src/git_repo_enumerator.rs | 79 +++++++++++++++++--------------------- src/lib.rs | 28 -------------- 5 files changed, 99 insertions(+), 99 deletions(-) diff --git a/.gitignore b/.gitignore index fb40dee..8cd8f36 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ .prettierrc custom.py logs/* +*.patch ### macOS ### # General diff --git a/src/git_commit_metadata.rs b/src/git_commit_metadata.rs index 390e1f8..75540d3 100644 --- a/src/git_commit_metadata.rs +++ b/src/git_commit_metadata.rs @@ -5,7 +5,8 @@ use serde::{Deserialize, Serialize}; use crate::serde_utils::BStringLossyUtf8; -#[derive(Serialize, Deserialize)] +#[repr(transparent)] +#[derive(Serialize, Deserialize, Copy, Clone)] #[serde(remote = "Time")] struct TextTime( #[serde( @@ -27,10 +28,13 @@ impl From