From 8a74eba16075c44ca304eb7bed21fe1c3686a55e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 1 Aug 2025 16:56:04 -0700 Subject: [PATCH] - New rules: Telegram bot token, OpenWeatherMap, Apify - New OpenAI detectors added (@joshlarsen) - Fixed bug that broke validation when using unnamed group captures --- CHANGELOG.md | 5 ++ Cargo.toml | 8 +-- Makefile | 1 + data/rules/apify.yml | 35 ++++++++++++ data/rules/openweathermap.yml | 42 ++++++++++++++ data/rules/recaptcha.yml | 2 - data/rules/telegram.yml | 30 ++++++++++ docs/RULES.md | 1 - rustfmt.toml | 6 -- src/git_binary.rs | 6 +- src/git_metadata_graph.rs | 2 +- src/jira.rs | 2 +- src/liquid_filters.rs | 4 -- src/main.rs | 2 +- src/reporter.rs | 3 +- src/reporter/json_format.rs | 2 +- src/reporter/sarif_format.rs | 3 +- src/scanner/docker.rs | 6 +- src/scanner/mod.rs | 2 +- src/scanner/repos.rs | 3 +- src/scanner/runner.rs | 2 +- src/slack.rs | 2 +- src/validation.rs | 3 +- src/validation/coinbase.rs | 18 +++--- src/validation/jwt.rs | 6 +- src/validation/utils.rs | 103 +++++++++++++++++++++++++++++++--- tests/int_gitlab.rs | 2 +- tests/int_slack.rs | 2 +- tests/smoke_docker.rs | 2 +- 29 files changed, 241 insertions(+), 64 deletions(-) create mode 100644 data/rules/apify.yml create mode 100644 data/rules/openweathermap.yml create mode 100644 data/rules/telegram.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 15ebdab..427b527 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.31.0] +- New rules: Telegram bot token, OpenWeatherMap, Apify +- New OpenAI detectors added (@joshlarsen) +- Fixed bug that broke validation when using unnamed group captures + ## [1.30.0] - Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active. - Removed pre-commit installation hook, due to bugs diff --git a/Cargo.toml b/Cargo.toml index d6e7199..de43779 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.30.0" +version = "1.31.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -104,7 +104,6 @@ http = "1.3.1" liquid = "0.26.11" liquid-core = "0.26.11" flate2 = "1.1.2" -brotli = "6.0.0" thousands = "0.2.0" base32 = "0.5.1" crossbeam-skiplist = "0.1.3" @@ -172,13 +171,10 @@ color-backtrace = "0.7.0" gitlab = "0.1801.0" mimalloc = {version = "0.1.47", features = ["override"]} thread_local = "1.1.9" -crc32fast = "1.5.0" bloomfilter = "3.0.1" uuid = "1.17.0" -urlencoding = "2.1.3" rand = "0.9.1" percent-encoding = "2.3.1" -trust-dns-resolver = { version = "0.23.2", default-features = false, features = ["tokio-runtime"] } atty = "0.2.14" self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] } semver = "1.0.26" @@ -189,8 +185,6 @@ jira_query = "1.6.0" oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] } walkdir = "2.5.0" p256 = "0.13.2" -sec1 = "0.7.3" -rand_core = "0.9.3" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } [dependencies.tikv-jemallocator] diff --git a/Makefile b/Makefile index e662d7e..b4d63a6 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ endif ifeq ($(OS),darwin) export HOMEBREW_NO_INSTALL_CLEANUP=1 export HOMEBREW_NO_ENV_HINTS=1 + export HOMEBREW_NO_AUTO_UPDATE=1 endif # detect host architecture and map to our target suffixes diff --git a/data/rules/apify.yml b/data/rules/apify.yml new file mode 100644 index 0000000..78c99dd --- /dev/null +++ b/data/rules/apify.yml @@ -0,0 +1,35 @@ +rules: + - name: Apify API Token + id: kingfisher.apify.1 + pattern: | + (?xi) + \b + ( + apify_api_[A-Z0-9]{34,38} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.apify.com/v2/users/me" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"data"' + - '"username"' + match_all_words: true + references: + - https://docs.gitguardian.com/secrets-detection/secrets-detection-engine/detectors/specifics/apify_token + - https://docs.apify.com/api/v2#/reference/users/user-object/get-user-public-profile-or-me + - https://docs.apify.com/api/v2/users-me-get + examples: + - "apify_api_NcjXcxEz2XL1irjppyWSHvjghalQOd1LXOHv" + - "apify_api_9uyewBxQUF1EXWdKVc4lNaTSM461Ls4oQouz" \ No newline at end of file diff --git a/data/rules/openweathermap.yml b/data/rules/openweathermap.yml new file mode 100644 index 0000000..c6b0251 --- /dev/null +++ b/data/rules/openweathermap.yml @@ -0,0 +1,42 @@ +rules: + # --------------------------------------------------------------------- + # 1. OpenWeather Map API Key (detector unchanged, new validation) + # --------------------------------------------------------------------- + - name: OpenWeather Map API Key + id: kingfisher.openweather.1 + pattern: | + (?xi) + \b + (?:pyowm|openweather|owm\b) + (?:.|[\n\r]){0,64}? + \b + ( + [a-z0-9]{32} + | + APPID= + [a-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - pyowm = '3k144a5af729351d0fc58bdrj9a21mkr' + - owm = '3k144a5af729351d0fc58bdrj9a21mkr' + - openweatherapikey=cd2b1d12d01ae2deffecfebafcc3c31d + - apikey=openweather:cd2b1d12d01ae2deffecfebafcc3c31d + validation: + type: Http + content: + request: + method: GET + url: https://api.openweathermap.org/data/2.5/forecast?q=London&appid={{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + words: ['"cod":"200"'] + references: + - https://openweathermap.org/forecast5 + - https://openweathermap.org/appid + - https://publicapi.dev/open-weather-map-api + diff --git a/data/rules/recaptcha.yml b/data/rules/recaptcha.yml index 4966857..e8f62a6 100644 --- a/data/rules/recaptcha.yml +++ b/data/rules/recaptcha.yml @@ -5,8 +5,6 @@ rules: (?xi) recaptcha (?:.|[\n\r]){0,16}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? \b ( 6l[c-f][a-z0-9_-].{36} diff --git a/data/rules/telegram.yml b/data/rules/telegram.yml new file mode 100644 index 0000000..9b6dc98 --- /dev/null +++ b/data/rules/telegram.yml @@ -0,0 +1,30 @@ +rules: + - name: Telegram Bot Token + id: kingfisher.telegram.1 + pattern: | + (?xi) + \b + ( + [0-9]{8,10} + : + [A-Z0-9_-]{35} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.telegram.org/bot{{TOKEN}}/getMe" + response_matcher: + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"ok":true' + examples: + - "110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsawd" + - "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0" + - "3628091811:BAG9RuJiqgOGIfFbOPBpAo6QhIJoD9mCdDs" diff --git a/docs/RULES.md b/docs/RULES.md index 01cce17..5a2fbfc 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -108,7 +108,6 @@ Below is the complete list of Liquid filters available in Kingfisher, along with | `b64enc` | – | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` | | `b64url_enc` | – | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` | | `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | -| `es256_sign` | `key` (string) | Signs the input with an ECDSA P-256 private key and returns a Base64URL signature. | `{{ "data" \| es256_sign: PRIVKEY }}` | | `sha256` | – | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` | | `hmac_sha1` | `key` (string) | Computes HMAC-SHA1 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha1: "secret-key" }}` | | `hmac_sha256` | `key` (string) | Computes HMAC-SHA256 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha256: "secret-key" }}` | diff --git a/rustfmt.toml b/rustfmt.toml index 1f12150..5aede1f 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -5,11 +5,5 @@ tab_spaces = 4 use_small_heuristics = "Max" newline_style = "Unix" -imports_granularity = "Crate" -group_imports = "StdExternalCrate" reorder_imports = true -normalize_doc_attributes = true -format_code_in_doc_comments = true -wrap_comments = true -comment_width = 100 diff --git a/src/git_binary.rs b/src/git_binary.rs index b2b6918..fc2baa1 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -36,13 +36,11 @@ impl Git { /// Create a new `Git` instance. /// /// * `ignore_certs`: If `true`, disables SSL certificate verification for `git` operations. -pub fn new(ignore_certs: bool) -> Self { + pub fn new(ignore_certs: bool) -> Self { let mut credentials = Vec::new(); // If either GitHub or GitLab token is set, first clear existing credential.helpers - if std::env::var("KF_GITHUB_TOKEN").is_ok() - || std::env::var("KF_GITLAB_TOKEN").is_ok() - { + if std::env::var("KF_GITHUB_TOKEN").is_ok() || std::env::var("KF_GITLAB_TOKEN").is_ok() { credentials.push("-c".into()); credentials.push(r#"credential.helper="#.into()); } diff --git a/src/git_metadata_graph.rs b/src/git_metadata_graph.rs index 04d6729..4a45940 100644 --- a/src/git_metadata_graph.rs +++ b/src/git_metadata_graph.rs @@ -139,7 +139,7 @@ impl RepositoryIndex { let mut num_trees = 0; let mut num_blobs = 0; let mut num_commits = 0; - + for oid in odb .iter() .context("Failed to iterate object database")? diff --git a/src/jira.rs b/src/jira.rs index 9b9e4fb..e3c2adc 100644 --- a/src/jira.rs +++ b/src/jira.rs @@ -49,4 +49,4 @@ pub async fn download_issues_to_dir( paths.push(file); } Ok(paths) -} \ No newline at end of file +} diff --git a/src/liquid_filters.rs b/src/liquid_filters.rs index 2142d83..e9d6ea4 100644 --- a/src/liquid_filters.rs +++ b/src/liquid_filters.rs @@ -7,9 +7,6 @@ use liquid_core::{ FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView, }; -use p256::ecdsa::{signature::Signer, SigningKey}; -use p256::pkcs8::DecodePrivateKey; -use sec1::DecodeEcPrivateKey; use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use rand::{distr::Alphanumeric, Rng}; use sha1::Sha1; @@ -295,7 +292,6 @@ impl Filter for B64DecFilter { } } - // ----------------------------------------------------------------------------- // Authentication & Security // ----------------------------------------------------------------------------- diff --git a/src/main.rs b/src/main.rs index 06ef3cf..9c30b92 100644 --- a/src/main.rs +++ b/src/main.rs @@ -289,7 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { // Slack query slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - + // Docker image scanning docker_image: Vec::new(), diff --git a/src/reporter.rs b/src/reporter.rs index e6709dc..210da31 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -113,7 +113,6 @@ impl DetailsReporter { } } - /// If the given file path corresponds to a Jira issue downloaded to disk, /// return the online Jira URL for that issue. fn jira_issue_url( @@ -123,7 +122,7 @@ impl DetailsReporter { ) -> Option { // drop any trailing slash so we don’t end up with “//browse/…” let jira_url = args.input_specifier_args.jira_url.as_ref()?.as_str().trim_end_matches('/'); - + let ds = self.datastore.lock().ok()?; let root = ds.clone_root(); let jira_dir = root.join("jira_issues"); diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 6bad0cb..5533b55 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -441,7 +441,7 @@ mod tests { // Slack options slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - + docker_image: Vec::new(), // clone / history options git_clone: GitCloneMode::Bare, diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index 3db2d99..5829bba 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -208,7 +208,6 @@ impl DetailsReporter { let p = first_match.origin.first(); match p { Origin::File(e) => { - let uri = if let Some(url) = self.jira_issue_url(&e.path, args) { url } else if let Some(url) = self.slack_message_url(&e.path) { @@ -351,7 +350,7 @@ impl DetailsReporter { .build()?, ) .build()?; - + let sarif_results: Vec = findings .par_iter() .filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok()) diff --git a/src/scanner/docker.rs b/src/scanner/docker.rs index 5a6daa4..b427a3b 100644 --- a/src/scanner/docker.rs +++ b/src/scanner/docker.rs @@ -61,7 +61,7 @@ fn image_dir_name(reference: &str) -> String { // add a truncated SHA-256 to guarantee uniqueness let hash = Sha256::digest(reference.as_bytes()); - let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty + let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty name.push('_'); name.push_str(short); name @@ -258,7 +258,7 @@ pub async fn save_docker_images( ) -> Result> { let docker = Docker::new(); let mut dirs = Vec::new(); - + for image in images { let dir_name = image_dir_name(image); let out_dir = clone_root.join(format!("docker_{dir_name}")); @@ -280,4 +280,4 @@ mod tests { fn docker_struct_new() { let _ = Docker::new(); } -} \ No newline at end of file +} diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index fff9440..8b905af 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,9 +1,9 @@ //! Public façade for the scanner subsystem. +pub(crate) use docker::save_docker_images; pub(crate) use enumerate::enumerate_filesystem_inputs; pub(crate) use repos::{clone_or_update_git_repos, enumerate_github_repos}; pub use runner::{load_and_record_rules, run_async_scan, run_scan}; pub(crate) use validation::run_secret_validation; -pub(crate) use docker::save_docker_images; mod docker; mod enumerate; diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 2a8044a..9d944ea 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -225,7 +225,6 @@ pub async fn enumerate_gitlab_repos( Ok(repo_urls) } - pub async fn fetch_jira_issues( args: &scan::ScanArgs, global_args: &global::GlobalArgs, @@ -284,4 +283,4 @@ pub async fn fetch_slack_messages( } } Ok(vec![output_dir]) -} \ No newline at end of file +} diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index b727718..0a880da 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -67,7 +67,7 @@ pub async fn run_async_scan( // Fetch Jira issues if requested let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); - + // Fetch Slack messages if requested let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); diff --git a/src/slack.rs b/src/slack.rs index a0cd1f5..ce6f90d 100644 --- a/src/slack.rs +++ b/src/slack.rs @@ -115,4 +115,4 @@ pub async fn download_messages_to_dir( paths.push((file, msg.permalink)); } Ok(paths) -} \ No newline at end of file +} diff --git a/src/validation.rs b/src/validation.rs index 7ef8c24..cff95c0 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1,7 +1,7 @@ use std::{ + collections::BTreeMap, fs, hash::{Hash, Hasher}, - collections::BTreeMap, sync::Arc, time::{Duration, Instant}, }; @@ -1043,5 +1043,4 @@ rules: println!("Body: {:?}", owned_blob_match.validation_response_body); Ok(()) } - } diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs index be6045f..d879601 100644 --- a/src/validation/coinbase.rs +++ b/src/validation/coinbase.rs @@ -4,16 +4,16 @@ use std::time::Duration; use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::Utc; +use ed25519_dalek::SigningKey as Ed25519Key; use p256::{ ecdsa::{signature::Signer as _, SigningKey}, pkcs8::DecodePrivateKey, SecretKey, }; -use ed25519_dalek::{SigningKey as Ed25519Key, Signer as _}; use rand::rngs::OsRng; +use rand::TryRngCore; use reqwest::{Client, StatusCode, Url}; use sha1::{Digest, Sha1}; -use rand::TryRngCore; use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; @@ -68,7 +68,6 @@ pub async fn validate_cdp_api_key( Ok((ok, msg)) } - // fn build_jwt( // method: &str, // host: &str, @@ -85,7 +84,7 @@ pub async fn validate_cdp_api_key( // let mut rng = OsRng; // let mut nonce = [0u8; 16]; - + // let _ = rng.try_fill_bytes(&mut nonce); // let header = serde_json::json!({ @@ -125,12 +124,12 @@ fn build_jwt( let mut rng = OsRng; let mut nonce = [0u8; 16]; - + let _ = rng.try_fill_bytes(&mut nonce); // Try ECDSA (PEM encoded EC key). Fallback to raw Ed25519 base64 key. - if let Ok(secret_key) = SecretKey::from_sec1_pem(&pem) - .or_else(|_| SecretKey::from_pkcs8_pem(&pem)) + if let Ok(secret_key) = + SecretKey::from_sec1_pem(&pem).or_else(|_| SecretKey::from_pkcs8_pem(&pem)) { let signing_key = SigningKey::from(secret_key); let header = serde_json::json!({ @@ -168,7 +167,8 @@ fn build_jwt( } 64 => { let arr: [u8; 64] = key_bytes[..64].try_into().unwrap(); - Ed25519Key::from_keypair_bytes(&arr).map_err(|e| anyhow!("invalid Ed25519 key: {e}"))? + Ed25519Key::from_keypair_bytes(&arr) + .map_err(|e| anyhow!("invalid Ed25519 key: {e}"))? } _ => return Err(anyhow!("invalid Ed25519 key length")), }; @@ -196,4 +196,4 @@ fn build_jwt( let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes()); return Ok(format!("{signing_input}.{sig_b64}")); } -} \ No newline at end of file +} diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 327f0d2..25a7206 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -162,7 +162,11 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { return Ok(( true, - format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, extract_aud_strings(&claims)), + format!( + "JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", + alg, + extract_aud_strings(&claims) + ), )); } diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 9736aab..ee118c8 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -9,21 +9,19 @@ use crate::validation::SerializableCaptures; /// * If it’s unnamed, fall back to `"TOKEN"` /// * Skip the unnamed “whole-match” capture **only when** there are /// additional captures to return. -pub fn process_captures( - captures: &SerializableCaptures, -) -> Vec<(String, String, usize, usize)> { +pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> { let multiple = captures.captures.len() > 1; captures .captures .iter() - .filter(|cap| !multiple || cap.name.is_some()) + // Skip the whole-match capture (match_number == 0) only when there + // are additional captures. All other captures – named or unnamed – + // should be preserved. + .filter(|cap| !multiple || cap.match_number != 0) .map(|cap| { - let name = cap - .name - .as_ref() - .map(|n| n.to_uppercase()) - .unwrap_or_else(|| "TOKEN".to_string()); + let name = + cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string()); (name, cap.value.clone().into_owned(), cap.start, cap.end) }) .collect() @@ -68,3 +66,90 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box Result<()> { all_gitlab_groups: false, gitlab_api_url: Url::parse("https://gitlab.com/")?, gitlab_repo_type: GitLabRepoType::Owner, - + jira_url: None, jql: None, max_results: 100, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index d238bce..699dad9 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -195,4 +195,4 @@ async fn test_scan_slack_messages() -> Result<()> { }; assert!(findings > 0); Ok(()) -} \ No newline at end of file +} diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs index 2fd61bc..40cc420 100644 --- a/tests/smoke_docker.rs +++ b/tests/smoke_docker.rs @@ -17,4 +17,4 @@ fn smoke_scan_docker_image() -> anyhow::Result<()> { .code(205) .stdout(predicate::str::contains("Active Credential")); Ok(()) -} \ No newline at end of file +}