- New rules: Telegram bot token, OpenWeatherMap, Apify

- New OpenAI detectors added (@joshlarsen)
- Fixed bug that broke validation when using unnamed group captures
This commit is contained in:
Mick Grove 2025-08-01 16:56:04 -07:00
commit 46d0ecce3b
29 changed files with 241 additions and 64 deletions

View file

@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
## [1.31.0]
- New rules: Telegram bot token, OpenWeatherMap, Apify
- New OpenAI detectors added (@joshlarsen)
- Fixed bug that broke validation when using unnamed group captures
## [1.30.0]
- Fixed validation caching for HTTP validators to include rendered headers so inactive secrets no longer appear active.
- Removed pre-commit installation hook, due to bugs

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.30.0"
version = "1.31.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true
@ -104,7 +104,6 @@ http = "1.3.1"
liquid = "0.26.11"
liquid-core = "0.26.11"
flate2 = "1.1.2"
brotli = "6.0.0"
thousands = "0.2.0"
base32 = "0.5.1"
crossbeam-skiplist = "0.1.3"
@ -172,13 +171,10 @@ color-backtrace = "0.7.0"
gitlab = "0.1801.0"
mimalloc = {version = "0.1.47", features = ["override"]}
thread_local = "1.1.9"
crc32fast = "1.5.0"
bloomfilter = "3.0.1"
uuid = "1.17.0"
urlencoding = "2.1.3"
rand = "0.9.1"
percent-encoding = "2.3.1"
trust-dns-resolver = { version = "0.23.2", default-features = false, features = ["tokio-runtime"] }
atty = "0.2.14"
self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] }
semver = "1.0.26"
@ -189,8 +185,6 @@ jira_query = "1.6.0"
oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] }
walkdir = "2.5.0"
p256 = "0.13.2"
sec1 = "0.7.3"
rand_core = "0.9.3"
ed25519-dalek = { version = "2.2", features = ["pkcs8"] }
[dependencies.tikv-jemallocator]

View file

@ -24,6 +24,7 @@ endif
ifeq ($(OS),darwin)
export HOMEBREW_NO_INSTALL_CLEANUP=1
export HOMEBREW_NO_ENV_HINTS=1
export HOMEBREW_NO_AUTO_UPDATE=1
endif
# detect host architecture and map to our target suffixes

35
data/rules/apify.yml Normal file
View file

@ -0,0 +1,35 @@
rules:
- name: Apify API Token
id: kingfisher.apify.1
pattern: |
(?xi)
\b
(
apify_api_[A-Z0-9]{34,38}
)
\b
confidence: medium
min_entropy: 3.5
validation:
type: Http
content:
request:
method: GET
url: "https://api.apify.com/v2/users/me"
headers:
Authorization: "Bearer {{ TOKEN }}"
response_matcher:
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"data"'
- '"username"'
match_all_words: true
references:
- https://docs.gitguardian.com/secrets-detection/secrets-detection-engine/detectors/specifics/apify_token
- https://docs.apify.com/api/v2#/reference/users/user-object/get-user-public-profile-or-me
- https://docs.apify.com/api/v2/users-me-get
examples:
- "apify_api_NcjXcxEz2XL1irjppyWSHvjghalQOd1LXOHv"
- "apify_api_9uyewBxQUF1EXWdKVc4lNaTSM461Ls4oQouz"

View file

@ -0,0 +1,42 @@
rules:
# ---------------------------------------------------------------------
# 1. OpenWeather Map API Key (detector unchanged, new validation)
# ---------------------------------------------------------------------
- name: OpenWeather Map API Key
id: kingfisher.openweather.1
pattern: |
(?xi)
\b
(?:pyowm|openweather|owm\b)
(?:.|[\n\r]){0,64}?
\b
(
[a-z0-9]{32}
|
APPID=
[a-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
examples:
- pyowm = '3k144a5af729351d0fc58bdrj9a21mkr'
- owm = '3k144a5af729351d0fc58bdrj9a21mkr'
- openweatherapikey=cd2b1d12d01ae2deffecfebafcc3c31d
- apikey=openweather:cd2b1d12d01ae2deffecfebafcc3c31d
validation:
type: Http
content:
request:
method: GET
url: https://api.openweathermap.org/data/2.5/forecast?q=London&appid={{ TOKEN }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
words: ['"cod":"200"']
references:
- https://openweathermap.org/forecast5
- https://openweathermap.org/appid
- https://publicapi.dev/open-weather-map-api

View file

@ -5,8 +5,6 @@ rules:
(?xi)
recaptcha
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
6l[c-f][a-z0-9_-].{36}

30
data/rules/telegram.yml Normal file
View file

@ -0,0 +1,30 @@
rules:
- name: Telegram Bot Token
id: kingfisher.telegram.1
pattern: |
(?xi)
\b
(
[0-9]{8,10}
:
[A-Z0-9_-]{35}
)
\b
confidence: medium
min_entropy: 3.5
validation:
type: Http
content:
request:
method: GET
url: "https://api.telegram.org/bot{{TOKEN}}/getMe"
response_matcher:
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"ok":true'
examples:
- "110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsawd"
- "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0"
- "3628091811:BAG9RuJiqgOGIfFbOPBpAo6QhIJoD9mCdDs"

View file

@ -108,7 +108,6 @@ Below is the complete list of Liquid filters available in Kingfisher, along with
| `b64enc` | | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` |
| `b64url_enc` | | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` |
| `b64dec` | | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` |
| `es256_sign` | `key` (string) | Signs the input with an ECDSA P-256 private key and returns a Base64URL signature. | `{{ "data" \| es256_sign: PRIVKEY }}` |
| `sha256` | | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` |
| `hmac_sha1` | `key` (string) | Computes HMAC-SHA1 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha1: "secret-key" }}` |
| `hmac_sha256` | `key` (string) | Computes HMAC-SHA256 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha256: "secret-key" }}` |

View file

@ -5,11 +5,5 @@ tab_spaces = 4
use_small_heuristics = "Max"
newline_style = "Unix"
imports_granularity = "Crate"
group_imports = "StdExternalCrate"
reorder_imports = true
normalize_doc_attributes = true
format_code_in_doc_comments = true
wrap_comments = true
comment_width = 100

View file

@ -36,13 +36,11 @@ impl Git {
/// Create a new `Git` instance.
///
/// * `ignore_certs`: If `true`, disables SSL certificate verification for `git` operations.
pub fn new(ignore_certs: bool) -> Self {
pub fn new(ignore_certs: bool) -> Self {
let mut credentials = Vec::new();
// If either GitHub or GitLab token is set, first clear existing credential.helpers
if std::env::var("KF_GITHUB_TOKEN").is_ok()
|| std::env::var("KF_GITLAB_TOKEN").is_ok()
{
if std::env::var("KF_GITHUB_TOKEN").is_ok() || std::env::var("KF_GITLAB_TOKEN").is_ok() {
credentials.push("-c".into());
credentials.push(r#"credential.helper="#.into());
}

View file

@ -139,7 +139,7 @@ impl RepositoryIndex {
let mut num_trees = 0;
let mut num_blobs = 0;
let mut num_commits = 0;
for oid in odb
.iter()
.context("Failed to iterate object database")?

View file

@ -49,4 +49,4 @@ pub async fn download_issues_to_dir(
paths.push(file);
}
Ok(paths)
}
}

View file

@ -7,9 +7,6 @@ use liquid_core::{
FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView,
};
use p256::ecdsa::{signature::Signer, SigningKey};
use p256::pkcs8::DecodePrivateKey;
use sec1::DecodeEcPrivateKey;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use rand::{distr::Alphanumeric, Rng};
use sha1::Sha1;
@ -295,7 +292,6 @@ impl Filter for B64DecFilter {
}
}
// -----------------------------------------------------------------------------
// Authentication & Security
// -----------------------------------------------------------------------------

View file

@ -289,7 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
// Slack query
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
// Docker image scanning
docker_image: Vec::new(),

View file

@ -113,7 +113,6 @@ impl DetailsReporter {
}
}
/// If the given file path corresponds to a Jira issue downloaded to disk,
/// return the online Jira URL for that issue.
fn jira_issue_url(
@ -123,7 +122,7 @@ impl DetailsReporter {
) -> Option<String> {
// drop any trailing slash so we dont end up with “//browse/…”
let jira_url = args.input_specifier_args.jira_url.as_ref()?.as_str().trim_end_matches('/');
let ds = self.datastore.lock().ok()?;
let root = ds.clone_root();
let jira_dir = root.join("jira_issues");

View file

@ -441,7 +441,7 @@ mod tests {
// Slack options
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
docker_image: Vec::new(),
// clone / history options
git_clone: GitCloneMode::Bare,

View file

@ -208,7 +208,6 @@ impl DetailsReporter {
let p = first_match.origin.first();
match p {
Origin::File(e) => {
let uri = if let Some(url) = self.jira_issue_url(&e.path, args) {
url
} else if let Some(url) = self.slack_message_url(&e.path) {
@ -351,7 +350,7 @@ impl DetailsReporter {
.build()?,
)
.build()?;
let sarif_results: Vec<sarif::Result> = findings
.par_iter()
.filter_map(|f| self.make_sarif_result(f, no_dedup, args).ok())

View file

@ -61,7 +61,7 @@ fn image_dir_name(reference: &str) -> String {
// add a truncated SHA-256 to guarantee uniqueness
let hash = Sha256::digest(reference.as_bytes());
let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty
let short = &hex::encode(hash)[..8]; // 8-char prefix is plenty
name.push('_');
name.push_str(short);
name
@ -258,7 +258,7 @@ pub async fn save_docker_images(
) -> Result<Vec<(PathBuf, String)>> {
let docker = Docker::new();
let mut dirs = Vec::new();
for image in images {
let dir_name = image_dir_name(image);
let out_dir = clone_root.join(format!("docker_{dir_name}"));
@ -280,4 +280,4 @@ mod tests {
fn docker_struct_new() {
let _ = Docker::new();
}
}
}

View file

@ -1,9 +1,9 @@
//! Public façade for the scanner subsystem.
pub(crate) use docker::save_docker_images;
pub(crate) use enumerate::enumerate_filesystem_inputs;
pub(crate) use repos::{clone_or_update_git_repos, enumerate_github_repos};
pub use runner::{load_and_record_rules, run_async_scan, run_scan};
pub(crate) use validation::run_secret_validation;
pub(crate) use docker::save_docker_images;
mod docker;
mod enumerate;

View file

@ -225,7 +225,6 @@ pub async fn enumerate_gitlab_repos(
Ok(repo_urls)
}
pub async fn fetch_jira_issues(
args: &scan::ScanArgs,
global_args: &global::GlobalArgs,
@ -284,4 +283,4 @@ pub async fn fetch_slack_messages(
}
}
Ok(vec![output_dir])
}
}

View file

@ -67,7 +67,7 @@ pub async fn run_async_scan(
// Fetch Jira issues if requested
let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?;
input_roots.extend(jira_dirs);
// Fetch Slack messages if requested
let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?;
input_roots.extend(slack_dirs);

View file

@ -115,4 +115,4 @@ pub async fn download_messages_to_dir(
paths.push((file, msg.permalink));
}
Ok(paths)
}
}

View file

@ -1,7 +1,7 @@
use std::{
collections::BTreeMap,
fs,
hash::{Hash, Hasher},
collections::BTreeMap,
sync::Arc,
time::{Duration, Instant},
};
@ -1043,5 +1043,4 @@ rules:
println!("Body: {:?}", owned_blob_match.validation_response_body);
Ok(())
}
}

View file

@ -4,16 +4,16 @@ use std::time::Duration;
use anyhow::{anyhow, Result};
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
use chrono::Utc;
use ed25519_dalek::SigningKey as Ed25519Key;
use p256::{
ecdsa::{signature::Signer as _, SigningKey},
pkcs8::DecodePrivateKey,
SecretKey,
};
use ed25519_dalek::{SigningKey as Ed25519Key, Signer as _};
use rand::rngs::OsRng;
use rand::TryRngCore;
use reqwest::{Client, StatusCode, Url};
use sha1::{Digest, Sha1};
use rand::TryRngCore;
use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS};
@ -68,7 +68,6 @@ pub async fn validate_cdp_api_key(
Ok((ok, msg))
}
// fn build_jwt(
// method: &str,
// host: &str,
@ -85,7 +84,7 @@ pub async fn validate_cdp_api_key(
// let mut rng = OsRng;
// let mut nonce = [0u8; 16];
// let _ = rng.try_fill_bytes(&mut nonce);
// let header = serde_json::json!({
@ -125,12 +124,12 @@ fn build_jwt(
let mut rng = OsRng;
let mut nonce = [0u8; 16];
let _ = rng.try_fill_bytes(&mut nonce);
// Try ECDSA (PEM encoded EC key). Fallback to raw Ed25519 base64 key.
if let Ok(secret_key) = SecretKey::from_sec1_pem(&pem)
.or_else(|_| SecretKey::from_pkcs8_pem(&pem))
if let Ok(secret_key) =
SecretKey::from_sec1_pem(&pem).or_else(|_| SecretKey::from_pkcs8_pem(&pem))
{
let signing_key = SigningKey::from(secret_key);
let header = serde_json::json!({
@ -168,7 +167,8 @@ fn build_jwt(
}
64 => {
let arr: [u8; 64] = key_bytes[..64].try_into().unwrap();
Ed25519Key::from_keypair_bytes(&arr).map_err(|e| anyhow!("invalid Ed25519 key: {e}"))?
Ed25519Key::from_keypair_bytes(&arr)
.map_err(|e| anyhow!("invalid Ed25519 key: {e}"))?
}
_ => return Err(anyhow!("invalid Ed25519 key length")),
};
@ -196,4 +196,4 @@ fn build_jwt(
let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes());
return Ok(format!("{signing_input}.{sig_b64}"));
}
}
}

View file

@ -162,7 +162,11 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> {
return Ok((
true,
format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, extract_aud_strings(&claims)),
format!(
"JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})",
alg,
extract_aud_strings(&claims)
),
));
}

View file

@ -9,21 +9,19 @@ use crate::validation::SerializableCaptures;
/// * If its unnamed, fall back to `"TOKEN"`
/// * Skip the unnamed “whole-match” capture **only when** there are
/// additional captures to return.
pub fn process_captures(
captures: &SerializableCaptures,
) -> Vec<(String, String, usize, usize)> {
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
let multiple = captures.captures.len() > 1;
captures
.captures
.iter()
.filter(|cap| !multiple || cap.name.is_some())
// Skip the whole-match capture (match_number == 0) only when there
// are additional captures. All other captures named or unnamed
// should be preserved.
.filter(|cap| !multiple || cap.match_number != 0)
.map(|cap| {
let name = cap
.name
.as_ref()
.map(|n| n.to_uppercase())
.unwrap_or_else(|| "TOKEN".to_string());
let name =
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
(name, cap.value.clone().into_owned(), cap.start, cap.end)
})
.collect()
@ -68,3 +66,90 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::E
let addr = format!("{}:{}", host, port);
lookup_host(addr).await?.next().ok_or_else(|| "Failed to resolve URL".into()).map(|_| ())
}
// -----------------------------------------------------------------------------
// tests
// -----------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::matcher::{SerializableCapture, SerializableCaptures};
use pretty_assertions::assert_eq;
#[test]
fn single_unnamed_capture_is_returned() {
let captures = SerializableCaptures {
captures: vec![SerializableCapture {
name: None,
match_number: 0,
start: 1,
end: 4,
value: "abc".into(),
}],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
}
#[test]
fn skips_whole_match_when_multiple() {
let captures = SerializableCaptures {
captures: vec![
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 5,
value: "abcde".into(),
},
SerializableCapture {
name: Some("foo".to_string()),
match_number: -1,
start: 1,
end: 4,
value: "bcd".into(),
},
],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("FOO".to_string(), "bcd".to_string(), 1usize, 4usize)]);
}
#[test]
fn includes_unnamed_groups_but_skips_whole_match() {
let captures = SerializableCaptures {
captures: vec![
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 6,
value: "aabbcc".into(),
},
SerializableCapture {
name: Some("foo".to_string()),
match_number: -1,
start: 0,
end: 2,
value: "aa".into(),
},
SerializableCapture {
name: None,
match_number: 1,
start: 4,
end: 6,
value: "cc".into(),
},
],
};
let result = process_captures(&captures);
assert_eq!(
result,
vec![
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
]
);
}
}

View file

@ -64,7 +64,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
all_gitlab_groups: false,
gitlab_api_url: Url::parse("https://gitlab.com/")?,
gitlab_repo_type: GitLabRepoType::Owner,
jira_url: None,
jql: None,
max_results: 100,

View file

@ -195,4 +195,4 @@ async fn test_scan_slack_messages() -> Result<()> {
};
assert!(findings > 0);
Ok(())
}
}

View file

@ -17,4 +17,4 @@ fn smoke_scan_docker_image() -> anyhow::Result<()> {
.code(205)
.stdout(predicate::str::contains("Active Credential"));
Ok(())
}
}