From 92f43d2e2998d716082fbad7834ba87c7af0bda0 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 24 Feb 2026 12:25:12 -0700 Subject: [PATCH] added --turbo mode --- CHANGELOG.md | 2 +- README.md | 36 ++++++++++----- crates/kingfisher-core/src/content_type.rs | 2 +- src/cli/commands/scan.rs | 8 ++-- src/direct_validate.rs | 2 +- src/findings_store.rs | 2 +- src/main.rs | 2 +- src/reporter.rs | 2 +- src/reporter/json_format.rs | 2 +- src/scanner/enumerate.rs | 9 +++- src/scanner/processing.rs | 28 ++++++++---- src/scanner/repos.rs | 4 +- tests/cli_failure.rs | 2 +- tests/cli_git_clone_flags.rs | 52 ++++++++++++++++++++++ tests/int_allowlist.rs | 2 +- tests/int_bitbucket.rs | 2 +- tests/int_dedup.rs | 4 +- tests/int_github.rs | 2 +- tests/int_gitlab.rs | 4 +- tests/int_redact.rs | 2 +- tests/int_slack.rs | 4 +- tests/int_validation_cache.rs | 2 +- tests/int_vulnerable_files.rs | 4 +- 23 files changed, 130 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9537da..49dfe11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ All notable changes to this project will be documented in this file. ## [v1.85.0] -- Added `--fast` mode: sets `--commit-metadata=false` and `--no-base64` for maximum scan speed. Findings will omit Git commit context (author, date, commit hash) and will not include Base64-decoded secrets. +- Added `--turbo` mode: sets `--commit-metadata=false`, `--no-base64`, disables language detection, and disables tree-sitter parsing...for maximum scan speed. Findings will omit Git commit context (author, date, commit hash) and will not include Base64-decoded secrets. - SQLite database scanning: kingfisher now detects and extracts SQLite files (`.db`, `.sqlite`, `.sqlite3`, etc.), dumping each table as SQL text with named columns so secrets stored in database rows are scannable. Controlled by the existing `--extract-archives` flag. - Python bytecode (.pyc) scanning: extracts string constants from compiled Python (`.pyc`, `.pyo`) files via marshal parsing so secrets embedded in bytecode are scannable. Controlled by `--extract-archives`. - Performance: pipelined ODB enumeration — scanning now begins while blob OIDs are still being discovered, overlapping I/O with pattern matching. diff --git a/README.md b/README.md index c53a717..4f29db2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Kingfisher +# Kingfisher: Open Source Secret Scanner with Live Validation

Kingfisher Logo @@ -7,16 +7,25 @@ [![ghcr downloads](https://ghcr-badge.elias.eu.org/shield/mongodb/kingfisher/kingfisher)](https://github.com/mongodb/kingfisher/pkgs/container/kingfisher)
-Kingfisher is a blazingly fast secret-scanning and **live validation** tool built in Rust. +Kingfisher is an open source secret scanner and **live secret validation** tool built in Rust. -It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and **ships with hundreds of built-in rules** to detect, **validate**, and triage secrets before they ever reach production. +It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and **ships with hundreds of built-in rules** to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. -Designed for offensive security engineers and blue-teamers alike, Kingfisher helps you pivot across repo ecosystems, validate exposure paths, and hunt for developer-owned leaks that spill beyond the primary codebase. +Designed for offensive security engineers and blue-team defenders alike, Kingfisher helps you scan repositories, cloud storage, chat, docs, and CI pipelines to find and verify exposed secrets quickly.

**Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) +## What Is Kingfisher? + +Kingfisher is a high-performance, open source secret detection tool for source code and developer platforms. If you are searching for a "GitHub secret scanner," "API key scanner," "token leak detection," or "Git secrets scanner," this project is built for that workflow. + +- Scan code, Git history, and integrated platforms (GitHub, GitLab, Azure Repos, Bitbucket, Gitea, Hugging Face, Jira, Confluence, Slack, Docker, AWS S3, and Google Cloud Storage) +- Validate discovered credentials against provider APIs to reduce false positives +- Revoke supported secrets directly from the CLI +- Generate JSON, SARIF, and HTML outputs for security teams, compliance, and CI + ## Key Features ### Multiple Scan Targets @@ -60,7 +69,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) kingfisher scan /path/to/scan --view-report ``` NOTE: Replay has been slowed down for demo -![alt text](docs/kingfisher-usage-01.gif) +![Kingfisher secret scanning demo](docs/kingfisher-usage-01.gif) ## Report Viewer Demo Explore Kingfisher's built-in report viewer and its `--access-map`, which can show what the token (AWS, GCP, Azure, GitHub, GitLab, and Slack...more coming) can actually access. @@ -77,13 +86,14 @@ Serving access-map viewer at http://127.0.0.1:7890 (Ctrl+C to stop) kingfisher scan /path/to/scan --access-map --view-report ``` -![alt text](docs/kingfisher-usage-access-map-01.gif) +![Kingfisher access map and report viewer demo](docs/kingfisher-usage-access-map-01.gif) **Click to view video** [![Demo](docs/demos/findings-thumbnail.png)](https://github.com/user-attachments/assets/d33ee7a6-c60a-4e42-88e0-ac03cb429a46) # Table of Contents +- [What Is Kingfisher?](#what-is-kingfisher) - [Key Features](#key-features) - [Compliance and Audit-Ready Scans](#compliance-and-audit-ready-scans) - [Benchmark Results](#benchmark-results) @@ -312,9 +322,10 @@ kingfisher scan /path/to/code # Scan without validation kingfisher scan ~/src/myrepo --no-validate -# Fast mode: run as fast as possible by disabling Git commit metadata and Base64 decoding -# (findings omit commit context and Base64-encoded secrets) -kingfisher scan ~/src/myrepo --fast +# Turbo mode: run as fast as possible by disabling Git commit metadata, Base64 decoding, +# MIME sniffing, language detection, and tree-sitter parsing +# (findings omit commit context, Base64-only matches, MIME type, and language metadata) +kingfisher scan ~/src/myrepo --turbo # Display only secrets confirmed active by third‑party APIs kingfisher scan /path/to/repo --only-valid @@ -398,9 +409,10 @@ cat /path/to/file.py | kingfisher scan - # Limit maximum file size scanned (default: 256 MB) kingfisher scan /some/file --max-file-size 500 -# Fast mode: equivalent to --commit-metadata=false --no-base64 for maximum speed -# No Git commit metadata (author, date, hash) or Base64 decoding in findings -kingfisher scan /path/to/repo --fast +# Turbo mode: equivalent to --commit-metadata=false --no-base64 and disables MIME sniffing, +# language detection/tree-sitter parsing for maximum speed +# No Git commit metadata (author, date, hash), Base64 decoding, MIME, or language metadata in findings +kingfisher scan /path/to/repo --turbo # Scan using a rule family kingfisher scan /path/to/repo --rule kingfisher.aws diff --git a/crates/kingfisher-core/src/content_type.rs b/crates/kingfisher-core/src/content_type.rs index 611badb..2100f0c 100644 --- a/crates/kingfisher-core/src/content_type.rs +++ b/crates/kingfisher-core/src/content_type.rs @@ -110,7 +110,7 @@ impl ContentInspector { #[inline] #[must_use] pub fn guess_language(&self, path: &Path, content: &[u8]) -> Option { - // 1) Extension mapping (fast, no I/O). + // 1) Extension mapping (turbo, no I/O). if let Some(ext) = path.extension().and_then(|e| e.to_str()) { if let Some(lang) = LanguageType::from_file_extension(&ext.to_ascii_lowercase()) { return Some(lang.name().to_string()); diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index d77a8ac..b378d1f 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -151,9 +151,9 @@ pub struct ScanArgs { #[arg(global = true, long, default_value_t = false)] pub no_base64: bool, - /// Fast mode: equivalent to --commit-metadata=false --no-base64 - #[arg(global = true, long, default_value_t = false)] - pub fast: bool, + /// Turbo mode: equivalent to --commit-metadata=false --no-base64 and disables MIME sniffing, language detection, and tree-sitter parsing + #[arg(global = true, long = "turbo", default_value_t = false)] + pub turbo: bool, /// Timeout for Git repository scanning in seconds #[arg(global = true, long, default_value_t = 1800, value_name = "SECONDS")] @@ -490,7 +490,7 @@ impl ScanCommandArgs { self.scan_args.no_dedup = true; } - if self.scan_args.fast { + if self.scan_args.turbo { self.scan_args.no_base64 = true; self.scan_args.input_specifier_args.commit_metadata = false; } diff --git a/src/direct_validate.rs b/src/direct_validate.rs index a962a07..9c00929 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -961,7 +961,7 @@ pub(crate) fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs skip_aws_account_file: None, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_base64: false, - fast: false, + turbo: false, no_inline_ignore: false, no_ignore_if_contains: false, validation_timeout: 10, diff --git a/src/findings_store.rs b/src/findings_store.rs index d38c421..8785b56 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -262,7 +262,7 @@ impl FindingsStore { // Origin::Extended(_) => "ext", // }; - // // 64-bit key (fast, cheap, good dispersion) + // // 64-bit key (turbo, cheap, good dispersion) // let key = xxh3_64( // format!( // "{}|{}|{}", diff --git a/src/main.rs b/src/main.rs index 77bebe8..22146c0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -577,7 +577,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { skip_aws_account_file: None, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_base64: false, - fast: false, + turbo: false, no_inline_ignore: false, no_ignore_if_contains: false, validation_timeout: 10, diff --git a/src/reporter.rs b/src/reporter.rs index 779a25b..e5b6366 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1781,7 +1781,7 @@ mod tests { view_report: false, redact: false, no_base64: false, - fast: false, + turbo: false, git_repo_timeout: 1_800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, baseline_file: None, diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 8fabe4e..62e636a 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -193,7 +193,7 @@ mod tests { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, no_inline_ignore: false, no_ignore_if_contains: false, validation_timeout: 10, diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 01075db..3144d65 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -238,7 +238,14 @@ pub fn enumerate_filesystem_inputs( return Ok(()); } progress.inc(blob.len().try_into().unwrap()); - match processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64) { + match processor.run( + origin, + blob, + args.no_dedup, + args.redact, + args.no_base64, + args.turbo, + ) { Ok(None) => { // nothing to record } diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs index 3461eed..fc441b0 100644 --- a/src/scanner/processing.rs +++ b/src/scanner/processing.rs @@ -28,13 +28,18 @@ impl<'a> BlobProcessor<'a> { no_dedup: bool, redact: bool, no_base64: bool, + fast_mode: bool, ) -> Result> { let _span = debug_span!("matcher", temp_id = blob.temp_id()).entered(); let t1 = Instant::now(); - let language_hint = origin - .iter() - .find_map(|p| p.blob_path()) - .and_then(|path| ContentInspector::default().guess_language(path, blob.bytes())); + let language_hint = if fast_mode { + None + } else { + origin + .iter() + .find_map(|p| p.blob_path()) + .and_then(|path| ContentInspector::default().guess_language(path, blob.bytes())) + }; let res = self.matcher.scan_blob(&blob, &origin, language_hint, redact, no_dedup, no_base64)?; let scan_us = t1.elapsed().as_micros(); @@ -66,7 +71,7 @@ impl<'a> BlobProcessor<'a> { if matches.is_empty() { return Ok(None); } - let md = MetadataResult::from_blob_and_origin(&blob, &origin); + let md = MetadataResult::from_blob_and_origin(&blob, &origin, fast_mode); let metadata = BlobMetadata { id: blob.id(), num_bytes: blob.len(), @@ -117,12 +122,17 @@ struct MetadataResult { language: Option, } impl MetadataResult { - fn from_blob_and_origin(blob: &Blob, origin: &OriginSet) -> MetadataResult { + fn from_blob_and_origin(blob: &Blob, origin: &OriginSet, fast_mode: bool) -> MetadataResult { let blob_path: Option<&'_ Path> = origin.iter().find_map(|p| p.blob_path()); let bytes = blob.bytes(); - let mime_essence = Some(tree_magic_mini::from_u8(bytes).to_string()); - let inspector = ContentInspector::default(); - let language = blob_path.and_then(|p| inspector.guess_language(p, bytes)); + let mime_essence = + if fast_mode { None } else { Some(tree_magic_mini::from_u8(bytes).to_string()) }; + let language = if fast_mode { + None + } else { + let inspector = ContentInspector::default(); + blob_path.and_then(|p| inspector.guess_language(p, bytes)) + }; MetadataResult { mime_essence, language } } } diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 9076375..77562d4 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -863,7 +863,7 @@ pub async fn fetch_s3_objects( let blob = crate::blob::Blob::from_bytes(bytes); if let Some((origin, blob_md, scored_matches)) = - processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)? + processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64, args.turbo)? { // Wrap origin & metadata once: let origin_arc = Arc::new(origin); @@ -945,7 +945,7 @@ pub async fn fetch_gcs_objects( let blob = crate::blob::Blob::from_bytes(bytes); if let Some((origin, blob_md, scored_matches)) = - processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)? + processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64, args.turbo)? { let origin_arc = Arc::new(origin); let blob_arc = Arc::new(blob_md); diff --git a/tests/cli_failure.rs b/tests/cli_failure.rs index 0968f4f..1629de5 100644 --- a/tests/cli_failure.rs +++ b/tests/cli_failure.rs @@ -27,7 +27,7 @@ fn scan_fails_for_bad_rule_yaml() { tmp.path().to_str().unwrap(), // dummy input dir (exists) "--rules-path", tmp.path().to_str().unwrap(), // point loader at bad YAML - "--no-validate", // keep the test fast + "--no-validate", // keep the test turbo "--no-update-check", // skip update check to avoid network calls ]) .assert() diff --git a/tests/cli_git_clone_flags.rs b/tests/cli_git_clone_flags.rs index fc7d40a..3b67883 100644 --- a/tests/cli_git_clone_flags.rs +++ b/tests/cli_git_clone_flags.rs @@ -61,3 +61,55 @@ fn keep_clones_defaults_to_false() -> anyhow::Result<()> { Ok(()) } + +#[test] +fn turbo_mode_applies_speed_first_defaults() -> anyhow::Result<()> { + let args = CommandLineArgs::try_parse_from([ + "kingfisher", + "scan", + ".", + "--turbo", + "--no-update-check", + ])?; + + let command = match args.command { + Command::Scan(scan_args) => scan_args, + other => panic!("unexpected command parsed: {:?}", other), + }; + + let scan_args = match command.into_operation()? { + ScanOperation::Scan(scan_args) => scan_args, + op => panic!("expected scan operation, got {:?}", op), + }; + + assert!(scan_args.turbo); + assert!(scan_args.no_base64); + assert!(!scan_args.input_specifier_args.commit_metadata); + + Ok(()) +} + +#[test] +fn fast_alias_still_enables_turbo_mode() -> anyhow::Result<()> { + let args = CommandLineArgs::try_parse_from([ + "kingfisher", + "scan", + ".", + "--turbo", + "--no-update-check", + ])?; + + let command = match args.command { + Command::Scan(scan_args) => scan_args, + other => panic!("unexpected command parsed: {:?}", other), + }; + + let scan_args = match command.into_operation()? { + ScanOperation::Scan(scan_args) => scan_args, + op => panic!("expected scan operation, got {:?}", op), + }; + + assert!(scan_args.turbo); + + Ok(()) +} diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index e684b14..75bd34b 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -155,7 +155,7 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index cd9d498..8dcd771 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -174,7 +174,7 @@ rules: skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, @@ -223,7 +223,7 @@ rules: #[test] fn test_dedup_branch() -> Result<()> { - // A *single* runtime reused for both scans keeps the test fast + // A *single* runtime reused for both scans keeps the test turbo let rt = Runtime::new().unwrap(); let findings_with_dups = run_scan(&rt, true)?; // keep duplicates diff --git a/tests/int_github.rs b/tests/int_github.rs index 97a23c8..55299fc 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -161,7 +161,7 @@ fn test_github_remote_scan() -> Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 39e7b5b..f560e0c 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -160,7 +160,7 @@ fn test_gitlab_remote_scan() -> Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, @@ -326,7 +326,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, diff --git a/tests/int_redact.rs b/tests/int_redact.rs index da52595..08e494e 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -137,7 +137,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 7a2c44f..bd65ff9 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -143,7 +143,7 @@ impl TestContext { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, @@ -295,7 +295,7 @@ async fn test_scan_slack_messages() -> Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 4407ba5..0339a72 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -217,7 +217,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 4c9b86d..b4b9fbe 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -160,7 +160,7 @@ impl TestContext { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, no_ignore_if_contains: false, @@ -302,7 +302,7 @@ impl TestContext { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_base64: false, - fast: false, + turbo: false, no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1,