From a81cfb963ada115ec71c8bc035fb7b282bf6bd61 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 6 Aug 2025 19:15:50 -0700 Subject: [PATCH] Remote scans with --git-history=none now clone repositories with a working tree and scan the current files instead of erroring with 'No inputs to scan.' --- CHANGELOG.md | 3 ++ Cargo.toml | 2 +- README.md | 6 ++- src/git_binary.rs | 27 ++++++++---- src/scanner/repos.rs | 18 ++++---- tests/int_gitlab.rs | 103 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 140 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a982f5..0ff7097 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.35.0] +- Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan". + ## [1.34.0] - Use system TLS root certificates to support self-hosted GitLab instances with internal CAs - Added new rule: Coze personal access token diff --git a/Cargo.toml b/Cargo.toml index 4295167..930a196 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.34.0" +version = "1.35.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index ef68c1f..1fba1fb 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,17 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -Kingfisher is a blazingly fast secret‑scanning and validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production +Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production

Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.com/praetorian-inc/noseyparker), and is built atop their incredible work and the work contributed by the Nosey Parker community. ## What Kingfisher Adds - **Live validation** via cloud-provider APIs -- **Language-aware detection** (source-code parsing) for ~20 languages - **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages +- **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline mode**: ignore known secrets, flag only new ones +- **Language-aware detection** (source-code parsing) for ~20 languages - **Native Windows** binary @@ -27,6 +28,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Slack messages**: query‑based scans with `--slack-query` - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous +- **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) diff --git a/src/git_binary.rs b/src/git_binary.rs index fc2baa1..6e9bd8d 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -105,8 +105,13 @@ impl Git { let _span = debug_span!("git_update", "{repo_url} {}", output_dir.display()).entered(); debug!("Attempting to update clone of {repo_url} at {}", output_dir.display()); let mut cmd = self.git(); - cmd.arg("--git-dir"); - cmd.arg(output_dir); + if output_dir.join(".git").is_dir() { + cmd.arg("-C"); + cmd.arg(output_dir); + } else { + cmd.arg("--git-dir"); + cmd.arg(output_dir); + } cmd.arg("remote"); cmd.arg("update"); cmd.arg("--prune"); @@ -129,7 +134,9 @@ impl Git { debug!("Attempting to create fresh clone of {} at {}", repo_url, output_dir.display()); let mut cmd = self.git(); cmd.arg("clone"); - cmd.arg(clone_mode.arg()); + if let Some(arg) = clone_mode.arg() { + cmd.arg(arg); + } cmd.arg(repo_url.as_str()); cmd.arg(output_dir); debug!("{cmd:#?}"); @@ -151,14 +158,17 @@ pub enum CloneMode { Bare, /// Equivalent to `git clone --mirror` Mirror, + /// Standard clone with a working tree + Checkout, } impl CloneMode { /// Return the CLI argument for this clone mode. - pub fn arg(&self) -> &str { + pub fn arg(&self) -> Option<&str> { match self { - Self::Bare => "--bare", - Self::Mirror => "--mirror", + Self::Bare => Some("--bare"), + Self::Mirror => Some("--mirror"), + Self::Checkout => None, } } } @@ -183,8 +193,9 @@ mod tests { #[test] fn test_clone_mode_arg() { - assert_eq!(CloneMode::Bare.arg(), "--bare"); - assert_eq!(CloneMode::Mirror.arg(), "--mirror"); + assert_eq!(CloneMode::Bare.arg(), Some("--bare")); + assert_eq!(CloneMode::Mirror.arg(), Some("--mirror")); + assert_eq!(CloneMode::Checkout.arg(), None); } #[test] diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 7999988..19f6b4d 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -12,10 +12,7 @@ use crate::blob::BlobIdMap; use crate::{ blob::BlobMetadata, cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode}, - scan, - }, + commands::{github::GitCloneMode, github::GitHistoryMode, scan}, global, }, findings_store, @@ -42,16 +39,20 @@ pub fn clone_or_update_git_repos( datastore: &Arc>, ) -> Result> { let mut input_roots = args.input_specifier_args.path_inputs.clone(); - if repo_urls.is_empty() || args.input_specifier_args.git_history == GitHistoryMode::None { + if repo_urls.is_empty() { return Ok(input_roots); } info!("{} Git URLs to fetch", repo_urls.len()); for repo_url in repo_urls { debug!("Need to fetch {repo_url}") } - let clone_mode = match args.input_specifier_args.git_clone { - GitCloneMode::Mirror => CloneMode::Mirror, - GitCloneMode::Bare => CloneMode::Bare, + let clone_mode = if args.input_specifier_args.git_history == GitHistoryMode::None { + CloneMode::Checkout + } else { + match args.input_specifier_args.git_clone { + GitCloneMode::Mirror => CloneMode::Mirror, + GitCloneMode::Bare => CloneMode::Bare, + } }; let git = Git::new(global_args.ignore_certs); @@ -68,6 +69,7 @@ pub fn clone_or_update_git_repos( } else { ProgressBar::hidden() }; + for repo_url in repo_urls { let output_dir = { let datastore = datastore.lock().unwrap(); diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 0b55799..3903ddb 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -139,3 +139,106 @@ fn test_gitlab_remote_scan() -> Result<()> { drop(rt); Ok(()) } + + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} \ No newline at end of file