From a81cfb963ada115ec71c8bc035fb7b282bf6bd61 Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Wed, 6 Aug 2025 19:15:50 -0700
Subject: [PATCH] Remote scans with --git-history=none now clone repositories
with a working tree and scan the current files instead of erroring with 'No
inputs to scan.'
---
CHANGELOG.md | 3 ++
Cargo.toml | 2 +-
README.md | 6 ++-
src/git_binary.rs | 27 ++++++++----
src/scanner/repos.rs | 18 ++++----
tests/int_gitlab.rs | 103 +++++++++++++++++++++++++++++++++++++++++++
6 files changed, 140 insertions(+), 19 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a982f5..0ff7097 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
All notable changes to this project will be documented in this file.
+## [1.35.0]
+- Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan".
+
## [1.34.0]
- Use system TLS root certificates to support self-hosted GitLab instances with internal CAs
- Added new rule: Coze personal access token
diff --git a/Cargo.toml b/Cargo.toml
index 4295167..930a196 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
-version = "1.34.0"
+version = "1.35.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true
diff --git a/README.md b/README.md
index ef68c1f..1fba1fb 100644
--- a/README.md
+++ b/README.md
@@ -5,16 +5,17 @@
[](https://opensource.org/licenses/Apache-2.0)
-Kingfisher is a blazingly fast secret‑scanning and validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production
+Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production
Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.com/praetorian-inc/noseyparker), and is built atop their incredible work and the work contributed by the Nosey Parker community.
## What Kingfisher Adds
- **Live validation** via cloud-provider APIs
-- **Language-aware detection** (source-code parsing) for ~20 languages
- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages
+- **Compressed Files**: Supports extracting and scanning compressed files for secrets
- **Baseline mode**: ignore known secrets, flag only new ones
+- **Language-aware detection** (source-code parsing) for ~20 languages
- **Native Windows** binary
@@ -27,6 +28,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
- **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql`
- **Slack messages**: query‑based scans with `--slack-query`
- **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous
+- **Compressed Files**: Supports extracting and scanning compressed files for secrets
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
**Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
diff --git a/src/git_binary.rs b/src/git_binary.rs
index fc2baa1..6e9bd8d 100644
--- a/src/git_binary.rs
+++ b/src/git_binary.rs
@@ -105,8 +105,13 @@ impl Git {
let _span = debug_span!("git_update", "{repo_url} {}", output_dir.display()).entered();
debug!("Attempting to update clone of {repo_url} at {}", output_dir.display());
let mut cmd = self.git();
- cmd.arg("--git-dir");
- cmd.arg(output_dir);
+ if output_dir.join(".git").is_dir() {
+ cmd.arg("-C");
+ cmd.arg(output_dir);
+ } else {
+ cmd.arg("--git-dir");
+ cmd.arg(output_dir);
+ }
cmd.arg("remote");
cmd.arg("update");
cmd.arg("--prune");
@@ -129,7 +134,9 @@ impl Git {
debug!("Attempting to create fresh clone of {} at {}", repo_url, output_dir.display());
let mut cmd = self.git();
cmd.arg("clone");
- cmd.arg(clone_mode.arg());
+ if let Some(arg) = clone_mode.arg() {
+ cmd.arg(arg);
+ }
cmd.arg(repo_url.as_str());
cmd.arg(output_dir);
debug!("{cmd:#?}");
@@ -151,14 +158,17 @@ pub enum CloneMode {
Bare,
/// Equivalent to `git clone --mirror`
Mirror,
+ /// Standard clone with a working tree
+ Checkout,
}
impl CloneMode {
/// Return the CLI argument for this clone mode.
- pub fn arg(&self) -> &str {
+ pub fn arg(&self) -> Option<&str> {
match self {
- Self::Bare => "--bare",
- Self::Mirror => "--mirror",
+ Self::Bare => Some("--bare"),
+ Self::Mirror => Some("--mirror"),
+ Self::Checkout => None,
}
}
}
@@ -183,8 +193,9 @@ mod tests {
#[test]
fn test_clone_mode_arg() {
- assert_eq!(CloneMode::Bare.arg(), "--bare");
- assert_eq!(CloneMode::Mirror.arg(), "--mirror");
+ assert_eq!(CloneMode::Bare.arg(), Some("--bare"));
+ assert_eq!(CloneMode::Mirror.arg(), Some("--mirror"));
+ assert_eq!(CloneMode::Checkout.arg(), None);
}
#[test]
diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs
index 7999988..19f6b4d 100644
--- a/src/scanner/repos.rs
+++ b/src/scanner/repos.rs
@@ -12,10 +12,7 @@ use crate::blob::BlobIdMap;
use crate::{
blob::BlobMetadata,
cli::{
- commands::{
- github::{GitCloneMode, GitHistoryMode},
- scan,
- },
+ commands::{github::GitCloneMode, github::GitHistoryMode, scan},
global,
},
findings_store,
@@ -42,16 +39,20 @@ pub fn clone_or_update_git_repos(
datastore: &Arc>,
) -> Result> {
let mut input_roots = args.input_specifier_args.path_inputs.clone();
- if repo_urls.is_empty() || args.input_specifier_args.git_history == GitHistoryMode::None {
+ if repo_urls.is_empty() {
return Ok(input_roots);
}
info!("{} Git URLs to fetch", repo_urls.len());
for repo_url in repo_urls {
debug!("Need to fetch {repo_url}")
}
- let clone_mode = match args.input_specifier_args.git_clone {
- GitCloneMode::Mirror => CloneMode::Mirror,
- GitCloneMode::Bare => CloneMode::Bare,
+ let clone_mode = if args.input_specifier_args.git_history == GitHistoryMode::None {
+ CloneMode::Checkout
+ } else {
+ match args.input_specifier_args.git_clone {
+ GitCloneMode::Mirror => CloneMode::Mirror,
+ GitCloneMode::Bare => CloneMode::Bare,
+ }
};
let git = Git::new(global_args.ignore_certs);
@@ -68,6 +69,7 @@ pub fn clone_or_update_git_repos(
} else {
ProgressBar::hidden()
};
+
for repo_url in repo_urls {
let output_dir = {
let datastore = datastore.lock().unwrap();
diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs
index 0b55799..3903ddb 100644
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@@ -139,3 +139,106 @@ fn test_gitlab_remote_scan() -> Result<()> {
drop(rt);
Ok(())
}
+
+
+#[test]
+fn test_gitlab_remote_scan_no_history() -> Result<()> {
+ let temp_dir = TempDir::new().context("tmp dir")?;
+ let clone_dir = temp_dir.path().to_path_buf();
+
+ let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git";
+ let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL");
+
+ let scan_args = ScanArgs {
+ num_jobs: 2,
+ rules: RuleSpecifierArgs {
+ rules_path: Vec::new(),
+ rule: vec!["all".into()],
+ load_builtins: true,
+ },
+ input_specifier_args: InputSpecifierArgs {
+ path_inputs: Vec::new(),
+ git_url: vec![git_url],
+ github_user: Vec::new(),
+ github_organization: Vec::new(),
+ all_github_organizations: false,
+ github_api_url: Url::parse("https://api.github.com/")?,
+ github_repo_type: GitHubRepoType::Source,
+ gitlab_user: Vec::new(),
+ gitlab_group: Vec::new(),
+ all_gitlab_groups: false,
+ gitlab_api_url: Url::parse("https://gitlab.com/")?,
+ gitlab_repo_type: GitLabRepoType::Owner,
+
+ jira_url: None,
+ jql: None,
+ max_results: 100,
+ slack_query: None,
+ slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
+ s3_bucket: None,
+ s3_prefix: None,
+ role_arn: None,
+ aws_local_profile: None,
+ docker_image: Vec::new(),
+ git_clone: GitCloneMode::Bare,
+ git_history: GitHistoryMode::None,
+ scan_nested_repos: true,
+ commit_metadata: true,
+ },
+ content_filtering_args: ContentFilteringArgs {
+ max_file_size_mb: 25.0,
+ no_extract_archives: false,
+ extraction_depth: 2,
+ no_binary: true,
+ exclude: Vec::new(),
+ },
+ confidence: ConfidenceLevel::Medium,
+ no_validate: false,
+ rule_stats: false,
+ only_valid: false,
+ min_entropy: None,
+ redact: false,
+ git_repo_timeout: 1800,
+ output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
+ no_dedup: true,
+ snippet_length: 256,
+ baseline_file: None,
+ manage_baseline: false,
+ };
+
+ let global_args = GlobalArgs {
+ verbose: 0,
+ quiet: false,
+ color: Mode::Auto,
+ progress: Mode::Auto,
+ no_update_check: false,
+ self_update: false,
+ ignore_certs: false,
+ advanced: AdvancedArgs { rlimit_nofile: 16_384 },
+ };
+
+ let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));
+ let rt = Runtime::new()?;
+
+ let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?);
+
+ rt.block_on(async {
+ run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await
+ })?;
+
+ let ds = datastore.lock().unwrap();
+ let findings = ds.get_matches();
+ let total = findings.len();
+ let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count();
+
+ assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}");
+
+ let exit_code = determine_exit_code(total, validated);
+ assert!(
+ exit_code >= 200,
+ "expected kingfisher to report findings (exit_code >= 200), got {exit_code}"
+ );
+
+ drop(rt);
+ Ok(())
+}
\ No newline at end of file