Remote scans with --git-history=none now clone repositories with a working tree and scan the current files instead of erroring with 'No inputs to scan.'

This commit is contained in:
Mick Grove 2025-08-06 19:15:50 -07:00
commit a81cfb963a
6 changed files with 140 additions and 19 deletions

View file

@ -2,6 +2,9 @@
All notable changes to this project will be documented in this file.
## [1.35.0]
- Remote scans with `--git-history=none` now clone repositories with a working tree and scan the current files instead of erroring with "No inputs to scan".
## [1.34.0]
- Use system TLS root certificates to support self-hosted GitLab instances with internal CAs
- Added new rule: Coze personal access token

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.34.0"
version = "1.35.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -5,16 +5,17 @@
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
Kingfisher is a blazingly fast secretscanning and validation tool built in Rust. It combines Intels hardwareaccelerated Hyperscan regex engine with languageaware parsing via TreeSitter, and **ships with hundreds of builtin rules** to detect, validate, and triage secrets before they ever reach production
Kingfisher is a blazingly fast secretscanning and live validation tool built in Rust. It combines Intels hardwareaccelerated Hyperscan regex engine with languageaware parsing via TreeSitter, and **ships with hundreds of builtin rules** to detect, validate, and triage secrets before they ever reach production
</p>
Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.com/praetorian-inc/noseyparker), and is built atop their incredible work and the work contributed by the Nosey Parker community.
## What Kingfisher Adds
- **Live validation** via cloud-provider APIs
- **Language-aware detection** (source-code parsing) for ~20 languages
- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages
- **Compressed Files**: Supports extracting and scanning compressed files for secrets
- **Baseline mode**: ignore known secrets, flag only new ones
- **Language-aware detection** (source-code parsing) for ~20 languages
- **Native Windows** binary
@ -27,6 +28,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
- **Jira issues**: JQLdriven scans with `--jira-url` and `--jql`
- **Slack messages**: querybased scans with `--slack-query`
- **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous
- **Compressed Files**: Supports extracting and scanning compressed files for secrets
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
**Learn more:** [Introducing Kingfisher: RealTime Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)

View file

@ -105,8 +105,13 @@ impl Git {
let _span = debug_span!("git_update", "{repo_url} {}", output_dir.display()).entered();
debug!("Attempting to update clone of {repo_url} at {}", output_dir.display());
let mut cmd = self.git();
cmd.arg("--git-dir");
cmd.arg(output_dir);
if output_dir.join(".git").is_dir() {
cmd.arg("-C");
cmd.arg(output_dir);
} else {
cmd.arg("--git-dir");
cmd.arg(output_dir);
}
cmd.arg("remote");
cmd.arg("update");
cmd.arg("--prune");
@ -129,7 +134,9 @@ impl Git {
debug!("Attempting to create fresh clone of {} at {}", repo_url, output_dir.display());
let mut cmd = self.git();
cmd.arg("clone");
cmd.arg(clone_mode.arg());
if let Some(arg) = clone_mode.arg() {
cmd.arg(arg);
}
cmd.arg(repo_url.as_str());
cmd.arg(output_dir);
debug!("{cmd:#?}");
@ -151,14 +158,17 @@ pub enum CloneMode {
Bare,
/// Equivalent to `git clone --mirror`
Mirror,
/// Standard clone with a working tree
Checkout,
}
impl CloneMode {
/// Return the CLI argument for this clone mode.
pub fn arg(&self) -> &str {
pub fn arg(&self) -> Option<&str> {
match self {
Self::Bare => "--bare",
Self::Mirror => "--mirror",
Self::Bare => Some("--bare"),
Self::Mirror => Some("--mirror"),
Self::Checkout => None,
}
}
}
@ -183,8 +193,9 @@ mod tests {
#[test]
fn test_clone_mode_arg() {
assert_eq!(CloneMode::Bare.arg(), "--bare");
assert_eq!(CloneMode::Mirror.arg(), "--mirror");
assert_eq!(CloneMode::Bare.arg(), Some("--bare"));
assert_eq!(CloneMode::Mirror.arg(), Some("--mirror"));
assert_eq!(CloneMode::Checkout.arg(), None);
}
#[test]

View file

@ -12,10 +12,7 @@ use crate::blob::BlobIdMap;
use crate::{
blob::BlobMetadata,
cli::{
commands::{
github::{GitCloneMode, GitHistoryMode},
scan,
},
commands::{github::GitCloneMode, github::GitHistoryMode, scan},
global,
},
findings_store,
@ -42,16 +39,20 @@ pub fn clone_or_update_git_repos(
datastore: &Arc<Mutex<findings_store::FindingsStore>>,
) -> Result<Vec<PathBuf>> {
let mut input_roots = args.input_specifier_args.path_inputs.clone();
if repo_urls.is_empty() || args.input_specifier_args.git_history == GitHistoryMode::None {
if repo_urls.is_empty() {
return Ok(input_roots);
}
info!("{} Git URLs to fetch", repo_urls.len());
for repo_url in repo_urls {
debug!("Need to fetch {repo_url}")
}
let clone_mode = match args.input_specifier_args.git_clone {
GitCloneMode::Mirror => CloneMode::Mirror,
GitCloneMode::Bare => CloneMode::Bare,
let clone_mode = if args.input_specifier_args.git_history == GitHistoryMode::None {
CloneMode::Checkout
} else {
match args.input_specifier_args.git_clone {
GitCloneMode::Mirror => CloneMode::Mirror,
GitCloneMode::Bare => CloneMode::Bare,
}
};
let git = Git::new(global_args.ignore_certs);
@ -68,6 +69,7 @@ pub fn clone_or_update_git_repos(
} else {
ProgressBar::hidden()
};
for repo_url in repo_urls {
let output_dir = {
let datastore = datastore.lock().unwrap();

View file

@ -139,3 +139,106 @@ fn test_gitlab_remote_scan() -> Result<()> {
drop(rt);
Ok(())
}
#[test]
fn test_gitlab_remote_scan_no_history() -> Result<()> {
let temp_dir = TempDir::new().context("tmp dir")?;
let clone_dir = temp_dir.path().to_path_buf();
let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git";
let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL");
let scan_args = ScanArgs {
num_jobs: 2,
rules: RuleSpecifierArgs {
rules_path: Vec::new(),
rule: vec!["all".into()],
load_builtins: true,
},
input_specifier_args: InputSpecifierArgs {
path_inputs: Vec::new(),
git_url: vec![git_url],
github_user: Vec::new(),
github_organization: Vec::new(),
all_github_organizations: false,
github_api_url: Url::parse("https://api.github.com/")?,
github_repo_type: GitHubRepoType::Source,
gitlab_user: Vec::new(),
gitlab_group: Vec::new(),
all_gitlab_groups: false,
gitlab_api_url: Url::parse("https://gitlab.com/")?,
gitlab_repo_type: GitLabRepoType::Owner,
jira_url: None,
jql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
s3_bucket: None,
s3_prefix: None,
role_arn: None,
aws_local_profile: None,
docker_image: Vec::new(),
git_clone: GitCloneMode::Bare,
git_history: GitHistoryMode::None,
scan_nested_repos: true,
commit_metadata: true,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
no_extract_archives: false,
extraction_depth: 2,
no_binary: true,
exclude: Vec::new(),
},
confidence: ConfidenceLevel::Medium,
no_validate: false,
rule_stats: false,
only_valid: false,
min_entropy: None,
redact: false,
git_repo_timeout: 1800,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};
let global_args = GlobalArgs {
verbose: 0,
quiet: false,
color: Mode::Auto,
progress: Mode::Auto,
no_update_check: false,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16_384 },
};
let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));
let rt = Runtime::new()?;
let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?);
rt.block_on(async {
run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await
})?;
let ds = datastore.lock().unwrap();
let findings = ds.get_matches();
let total = findings.len();
let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count();
assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}");
let exit_code = determine_exit_code(total, validated);
assert!(
exit_code >= 200,
"expected kingfisher to report findings (exit_code >= 200), got {exit_code}"
);
drop(rt);
Ok(())
}