From 9f91cbdab63c2c1a8dbae596a48cce6433aa45a9 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 23 Oct 2025 17:02:31 -0700 Subject: [PATCH] added tests for --branch and --since-commit feature --- src/cli/commands/bitbucket.rs | 2 +- src/cli/commands/github.rs | 2 +- src/cli/commands/gitlab.rs | 2 +- src/cli/commands/scan.rs | 22 ++++-- src/gitlab.rs | 30 ++++++-- src/scanner/repos.rs | 2 +- tests/cli_subcommands.rs | 6 +- tests/int_git_diff.rs | 128 ++++++++++++++++++++++++++++++++++ tests/smoke_branch.rs | 119 +++++++++++++++++++++++++++++++ 9 files changed, 296 insertions(+), 17 deletions(-) create mode 100644 tests/int_git_diff.rs create mode 100644 tests/smoke_branch.rs diff --git a/src/cli/commands/bitbucket.rs b/src/cli/commands/bitbucket.rs index 009d51e..247cf13 100644 --- a/src/cli/commands/bitbucket.rs +++ b/src/cli/commands/bitbucket.rs @@ -71,7 +71,7 @@ pub struct BitbucketRepoSpecifiers { pub exclude_repos: Vec, /// Enumerate all accessible workspaces or projects - #[arg(long, alias = "all-bitbucket-workspaces", requires = "bitbucket_api_url")] + #[arg(long, alias = "all-bitbucket-workspaces", requires = "api_url")] pub all_workspaces: bool, /// Filter repositories by type diff --git a/src/cli/commands/github.rs b/src/cli/commands/github.rs index 62194f5..8551fde 100644 --- a/src/cli/commands/github.rs +++ b/src/cli/commands/github.rs @@ -59,7 +59,7 @@ pub struct GitHubRepoSpecifiers { alias = "all-orgs", alias = "all-github-organizations", alias = "all-github-orgs", - requires = "github_api_url" + requires = "api_url" )] pub all_organizations: bool, diff --git a/src/cli/commands/gitlab.rs b/src/cli/commands/gitlab.rs index 73f44a4..bda7e8a 100644 --- a/src/cli/commands/gitlab.rs +++ b/src/cli/commands/gitlab.rs @@ -59,7 +59,7 @@ pub struct GitLabRepoSpecifiers { pub exclude_repos: Vec, /// Repositories for all groups (Enterprise only) - #[arg(long, alias = "all-groups", alias = "all-gitlab-groups", requires = "gitlab_api_url")] + #[arg(long, alias = "all-groups", alias = "all-gitlab-groups", requires = "api_url")] pub all_groups: bool, /// Filter by repository type diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 1402f47..8deb595 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -1,6 +1,6 @@ use anyhow::bail; use clap::{Args, Subcommand, ValueEnum, ValueHint}; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use strum::Display; use tracing::debug; use url::Url; @@ -210,7 +210,7 @@ impl ScanCommandArgs { ScanInputCommand::Github(args) => { if args.specifiers.is_empty() { bail!( - "Specify at least one --user, --org, or use --all-orgs when scanning GitHub" + "You must specify at least one --user, --org, or use --all-orgs when scanning GitHub" ); } if args.list_only { @@ -234,7 +234,7 @@ impl ScanCommandArgs { ScanInputCommand::Gitlab(args) => { if args.specifiers.is_empty() { bail!( - "Specify at least one --user, --group, or use --all-groups when scanning GitLab" + "You must specify at least one --user, --group, or use --all-groups when scanning GitLab" ); } if args.list_only { @@ -283,7 +283,7 @@ impl ScanCommandArgs { ScanInputCommand::Bitbucket(args) => { if args.specifiers.is_empty() { bail!( - "Specify at least one --user, --workspace, --project, or use --all-workspaces when scanning Bitbucket" + "You must specify at least one --user, --workspace, --project, or use --all-workspaces when scanning Bitbucket" ); } if args.list_only { @@ -309,7 +309,7 @@ impl ScanCommandArgs { ScanInputCommand::Azure(args) => { if args.specifiers.is_empty() { bail!( - "Specify at least one --organization, --project, or use --all-projects when scanning Azure DevOps" + "You must specify at least one --organization, --project, or use --all-projects when scanning Azure DevOps" ); } if args.list_only { @@ -333,7 +333,7 @@ impl ScanCommandArgs { ScanInputCommand::Huggingface(args) => { if args.specifiers.is_empty() { bail!( - "Specify at least one --user, --org, --model, --dataset, or --space when scanning Hugging Face" + "You must specify at least one --user, --org, --model, --dataset, or --space when scanning Hugging Face" ); } if args.list_only { @@ -402,6 +402,16 @@ impl ScanCommandArgs { ); } + for path in &self.scan_args.input_specifier_args.path_inputs { + if path.as_path() == Path::new("-") { + continue; + } + + if !path.exists() { + bail!("Error: unrecognized scan target or path does not exist: {}", path.display()); + } + } + if !used_provider_subcommand { self.scan_args.input_specifier_args.emit_deprecated_warnings(); } diff --git a/src/gitlab.rs b/src/gitlab.rs index 17926e3..f5cdbfb 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -20,6 +20,7 @@ use globset::{Glob, GlobSet, GlobSetBuilder}; use indicatif::{ProgressBar, ProgressStyle}; use serde::Deserialize; use serde_json::Value; +use tokio::task; use tracing::warn; use url::{form_urlencoded, Url}; @@ -50,7 +51,7 @@ pub enum RepoType { } /// A struct to hold GitLab repository query specifications -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct RepoSpecifiers { pub user: Vec, pub group: Vec, @@ -209,7 +210,23 @@ pub async fn enumerate_repo_urls( repo_specifiers: &RepoSpecifiers, gitlab_url: Url, ignore_certs: bool, - mut progress: Option<&mut ProgressBar>, + progress: Option, +) -> Result> { + let specifiers = repo_specifiers.clone(); + let repo_urls = task::spawn_blocking(move || { + let progress_ref = progress.as_ref(); + enumerate_repo_urls_blocking(&specifiers, gitlab_url, ignore_certs, progress_ref) + }) + .await??; + + Ok(repo_urls) +} + +fn enumerate_repo_urls_blocking( + repo_specifiers: &RepoSpecifiers, + gitlab_url: Url, + ignore_certs: bool, + progress: Option<&ProgressBar>, ) -> Result> { let client = create_gitlab_client(&gitlab_url, ignore_certs)?; let mut repo_urls = Vec::new(); @@ -249,7 +266,7 @@ pub async fn enumerate_repo_urls( repo_urls.push(proj.http_url_to_repo); } - if let Some(pb) = progress.as_mut() { + if let Some(pb) = progress { pb.inc(1); } } @@ -286,7 +303,7 @@ pub async fn enumerate_repo_urls( } repo_urls.push(proj.http_url_to_repo); } - if let Some(pb) = progress.as_mut() { + if let Some(pb) = progress { pb.inc(1); } } @@ -319,7 +336,7 @@ pub async fn list_repositories( }; // Create a progress bar for displaying status - let mut progress = if progress_enabled { + let progress = if progress_enabled { let style = ProgressStyle::with_template("{spinner} {msg} [{elapsed_precise}]") .expect("progress bar style template should compile"); let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching repositories"); @@ -330,7 +347,8 @@ pub async fn list_repositories( }; let repo_urls = - enumerate_repo_urls(&repo_specifiers, api_url, ignore_certs, Some(&mut progress)).await?; + enumerate_repo_urls(&repo_specifiers, api_url, ignore_certs, Some(progress.clone())) + .await?; // Print repositories for url in repo_urls { diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index e9fc34d..f041049 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -214,7 +214,7 @@ pub async fn enumerate_gitlab_repos( &repo_specifiers, api_url, global_args.ignore_certs, - Some(&mut progress), + Some(progress.clone()), ) .await .context("Failed to enumerate GitLab repositories")?; diff --git a/tests/cli_subcommands.rs b/tests/cli_subcommands.rs index f7af898..d97e713 100644 --- a/tests/cli_subcommands.rs +++ b/tests/cli_subcommands.rs @@ -1056,13 +1056,17 @@ mod cross_platform { mod legacy_compatibility { use super::*; + use std::path::{Path, PathBuf}; #[test] fn scan_path_still_works() { // The old syntax of scanning a local path should still work + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + + let test_file = root.join("testdata").join("generic_secrets.py"); Command::cargo_bin("kingfisher") .unwrap() - .args(["scan", ".", "--no-validate", "--no-update-check"]) + .args(["scan", test_file.to_str().expect("REASON"), "--no-validate", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| { // May succeed or fail depending on rules, but shouldn't error on syntax diff --git a/tests/int_git_diff.rs b/tests/int_git_diff.rs new file mode 100644 index 0000000..801eb0f --- /dev/null +++ b/tests/int_git_diff.rs @@ -0,0 +1,128 @@ +use std::{collections::HashSet, fs, path::Path, process::Command}; + +use anyhow::Result; +use assert_cmd::prelude::*; +use git2::{Repository, Signature}; +use serde_json::Value; +use tempfile::TempDir; + +const CONFIG_PY: &str = r"# Configuration file packed with representative secrets\n\nAWS_ACCESS_SECRET_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgT5D/'\nGCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'\nGCP_PRIVATE_KEY = '-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQChoGF4j4AUnAfj\nbVGP/tSJqAyeYiZfOf4UCwd9+B/2oej3rsiuZmx506kuWVN4Jhg8UocLn5l/OfqU\n2MyV3Mq5VjtGQjYWF7a/Y04yEMRWf+spiJp1iYGS1vTOVjuyYyMa9h+8sbDiBFAD\nBcZejB4FQHxstFtmlnehf7cieMLTa3Wezv8LX8pH0q+pEynuvusQkhe8uPmjUsuo\nWG5W5CgVchQVzQf9eB5xtyt85t6VozMvAEI4h+WwZRdn+EWrQi+z8A8vXF7iUDmu\n2lpypLExcZBrZINMh8ecs8B34JNIYzO4Hod7RB4IwXN8PG/5RHlb7qQbzXSxir2B\n17gPPf8JAgMBAAECggEAHbkdG7sGIqQkJjypInpKc0tKkMj7hgkn8t8pYE7kb+qM\nKZqE0N/IpKnaY8ntGfwlelhx+d7+r0FGFh/9lbTOOkHDslLEWBFB3BYC4B2pwb+S\nC2gSAboJMGwkBpsgrNhi8RcgtIaYASSqYzfpaGNLtQsMJsCPS4Ex3GscjnQXXiJK\n5MExF8VYZVvT8Hq2lvECUpFMTWwM2o/QndwjLrEq/vRI3n7PmweXZGKgLuyOjpWk\ny80qa/IUlB6xO4XHvjnaEGxRq1LSF8hgEGU2Nmd8GDRT5ZLkSk+TMtqPrEbHEi6n\n4pZGndX0XmttWkKcUX/NwB/WZC5ROEsUl8Fyw+T5RQKBgQDMfgFB6Xx+Na2iB33w\nkhzNxo4HPCJzxeAB0zCRpfDpM1GtqK6JsIxvrci5lDAKaP8TQTr/gQxXpbJjE1Dl\n3VWGzFbW4czSw+AqBFl1he20RZhGjATcDCCzSOyEiRhqoJwTPTvqcXRK8NbKGfJR\nV6b4Auw+McNhnEUyfrZzguV93QKBgQDKVlLPhb4O84mINKFK73QFf2xlns0IHI0m\nWqNvY7HxJP9WUH5FgX4r/cO6aIafg+u5j0gNPDd2JD67htnY85EH/n5KNhb9ytsN\n+hkDeidFvdOrD+h9YFHkNoNy3XHwrQ0mtYRj2FBWhhpBsVlHVO2KcLe0TvivinN2\nfIac2uZhHQKBgAYE23KeNbzdRZwUTl+rXU+tPXb3DSiNNXe4SKCw2rNygD/1TBXf\nbXLIEbVsqDFWP9PIQr1Mhhl6VhLWebYaWq8aCqBOiyHVBB8Ye62a4JFCzyWcb3Qu\nozPDvLp18pMI4S8ryTywVDT0e839D4XXZ6G7LEr0WgTgfaTr1+D0hF69AoGBAKIQ\nxKGeAV6eaOGlLjAEXgztRFic+qLto409+jyFQQji1nY/YPSxROtdhkGv6WypUM0/\nW7nmKpJBc9HmsGUaqmcZy/QLIR1FN3IZiaGEXSJ6aqlQw6pw1QcTNvRxNQtOwQLp\nT1Jd9/Nl1HAb6mO9PcqugCY3Pu/z2InmMjg/CVptAoGAMpwMsoen4xEHv4uGZVt8\n8wlvQ2fYnso4wgRSYAkjh8cOHjB85eazlSAsaJvmQ9D1rV086Re5zKxKjrjQWdaT\nRMyIZJMJYZr6c8RKmabOfO1oc5urDdETQjGi3qXJuiu86wp7IoBINdmBEPRl6+m3\nGqJA6hgV5niKAq4sJtv9EW4='\nGOOGLE_API_KEY = 'AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM'\nGITHUB_KEY = '88df97769ab3185f2c0b2a73fdae1b27d89409ca'\nGITHUB_APP_SECRET = '895b1da4051440395f90e1411c4a1150e423c922'\nSLACK_APP_TOKEN = 'xapp-1-A01C259PH2A-1440755929120-7d5241948a2cc1b464add85df8a8e75f9040ae2869f6599926ed0b9dcafdb32b'\nSLACK_OAUTH_ACCESS_TOKEN = 'xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY'\nSLACK_WEBHOOK = 'https://hooks.slack.com/services/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'\nSTRIPE_RESTRICTED_KEY = 'rk_live_z59MoCJoFc114PpJlP1OnB1O'\nTWILIO_API_KEY = 'SK5d1d319A6Acf7EC9BDeDb8CCe4D76BA8'\n"; + +const CANARY_TOKEN: &str = r"[default]\naws_access_key_id = AKIAX24QKKOLDJMZ5Y2T\naws_secret_access_key = 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\noutput = json\nregion = us-east-2\n"; + +#[test] +fn scan_branch_and_since_commit_diff_behaviour() -> Result<()> { + let workspace = TempDir::new()?; + let repo_dir = workspace.path().join("SecretsTest"); + fs::create_dir(&repo_dir)?; + let repo = Repository::init(&repo_dir)?; + let sig = Signature::now("tester", "tester@example.com")?; + + fs::write(repo_dir.join("config.py"), CONFIG_PY)?; + + let mut index = repo.index()?; + index.add_path(Path::new("config.py"))?; + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let initial_commit = repo.commit(Some("HEAD"), &sig, &sig, "seed secrets", &tree, &[])?; + + let base_commit = repo.find_commit(initial_commit)?; + repo.branch("main", &base_commit, true)?; + repo.set_head("refs/heads/main")?; + + repo.branch("feature-1", &base_commit, true)?; + repo.set_head("refs/heads/feature-1")?; + + fs::write(repo_dir.join("canary-token"), CANARY_TOKEN)?; + let mut index = repo.index()?; + index.add_path(Path::new("canary-token"))?; + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let feature_commit = + repo.commit(Some("HEAD"), &sig, &sig, "add canary token", &tree, &[&base_commit])?; + + let repo_path = repo_dir.to_string_lossy().to_string(); + let base_commit_hex = initial_commit.to_string(); + let feature_commit_hex = feature_commit.to_string(); + + let branch_scan = Command::cargo_bin("kingfisher")? + .args([ + "scan", + &repo_path, + "--branch", + &base_commit_hex, + "--no-validate", + "--no-update-check", + "--format", + "json", + ]) + .output()?; + assert_eq!(branch_scan.status.code(), Some(200)); + + let findings: Vec = serde_json::from_slice(&branch_scan.stdout)?; + let expected_rules: HashSet = [ + "KINGFISHER.AWS.2", + "KINGFISHER.GCP.3", + "KINGFISHER.PRIVKEY.2", + "KINGFISHER.PEM.1", + "KINGFISHER.GOOGLE.7", + "KINGFISHER.GITHUB.6", + "KINGFISHER.SLACK.1", + "KINGFISHER.SLACK.2", + "KINGFISHER.SLACK.4", + "KINGFISHER.STRIPE.2", + "KINGFISHER.TWILIO.1", + ] + .into_iter() + .map(|s| s.to_string()) + .collect(); + + + // KINGFISHER.GITHUB.6 should appear twice (two separate secrets) + assert_eq!(findings.len(), expected_rules.len() + 1); + + let mut rule_hits: HashSet = HashSet::new(); + let mut github_hit_count = 0usize; + for finding in &findings { + let rule_id = finding["rule"]["id"].as_str().unwrap(); + if rule_id == "KINGFISHER.GITHUB.6" { + github_hit_count += 1; + } else { + rule_hits.insert(rule_id.to_string()); + } + + assert_eq!(finding["finding"]["path"].as_str().unwrap(), "config.py"); + assert_eq!( + finding["finding"]["git_metadata"]["commit"]["id"].as_str().unwrap(), + base_commit_hex + ); + } + + assert_eq!(github_hit_count, 2, "expected two GitHub secret detections"); + assert_eq!(rule_hits, expected_rules); + + let diff_scan = Command::cargo_bin("kingfisher")? + .args([ + "scan", + &repo_path, + "--branch", + "feature-1", + "--since-commit", + &base_commit_hex, + "--no-validate", + "--no-update-check", + "--format", + "json", + ]) + .output()?; + assert_eq!(diff_scan.status.code(), Some(200)); + + let diff_findings: Vec = serde_json::from_slice(&diff_scan.stdout)?; + assert_eq!(diff_findings.len(), 1, "expected only the canary secret in diff scan"); + let diff_finding = &diff_findings[0]; + assert_eq!(diff_finding["rule"]["id"], "KINGFISHER.AWS.2"); + assert_eq!(diff_finding["finding"]["path"], "canary-token"); + assert_eq!(diff_finding["finding"]["git_metadata"]["commit"]["id"], feature_commit_hex); + + Ok(()) +} diff --git a/tests/smoke_branch.rs b/tests/smoke_branch.rs new file mode 100644 index 0000000..a08e73a --- /dev/null +++ b/tests/smoke_branch.rs @@ -0,0 +1,119 @@ +// tests/smoke_branch.rs +// +// Integration tests that exercise `kingfisher scan` against Git branches and commit +// references using locally constructed repositories. These ensure that the +// `--branch` and `--since-commit` flags behave as expected when scanning a repo +// without validation. + +use std::fs; +use std::path::Path; + +use assert_cmd::Command; +use git2::{build::CheckoutBuilder, BranchType, Repository, Signature}; +use predicates::{prelude::PredicateBooleanExt, str::contains}; +use tempfile::tempdir; + +#[test] +fn scan_by_commit_and_branch_diff() -> anyhow::Result<()> { + let dir = tempdir()?; + let repo_dir = dir.path().join("repo"); + let repo = Repository::init(&repo_dir)?; + let signature = Signature::now("tester", "tester@example.com")?; + + // Commit an initial config file packed with known test secrets. We'll scan + // this commit directly via `--branch ` in the first assertion. + let config_path = repo_dir.join("config.py"); + let config_contents = r"# test configuration with multiple secrets +AWS_ACCESS_SECRET_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgT5D/' +GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771' +GOOGLE_API_KEY = 'AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM' +"; + fs::create_dir_all(config_path.parent().unwrap())?; + fs::write(&config_path, config_contents)?; + + let mut index = repo.index()?; + index.add_path(Path::new("config.py"))?; + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let initial_commit_id = + repo.commit(Some("HEAD"), &signature, &signature, "initial", &tree, &[])?; + let initial_commit = repo.find_commit(initial_commit_id)?; + let initial_commit_hex = initial_commit_id.to_string(); + + // Create a "main" branch pointing at the initial commit to mirror the + // documented example, but keep the default branch checkout untouched. Some + // Git installations already default to `main`, so only create the branch + // if it does not exist yet. + if repo.find_branch("main", BranchType::Local).is_err() { + repo.branch("main", &initial_commit, false)?; + } + + // Create a feature branch that introduces a new secret file. The diff based + // scan later on should report only this file when paired with --since-commit. + repo.branch("feature-1", &initial_commit, true)?; + repo.set_head("refs/heads/feature-1")?; + repo.checkout_head(Some(CheckoutBuilder::new().force()))?; + + let canary_path = repo_dir.join("canary-token"); + let canary_contents = r"[default] +aws_access_key_id = AKIAX24QKKOLDJMZ5Y2T +aws_secret_access_key = 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 +"; + fs::write(&canary_path, canary_contents)?; + + let mut index = repo.index()?; + index.add_path(Path::new("config.py"))?; + index.add_path(Path::new("canary-token"))?; + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let parent_commit = repo.head()?.peel_to_commit()?; + repo.commit( + Some("HEAD"), + &signature, + &signature, + "add canary token", + &tree, + &[&parent_commit], + )?; + + // ── scan the repository by commit hash ─────────────────────────────────── + Command::cargo_bin("kingfisher")? + .args([ + "scan", + repo_dir.to_str().unwrap(), + "--branch", + initial_commit_hex.as_str(), + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + contains("AWS SECRET ACCESS KEY") + .and(contains("config.py")) + .and(contains(initial_commit_hex.as_str())), + ); + + // ── scan only the diff between feature-1 and the merge base ───────────── + Command::cargo_bin("kingfisher")? + .args([ + "scan", + repo_dir.to_str().unwrap(), + "--branch", + "feature-1", + "--since-commit", + initial_commit_hex.as_str(), + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + contains("canary-token") + .and(contains("AWS SECRET ACCESS KEY")) + .and(contains("efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i")), + ) + .stdout(contains("config.py").not()); + + Ok(()) +} \ No newline at end of file