added tests for --branch and --since-commit feature

This commit is contained in:
Mick Grove 2025-10-23 17:02:31 -07:00
commit 9f91cbdab6
9 changed files with 296 additions and 17 deletions

View file

@ -71,7 +71,7 @@ pub struct BitbucketRepoSpecifiers {
pub exclude_repos: Vec<String>,
/// Enumerate all accessible workspaces or projects
#[arg(long, alias = "all-bitbucket-workspaces", requires = "bitbucket_api_url")]
#[arg(long, alias = "all-bitbucket-workspaces", requires = "api_url")]
pub all_workspaces: bool,
/// Filter repositories by type

View file

@ -59,7 +59,7 @@ pub struct GitHubRepoSpecifiers {
alias = "all-orgs",
alias = "all-github-organizations",
alias = "all-github-orgs",
requires = "github_api_url"
requires = "api_url"
)]
pub all_organizations: bool,

View file

@ -59,7 +59,7 @@ pub struct GitLabRepoSpecifiers {
pub exclude_repos: Vec<String>,
/// Repositories for all groups (Enterprise only)
#[arg(long, alias = "all-groups", alias = "all-gitlab-groups", requires = "gitlab_api_url")]
#[arg(long, alias = "all-groups", alias = "all-gitlab-groups", requires = "api_url")]
pub all_groups: bool,
/// Filter by repository type

View file

@ -1,6 +1,6 @@
use anyhow::bail;
use clap::{Args, Subcommand, ValueEnum, ValueHint};
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use strum::Display;
use tracing::debug;
use url::Url;
@ -210,7 +210,7 @@ impl ScanCommandArgs {
ScanInputCommand::Github(args) => {
if args.specifiers.is_empty() {
bail!(
"Specify at least one --user, --org, or use --all-orgs when scanning GitHub"
"You must specify at least one --user, --org, or use --all-orgs when scanning GitHub"
);
}
if args.list_only {
@ -234,7 +234,7 @@ impl ScanCommandArgs {
ScanInputCommand::Gitlab(args) => {
if args.specifiers.is_empty() {
bail!(
"Specify at least one --user, --group, or use --all-groups when scanning GitLab"
"You must specify at least one --user, --group, or use --all-groups when scanning GitLab"
);
}
if args.list_only {
@ -283,7 +283,7 @@ impl ScanCommandArgs {
ScanInputCommand::Bitbucket(args) => {
if args.specifiers.is_empty() {
bail!(
"Specify at least one --user, --workspace, --project, or use --all-workspaces when scanning Bitbucket"
"You must specify at least one --user, --workspace, --project, or use --all-workspaces when scanning Bitbucket"
);
}
if args.list_only {
@ -309,7 +309,7 @@ impl ScanCommandArgs {
ScanInputCommand::Azure(args) => {
if args.specifiers.is_empty() {
bail!(
"Specify at least one --organization, --project, or use --all-projects when scanning Azure DevOps"
"You must specify at least one --organization, --project, or use --all-projects when scanning Azure DevOps"
);
}
if args.list_only {
@ -333,7 +333,7 @@ impl ScanCommandArgs {
ScanInputCommand::Huggingface(args) => {
if args.specifiers.is_empty() {
bail!(
"Specify at least one --user, --org, --model, --dataset, or --space when scanning Hugging Face"
"You must specify at least one --user, --org, --model, --dataset, or --space when scanning Hugging Face"
);
}
if args.list_only {
@ -402,6 +402,16 @@ impl ScanCommandArgs {
);
}
for path in &self.scan_args.input_specifier_args.path_inputs {
if path.as_path() == Path::new("-") {
continue;
}
if !path.exists() {
bail!("Error: unrecognized scan target or path does not exist: {}", path.display());
}
}
if !used_provider_subcommand {
self.scan_args.input_specifier_args.emit_deprecated_warnings();
}

View file

@ -20,6 +20,7 @@ use globset::{Glob, GlobSet, GlobSetBuilder};
use indicatif::{ProgressBar, ProgressStyle};
use serde::Deserialize;
use serde_json::Value;
use tokio::task;
use tracing::warn;
use url::{form_urlencoded, Url};
@ -50,7 +51,7 @@ pub enum RepoType {
}
/// A struct to hold GitLab repository query specifications
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct RepoSpecifiers {
pub user: Vec<String>,
pub group: Vec<String>,
@ -209,7 +210,23 @@ pub async fn enumerate_repo_urls(
repo_specifiers: &RepoSpecifiers,
gitlab_url: Url,
ignore_certs: bool,
mut progress: Option<&mut ProgressBar>,
progress: Option<ProgressBar>,
) -> Result<Vec<String>> {
let specifiers = repo_specifiers.clone();
let repo_urls = task::spawn_blocking(move || {
let progress_ref = progress.as_ref();
enumerate_repo_urls_blocking(&specifiers, gitlab_url, ignore_certs, progress_ref)
})
.await??;
Ok(repo_urls)
}
fn enumerate_repo_urls_blocking(
repo_specifiers: &RepoSpecifiers,
gitlab_url: Url,
ignore_certs: bool,
progress: Option<&ProgressBar>,
) -> Result<Vec<String>> {
let client = create_gitlab_client(&gitlab_url, ignore_certs)?;
let mut repo_urls = Vec::new();
@ -249,7 +266,7 @@ pub async fn enumerate_repo_urls(
repo_urls.push(proj.http_url_to_repo);
}
if let Some(pb) = progress.as_mut() {
if let Some(pb) = progress {
pb.inc(1);
}
}
@ -286,7 +303,7 @@ pub async fn enumerate_repo_urls(
}
repo_urls.push(proj.http_url_to_repo);
}
if let Some(pb) = progress.as_mut() {
if let Some(pb) = progress {
pb.inc(1);
}
}
@ -319,7 +336,7 @@ pub async fn list_repositories(
};
// Create a progress bar for displaying status
let mut progress = if progress_enabled {
let progress = if progress_enabled {
let style = ProgressStyle::with_template("{spinner} {msg} [{elapsed_precise}]")
.expect("progress bar style template should compile");
let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching repositories");
@ -330,7 +347,8 @@ pub async fn list_repositories(
};
let repo_urls =
enumerate_repo_urls(&repo_specifiers, api_url, ignore_certs, Some(&mut progress)).await?;
enumerate_repo_urls(&repo_specifiers, api_url, ignore_certs, Some(progress.clone()))
.await?;
// Print repositories
for url in repo_urls {

View file

@ -214,7 +214,7 @@ pub async fn enumerate_gitlab_repos(
&repo_specifiers,
api_url,
global_args.ignore_certs,
Some(&mut progress),
Some(progress.clone()),
)
.await
.context("Failed to enumerate GitLab repositories")?;

View file

@ -1056,13 +1056,17 @@ mod cross_platform {
mod legacy_compatibility {
use super::*;
use std::path::{Path, PathBuf};
#[test]
fn scan_path_still_works() {
// The old syntax of scanning a local path should still work
let root = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let test_file = root.join("testdata").join("generic_secrets.py");
Command::cargo_bin("kingfisher")
.unwrap()
.args(["scan", ".", "--no-validate", "--no-update-check"])
.args(["scan", test_file.to_str().expect("REASON"), "--no-validate", "--no-update-check"])
.assert()
.code(predicates::function::function(|code: &i32| {
// May succeed or fail depending on rules, but shouldn't error on syntax

128
tests/int_git_diff.rs Normal file
View file

@ -0,0 +1,128 @@
use std::{collections::HashSet, fs, path::Path, process::Command};
use anyhow::Result;
use assert_cmd::prelude::*;
use git2::{Repository, Signature};
use serde_json::Value;
use tempfile::TempDir;
const CONFIG_PY: &str = r"# Configuration file packed with representative secrets\n\nAWS_ACCESS_SECRET_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgT5D/'\nGCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'\nGCP_PRIVATE_KEY = '-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQChoGF4j4AUnAfj\nbVGP/tSJqAyeYiZfOf4UCwd9+B/2oej3rsiuZmx506kuWVN4Jhg8UocLn5l/OfqU\n2MyV3Mq5VjtGQjYWF7a/Y04yEMRWf+spiJp1iYGS1vTOVjuyYyMa9h+8sbDiBFAD\nBcZejB4FQHxstFtmlnehf7cieMLTa3Wezv8LX8pH0q+pEynuvusQkhe8uPmjUsuo\nWG5W5CgVchQVzQf9eB5xtyt85t6VozMvAEI4h+WwZRdn+EWrQi+z8A8vXF7iUDmu\n2lpypLExcZBrZINMh8ecs8B34JNIYzO4Hod7RB4IwXN8PG/5RHlb7qQbzXSxir2B\n17gPPf8JAgMBAAECggEAHbkdG7sGIqQkJjypInpKc0tKkMj7hgkn8t8pYE7kb+qM\nKZqE0N/IpKnaY8ntGfwlelhx+d7+r0FGFh/9lbTOOkHDslLEWBFB3BYC4B2pwb+S\nC2gSAboJMGwkBpsgrNhi8RcgtIaYASSqYzfpaGNLtQsMJsCPS4Ex3GscjnQXXiJK\n5MExF8VYZVvT8Hq2lvECUpFMTWwM2o/QndwjLrEq/vRI3n7PmweXZGKgLuyOjpWk\ny80qa/IUlB6xO4XHvjnaEGxRq1LSF8hgEGU2Nmd8GDRT5ZLkSk+TMtqPrEbHEi6n\n4pZGndX0XmttWkKcUX/NwB/WZC5ROEsUl8Fyw+T5RQKBgQDMfgFB6Xx+Na2iB33w\nkhzNxo4HPCJzxeAB0zCRpfDpM1GtqK6JsIxvrci5lDAKaP8TQTr/gQxXpbJjE1Dl\n3VWGzFbW4czSw+AqBFl1he20RZhGjATcDCCzSOyEiRhqoJwTPTvqcXRK8NbKGfJR\nV6b4Auw+McNhnEUyfrZzguV93QKBgQDKVlLPhb4O84mINKFK73QFf2xlns0IHI0m\nWqNvY7HxJP9WUH5FgX4r/cO6aIafg+u5j0gNPDd2JD67htnY85EH/n5KNhb9ytsN\n+hkDeidFvdOrD+h9YFHkNoNy3XHwrQ0mtYRj2FBWhhpBsVlHVO2KcLe0TvivinN2\nfIac2uZhHQKBgAYE23KeNbzdRZwUTl+rXU+tPXb3DSiNNXe4SKCw2rNygD/1TBXf\nbXLIEbVsqDFWP9PIQr1Mhhl6VhLWebYaWq8aCqBOiyHVBB8Ye62a4JFCzyWcb3Qu\nozPDvLp18pMI4S8ryTywVDT0e839D4XXZ6G7LEr0WgTgfaTr1+D0hF69AoGBAKIQ\nxKGeAV6eaOGlLjAEXgztRFic+qLto409+jyFQQji1nY/YPSxROtdhkGv6WypUM0/\nW7nmKpJBc9HmsGUaqmcZy/QLIR1FN3IZiaGEXSJ6aqlQw6pw1QcTNvRxNQtOwQLp\nT1Jd9/Nl1HAb6mO9PcqugCY3Pu/z2InmMjg/CVptAoGAMpwMsoen4xEHv4uGZVt8\n8wlvQ2fYnso4wgRSYAkjh8cOHjB85eazlSAsaJvmQ9D1rV086Re5zKxKjrjQWdaT\nRMyIZJMJYZr6c8RKmabOfO1oc5urDdETQjGi3qXJuiu86wp7IoBINdmBEPRl6+m3\nGqJA6hgV5niKAq4sJtv9EW4='\nGOOGLE_API_KEY = 'AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM'\nGITHUB_KEY = '88df97769ab3185f2c0b2a73fdae1b27d89409ca'\nGITHUB_APP_SECRET = '895b1da4051440395f90e1411c4a1150e423c922'\nSLACK_APP_TOKEN = 'xapp-1-A01C259PH2A-1440755929120-7d5241948a2cc1b464add85df8a8e75f9040ae2869f6599926ed0b9dcafdb32b'\nSLACK_OAUTH_ACCESS_TOKEN = 'xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY'\nSLACK_WEBHOOK = 'https://hooks.slack.com/services/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'\nSTRIPE_RESTRICTED_KEY = 'rk_live_z59MoCJoFc114PpJlP1OnB1O'\nTWILIO_API_KEY = 'SK5d1d319A6Acf7EC9BDeDb8CCe4D76BA8'\n";
const CANARY_TOKEN: &str = r"[default]\naws_access_key_id = AKIAX24QKKOLDJMZ5Y2T\naws_secret_access_key = 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\noutput = json\nregion = us-east-2\n";
#[test]
fn scan_branch_and_since_commit_diff_behaviour() -> Result<()> {
let workspace = TempDir::new()?;
let repo_dir = workspace.path().join("SecretsTest");
fs::create_dir(&repo_dir)?;
let repo = Repository::init(&repo_dir)?;
let sig = Signature::now("tester", "tester@example.com")?;
fs::write(repo_dir.join("config.py"), CONFIG_PY)?;
let mut index = repo.index()?;
index.add_path(Path::new("config.py"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let initial_commit = repo.commit(Some("HEAD"), &sig, &sig, "seed secrets", &tree, &[])?;
let base_commit = repo.find_commit(initial_commit)?;
repo.branch("main", &base_commit, true)?;
repo.set_head("refs/heads/main")?;
repo.branch("feature-1", &base_commit, true)?;
repo.set_head("refs/heads/feature-1")?;
fs::write(repo_dir.join("canary-token"), CANARY_TOKEN)?;
let mut index = repo.index()?;
index.add_path(Path::new("canary-token"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let feature_commit =
repo.commit(Some("HEAD"), &sig, &sig, "add canary token", &tree, &[&base_commit])?;
let repo_path = repo_dir.to_string_lossy().to_string();
let base_commit_hex = initial_commit.to_string();
let feature_commit_hex = feature_commit.to_string();
let branch_scan = Command::cargo_bin("kingfisher")?
.args([
"scan",
&repo_path,
"--branch",
&base_commit_hex,
"--no-validate",
"--no-update-check",
"--format",
"json",
])
.output()?;
assert_eq!(branch_scan.status.code(), Some(200));
let findings: Vec<Value> = serde_json::from_slice(&branch_scan.stdout)?;
let expected_rules: HashSet<String> = [
"KINGFISHER.AWS.2",
"KINGFISHER.GCP.3",
"KINGFISHER.PRIVKEY.2",
"KINGFISHER.PEM.1",
"KINGFISHER.GOOGLE.7",
"KINGFISHER.GITHUB.6",
"KINGFISHER.SLACK.1",
"KINGFISHER.SLACK.2",
"KINGFISHER.SLACK.4",
"KINGFISHER.STRIPE.2",
"KINGFISHER.TWILIO.1",
]
.into_iter()
.map(|s| s.to_string())
.collect();
// KINGFISHER.GITHUB.6 should appear twice (two separate secrets)
assert_eq!(findings.len(), expected_rules.len() + 1);
let mut rule_hits: HashSet<String> = HashSet::new();
let mut github_hit_count = 0usize;
for finding in &findings {
let rule_id = finding["rule"]["id"].as_str().unwrap();
if rule_id == "KINGFISHER.GITHUB.6" {
github_hit_count += 1;
} else {
rule_hits.insert(rule_id.to_string());
}
assert_eq!(finding["finding"]["path"].as_str().unwrap(), "config.py");
assert_eq!(
finding["finding"]["git_metadata"]["commit"]["id"].as_str().unwrap(),
base_commit_hex
);
}
assert_eq!(github_hit_count, 2, "expected two GitHub secret detections");
assert_eq!(rule_hits, expected_rules);
let diff_scan = Command::cargo_bin("kingfisher")?
.args([
"scan",
&repo_path,
"--branch",
"feature-1",
"--since-commit",
&base_commit_hex,
"--no-validate",
"--no-update-check",
"--format",
"json",
])
.output()?;
assert_eq!(diff_scan.status.code(), Some(200));
let diff_findings: Vec<Value> = serde_json::from_slice(&diff_scan.stdout)?;
assert_eq!(diff_findings.len(), 1, "expected only the canary secret in diff scan");
let diff_finding = &diff_findings[0];
assert_eq!(diff_finding["rule"]["id"], "KINGFISHER.AWS.2");
assert_eq!(diff_finding["finding"]["path"], "canary-token");
assert_eq!(diff_finding["finding"]["git_metadata"]["commit"]["id"], feature_commit_hex);
Ok(())
}

119
tests/smoke_branch.rs Normal file
View file

@ -0,0 +1,119 @@
// tests/smoke_branch.rs
//
// Integration tests that exercise `kingfisher scan` against Git branches and commit
// references using locally constructed repositories. These ensure that the
// `--branch` and `--since-commit` flags behave as expected when scanning a repo
// without validation.
use std::fs;
use std::path::Path;
use assert_cmd::Command;
use git2::{build::CheckoutBuilder, BranchType, Repository, Signature};
use predicates::{prelude::PredicateBooleanExt, str::contains};
use tempfile::tempdir;
#[test]
fn scan_by_commit_and_branch_diff() -> anyhow::Result<()> {
let dir = tempdir()?;
let repo_dir = dir.path().join("repo");
let repo = Repository::init(&repo_dir)?;
let signature = Signature::now("tester", "tester@example.com")?;
// Commit an initial config file packed with known test secrets. We'll scan
// this commit directly via `--branch <commit-hash>` in the first assertion.
let config_path = repo_dir.join("config.py");
let config_contents = r"# test configuration with multiple secrets
AWS_ACCESS_SECRET_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgT5D/'
GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'
GOOGLE_API_KEY = 'AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM'
";
fs::create_dir_all(config_path.parent().unwrap())?;
fs::write(&config_path, config_contents)?;
let mut index = repo.index()?;
index.add_path(Path::new("config.py"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let initial_commit_id =
repo.commit(Some("HEAD"), &signature, &signature, "initial", &tree, &[])?;
let initial_commit = repo.find_commit(initial_commit_id)?;
let initial_commit_hex = initial_commit_id.to_string();
// Create a "main" branch pointing at the initial commit to mirror the
// documented example, but keep the default branch checkout untouched. Some
// Git installations already default to `main`, so only create the branch
// if it does not exist yet.
if repo.find_branch("main", BranchType::Local).is_err() {
repo.branch("main", &initial_commit, false)?;
}
// Create a feature branch that introduces a new secret file. The diff based
// scan later on should report only this file when paired with --since-commit.
repo.branch("feature-1", &initial_commit, true)?;
repo.set_head("refs/heads/feature-1")?;
repo.checkout_head(Some(CheckoutBuilder::new().force()))?;
let canary_path = repo_dir.join("canary-token");
let canary_contents = r"[default]
aws_access_key_id = AKIAX24QKKOLDJMZ5Y2T
aws_secret_access_key = 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
";
fs::write(&canary_path, canary_contents)?;
let mut index = repo.index()?;
index.add_path(Path::new("config.py"))?;
index.add_path(Path::new("canary-token"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let parent_commit = repo.head()?.peel_to_commit()?;
repo.commit(
Some("HEAD"),
&signature,
&signature,
"add canary token",
&tree,
&[&parent_commit],
)?;
// ── scan the repository by commit hash ───────────────────────────────────
Command::cargo_bin("kingfisher")?
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
initial_commit_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("AWS SECRET ACCESS KEY")
.and(contains("config.py"))
.and(contains(initial_commit_hex.as_str())),
);
// ── scan only the diff between feature-1 and the merge base ─────────────
Command::cargo_bin("kingfisher")?
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
"feature-1",
"--since-commit",
initial_commit_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("canary-token")
.and(contains("AWS SECRET ACCESS KEY"))
.and(contains("efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i")),
)
.stdout(contains("config.py").not());
Ok(())
}