kingfisher/tests/smoke_branch.rs

282 lines
10 KiB
Rust

// tests/smoke_branch.rs
//
// Integration tests that exercise `kingfisher scan` against Git branches and commit
// references using locally constructed repositories. These ensure that the
// branch-focused flags behave as expected when scanning a repo without
// validation, including the ability to resume from a specific commit.
use std::fs;
use std::path::Path;
use anyhow::Result;
use assert_cmd::Command;
use git2::{build::CheckoutBuilder, BranchType, Repository, Signature};
use predicates::{prelude::PredicateBooleanExt, str::contains};
use tempfile::{tempdir, TempDir};
const AWS_SECRET_VALUE: &str = "UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D";
const GCP_PRIVATE_KEY_VALUE: &str = "c4c474d61701fd6fd4191883b8fea9a8411bf771";
const SLACK_TOKEN_VALUE: &str = "xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx";
const STRIPE_SECRET_VALUE: &str = "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";
const AWS_SECRET_LINE: &str = "AWS_SECRET_ACCESS_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D/'";
const GCP_PRIVATE_KEY_LINE: &str =
"GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'";
const SLACK_TOKEN_LINE: &str =
"SLACK_BOT_TOKEN = 'xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx'";
const STRIPE_SECRET_LINE: &str = concat!(
"STRIPE_SECRET_KEY = '",
"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",
"'",
);
#[test]
fn scan_by_commit_and_branch_diff() -> anyhow::Result<()> {
let dir = tempdir()?;
let repo_dir = dir.path().join("repo");
let repo = Repository::init(&repo_dir)?;
let signature = Signature::now("tester", "tester@example.com")?;
// Commit an initial config file packed with known test secrets. We'll scan
// this commit directly via `--branch <commit-hash>` in the first assertion.
let config_path = repo_dir.join("config.py");
let config_contents = r"# test configuration with multiple secrets
AWS_ACCESS_SECRET_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgT5D/'
GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'
GOOGLE_API_KEY = 'AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM'
";
fs::create_dir_all(config_path.parent().unwrap())?;
fs::write(&config_path, config_contents)?;
let mut index = repo.index()?;
index.add_path(Path::new("config.py"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let initial_commit_id =
repo.commit(Some("HEAD"), &signature, &signature, "initial", &tree, &[])?;
let initial_commit = repo.find_commit(initial_commit_id)?;
let initial_commit_hex = initial_commit_id.to_string();
// Create a "main" branch pointing at the initial commit to mirror the
// documented example, but keep the default branch checkout untouched. Some
// Git installations already default to `main`, so only create the branch
// if it does not exist yet.
if repo.find_branch("main", BranchType::Local).is_err() {
repo.branch("main", &initial_commit, false)?;
}
// Create a feature branch that introduces a new secret file. The diff based
// scan later on should report only this file when paired with --since-commit.
repo.branch("feature-1", &initial_commit, true)?;
repo.set_head("refs/heads/feature-1")?;
repo.checkout_head(Some(CheckoutBuilder::new().force()))?;
let canary_path = repo_dir.join("canary-token");
let canary_contents = r"[default]
aws_access_key_id = AKIAX24QKKOLDJMZ5Y2T
aws_secret_access_key = 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
";
fs::write(&canary_path, canary_contents)?;
let mut index = repo.index()?;
index.add_path(Path::new("config.py"))?;
index.add_path(Path::new("canary-token"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let parent_commit = repo.head()?.peel_to_commit()?;
repo.commit(
Some("HEAD"),
&signature,
&signature,
"add canary token",
&tree,
&[&parent_commit],
)?;
// ── scan the repository by commit hash ───────────────────────────────────
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
initial_commit_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("AWS SECRET ACCESS KEY")
.and(contains("config.py"))
.and(contains(initial_commit_hex.as_str())),
);
// ── scan only the diff between feature-1 and the merge base ─────────────
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
"feature-1",
"--since-commit",
initial_commit_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("canary-token")
.and(contains("AWS SECRET ACCESS KEY"))
.and(contains("efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i")),
)
.stdout(contains("config.py").not());
Ok(())
}
///
///
///
///
///
/// Create a repo with a single file `secrets.txt` and five commits that append
/// lines in order, exactly like the provided shell script. Returns the repo dir
/// and the vector of commit IDs (oldest → newest).
fn setup_linear_repo_with_secrets() -> Result<(TempDir, std::path::PathBuf, Vec<git2::Oid>)> {
let dir = tempdir()?;
let repo_dir = dir.path().join("repo");
let repo = Repository::init(&repo_dir)?;
let sig = Signature::now("tester", "tester@example.com")?;
let secrets_path = repo_dir.join("secrets.txt");
// Commit #1 — AWS
fs::write(&secrets_path, AWS_SECRET_LINE)?;
let mut index = repo.index()?;
index.add_path(Path::new("secrets.txt"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let mut commits = Vec::new();
let c1 = repo.commit(Some("HEAD"), &sig, &sig, "Add AWS secret", &tree, &[])?;
commits.push(c1);
let mut parent_commit = repo.find_commit(c1)?;
let mut contents = String::from(AWS_SECRET_LINE);
// Remaining commits mirror the shell script example.
let additions = [
("Add GCP private key id", GCP_PRIVATE_KEY_LINE),
("Add Slack bot token", SLACK_TOKEN_LINE),
("Add Stripe API key", STRIPE_SECRET_LINE),
];
for (message, line) in additions {
contents.push('\n');
contents.push_str(line);
fs::write(&secrets_path, &contents)?;
let mut index = repo.index()?;
index.add_path(Path::new("secrets.txt"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let oid = repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent_commit])?;
commits.push(oid);
parent_commit = repo.find_commit(oid)?;
}
// Create a named branch to mirror long-lived branch workflows.
repo.branch("long-lived", &parent_commit, true)?;
Ok((dir, repo_dir, commits))
}
#[test]
fn scan_specific_commit_reports_only_that_commit() -> Result<()> {
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
let c1_hex = commits[0].to_string(); // first commit (AWS only)
// Scan exactly the initial commit via --branch <commit>
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
c1_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
// Must contain AWS, must NOT contain the later secrets
contains("AWS SECRET ACCESS KEY")
.and(contains(AWS_SECRET_VALUE))
.and(contains(GCP_PRIVATE_KEY_VALUE).not())
.and(contains(SLACK_TOKEN_VALUE).not())
.and(contains(STRIPE_SECRET_VALUE).not()),
);
Ok(())
}
#[test]
fn scan_with_branch_root_includes_descendants() -> Result<()> {
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
let c1_hex = commits[0].to_string(); // start from first commit
// Using --branch-root should include the selected commit and remaining history up to HEAD
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
c1_hex.as_str(),
"--branch-root",
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("AWS SECRET ACCESS KEY")
.and(contains(AWS_SECRET_VALUE))
.and(contains(GCP_PRIVATE_KEY_VALUE))
.and(contains(SLACK_TOKEN_VALUE))
.and(contains(STRIPE_SECRET_VALUE)),
);
Ok(())
}
#[test]
fn scan_branch_tip_with_branch_root_commit() -> Result<()> {
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
let root_commit_hex = commits[0].to_string();
let latest_commit_hex = commits.last().expect("expected at least one commit").to_string();
// Passing --branch-root-commit should implicitly enable inclusive scanning even
// without the legacy --branch-root flag when targeting a named branch tip.
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
"long-lived",
"--branch-root-commit",
root_commit_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("AWS SECRET ACCESS KEY")
.and(contains(AWS_SECRET_VALUE))
.and(contains(GCP_PRIVATE_KEY_VALUE))
.and(contains(SLACK_TOKEN_VALUE))
.and(contains(STRIPE_SECRET_VALUE))
.and(contains(latest_commit_hex.as_str())),
);
Ok(())
}