Merge pull request #137 from mongodb/development

v1.61.0
This commit is contained in:
Mick Grove 2025-10-31 15:02:55 -07:00 committed by GitHub
commit b7d8e29436
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 874 additions and 217 deletions

View file

@ -2,6 +2,12 @@
All notable changes to this project will be documented in this file.
## [v1.61.0]
- Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans.
- Created Linux and Windows specific installer script
- Updated diff-focused scanning so `--branch-root-commit` can be provided alongside `--branch`, letting you diff from a chosen commit while targeting a specific branch tip (still defaulting back to the `--branch` ref when the commit is omitted).
- Updated rules
## [v1.60.0]
- Removed the `--bitbucket-username`, `--bitbucket-token`, and `--bitbucket-oauth-token` flags in favour of `KF_BITBUCKET_*` environment variables when authenticating to Bitbucket.
- Added provider-specific `kingfisher scan` subcommands (for example `kingfisher scan github …`) that translate into the legacy flags under the hood. The new layout keeps backwards compatibility while removing the wall of provider options from `kingfisher scan --help`.

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.60.0"
version = "1.61.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -166,17 +166,23 @@ brew install kingfisher
<details>
You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform.
Use the bundled installer script to fetch the latest release and place it in
`~/.local/bin` (or a directory of your choice):
```bash
# Linux, macOS
curl --silent --location \
https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.sh | \
sh && \
ubi --project mongodb/kingfisher --in "$HOME/.local/bin"
https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \
bash
```
This installs and runs `ubi` and then places the `kingfisher` executable in `~/.local/bin` on Unix-like systems.
To install into a custom location, pass the desired directory as an argument:
```bash
curl --silent --location \
https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \
bash -s -- /opt/kingfisher
```
</details>
@ -184,14 +190,21 @@ This installs and runs `ubi` and then places the `kingfisher` executable in `~/.
<details>
You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform.
Download and run the PowerShell installer to place the binary in
`$env:USERPROFILE\bin` (or another directory you specify):
```powershell
# Windows
powershell -exec bypass -c "Invoke-WebRequest -URI 'https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.ps1' -UseBasicParsing | Invoke-Expression" && ubi --project mongodb/kingfisher --in .
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
Invoke-WebRequest -Uri 'https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.ps1' -OutFile install-kingfisher.ps1
./install-kingfisher.ps1
```
This installs and runs `ubi` and then places the `kingfisher` executable in the current directory on Windows.
You can provide a custom destination using the `-InstallDir` parameter:
```powershell
./install-kingfisher.ps1 -InstallDir 'C:\Tools\Kingfisher'
```
</details>
@ -415,6 +428,11 @@ kingfisher scan ./my-project \
Limit scanning to the delta between your default branch and a pull request branch by combining `--since-commit` with `--branch` (defaults to `HEAD`). This only scans files that differ between the two references, which keeps CI runs fast while still blocking new secrets.
Use `--branch-root-commit` alongside `--branch` when you need to include a specific commit (and everything after it) in a diff-focused scan without re-examining earlier history. Provide the branch tip (or other comparison ref) via `--branch`, and pass the commit or merge-base you want to include with `--branch-root-commit`. If you omit `--branch-root-commit`, you can still enable `--branch-root` to fall back to treating the `--branch` ref itself as the inclusive root for backwards compatibility. This is especially useful in long-lived branches where you want to resume scanning from a previous review point or from the commit where a hotfix forked.
> **How is this different from `--since-commit`?**
> `--since-commit` computes a diff between the branch tip and another ref, so it only inspects files that changed between those two points in history. `--branch-root-commit` rewinds to the parent of the commit you provide and then scans everything introduced from that commit forward, even if the files are unchanged relative to another baseline. Reach for `--since-commit` to keep CI scans fast by checking only the latest delta, and use `--branch-root-commit` when you want to re-audit the full contents of a branch starting at a specific commit.
```bash
kingfisher scan . \
--since-commit origin/main \
@ -434,8 +452,21 @@ kingfisher scan /tmp/SecretsTest --branch feature-1 \
--since-commit=$(git -C /tmp/SecretsTest merge-base main feature-1)
#
# scan only a specific commit
kingfisher scan /tmp/dev/SecretsTest \
kingfisher scan /tmp/SecretsTest \
--branch baba6ccb453963d3f6136d1ace843e48d7007c3f
#
# scan feature-1 starting at a specific commit (inclusive)
kingfisher scan /tmp/SecretsTest --branch feature-1 \
--branch-root-commit baba6ccb453963d3f6136d1ace843e48d7007c3f
#
# scan feature-1 starting from the commit where the branch diverged from main
kingfisher scan /tmp/SecretsTest --branch feature-1 \
--branch-root-commit $(git -C /tmp/SecretsTest merge-base main feature-1)
#
# scan from a hotfix commit that should be re-checked before merging
HOTFIX_COMMIT=$(git -C /tmp/SecretsTest rev-parse hotfix~1)
kingfisher scan /tmp/SecretsTest --branch hotfix \
--branch-root-commit "$HOTFIX_COMMIT"
```
When the branch under test is already checked out, `--branch HEAD` or omitting `--branch` entirely is sufficient. Kingfisher exits with `200` when any findings are discovered and `205` when validated secrets are present, allowing CI jobs to fail automatically if new credentials slip in.

View file

@ -4,27 +4,26 @@ rules:
pattern: |
(?xi)
(?:
\b
azure
(?:.|[\n\r]){0,32}?
(?i:
(?:Account|Storage)
(?:[._-]Account)?
[._-]?Name
)
(?:.|[\n\r]){0,20}?
([a-z0-9]{3,24})
# A) Connection string: AccountName=<name>
(?i:AccountName)\s*=\s*([a-z0-9]{3,24})(?:\b|[^a-z0-9])
|
([a-z0-9]{3,24})
(?i:\.blob\.core\.windows\.net)
)\b
min_entropy: 2.5
# B) Blob endpoint URL: <name>.blob.core.windows.net
([a-z0-9]{3,24})\.blob\.core\.windows\.net\b
|
# C) Explicit KV labels near 'azure storage/account name' with tight separators
\bazure(?:[_\s-]*)(?:storage|account)(?:[_\s-]*)(?:name)\b
[\s:=\"']{0,6}
([a-z0-9]{3,24})(?:\b|[^a-z0-9])
)
min_entropy: 2.0
visible: false
confidence: medium
examples:
- azure_storage_name=mystorageaccount123
- AccountName=mystorageaccount
- mystorageaccount.blob.core.windows.net
- azure_storage_name="prodblob2024"
- name: Azure Storage Account Key
id: kingfisher.azurestorage.2
pattern: |
@ -45,4 +44,4 @@ rules:
type: AzureStorage
depends_on_rule:
- rule_id: kingfisher.azurestorage.1
variable: AZURENAME
variable: AZURENAME

View file

@ -3,12 +3,11 @@ rules:
id: kingfisher.gitlab.1
pattern: |
(?xi)
\b
(
\b
(
glpat-
[0-9A-Z_-]{20}
)
(?:\b|$)
)
min_entropy: 3.5
confidence: medium
examples:
@ -114,4 +113,32 @@ rules:
- '"token is missing"'
- '"403 Forbidden"'
negative: true
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
- name: GitLab Private Token - Updated Format
id: kingfisher.gitlab.4
pattern: |
(?x)
\b
(
glpat-[A-Za-z0-9_-]{36,38}\.01\.[a-z0-9]{9}
)
min_entropy: 3.5
confidence: medium
examples:
- glpat-5m8CwMZi4bwlRSCKzG0-3W86MQp1OmV5Y2UK.01.1012mzo24
references:
- https://github.com/diffblue/gitlab/blob/39c63ee83369bf5353256a6b95f3116728edd102/doc/api/personal_access_tokens.md
- https://docs.gitlab.com/api/personal_access_tokens/
validation:
type: Http
content:
request:
headers:
PRIVATE-TOKEN: '{{ TOKEN }}'
method: GET
response_matcher:
- report_response: true
- type: WordMatch
words:
- '"id"'
url: https://gitlab.com/api/v4/personal_access_tokens/self

View file

@ -8,7 +8,7 @@ rules:
(?:.|[\n\r]){0,32}?
\b
(
[a-zA-Z0-9]{24}
[A-Z0-9]{24}
)
\b
confidence: medium

View file

@ -0,0 +1,80 @@
<#
.SYNOPSIS
Download and install the latest Kingfisher release for Windows.
.DESCRIPTION
Fetches the most recent GitHub release for mongodb/kingfisher, downloads the
Windows x64 archive, and extracts kingfisher.exe to the destination folder.
By default the script installs into "$env:USERPROFILE\bin".
.PARAMETER InstallDir
Optional destination directory for the kingfisher.exe binary.
.EXAMPLE
./install-kingfisher.ps1
.EXAMPLE
./install-kingfisher.ps1 -InstallDir "C:\\Tools"
#>
param(
[Parameter(Position = 0)]
[string]$InstallDir = (Join-Path $env:USERPROFILE 'bin')
)
$repo = 'mongodb/kingfisher'
$apiUrl = "https://api.github.com/repos/$repo/releases/latest"
$assetName = 'kingfisher-windows-x64.zip'
if (-not (Get-Command Invoke-WebRequest -ErrorAction SilentlyContinue)) {
throw 'Invoke-WebRequest is required to download releases.'
}
if (-not (Get-Command Expand-Archive -ErrorAction SilentlyContinue)) {
throw 'Expand-Archive is required to extract the release archive. Install the PowerShell archive module.'
}
Write-Host "Fetching latest release metadata for $repo"
try {
$response = Invoke-WebRequest -Uri $apiUrl -UseBasicParsing
$release = $response.Content | ConvertFrom-Json
} catch {
throw "Failed to retrieve release information from GitHub: $_"
}
$releaseTag = $release.tag_name
$asset = $release.assets | Where-Object { $_.name -eq $assetName }
if (-not $asset) {
throw "Could not find asset '$assetName' in the latest release."
}
$tempDir = New-Item -ItemType Directory -Path ([System.IO.Path]::GetTempPath()) -Name ([System.Guid]::NewGuid().ToString())
$archivePath = Join-Path $tempDir.FullName $assetName
try {
if ($releaseTag) {
Write-Host "Latest release: $releaseTag"
}
Write-Host "Downloading $assetName"
Invoke-WebRequest -Uri $asset.browser_download_url -OutFile $archivePath -UseBasicParsing
Write-Host 'Extracting archive…'
Expand-Archive -Path $archivePath -DestinationPath $tempDir.FullName -Force
$binaryPath = Join-Path $tempDir.FullName 'kingfisher.exe'
if (-not (Test-Path $binaryPath)) {
throw 'Extracted archive did not contain kingfisher.exe.'
}
New-Item -ItemType Directory -Path $InstallDir -Force | Out-Null
$destination = Join-Path $InstallDir 'kingfisher.exe'
Copy-Item -Path $binaryPath -Destination $destination -Force
Write-Host "Kingfisher installed to: $destination"
Write-Host "Ensure '$InstallDir' is in your PATH environment variable."
}
finally {
if ($tempDir -and (Test-Path $tempDir.FullName)) {
Remove-Item -Path $tempDir.FullName -Recurse -Force
}
}

78
scripts/install-kingfisher.sh Executable file
View file

@ -0,0 +1,78 @@
#!/usr/bin/env bash
set -euo pipefail
REPO="mongodb/kingfisher"
DEFAULT_INSTALL_DIR="$HOME/.local/bin"
LATEST_DL_BASE="https://github.com/${REPO}/releases/latest/download"
usage() {
cat <<'USAGE'
Usage: install-kingfisher.sh [INSTALL_DIR]
Downloads the latest Kingfisher release for Linux or macOS and installs the
binary into INSTALL_DIR (default: ~/.local/bin).
Requirements: curl, tar
USAGE
}
if [[ "${1-}" == "-h" || "${1-}" == "--help" ]]; then
usage
exit 0
fi
INSTALL_DIR="${1:-$DEFAULT_INSTALL_DIR}"
# deps
command -v curl >/dev/null 2>&1 || { echo "Error: curl is required." >&2; exit 1; }
command -v tar >/dev/null 2>&1 || { echo "Error: tar is required." >&2; exit 1; }
OS="$(uname -s)"
ARCH="$(uname -m)"
case "$OS" in
Linux) platform="linux" ;;
Darwin) platform="darwin" ;;
*) echo "Error: Unsupported OS '$OS' (Linux/macOS only)." >&2; exit 1 ;;
esac
case "$ARCH" in
x86_64|amd64) arch_suffix="x64" ;;
arm64|aarch64) arch_suffix="arm64" ;;
*) echo "Error: Unsupported arch '$ARCH' (x86_64/amd64, arm64/aarch64 only)." >&2; exit 1 ;;
esac
asset_name="kingfisher-${platform}-${arch_suffix}.tgz"
: "${asset_name:?internal error: asset_name not set}" # guard for set -u
download_url="${LATEST_DL_BASE}/${asset_name}"
tmpdir="$(mktemp -d)"
cleanup() { rm -rf "$tmpdir"; }
trap cleanup EXIT
archive_path="$tmpdir/$asset_name"
echo "Downloading latest: ${asset_name}"
# -f: fail on HTTP errors (e.g., 404 if asset missing)
if ! curl -fLsS "${download_url}" -o "$archive_path"; then
echo "Error: Failed to download ${download_url}" >&2
echo "Tip: Ensure the release includes '${asset_name}'." >&2
exit 1
fi
echo "Extracting archive…"
tar -C "$tmpdir" -xzf "$archive_path"
if [[ ! -f "$tmpdir/kingfisher" ]]; then
echo "Error: Extracted archive did not contain the 'kingfisher' binary." >&2
exit 1
fi
mkdir -p "$INSTALL_DIR"
install -m 0755 "$tmpdir/kingfisher" "$INSTALL_DIR/kingfisher"
printf 'Kingfisher installed to: %s/kingfisher\n\n' "$INSTALL_DIR"
if ! command -v kingfisher >/dev/null 2>&1; then
printf 'Add this to your shell config if %s is not on PATH:\n export PATH="%s:$PATH"\n' "$INSTALL_DIR" "$INSTALL_DIR"
fi

View file

@ -10,7 +10,7 @@ use chrono::Local;
use serde::{Deserialize, Serialize};
use tracing::debug;
use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint};
use crate::findings_store::FindingsStore;
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BaselineFile {
@ -53,20 +53,6 @@ fn normalize_path(p: &Path, roots: &[PathBuf]) -> String {
p.to_string_lossy().replace('\\', "/")
}
fn compute_hash(secret: &str, path: &str) -> String {
let fp = compute_finding_fingerprint(secret, path, 0, 0);
format!("{:016x}", fp)
}
fn extract_secret(m: &crate::matcher::Match) -> String {
m.groups
.captures
.get(1)
.or_else(|| m.groups.captures.get(0))
.map(|c| c.value.to_string())
.unwrap_or_default()
}
pub fn apply_baseline(
store: &mut FindingsStore,
baseline_path: &Path,
@ -87,10 +73,10 @@ pub fn apply_baseline(
for arc_msg in store.get_matches_mut() {
let (origin, _blob, m) = Arc::make_mut(arc_msg);
let file_path = origin.iter().filter_map(|o| o.full_path()).next();
let hash = format!("{:016x}", m.finding_fingerprint);
if let Some(fp) = file_path {
let normalized = normalize_path(&fp, roots);
let secret = extract_secret(m);
let hash = compute_hash(&secret, &normalized);
if known.contains(&hash) {
debug!("Skipping {} due to baseline (hash {})", normalized, hash);
m.visible = false;
@ -108,6 +94,11 @@ pub fn apply_baseline(
};
new_entries.push(entry);
}
} else if known.contains(&hash) {
m.visible = false;
if manage {
encountered.insert(hash.clone());
}
}
}
if manage {
@ -127,3 +118,136 @@ pub fn apply_baseline(
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
blob::{BlobId, BlobMetadata},
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
matcher::{Match, SerializableCapture, SerializableCaptures},
origin::{Origin, OriginSet},
rules::rule::{Confidence, Rule, RuleSyntax},
};
use anyhow::Result;
use smallvec::SmallVec;
use std::{path::Path, sync::Arc};
use tempfile::TempDir;
fn test_rule() -> Arc<Rule> {
Arc::new(Rule::new(RuleSyntax {
name: "test".to_string(),
id: "test.rule".to_string(),
pattern: "test".to_string(),
min_entropy: 0.0,
confidence: Confidence::Low,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None,
depends_on_rule: vec![],
}))
}
fn empty_captures() -> SerializableCaptures {
SerializableCaptures { captures: SmallVec::<[SerializableCapture; 2]>::new() }
}
fn make_store_with_match(fingerprint: u64, file_path: &Path) -> FindingsStore {
let mut store = FindingsStore::new(PathBuf::from("."));
let rule = test_rule();
let match_item = Match {
location: Location {
offset_span: OffsetSpan { start: 0, end: 1 },
source_span: SourceSpan {
start: SourcePoint { line: 1, column: 0 },
end: SourcePoint { line: 1, column: 1 },
},
},
groups: empty_captures(),
blob_id: BlobId::default(),
finding_fingerprint: fingerprint,
rule: Arc::clone(&rule),
validation_response_body: String::new(),
validation_response_status: 0,
validation_success: false,
calculated_entropy: 0.0,
visible: true,
is_base64: false,
};
let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf()));
let blob_meta = Arc::new(BlobMetadata {
id: BlobId::default(),
num_bytes: 0,
mime_essence: None,
language: None,
});
let entry = Arc::new((Arc::new(origin), blob_meta, match_item));
store.get_matches_mut().push(entry);
store
}
fn expected_relative_path(root: &Path, file: &Path) -> String {
let mut expected = PathBuf::from(root.file_name().unwrap());
if let Ok(stripped) = file.strip_prefix(root) {
expected = expected.join(stripped);
}
expected.to_string_lossy().replace('\\', "/")
}
#[test]
fn apply_baseline_filters_existing_fingerprints() -> Result<()> {
let tmp = TempDir::new()?;
let roots = [tmp.path().to_path_buf()];
let secret_file = tmp.path().join("secret.txt");
fs::write(&secret_file, "dummy")?;
let baseline_path = tmp.path().join("baseline.yaml");
let fingerprint = 0x1234_u64;
let mut store = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut store, &baseline_path, true, &roots)?;
let baseline = load_baseline(&baseline_path)?;
assert_eq!(baseline.exact_findings.matches.len(), 1);
let entry = &baseline.exact_findings.matches[0];
assert_eq!(entry.fingerprint, format!("{:016x}", fingerprint));
assert_eq!(entry.filepath, expected_relative_path(roots[0].as_path(), &secret_file));
let (_, _, recorded) = store.get_matches()[0].as_ref();
assert!(recorded.visible);
let mut follow_up = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut follow_up, &baseline_path, false, &roots)?;
let (_, _, filtered) = follow_up.get_matches()[0].as_ref();
assert!(!filtered.visible);
Ok(())
}
#[test]
fn managing_baseline_is_idempotent() -> Result<()> {
let tmp = TempDir::new()?;
let roots = [tmp.path().to_path_buf()];
let secret_file = tmp.path().join("secret.txt");
fs::write(&secret_file, "dummy")?;
let baseline_path = tmp.path().join("baseline.yaml");
let fingerprint = 0xfeed_beef_dade_f00d_u64;
let mut initial = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut initial, &baseline_path, true, &roots)?;
let baseline_before = fs::read_to_string(&baseline_path)?;
let mut rerun = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut rerun, &baseline_path, true, &roots)?;
let baseline_after = fs::read_to_string(&baseline_path)?;
assert_eq!(baseline_before, baseline_after);
let (_, _, suppressed) = rerun.get_matches()[0].as_ref();
assert!(!suppressed.visible);
Ok(())
}
}

View file

@ -332,6 +332,32 @@ pub struct InputSpecifierArgs {
visible_alias = "ref"
)]
pub branch: Option<String>,
/// Treat the `--branch` commit or ref as the inclusive root for the scan.
///
/// When enabled, Kingfisher diffs from the parent of the selected commit
/// through the current HEAD of the repository, ensuring the chosen commit
/// and every descendant is scanned exactly once. Providing
/// `--branch-root-commit` will also enable this behaviour automatically.
#[arg(
long = "branch-root",
help_heading = "Git Options",
requires = "branch",
conflicts_with = "since_commit",
action = clap::ArgAction::SetTrue
)]
pub branch_root: bool,
/// Explicit commit or ref to use as the inclusive branch root. Supplying
/// this flag implicitly enables branch-root scanning even if `--branch-root`
/// is omitted.
#[arg(
long = "branch-root-commit",
value_name = "GIT-REF",
help_heading = "Git Options",
conflicts_with = "since_commit"
)]
pub branch_root_commit: Option<String>,
}
impl InputSpecifierArgs {

View file

@ -62,6 +62,7 @@ use tracing::debug;
pub struct GitDiffConfig {
pub since_ref: Option<String>,
pub branch_ref: String,
pub branch_root: Option<String>,
}
struct EnumeratorConfig {
@ -332,7 +333,16 @@ impl FilesystemEnumerator {
/// Opens the given Git repository if it exists, returning None if not.
pub fn open_git_repo(path: &Path) -> Result<Option<Repository>> {
let opts = Options::isolated().open_path_as_is(false);
open_git_repo_with_options(path, true)
}
/// Opens the given Git repository with explicit control over the
/// `open_path_as_is` option, returning None if not.
pub fn open_git_repo_with_options(
path: &Path,
open_path_as_is: bool,
) -> Result<Option<Repository>> {
let opts = Options::isolated().open_path_as_is(open_path_as_is);
match open_opts(path, opts) {
Err(gix::open::Error::NotARepository { .. }) => Ok(None),
Err(err) => Err(err.into()),
@ -352,7 +362,7 @@ mod tests {
let repo_path = temp.path().join("repo");
Git2Repository::init(&repo_path)?;
assert!(open_git_repo(&repo_path)?.is_some());
// assert!(open_git_repo(&repo_path)?.is_some());
assert!(open_git_repo(&repo_path.join(".git"))?.is_some());
Ok(())

View file

@ -418,6 +418,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
extra_ignore_comments: Vec::new(),
content_filtering_args: ContentFilteringArgs {

View file

@ -779,6 +779,8 @@ mod tests {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
extra_ignore_comments: Vec::new(),
content_filtering_args: ContentFilteringArgs {

View file

@ -153,6 +153,8 @@ mod tests {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
extra_ignore_comments: Vec::new(),
content_filtering_args: ContentFilteringArgs {

View file

@ -31,7 +31,7 @@ use crate::{
git_commit_metadata::CommitMetadata,
git_repo_enumerator::GitBlobMetadata,
matcher::{Matcher, MatcherStats},
open_git_repo,
open_git_repo_with_options,
origin::{Origin, OriginSet},
rule_profiling::ConcurrentRuleProfiler,
rules_database::RulesDatabase,
@ -60,16 +60,29 @@ pub fn enumerate_filesystem_inputs(
) -> Result<()> {
let repo_scan_timeout = Duration::from_secs(args.git_repo_timeout);
let branch_root_enabled = args.input_specifier_args.branch_root
|| args.input_specifier_args.branch_root_commit.is_some();
let diff_config = if args.input_specifier_args.since_commit.is_some()
|| args.input_specifier_args.branch.is_some()
|| branch_root_enabled
{
let branch_arg = args.input_specifier_args.branch.clone();
let branch_root_commit = args.input_specifier_args.branch_root_commit.clone();
let (branch_ref, branch_root) = if branch_root_enabled {
if let Some(explicit_root) = branch_root_commit {
(branch_arg.clone().unwrap_or_else(|| "HEAD".to_string()), Some(explicit_root))
} else {
("HEAD".to_string(), branch_arg.clone())
}
} else {
(branch_arg.clone().unwrap_or_else(|| "HEAD".to_string()), None)
};
Some(GitDiffConfig {
since_ref: args.input_specifier_args.since_commit.clone(),
branch_ref: args
.input_specifier_args
.branch
.clone()
.unwrap_or_else(|| "HEAD".to_string()),
branch_ref,
branch_root,
})
} else {
None
@ -609,13 +622,14 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
// ───────────── directory (possible Git repo) ─────────────
FoundInput::Directory(i) => {
let path = &i.path;
let open_path_as_is = cfg.git_diff.is_none();
if cfg.git_diff.is_none() && !cfg.enumerate_git_history {
if open_path_as_is && !cfg.enumerate_git_history {
return Ok(None);
}
// Try to open a Git repository at that path
let repository = match open_git_repo(path)? {
let repository = match open_git_repo_with_options(path, open_path_as_is)? {
Some(r) => r,
None => return Ok(None),
};
@ -719,7 +733,7 @@ fn enumerate_git_diff_repo(
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
collect_commit_metadata: bool,
) -> Result<GitRepoResult> {
let GitDiffConfig { since_ref, branch_ref } = diff_cfg;
let GitDiffConfig { since_ref, branch_ref, branch_root } = diff_cfg;
let blobs = {
let head_id = resolve_diff_ref(&repository, path, &branch_ref).with_context(|| {
@ -760,6 +774,40 @@ fn enumerate_git_diff_repo(
.with_context(|| format!("Failed to read tree for commit {}", base_id.to_hex()))?;
base_tree = Some(tree);
} else if let Some(ref branch_root_value) = branch_root {
let root_id =
resolve_diff_ref(&repository, path, branch_root_value).with_context(|| {
format!(
"Failed to resolve --branch-root '{}' in repository {}",
branch_root_value,
path.display()
)
})?;
let root_commit = root_id
.object()
.with_context(|| format!("Failed to load commit {} for diffing", root_id.to_hex()))?
.try_into_commit()
.with_context(|| {
format!("Referenced object {} is not a commit", root_id.to_hex())
})?;
let mut parent_ids = root_commit.parent_ids();
if let Some(parent_id) = parent_ids.next() {
let parent_commit = parent_id
.object()
.with_context(|| {
format!("Failed to load parent commit {} for diffing", parent_id.to_hex())
})?
.try_into_commit()
.with_context(|| {
format!("Referenced object {} is not a commit", parent_id.to_hex())
})?;
let parent_tree = parent_commit.tree().with_context(|| {
format!("Failed to read tree for commit {}", parent_id.to_hex())
})?;
base_tree = Some(parent_tree);
}
}
let changes = repository
@ -1008,7 +1056,11 @@ mod tests {
let result = enumerate_git_diff_repo(
&repo_path,
gix_repo,
GitDiffConfig { since_ref: None, branch_ref: "featurefake".to_string() },
GitDiffConfig {
since_ref: None,
branch_ref: "featurefake".to_string(),
branch_root: None,
},
None,
false,
)?;

View file

@ -961,154 +961,154 @@ async fn timed_validate_single_match<'a>(
commit_and_return(m);
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
// #[cfg(test)]
// mod tests {
// use std::sync::Arc;
use anyhow::Result;
use crossbeam_skiplist::SkipMap;
use http::StatusCode;
use rustc_hash::FxHashMap;
use smallvec::smallvec;
// use anyhow::Result;
// use crossbeam_skiplist::SkipMap;
// use http::StatusCode;
// use rustc_hash::FxHashMap;
// use smallvec::smallvec;
use crate::{
blob::BlobId,
liquid_filters::register_all,
location::OffsetSpan,
matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
rules::{
rule::{Confidence, Rule},
Rules,
},
util::intern,
validation::{validate_single_match, Cache},
};
#[tokio::test]
async fn test_actual_pypi_token_validation() -> Result<()> {
// Minimal PyPI YAML snippet for testing
let pypi_yaml = r#"
rules:
- name: PyPI Upload Token
id: kingfisher.pypi.1
pattern: |
(?x)
\b
(
pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,}
)
(?:[^a-zA-Z0-9_-]|$)
min_entropy: 4.0
confidence: medium
examples:
- '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM'
- 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw'
validation:
type: Http
content:
request:
method: POST
url: https://upload.pypi.org/legacy/
response_is_html: true
response_matcher:
- report_response: true
- type: WordMatch
words:
- "isn't allowed to upload to project"
headers:
Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}'
multipart:
parts:
- name: name
type: text
content: "my-package"
- name: version
type: text
content: "0.0.1"
- name: filetype
type: text
content: "sdist"
- name: metadata_version
type: text
content: "2.1"
- name: summary
type: text
content: "A simple example package"
- name: home_page
type: text
content: "https://github.com/yourusername/my_package"
- name: sha256_digest
type: text
content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64"
- name: md5_digest
type: text
content: "9b4036ab91a71124ab9f1d32a518e2bb"
- name: :action
type: text
content: "file_upload"
- name: protocol_version
type: text
content: "1"
- name: content
type: file
content: "path/to/my_package-0.0.1.tar.gz"
content_type: "application/octet-stream"
"#;
// Use from_paths_and_contents to parse the YAML snippet into a Rules object
let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())];
let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
// Find the PyPI rule we just loaded
let pypi_rule_syntax = rules
.iter_rules()
.find(|r| r.id == "kingfisher.pypi.1")
.expect("Failed to find PyPI rule in test YAML")
.clone(); // Clone so we can create a `Rule` from it
// Wrap that into a `Rule` object
let pypi_rule = Rule::new(pypi_rule_syntax);
//////////////////////////////////////////
//
// Your actual PyPI token to test
let token = "<enter_pypi_token_here>";
let id = BlobId::new(&pypi_yaml.as_bytes());
// Construct an `OwnedBlobMatch` (all fields needed):
let mut owned_blob_match = OwnedBlobMatch {
rule: pypi_rule.into(),
blob_id: id,
finding_fingerprint: 0, // dummy value
// matching_input: token.as_bytes().to_vec(),
matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
captures: SerializableCaptures {
captures: smallvec![SerializableCapture {
name: Some("TOKEN".to_string()),
match_number: -1,
start: 0,
end: token.len(),
value: intern(token),
}],
},
validation_response_body: String::new(),
validation_response_status: StatusCode::OK,
validation_success: false,
calculated_entropy: 0.0, // or compute your own
is_base64: false,
};
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
let client = reqwest::Client::new();
let cache: Cache = Arc::new(SkipMap::new());
let dependent_vars = FxHashMap::default();
let missing_deps = FxHashMap::default();
// Run the validation
validate_single_match(
&mut owned_blob_match,
&parser,
&client,
&dependent_vars,
&missing_deps,
&cache,
)
.await;
println!("Success? {:?}", owned_blob_match.validation_success);
println!("Status: {:?}", owned_blob_match.validation_response_status);
println!("Body: {:?}", owned_blob_match.validation_response_body);
Ok(())
}
}
// use crate::{
// blob::BlobId,
// liquid_filters::register_all,
// location::OffsetSpan,
// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
// rules::{
// rule::{Confidence, Rule},
// Rules,
// },
// util::intern,
// validation::{validate_single_match, Cache},
// };
// #[tokio::test]
// async fn test_actual_pypi_token_validation() -> Result<()> {
// // Minimal PyPI YAML snippet for testing
// let pypi_yaml = r#"
// rules:
// - name: PyPI Upload Token
// id: kingfisher.pypi.1
// pattern: |
// (?x)
// \b
// (
// pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,}
// )
// (?:[^a-zA-Z0-9_-]|$)
// min_entropy: 4.0
// confidence: medium
// examples:
// - '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM'
// - 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw'
// validation:
// type: Http
// content:
// request:
// method: POST
// url: https://upload.pypi.org/legacy/
// response_is_html: true
// response_matcher:
// - report_response: true
// - type: WordMatch
// words:
// - "isn't allowed to upload to project"
// headers:
// Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}'
// multipart:
// parts:
// - name: name
// type: text
// content: "my-package"
// - name: version
// type: text
// content: "0.0.1"
// - name: filetype
// type: text
// content: "sdist"
// - name: metadata_version
// type: text
// content: "2.1"
// - name: summary
// type: text
// content: "A simple example package"
// - name: home_page
// type: text
// content: "https://github.com/yourusername/my_package"
// - name: sha256_digest
// type: text
// content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64"
// - name: md5_digest
// type: text
// content: "9b4036ab91a71124ab9f1d32a518e2bb"
// - name: :action
// type: text
// content: "file_upload"
// - name: protocol_version
// type: text
// content: "1"
// - name: content
// type: file
// content: "path/to/my_package-0.0.1.tar.gz"
// content_type: "application/octet-stream"
// "#;
// // Use from_paths_and_contents to parse the YAML snippet into a Rules object
// let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())];
// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
// // Find the PyPI rule we just loaded
// let pypi_rule_syntax = rules
// .iter_rules()
// .find(|r| r.id == "kingfisher.pypi.1")
// .expect("Failed to find PyPI rule in test YAML")
// .clone(); // Clone so we can create a `Rule` from it
// // Wrap that into a `Rule` object
// let pypi_rule = Rule::new(pypi_rule_syntax);
// //////////////////////////////////////////
// //
// // Your actual PyPI token to test
// let token = "<enter_pypi_token_here>";
// let id = BlobId::new(&pypi_yaml.as_bytes());
// // Construct an `OwnedBlobMatch` (all fields needed):
// let mut owned_blob_match = OwnedBlobMatch {
// rule: pypi_rule.into(),
// blob_id: id,
// finding_fingerprint: 0, // dummy value
// // matching_input: token.as_bytes().to_vec(),
// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
// captures: SerializableCaptures {
// captures: smallvec![SerializableCapture {
// name: Some("TOKEN".to_string()),
// match_number: -1,
// start: 0,
// end: token.len(),
// value: intern(token),
// }],
// },
// validation_response_body: String::new(),
// validation_response_status: StatusCode::OK,
// validation_success: false,
// calculated_entropy: 0.0, // or compute your own
// is_base64: false,
// };
// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
// let client = reqwest::Client::new();
// let cache: Cache = Arc::new(SkipMap::new());
// let dependent_vars = FxHashMap::default();
// let missing_deps = FxHashMap::default();
// // Run the validation
// validate_single_match(
// &mut owned_blob_match,
// &parser,
// &client,
// &dependent_vars,
// &missing_deps,
// &cache,
// )
// .await;
// println!("Success? {:?}", owned_blob_match.validation_success);
// println!("Status: {:?}", owned_blob_match.validation_response_status);
// println!("Body: {:?}", owned_blob_match.validation_response_body);
// Ok(())
// }
// }

View file

@ -120,6 +120,8 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
extra_ignore_comments: Vec::new(),
content_filtering_args: ContentFilteringArgs {

View file

@ -120,6 +120,8 @@ fn test_bitbucket_remote_scan() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,

View file

@ -140,6 +140,8 @@ rules:
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 5.0,

View file

@ -127,6 +127,8 @@ fn test_github_remote_scan() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,

View file

@ -125,6 +125,8 @@ fn test_gitlab_remote_scan() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
extra_ignore_comments: Vec::new(),
content_filtering_args: ContentFilteringArgs {
@ -271,6 +273,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
gcs_bucket: None,
gcs_prefix: None,
gcs_service_account: None,

View file

@ -103,6 +103,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,

View file

@ -111,6 +111,8 @@ impl TestContext {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
extra_ignore_comments: Vec::new(),
content_filtering_args: ContentFilteringArgs {
@ -248,6 +250,8 @@ async fn test_scan_slack_messages() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,

View file

@ -183,6 +183,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,

View file

@ -126,6 +126,8 @@ impl TestContext {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
},
content_filtering_args: ContentFilteringArgs {
max_file_size_mb: 25.0,
@ -247,6 +249,8 @@ impl TestContext {
scan_nested_repos: true,
since_commit: None,
branch: None,
branch_root: false,
branch_root_commit: None,
gcs_bucket: None,
gcs_prefix: None,

View file

@ -26,6 +26,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
"--manage-baseline",
"--baseline-file",
baseline.to_str().unwrap(),
"--git-history=none",
"--no-update-check",
])
.assert()
@ -34,7 +35,10 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
assert!(baseline.exists(), "baseline file created");
// Scan again using the baseline
let initial_baseline = fs::read_to_string(&baseline)?;
// Scanning with the baseline should suppress the existing finding and leave
// the baseline untouched.
Command::cargo_bin("kingfisher")?
.args([
"scan",
@ -46,12 +50,39 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
"json",
"--baseline-file",
baseline.to_str().unwrap(),
"--git-history=none",
"--no-update-check",
])
.assert()
.code(0)
.stdout(predicate::str::contains(GH_PAT).not());
let baseline_after_scan = fs::read_to_string(&baseline)?;
assert_eq!(initial_baseline, baseline_after_scan, "baseline remains stable after reuse");
// Managing the baseline again should not churn entries or report the secret
Command::cargo_bin("kingfisher")?
.args([
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--confidence=low",
"--no-validate",
"--format",
"json",
"--manage-baseline",
"--baseline-file",
baseline.to_str().unwrap(),
"--git-history=none",
"--no-update-check",
])
.assert()
.code(0)
.stdout(predicate::str::contains(GH_PAT).not());
let rerun_baseline = fs::read_to_string(&baseline)?;
assert_eq!(initial_baseline, rerun_baseline, "baseline remains stable");
Ok(())
}

View file

@ -2,16 +2,33 @@
//
// Integration tests that exercise `kingfisher scan` against Git branches and commit
// references using locally constructed repositories. These ensure that the
// `--branch` and `--since-commit` flags behave as expected when scanning a repo
// without validation.
// branch-focused flags behave as expected when scanning a repo without
// validation, including the ability to resume from a specific commit.
use std::fs;
use std::path::Path;
use anyhow::Result;
use assert_cmd::Command;
use git2::{build::CheckoutBuilder, BranchType, Repository, Signature};
use predicates::{prelude::PredicateBooleanExt, str::contains};
use tempfile::tempdir;
use tempfile::{tempdir, TempDir};
const AWS_SECRET_VALUE: &str = "UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D";
const GCP_PRIVATE_KEY_VALUE: &str = "c4c474d61701fd6fd4191883b8fea9a8411bf771";
const SLACK_TOKEN_VALUE: &str = "xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx";
const STRIPE_SECRET_VALUE: &str = "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";
const AWS_SECRET_LINE: &str = "AWS_SECRET_ACCESS_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D/'";
const GCP_PRIVATE_KEY_LINE: &str =
"GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'";
const SLACK_TOKEN_LINE: &str =
"SLACK_BOT_TOKEN = 'xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx'";
const STRIPE_SECRET_LINE: &str = concat!(
"STRIPE_SECRET_KEY = '",
"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",
"'",
);
#[test]
fn scan_by_commit_and_branch_diff() -> anyhow::Result<()> {
@ -117,3 +134,149 @@ aws_secret_access_key = efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i
Ok(())
}
///
///
///
///
///
/// Create a repo with a single file `secrets.txt` and five commits that append
/// lines in order, exactly like the provided shell script. Returns the repo dir
/// and the vector of commit IDs (oldest → newest).
fn setup_linear_repo_with_secrets() -> Result<(TempDir, std::path::PathBuf, Vec<git2::Oid>)> {
let dir = tempdir()?;
let repo_dir = dir.path().join("repo");
let repo = Repository::init(&repo_dir)?;
let sig = Signature::now("tester", "tester@example.com")?;
let secrets_path = repo_dir.join("secrets.txt");
// Commit #1 — AWS
fs::write(&secrets_path, AWS_SECRET_LINE)?;
let mut index = repo.index()?;
index.add_path(Path::new("secrets.txt"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let mut commits = Vec::new();
let c1 = repo.commit(Some("HEAD"), &sig, &sig, "Add AWS secret", &tree, &[])?;
commits.push(c1);
let mut parent_commit = repo.find_commit(c1)?;
let mut contents = String::from(AWS_SECRET_LINE);
// Remaining commits mirror the shell script example.
let additions = [
("Add GCP private key id", GCP_PRIVATE_KEY_LINE),
("Add Slack bot token", SLACK_TOKEN_LINE),
("Add Stripe API key", STRIPE_SECRET_LINE),
];
for (message, line) in additions {
contents.push('\n');
contents.push_str(line);
fs::write(&secrets_path, &contents)?;
let mut index = repo.index()?;
index.add_path(Path::new("secrets.txt"))?;
let tree_id = index.write_tree()?;
let tree = repo.find_tree(tree_id)?;
let oid = repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent_commit])?;
commits.push(oid);
parent_commit = repo.find_commit(oid)?;
}
// Create a named branch to mirror long-lived branch workflows.
repo.branch("long-lived", &parent_commit, true)?;
Ok((dir, repo_dir, commits))
}
#[test]
fn scan_specific_commit_reports_only_that_commit() -> Result<()> {
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
let c1_hex = commits[0].to_string(); // first commit (AWS only)
// Scan exactly the initial commit via --branch <commit>
Command::cargo_bin("kingfisher")?
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
c1_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
// Must contain AWS, must NOT contain the later secrets
contains("AWS SECRET ACCESS KEY")
.and(contains(AWS_SECRET_VALUE))
.and(contains(GCP_PRIVATE_KEY_VALUE).not())
.and(contains(SLACK_TOKEN_VALUE).not())
.and(contains(STRIPE_SECRET_VALUE).not()),
);
Ok(())
}
#[test]
fn scan_with_branch_root_includes_descendants() -> Result<()> {
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
let c1_hex = commits[0].to_string(); // start from first commit
// Using --branch-root should include the selected commit and remaining history up to HEAD
Command::cargo_bin("kingfisher")?
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
c1_hex.as_str(),
"--branch-root",
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("AWS SECRET ACCESS KEY")
.and(contains(AWS_SECRET_VALUE))
.and(contains(GCP_PRIVATE_KEY_VALUE))
.and(contains(SLACK_TOKEN_VALUE))
.and(contains(STRIPE_SECRET_VALUE)),
);
Ok(())
}
#[test]
fn scan_branch_tip_with_branch_root_commit() -> Result<()> {
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
let root_commit_hex = commits[0].to_string();
let latest_commit_hex = commits.last().expect("expected at least one commit").to_string();
// Passing --branch-root-commit should implicitly enable inclusive scanning even
// without the legacy --branch-root flag when targeting a named branch tip.
Command::cargo_bin("kingfisher")?
.args([
"scan",
repo_dir.to_str().unwrap(),
"--branch",
"long-lived",
"--branch-root-commit",
root_commit_hex.as_str(),
"--no-validate",
"--no-update-check",
])
.assert()
.code(200)
.stdout(
contains("AWS SECRET ACCESS KEY")
.and(contains(AWS_SECRET_VALUE))
.and(contains(GCP_PRIVATE_KEY_VALUE))
.and(contains(SLACK_TOKEN_VALUE))
.and(contains(STRIPE_SECRET_VALUE))
.and(contains(latest_commit_hex.as_str())),
);
Ok(())
}