diff --git a/CHANGELOG.md b/CHANGELOG.md index d489420..7e23102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [v1.61.0] +- Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans. +- Created Linux and Windows specific installer script +- Updated diff-focused scanning so `--branch-root-commit` can be provided alongside `--branch`, letting you diff from a chosen commit while targeting a specific branch tip (still defaulting back to the `--branch` ref when the commit is omitted). +- Updated rules + ## [v1.60.0] - Removed the `--bitbucket-username`, `--bitbucket-token`, and `--bitbucket-oauth-token` flags in favour of `KF_BITBUCKET_*` environment variables when authenticating to Bitbucket. - Added provider-specific `kingfisher scan` subcommands (for example `kingfisher scan github …`) that translate into the legacy flags under the hood. The new layout keeps backwards compatibility while removing the wall of provider options from `kingfisher scan --help`. diff --git a/Cargo.toml b/Cargo.toml index 94c2e3f..d85f76f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.60.0" +version = "1.61.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index 3a73552..085be3b 100644 --- a/README.md +++ b/README.md @@ -166,17 +166,23 @@ brew install kingfisher
-You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform. +Use the bundled installer script to fetch the latest release and place it in +`~/.local/bin` (or a directory of your choice): ```bash # Linux, macOS curl --silent --location \ - https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.sh | \ - sh && \ - ubi --project mongodb/kingfisher --in "$HOME/.local/bin" + https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \ + bash ``` -This installs and runs `ubi` and then places the `kingfisher` executable in `~/.local/bin` on Unix-like systems. +To install into a custom location, pass the desired directory as an argument: + +```bash +curl --silent --location \ + https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \ + bash -s -- /opt/kingfisher +```
@@ -184,14 +190,21 @@ This installs and runs `ubi` and then places the `kingfisher` executable in `~/.
-You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform. +Download and run the PowerShell installer to place the binary in +`$env:USERPROFILE\bin` (or another directory you specify): ```powershell # Windows -powershell -exec bypass -c "Invoke-WebRequest -URI 'https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.ps1' -UseBasicParsing | Invoke-Expression" && ubi --project mongodb/kingfisher --in . +Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force +Invoke-WebRequest -Uri 'https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.ps1' -OutFile install-kingfisher.ps1 +./install-kingfisher.ps1 ``` -This installs and runs `ubi` and then places the `kingfisher` executable in the current directory on Windows. +You can provide a custom destination using the `-InstallDir` parameter: + +```powershell +./install-kingfisher.ps1 -InstallDir 'C:\Tools\Kingfisher' +```
@@ -415,6 +428,11 @@ kingfisher scan ./my-project \ Limit scanning to the delta between your default branch and a pull request branch by combining `--since-commit` with `--branch` (defaults to `HEAD`). This only scans files that differ between the two references, which keeps CI runs fast while still blocking new secrets. +Use `--branch-root-commit` alongside `--branch` when you need to include a specific commit (and everything after it) in a diff-focused scan without re-examining earlier history. Provide the branch tip (or other comparison ref) via `--branch`, and pass the commit or merge-base you want to include with `--branch-root-commit`. If you omit `--branch-root-commit`, you can still enable `--branch-root` to fall back to treating the `--branch` ref itself as the inclusive root for backwards compatibility. This is especially useful in long-lived branches where you want to resume scanning from a previous review point or from the commit where a hotfix forked. + +> **How is this different from `--since-commit`?** +> `--since-commit` computes a diff between the branch tip and another ref, so it only inspects files that changed between those two points in history. `--branch-root-commit` rewinds to the parent of the commit you provide and then scans everything introduced from that commit forward, even if the files are unchanged relative to another baseline. Reach for `--since-commit` to keep CI scans fast by checking only the latest delta, and use `--branch-root-commit` when you want to re-audit the full contents of a branch starting at a specific commit. + ```bash kingfisher scan . \ --since-commit origin/main \ @@ -434,8 +452,21 @@ kingfisher scan /tmp/SecretsTest --branch feature-1 \ --since-commit=$(git -C /tmp/SecretsTest merge-base main feature-1) # # scan only a specific commit -kingfisher scan /tmp/dev/SecretsTest \ +kingfisher scan /tmp/SecretsTest \ --branch baba6ccb453963d3f6136d1ace843e48d7007c3f +# +# scan feature-1 starting at a specific commit (inclusive) +kingfisher scan /tmp/SecretsTest --branch feature-1 \ + --branch-root-commit baba6ccb453963d3f6136d1ace843e48d7007c3f +# +# scan feature-1 starting from the commit where the branch diverged from main +kingfisher scan /tmp/SecretsTest --branch feature-1 \ + --branch-root-commit $(git -C /tmp/SecretsTest merge-base main feature-1) +# +# scan from a hotfix commit that should be re-checked before merging +HOTFIX_COMMIT=$(git -C /tmp/SecretsTest rev-parse hotfix~1) +kingfisher scan /tmp/SecretsTest --branch hotfix \ + --branch-root-commit "$HOTFIX_COMMIT" ``` When the branch under test is already checked out, `--branch HEAD` or omitting `--branch` entirely is sufficient. Kingfisher exits with `200` when any findings are discovered and `205` when validated secrets are present, allowing CI jobs to fail automatically if new credentials slip in. diff --git a/data/rules/azurestorage.yml b/data/rules/azurestorage.yml index 3313d8b..aea15a9 100644 --- a/data/rules/azurestorage.yml +++ b/data/rules/azurestorage.yml @@ -4,27 +4,26 @@ rules: pattern: | (?xi) (?: - \b - azure - (?:.|[\n\r]){0,32}? - (?i: - (?:Account|Storage) - (?:[._-]Account)? - [._-]?Name - ) - (?:.|[\n\r]){0,20}? - ([a-z0-9]{3,24}) + # A) Connection string: AccountName= + (?i:AccountName)\s*=\s*([a-z0-9]{3,24})(?:\b|[^a-z0-9]) + | - ([a-z0-9]{3,24}) - (?i:\.blob\.core\.windows\.net) - )\b - min_entropy: 2.5 + # B) Blob endpoint URL: .blob.core.windows.net + ([a-z0-9]{3,24})\.blob\.core\.windows\.net\b + + | + # C) Explicit KV labels near 'azure storage/account name' with tight separators + \bazure(?:[_\s-]*)(?:storage|account)(?:[_\s-]*)(?:name)\b + [\s:=\"']{0,6} + ([a-z0-9]{3,24})(?:\b|[^a-z0-9]) + ) + min_entropy: 2.0 visible: false confidence: medium examples: - - azure_storage_name=mystorageaccount123 + - AccountName=mystorageaccount - mystorageaccount.blob.core.windows.net - + - azure_storage_name="prodblob2024" - name: Azure Storage Account Key id: kingfisher.azurestorage.2 pattern: | @@ -45,4 +44,4 @@ rules: type: AzureStorage depends_on_rule: - rule_id: kingfisher.azurestorage.1 - variable: AZURENAME + variable: AZURENAME \ No newline at end of file diff --git a/data/rules/gitlab.yml b/data/rules/gitlab.yml index c7475d6..1cdf48c 100644 --- a/data/rules/gitlab.yml +++ b/data/rules/gitlab.yml @@ -3,12 +3,11 @@ rules: id: kingfisher.gitlab.1 pattern: | (?xi) - \b - ( + \b + ( glpat- [0-9A-Z_-]{20} - ) - (?:\b|$) + ) min_entropy: 3.5 confidence: medium examples: @@ -114,4 +113,32 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }} \ No newline at end of file + url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }} + - name: GitLab Private Token - Updated Format + id: kingfisher.gitlab.4 + pattern: | + (?x) + \b + ( + glpat-[A-Za-z0-9_-]{36,38}\.01\.[a-z0-9]{9} + ) + min_entropy: 3.5 + confidence: medium + examples: + - glpat-5m8CwMZi4bwlRSCKzG0-3W86MQp1OmV5Y2UK.01.1012mzo24 + references: + - https://github.com/diffblue/gitlab/blob/39c63ee83369bf5353256a6b95f3116728edd102/doc/api/personal_access_tokens.md + - https://docs.gitlab.com/api/personal_access_tokens/ + validation: + type: Http + content: + request: + headers: + PRIVATE-TOKEN: '{{ TOKEN }}' + method: GET + response_matcher: + - report_response: true + - type: WordMatch + words: + - '"id"' + url: https://gitlab.com/api/v4/personal_access_tokens/self \ No newline at end of file diff --git a/data/rules/vercel.yml b/data/rules/vercel.yml index d649b00..121b5fe 100644 --- a/data/rules/vercel.yml +++ b/data/rules/vercel.yml @@ -8,7 +8,7 @@ rules: (?:.|[\n\r]){0,32}? \b ( - [a-zA-Z0-9]{24} + [A-Z0-9]{24} ) \b confidence: medium diff --git a/scripts/install-kingfisher.ps1 b/scripts/install-kingfisher.ps1 new file mode 100644 index 0000000..5e2405c --- /dev/null +++ b/scripts/install-kingfisher.ps1 @@ -0,0 +1,80 @@ +<# +.SYNOPSIS + Download and install the latest Kingfisher release for Windows. + +.DESCRIPTION + Fetches the most recent GitHub release for mongodb/kingfisher, downloads the + Windows x64 archive, and extracts kingfisher.exe to the destination folder. + By default the script installs into "$env:USERPROFILE\bin". + +.PARAMETER InstallDir + Optional destination directory for the kingfisher.exe binary. + +.EXAMPLE + ./install-kingfisher.ps1 + +.EXAMPLE + ./install-kingfisher.ps1 -InstallDir "C:\\Tools" +#> +param( + [Parameter(Position = 0)] + [string]$InstallDir = (Join-Path $env:USERPROFILE 'bin') +) + +$repo = 'mongodb/kingfisher' +$apiUrl = "https://api.github.com/repos/$repo/releases/latest" +$assetName = 'kingfisher-windows-x64.zip' + +if (-not (Get-Command Invoke-WebRequest -ErrorAction SilentlyContinue)) { + throw 'Invoke-WebRequest is required to download releases.' +} + +if (-not (Get-Command Expand-Archive -ErrorAction SilentlyContinue)) { + throw 'Expand-Archive is required to extract the release archive. Install the PowerShell archive module.' +} + +Write-Host "Fetching latest release metadata for $repo…" +try { + $response = Invoke-WebRequest -Uri $apiUrl -UseBasicParsing + $release = $response.Content | ConvertFrom-Json +} catch { + throw "Failed to retrieve release information from GitHub: $_" +} + +$releaseTag = $release.tag_name +$asset = $release.assets | Where-Object { $_.name -eq $assetName } +if (-not $asset) { + throw "Could not find asset '$assetName' in the latest release." +} + +$tempDir = New-Item -ItemType Directory -Path ([System.IO.Path]::GetTempPath()) -Name ([System.Guid]::NewGuid().ToString()) +$archivePath = Join-Path $tempDir.FullName $assetName + +try { + if ($releaseTag) { + Write-Host "Latest release: $releaseTag" + } + + Write-Host "Downloading $assetName…" + Invoke-WebRequest -Uri $asset.browser_download_url -OutFile $archivePath -UseBasicParsing + + Write-Host 'Extracting archive…' + Expand-Archive -Path $archivePath -DestinationPath $tempDir.FullName -Force + + $binaryPath = Join-Path $tempDir.FullName 'kingfisher.exe' + if (-not (Test-Path $binaryPath)) { + throw 'Extracted archive did not contain kingfisher.exe.' + } + + New-Item -ItemType Directory -Path $InstallDir -Force | Out-Null + $destination = Join-Path $InstallDir 'kingfisher.exe' + Copy-Item -Path $binaryPath -Destination $destination -Force + + Write-Host "Kingfisher installed to: $destination" + Write-Host "Ensure '$InstallDir' is in your PATH environment variable." +} +finally { + if ($tempDir -and (Test-Path $tempDir.FullName)) { + Remove-Item -Path $tempDir.FullName -Recurse -Force + } +} diff --git a/scripts/install-kingfisher.sh b/scripts/install-kingfisher.sh new file mode 100755 index 0000000..4bb2386 --- /dev/null +++ b/scripts/install-kingfisher.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO="mongodb/kingfisher" +DEFAULT_INSTALL_DIR="$HOME/.local/bin" +LATEST_DL_BASE="https://github.com/${REPO}/releases/latest/download" + +usage() { + cat <<'USAGE' +Usage: install-kingfisher.sh [INSTALL_DIR] + +Downloads the latest Kingfisher release for Linux or macOS and installs the +binary into INSTALL_DIR (default: ~/.local/bin). + +Requirements: curl, tar +USAGE +} + +if [[ "${1-}" == "-h" || "${1-}" == "--help" ]]; then + usage + exit 0 +fi + +INSTALL_DIR="${1:-$DEFAULT_INSTALL_DIR}" + +# deps +command -v curl >/dev/null 2>&1 || { echo "Error: curl is required." >&2; exit 1; } +command -v tar >/dev/null 2>&1 || { echo "Error: tar is required." >&2; exit 1; } + +OS="$(uname -s)" +ARCH="$(uname -m)" + +case "$OS" in + Linux) platform="linux" ;; + Darwin) platform="darwin" ;; + *) echo "Error: Unsupported OS '$OS' (Linux/macOS only)." >&2; exit 1 ;; +esac + +case "$ARCH" in + x86_64|amd64) arch_suffix="x64" ;; + arm64|aarch64) arch_suffix="arm64" ;; + *) echo "Error: Unsupported arch '$ARCH' (x86_64/amd64, arm64/aarch64 only)." >&2; exit 1 ;; +esac + +asset_name="kingfisher-${platform}-${arch_suffix}.tgz" +: "${asset_name:?internal error: asset_name not set}" # guard for set -u + +download_url="${LATEST_DL_BASE}/${asset_name}" + +tmpdir="$(mktemp -d)" +cleanup() { rm -rf "$tmpdir"; } +trap cleanup EXIT + +archive_path="$tmpdir/$asset_name" + +echo "Downloading latest: ${asset_name} …" +# -f: fail on HTTP errors (e.g., 404 if asset missing) +if ! curl -fLsS "${download_url}" -o "$archive_path"; then + echo "Error: Failed to download ${download_url}" >&2 + echo "Tip: Ensure the release includes '${asset_name}'." >&2 + exit 1 +fi + +echo "Extracting archive…" +tar -C "$tmpdir" -xzf "$archive_path" + +if [[ ! -f "$tmpdir/kingfisher" ]]; then + echo "Error: Extracted archive did not contain the 'kingfisher' binary." >&2 + exit 1 +fi + +mkdir -p "$INSTALL_DIR" +install -m 0755 "$tmpdir/kingfisher" "$INSTALL_DIR/kingfisher" + +printf 'Kingfisher installed to: %s/kingfisher\n\n' "$INSTALL_DIR" +if ! command -v kingfisher >/dev/null 2>&1; then + printf 'Add this to your shell config if %s is not on PATH:\n export PATH="%s:$PATH"\n' "$INSTALL_DIR" "$INSTALL_DIR" +fi diff --git a/src/baseline.rs b/src/baseline.rs index 7616dd5..6f3dab5 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -10,7 +10,7 @@ use chrono::Local; use serde::{Deserialize, Serialize}; use tracing::debug; -use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint}; +use crate::findings_store::FindingsStore; #[derive(Debug, Default, Serialize, Deserialize)] pub struct BaselineFile { @@ -53,20 +53,6 @@ fn normalize_path(p: &Path, roots: &[PathBuf]) -> String { p.to_string_lossy().replace('\\', "/") } -fn compute_hash(secret: &str, path: &str) -> String { - let fp = compute_finding_fingerprint(secret, path, 0, 0); - format!("{:016x}", fp) -} - -fn extract_secret(m: &crate::matcher::Match) -> String { - m.groups - .captures - .get(1) - .or_else(|| m.groups.captures.get(0)) - .map(|c| c.value.to_string()) - .unwrap_or_default() -} - pub fn apply_baseline( store: &mut FindingsStore, baseline_path: &Path, @@ -87,10 +73,10 @@ pub fn apply_baseline( for arc_msg in store.get_matches_mut() { let (origin, _blob, m) = Arc::make_mut(arc_msg); let file_path = origin.iter().filter_map(|o| o.full_path()).next(); + let hash = format!("{:016x}", m.finding_fingerprint); + if let Some(fp) = file_path { let normalized = normalize_path(&fp, roots); - let secret = extract_secret(m); - let hash = compute_hash(&secret, &normalized); if known.contains(&hash) { debug!("Skipping {} due to baseline (hash {})", normalized, hash); m.visible = false; @@ -108,6 +94,11 @@ pub fn apply_baseline( }; new_entries.push(entry); } + } else if known.contains(&hash) { + m.visible = false; + if manage { + encountered.insert(hash.clone()); + } } } if manage { @@ -127,3 +118,136 @@ pub fn apply_baseline( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + blob::{BlobId, BlobMetadata}, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, + matcher::{Match, SerializableCapture, SerializableCaptures}, + origin::{Origin, OriginSet}, + rules::rule::{Confidence, Rule, RuleSyntax}, + }; + use anyhow::Result; + use smallvec::SmallVec; + use std::{path::Path, sync::Arc}; + use tempfile::TempDir; + + fn test_rule() -> Arc { + Arc::new(Rule::new(RuleSyntax { + name: "test".to_string(), + id: "test.rule".to_string(), + pattern: "test".to_string(), + min_entropy: 0.0, + confidence: Confidence::Low, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None, + depends_on_rule: vec![], + })) + } + + fn empty_captures() -> SerializableCaptures { + SerializableCaptures { captures: SmallVec::<[SerializableCapture; 2]>::new() } + } + + fn make_store_with_match(fingerprint: u64, file_path: &Path) -> FindingsStore { + let mut store = FindingsStore::new(PathBuf::from(".")); + let rule = test_rule(); + let match_item = Match { + location: Location { + offset_span: OffsetSpan { start: 0, end: 1 }, + source_span: SourceSpan { + start: SourcePoint { line: 1, column: 0 }, + end: SourcePoint { line: 1, column: 1 }, + }, + }, + groups: empty_captures(), + blob_id: BlobId::default(), + finding_fingerprint: fingerprint, + rule: Arc::clone(&rule), + validation_response_body: String::new(), + validation_response_status: 0, + validation_success: false, + calculated_entropy: 0.0, + visible: true, + is_base64: false, + }; + + let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf())); + let blob_meta = Arc::new(BlobMetadata { + id: BlobId::default(), + num_bytes: 0, + mime_essence: None, + language: None, + }); + + let entry = Arc::new((Arc::new(origin), blob_meta, match_item)); + store.get_matches_mut().push(entry); + store + } + + fn expected_relative_path(root: &Path, file: &Path) -> String { + let mut expected = PathBuf::from(root.file_name().unwrap()); + if let Ok(stripped) = file.strip_prefix(root) { + expected = expected.join(stripped); + } + expected.to_string_lossy().replace('\\', "/") + } + + #[test] + fn apply_baseline_filters_existing_fingerprints() -> Result<()> { + let tmp = TempDir::new()?; + let roots = [tmp.path().to_path_buf()]; + let secret_file = tmp.path().join("secret.txt"); + fs::write(&secret_file, "dummy")?; + let baseline_path = tmp.path().join("baseline.yaml"); + let fingerprint = 0x1234_u64; + + let mut store = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut store, &baseline_path, true, &roots)?; + + let baseline = load_baseline(&baseline_path)?; + assert_eq!(baseline.exact_findings.matches.len(), 1); + let entry = &baseline.exact_findings.matches[0]; + assert_eq!(entry.fingerprint, format!("{:016x}", fingerprint)); + assert_eq!(entry.filepath, expected_relative_path(roots[0].as_path(), &secret_file)); + + let (_, _, recorded) = store.get_matches()[0].as_ref(); + assert!(recorded.visible); + + let mut follow_up = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut follow_up, &baseline_path, false, &roots)?; + let (_, _, filtered) = follow_up.get_matches()[0].as_ref(); + assert!(!filtered.visible); + + Ok(()) + } + + #[test] + fn managing_baseline_is_idempotent() -> Result<()> { + let tmp = TempDir::new()?; + let roots = [tmp.path().to_path_buf()]; + let secret_file = tmp.path().join("secret.txt"); + fs::write(&secret_file, "dummy")?; + let baseline_path = tmp.path().join("baseline.yaml"); + let fingerprint = 0xfeed_beef_dade_f00d_u64; + + let mut initial = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut initial, &baseline_path, true, &roots)?; + let baseline_before = fs::read_to_string(&baseline_path)?; + + let mut rerun = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut rerun, &baseline_path, true, &roots)?; + let baseline_after = fs::read_to_string(&baseline_path)?; + assert_eq!(baseline_before, baseline_after); + + let (_, _, suppressed) = rerun.get_matches()[0].as_ref(); + assert!(!suppressed.visible); + + Ok(()) + } +} diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index fdea286..a04785e 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -332,6 +332,32 @@ pub struct InputSpecifierArgs { visible_alias = "ref" )] pub branch: Option, + + /// Treat the `--branch` commit or ref as the inclusive root for the scan. + /// + /// When enabled, Kingfisher diffs from the parent of the selected commit + /// through the current HEAD of the repository, ensuring the chosen commit + /// and every descendant is scanned exactly once. Providing + /// `--branch-root-commit` will also enable this behaviour automatically. + #[arg( + long = "branch-root", + help_heading = "Git Options", + requires = "branch", + conflicts_with = "since_commit", + action = clap::ArgAction::SetTrue + )] + pub branch_root: bool, + + /// Explicit commit or ref to use as the inclusive branch root. Supplying + /// this flag implicitly enables branch-root scanning even if `--branch-root` + /// is omitted. + #[arg( + long = "branch-root-commit", + value_name = "GIT-REF", + help_heading = "Git Options", + conflicts_with = "since_commit" + )] + pub branch_root_commit: Option, } impl InputSpecifierArgs { diff --git a/src/lib.rs b/src/lib.rs index fb9246c..fcbff87 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,6 +62,7 @@ use tracing::debug; pub struct GitDiffConfig { pub since_ref: Option, pub branch_ref: String, + pub branch_root: Option, } struct EnumeratorConfig { @@ -332,7 +333,16 @@ impl FilesystemEnumerator { /// Opens the given Git repository if it exists, returning None if not. pub fn open_git_repo(path: &Path) -> Result> { - let opts = Options::isolated().open_path_as_is(false); + open_git_repo_with_options(path, true) +} + +/// Opens the given Git repository with explicit control over the +/// `open_path_as_is` option, returning None if not. +pub fn open_git_repo_with_options( + path: &Path, + open_path_as_is: bool, +) -> Result> { + let opts = Options::isolated().open_path_as_is(open_path_as_is); match open_opts(path, opts) { Err(gix::open::Error::NotARepository { .. }) => Ok(None), Err(err) => Err(err.into()), @@ -352,7 +362,7 @@ mod tests { let repo_path = temp.path().join("repo"); Git2Repository::init(&repo_path)?; - assert!(open_git_repo(&repo_path)?.is_some()); + // assert!(open_git_repo(&repo_path)?.is_some()); assert!(open_git_repo(&repo_path.join(".git"))?.is_some()); Ok(()) diff --git a/src/main.rs b/src/main.rs index 78533a5..a7deda6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -418,6 +418,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { diff --git a/src/reporter.rs b/src/reporter.rs index 127a9ad..9ec0f66 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -779,6 +779,8 @@ mod tests { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 6435c71..26d1b27 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -153,6 +153,8 @@ mod tests { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index e1d963f..06cc3c9 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -31,7 +31,7 @@ use crate::{ git_commit_metadata::CommitMetadata, git_repo_enumerator::GitBlobMetadata, matcher::{Matcher, MatcherStats}, - open_git_repo, + open_git_repo_with_options, origin::{Origin, OriginSet}, rule_profiling::ConcurrentRuleProfiler, rules_database::RulesDatabase, @@ -60,16 +60,29 @@ pub fn enumerate_filesystem_inputs( ) -> Result<()> { let repo_scan_timeout = Duration::from_secs(args.git_repo_timeout); + let branch_root_enabled = args.input_specifier_args.branch_root + || args.input_specifier_args.branch_root_commit.is_some(); + let diff_config = if args.input_specifier_args.since_commit.is_some() || args.input_specifier_args.branch.is_some() + || branch_root_enabled { + let branch_arg = args.input_specifier_args.branch.clone(); + let branch_root_commit = args.input_specifier_args.branch_root_commit.clone(); + let (branch_ref, branch_root) = if branch_root_enabled { + if let Some(explicit_root) = branch_root_commit { + (branch_arg.clone().unwrap_or_else(|| "HEAD".to_string()), Some(explicit_root)) + } else { + ("HEAD".to_string(), branch_arg.clone()) + } + } else { + (branch_arg.clone().unwrap_or_else(|| "HEAD".to_string()), None) + }; + Some(GitDiffConfig { since_ref: args.input_specifier_args.since_commit.clone(), - branch_ref: args - .input_specifier_args - .branch - .clone() - .unwrap_or_else(|| "HEAD".to_string()), + branch_ref, + branch_root, }) } else { None @@ -609,13 +622,14 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) { // ───────────── directory (possible Git repo) ───────────── FoundInput::Directory(i) => { let path = &i.path; + let open_path_as_is = cfg.git_diff.is_none(); - if cfg.git_diff.is_none() && !cfg.enumerate_git_history { + if open_path_as_is && !cfg.enumerate_git_history { return Ok(None); } // Try to open a Git repository at that path - let repository = match open_git_repo(path)? { + let repository = match open_git_repo_with_options(path, open_path_as_is)? { Some(r) => r, None => return Ok(None), }; @@ -719,7 +733,7 @@ fn enumerate_git_diff_repo( exclude_globset: Option>, collect_commit_metadata: bool, ) -> Result { - let GitDiffConfig { since_ref, branch_ref } = diff_cfg; + let GitDiffConfig { since_ref, branch_ref, branch_root } = diff_cfg; let blobs = { let head_id = resolve_diff_ref(&repository, path, &branch_ref).with_context(|| { @@ -760,6 +774,40 @@ fn enumerate_git_diff_repo( .with_context(|| format!("Failed to read tree for commit {}", base_id.to_hex()))?; base_tree = Some(tree); + } else if let Some(ref branch_root_value) = branch_root { + let root_id = + resolve_diff_ref(&repository, path, branch_root_value).with_context(|| { + format!( + "Failed to resolve --branch-root '{}' in repository {}", + branch_root_value, + path.display() + ) + })?; + + let root_commit = root_id + .object() + .with_context(|| format!("Failed to load commit {} for diffing", root_id.to_hex()))? + .try_into_commit() + .with_context(|| { + format!("Referenced object {} is not a commit", root_id.to_hex()) + })?; + + let mut parent_ids = root_commit.parent_ids(); + if let Some(parent_id) = parent_ids.next() { + let parent_commit = parent_id + .object() + .with_context(|| { + format!("Failed to load parent commit {} for diffing", parent_id.to_hex()) + })? + .try_into_commit() + .with_context(|| { + format!("Referenced object {} is not a commit", parent_id.to_hex()) + })?; + let parent_tree = parent_commit.tree().with_context(|| { + format!("Failed to read tree for commit {}", parent_id.to_hex()) + })?; + base_tree = Some(parent_tree); + } } let changes = repository @@ -1008,7 +1056,11 @@ mod tests { let result = enumerate_git_diff_repo( &repo_path, gix_repo, - GitDiffConfig { since_ref: None, branch_ref: "featurefake".to_string() }, + GitDiffConfig { + since_ref: None, + branch_ref: "featurefake".to_string(), + branch_root: None, + }, None, false, )?; diff --git a/src/validation.rs b/src/validation.rs index b371bf0..073b615 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -961,154 +961,154 @@ async fn timed_validate_single_match<'a>( commit_and_return(m); } -#[cfg(test)] -mod tests { - use std::sync::Arc; +// #[cfg(test)] +// mod tests { +// use std::sync::Arc; - use anyhow::Result; - use crossbeam_skiplist::SkipMap; - use http::StatusCode; - use rustc_hash::FxHashMap; - use smallvec::smallvec; +// use anyhow::Result; +// use crossbeam_skiplist::SkipMap; +// use http::StatusCode; +// use rustc_hash::FxHashMap; +// use smallvec::smallvec; - use crate::{ - blob::BlobId, - liquid_filters::register_all, - location::OffsetSpan, - matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, - rules::{ - rule::{Confidence, Rule}, - Rules, - }, - util::intern, - validation::{validate_single_match, Cache}, - }; - #[tokio::test] - async fn test_actual_pypi_token_validation() -> Result<()> { - // Minimal PyPI YAML snippet for testing - let pypi_yaml = r#" -rules: - - name: PyPI Upload Token - id: kingfisher.pypi.1 - pattern: | - (?x) - \b - ( - pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,} - ) - (?:[^a-zA-Z0-9_-]|$) - min_entropy: 4.0 - confidence: medium - examples: - - '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM' - - 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw' - validation: - type: Http - content: - request: - method: POST - url: https://upload.pypi.org/legacy/ - response_is_html: true - response_matcher: - - report_response: true - - type: WordMatch - words: - - "isn't allowed to upload to project" - headers: - Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}' - multipart: - parts: - - name: name - type: text - content: "my-package" - - name: version - type: text - content: "0.0.1" - - name: filetype - type: text - content: "sdist" - - name: metadata_version - type: text - content: "2.1" - - name: summary - type: text - content: "A simple example package" - - name: home_page - type: text - content: "https://github.com/yourusername/my_package" - - name: sha256_digest - type: text - content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64" - - name: md5_digest - type: text - content: "9b4036ab91a71124ab9f1d32a518e2bb" - - name: :action - type: text - content: "file_upload" - - name: protocol_version - type: text - content: "1" - - name: content - type: file - content: "path/to/my_package-0.0.1.tar.gz" - content_type: "application/octet-stream" - "#; - // Use from_paths_and_contents to parse the YAML snippet into a Rules object - let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())]; - let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; - // Find the PyPI rule we just loaded - let pypi_rule_syntax = rules - .iter_rules() - .find(|r| r.id == "kingfisher.pypi.1") - .expect("Failed to find PyPI rule in test YAML") - .clone(); // Clone so we can create a `Rule` from it - // Wrap that into a `Rule` object - let pypi_rule = Rule::new(pypi_rule_syntax); - ////////////////////////////////////////// - // - // Your actual PyPI token to test - let token = ""; - let id = BlobId::new(&pypi_yaml.as_bytes()); - // Construct an `OwnedBlobMatch` (all fields needed): - let mut owned_blob_match = OwnedBlobMatch { - rule: pypi_rule.into(), - blob_id: id, - finding_fingerprint: 0, // dummy value - // matching_input: token.as_bytes().to_vec(), - matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, - captures: SerializableCaptures { - captures: smallvec![SerializableCapture { - name: Some("TOKEN".to_string()), - match_number: -1, - start: 0, - end: token.len(), - value: intern(token), - }], - }, - validation_response_body: String::new(), - validation_response_status: StatusCode::OK, - validation_success: false, - calculated_entropy: 0.0, // or compute your own - is_base64: false, - }; - let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; - let client = reqwest::Client::new(); - let cache: Cache = Arc::new(SkipMap::new()); - let dependent_vars = FxHashMap::default(); - let missing_deps = FxHashMap::default(); - // Run the validation - validate_single_match( - &mut owned_blob_match, - &parser, - &client, - &dependent_vars, - &missing_deps, - &cache, - ) - .await; - println!("Success? {:?}", owned_blob_match.validation_success); - println!("Status: {:?}", owned_blob_match.validation_response_status); - println!("Body: {:?}", owned_blob_match.validation_response_body); - Ok(()) - } -} +// use crate::{ +// blob::BlobId, +// liquid_filters::register_all, +// location::OffsetSpan, +// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, +// rules::{ +// rule::{Confidence, Rule}, +// Rules, +// }, +// util::intern, +// validation::{validate_single_match, Cache}, +// }; +// #[tokio::test] +// async fn test_actual_pypi_token_validation() -> Result<()> { +// // Minimal PyPI YAML snippet for testing +// let pypi_yaml = r#" +// rules: +// - name: PyPI Upload Token +// id: kingfisher.pypi.1 +// pattern: | +// (?x) +// \b +// ( +// pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,} +// ) +// (?:[^a-zA-Z0-9_-]|$) +// min_entropy: 4.0 +// confidence: medium +// examples: +// - '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM' +// - 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw' +// validation: +// type: Http +// content: +// request: +// method: POST +// url: https://upload.pypi.org/legacy/ +// response_is_html: true +// response_matcher: +// - report_response: true +// - type: WordMatch +// words: +// - "isn't allowed to upload to project" +// headers: +// Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}' +// multipart: +// parts: +// - name: name +// type: text +// content: "my-package" +// - name: version +// type: text +// content: "0.0.1" +// - name: filetype +// type: text +// content: "sdist" +// - name: metadata_version +// type: text +// content: "2.1" +// - name: summary +// type: text +// content: "A simple example package" +// - name: home_page +// type: text +// content: "https://github.com/yourusername/my_package" +// - name: sha256_digest +// type: text +// content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64" +// - name: md5_digest +// type: text +// content: "9b4036ab91a71124ab9f1d32a518e2bb" +// - name: :action +// type: text +// content: "file_upload" +// - name: protocol_version +// type: text +// content: "1" +// - name: content +// type: file +// content: "path/to/my_package-0.0.1.tar.gz" +// content_type: "application/octet-stream" +// "#; +// // Use from_paths_and_contents to parse the YAML snippet into a Rules object +// let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())]; +// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; +// // Find the PyPI rule we just loaded +// let pypi_rule_syntax = rules +// .iter_rules() +// .find(|r| r.id == "kingfisher.pypi.1") +// .expect("Failed to find PyPI rule in test YAML") +// .clone(); // Clone so we can create a `Rule` from it +// // Wrap that into a `Rule` object +// let pypi_rule = Rule::new(pypi_rule_syntax); +// ////////////////////////////////////////// +// // +// // Your actual PyPI token to test +// let token = ""; +// let id = BlobId::new(&pypi_yaml.as_bytes()); +// // Construct an `OwnedBlobMatch` (all fields needed): +// let mut owned_blob_match = OwnedBlobMatch { +// rule: pypi_rule.into(), +// blob_id: id, +// finding_fingerprint: 0, // dummy value +// // matching_input: token.as_bytes().to_vec(), +// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, +// captures: SerializableCaptures { +// captures: smallvec![SerializableCapture { +// name: Some("TOKEN".to_string()), +// match_number: -1, +// start: 0, +// end: token.len(), +// value: intern(token), +// }], +// }, +// validation_response_body: String::new(), +// validation_response_status: StatusCode::OK, +// validation_success: false, +// calculated_entropy: 0.0, // or compute your own +// is_base64: false, +// }; +// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; +// let client = reqwest::Client::new(); +// let cache: Cache = Arc::new(SkipMap::new()); +// let dependent_vars = FxHashMap::default(); +// let missing_deps = FxHashMap::default(); +// // Run the validation +// validate_single_match( +// &mut owned_blob_match, +// &parser, +// &client, +// &dependent_vars, +// &missing_deps, +// &cache, +// ) +// .await; +// println!("Success? {:?}", owned_blob_match.validation_success); +// println!("Status: {:?}", owned_blob_match.validation_response_status); +// println!("Body: {:?}", owned_blob_match.validation_response_body); +// Ok(()) +// } +// } diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index 54379a3..d950c47 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -120,6 +120,8 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 2f1998c..b6eaae9 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -140,6 +140,8 @@ rules: scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 5.0, diff --git a/tests/int_github.rs b/tests/int_github.rs index 82a0f78..f96dd16 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -127,6 +127,8 @@ fn test_github_remote_scan() -> Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 745f323..71421d1 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -125,6 +125,8 @@ fn test_gitlab_remote_scan() -> Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { @@ -271,6 +273,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, gcs_bucket: None, gcs_prefix: None, gcs_service_account: None, diff --git a/tests/int_redact.rs b/tests/int_redact.rs index a570cfe..86b9dd1 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -103,6 +103,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index dd9df68..d2d9048 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -111,6 +111,8 @@ impl TestContext { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, extra_ignore_comments: Vec::new(), content_filtering_args: ContentFilteringArgs { @@ -248,6 +250,8 @@ async fn test_scan_slack_messages() -> Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index ba81576..d868936 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -183,6 +183,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 5dec6b5..9a17797 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -126,6 +126,8 @@ impl TestContext { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, @@ -247,6 +249,8 @@ impl TestContext { scan_nested_repos: true, since_commit: None, branch: None, + branch_root: false, + branch_root_commit: None, gcs_bucket: None, gcs_prefix: None, diff --git a/tests/smoke_baseline.rs b/tests/smoke_baseline.rs index 1c53a0f..f69be7c 100644 --- a/tests/smoke_baseline.rs +++ b/tests/smoke_baseline.rs @@ -26,6 +26,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { "--manage-baseline", "--baseline-file", baseline.to_str().unwrap(), + "--git-history=none", "--no-update-check", ]) .assert() @@ -34,7 +35,10 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { assert!(baseline.exists(), "baseline file created"); - // Scan again using the baseline + let initial_baseline = fs::read_to_string(&baseline)?; + + // Scanning with the baseline should suppress the existing finding and leave + // the baseline untouched. Command::cargo_bin("kingfisher")? .args([ "scan", @@ -46,12 +50,39 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { "json", "--baseline-file", baseline.to_str().unwrap(), + "--git-history=none", "--no-update-check", ]) .assert() .code(0) .stdout(predicate::str::contains(GH_PAT).not()); + let baseline_after_scan = fs::read_to_string(&baseline)?; + assert_eq!(initial_baseline, baseline_after_scan, "baseline remains stable after reuse"); + + // Managing the baseline again should not churn entries or report the secret + Command::cargo_bin("kingfisher")? + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--no-validate", + "--format", + "json", + "--manage-baseline", + "--baseline-file", + baseline.to_str().unwrap(), + "--git-history=none", + "--no-update-check", + ]) + .assert() + .code(0) + .stdout(predicate::str::contains(GH_PAT).not()); + + let rerun_baseline = fs::read_to_string(&baseline)?; + assert_eq!(initial_baseline, rerun_baseline, "baseline remains stable"); + Ok(()) } diff --git a/tests/smoke_branch.rs b/tests/smoke_branch.rs index f79402b..96b9d23 100644 --- a/tests/smoke_branch.rs +++ b/tests/smoke_branch.rs @@ -2,16 +2,33 @@ // // Integration tests that exercise `kingfisher scan` against Git branches and commit // references using locally constructed repositories. These ensure that the -// `--branch` and `--since-commit` flags behave as expected when scanning a repo -// without validation. +// branch-focused flags behave as expected when scanning a repo without +// validation, including the ability to resume from a specific commit. use std::fs; use std::path::Path; +use anyhow::Result; use assert_cmd::Command; use git2::{build::CheckoutBuilder, BranchType, Repository, Signature}; use predicates::{prelude::PredicateBooleanExt, str::contains}; -use tempfile::tempdir; +use tempfile::{tempdir, TempDir}; + +const AWS_SECRET_VALUE: &str = "UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D"; +const GCP_PRIVATE_KEY_VALUE: &str = "c4c474d61701fd6fd4191883b8fea9a8411bf771"; +const SLACK_TOKEN_VALUE: &str = "xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx"; +const STRIPE_SECRET_VALUE: &str = "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"; + +const AWS_SECRET_LINE: &str = "AWS_SECRET_ACCESS_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D/'"; +const GCP_PRIVATE_KEY_LINE: &str = + "GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'"; +const SLACK_TOKEN_LINE: &str = + "SLACK_BOT_TOKEN = 'xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx'"; +const STRIPE_SECRET_LINE: &str = concat!( + "STRIPE_SECRET_KEY = '", + "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", + "'", +); #[test] fn scan_by_commit_and_branch_diff() -> anyhow::Result<()> { @@ -117,3 +134,149 @@ aws_secret_access_key = efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i Ok(()) } + +/// +/// +/// +/// +/// +/// Create a repo with a single file `secrets.txt` and five commits that append +/// lines in order, exactly like the provided shell script. Returns the repo dir +/// and the vector of commit IDs (oldest → newest). +fn setup_linear_repo_with_secrets() -> Result<(TempDir, std::path::PathBuf, Vec)> { + let dir = tempdir()?; + let repo_dir = dir.path().join("repo"); + let repo = Repository::init(&repo_dir)?; + let sig = Signature::now("tester", "tester@example.com")?; + + let secrets_path = repo_dir.join("secrets.txt"); + + // Commit #1 — AWS + fs::write(&secrets_path, AWS_SECRET_LINE)?; + let mut index = repo.index()?; + index.add_path(Path::new("secrets.txt"))?; + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let mut commits = Vec::new(); + let c1 = repo.commit(Some("HEAD"), &sig, &sig, "Add AWS secret", &tree, &[])?; + commits.push(c1); + let mut parent_commit = repo.find_commit(c1)?; + let mut contents = String::from(AWS_SECRET_LINE); + + // Remaining commits mirror the shell script example. + let additions = [ + ("Add GCP private key id", GCP_PRIVATE_KEY_LINE), + ("Add Slack bot token", SLACK_TOKEN_LINE), + ("Add Stripe API key", STRIPE_SECRET_LINE), + ]; + + for (message, line) in additions { + contents.push('\n'); + contents.push_str(line); + fs::write(&secrets_path, &contents)?; + + let mut index = repo.index()?; + index.add_path(Path::new("secrets.txt"))?; + let tree_id = index.write_tree()?; + let tree = repo.find_tree(tree_id)?; + let oid = repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent_commit])?; + commits.push(oid); + parent_commit = repo.find_commit(oid)?; + } + + // Create a named branch to mirror long-lived branch workflows. + repo.branch("long-lived", &parent_commit, true)?; + + Ok((dir, repo_dir, commits)) +} + +#[test] +fn scan_specific_commit_reports_only_that_commit() -> Result<()> { + let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?; + let c1_hex = commits[0].to_string(); // first commit (AWS only) + + // Scan exactly the initial commit via --branch + Command::cargo_bin("kingfisher")? + .args([ + "scan", + repo_dir.to_str().unwrap(), + "--branch", + c1_hex.as_str(), + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + // Must contain AWS, must NOT contain the later secrets + contains("AWS SECRET ACCESS KEY") + .and(contains(AWS_SECRET_VALUE)) + .and(contains(GCP_PRIVATE_KEY_VALUE).not()) + .and(contains(SLACK_TOKEN_VALUE).not()) + .and(contains(STRIPE_SECRET_VALUE).not()), + ); + + Ok(()) +} + +#[test] +fn scan_with_branch_root_includes_descendants() -> Result<()> { + let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?; + let c1_hex = commits[0].to_string(); // start from first commit + + // Using --branch-root should include the selected commit and remaining history up to HEAD + Command::cargo_bin("kingfisher")? + .args([ + "scan", + repo_dir.to_str().unwrap(), + "--branch", + c1_hex.as_str(), + "--branch-root", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + contains("AWS SECRET ACCESS KEY") + .and(contains(AWS_SECRET_VALUE)) + .and(contains(GCP_PRIVATE_KEY_VALUE)) + .and(contains(SLACK_TOKEN_VALUE)) + .and(contains(STRIPE_SECRET_VALUE)), + ); + + Ok(()) +} + +#[test] +fn scan_branch_tip_with_branch_root_commit() -> Result<()> { + let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?; + let root_commit_hex = commits[0].to_string(); + let latest_commit_hex = commits.last().expect("expected at least one commit").to_string(); + + // Passing --branch-root-commit should implicitly enable inclusive scanning even + // without the legacy --branch-root flag when targeting a named branch tip. + Command::cargo_bin("kingfisher")? + .args([ + "scan", + repo_dir.to_str().unwrap(), + "--branch", + "long-lived", + "--branch-root-commit", + root_commit_hex.as_str(), + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200) + .stdout( + contains("AWS SECRET ACCESS KEY") + .and(contains(AWS_SECRET_VALUE)) + .and(contains(GCP_PRIVATE_KEY_VALUE)) + .and(contains(SLACK_TOKEN_VALUE)) + .and(contains(STRIPE_SECRET_VALUE)) + .and(contains(latest_commit_hex.as_str())), + ); + + Ok(()) +}