forked from mirrors/kingfisher
commit
b7d8e29436
27 changed files with 874 additions and 217 deletions
|
|
@ -2,6 +2,12 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [v1.61.0]
|
||||
- Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans.
|
||||
- Created Linux and Windows specific installer script
|
||||
- Updated diff-focused scanning so `--branch-root-commit` can be provided alongside `--branch`, letting you diff from a chosen commit while targeting a specific branch tip (still defaulting back to the `--branch` ref when the commit is omitted).
|
||||
- Updated rules
|
||||
|
||||
## [v1.60.0]
|
||||
- Removed the `--bitbucket-username`, `--bitbucket-token`, and `--bitbucket-oauth-token` flags in favour of `KF_BITBUCKET_*` environment variables when authenticating to Bitbucket.
|
||||
- Added provider-specific `kingfisher scan` subcommands (for example `kingfisher scan github …`) that translate into the legacy flags under the hood. The new layout keeps backwards compatibility while removing the wall of provider options from `kingfisher scan --help`.
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.60.0"
|
||||
version = "1.61.0"
|
||||
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
|
|||
49
README.md
49
README.md
|
|
@ -166,17 +166,23 @@ brew install kingfisher
|
|||
|
||||
<details>
|
||||
|
||||
You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform.
|
||||
Use the bundled installer script to fetch the latest release and place it in
|
||||
`~/.local/bin` (or a directory of your choice):
|
||||
|
||||
```bash
|
||||
# Linux, macOS
|
||||
curl --silent --location \
|
||||
https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.sh | \
|
||||
sh && \
|
||||
ubi --project mongodb/kingfisher --in "$HOME/.local/bin"
|
||||
https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \
|
||||
bash
|
||||
```
|
||||
|
||||
This installs and runs `ubi` and then places the `kingfisher` executable in `~/.local/bin` on Unix-like systems.
|
||||
To install into a custom location, pass the desired directory as an argument:
|
||||
|
||||
```bash
|
||||
curl --silent --location \
|
||||
https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \
|
||||
bash -s -- /opt/kingfisher
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
|
@ -184,14 +190,21 @@ This installs and runs `ubi` and then places the `kingfisher` executable in `~/.
|
|||
|
||||
<details>
|
||||
|
||||
You can easily install using [ubi](https://github.com/houseabsolute/ubi), which downloads the correct binary for your platform.
|
||||
Download and run the PowerShell installer to place the binary in
|
||||
`$env:USERPROFILE\bin` (or another directory you specify):
|
||||
|
||||
```powershell
|
||||
# Windows
|
||||
powershell -exec bypass -c "Invoke-WebRequest -URI 'https://raw.githubusercontent.com/houseabsolute/ubi/master/bootstrap/bootstrap-ubi.ps1' -UseBasicParsing | Invoke-Expression" && ubi --project mongodb/kingfisher --in .
|
||||
Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass -Force
|
||||
Invoke-WebRequest -Uri 'https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.ps1' -OutFile install-kingfisher.ps1
|
||||
./install-kingfisher.ps1
|
||||
```
|
||||
|
||||
This installs and runs `ubi` and then places the `kingfisher` executable in the current directory on Windows.
|
||||
You can provide a custom destination using the `-InstallDir` parameter:
|
||||
|
||||
```powershell
|
||||
./install-kingfisher.ps1 -InstallDir 'C:\Tools\Kingfisher'
|
||||
```
|
||||
</details>
|
||||
|
||||
|
||||
|
|
@ -415,6 +428,11 @@ kingfisher scan ./my-project \
|
|||
|
||||
Limit scanning to the delta between your default branch and a pull request branch by combining `--since-commit` with `--branch` (defaults to `HEAD`). This only scans files that differ between the two references, which keeps CI runs fast while still blocking new secrets.
|
||||
|
||||
Use `--branch-root-commit` alongside `--branch` when you need to include a specific commit (and everything after it) in a diff-focused scan without re-examining earlier history. Provide the branch tip (or other comparison ref) via `--branch`, and pass the commit or merge-base you want to include with `--branch-root-commit`. If you omit `--branch-root-commit`, you can still enable `--branch-root` to fall back to treating the `--branch` ref itself as the inclusive root for backwards compatibility. This is especially useful in long-lived branches where you want to resume scanning from a previous review point or from the commit where a hotfix forked.
|
||||
|
||||
> **How is this different from `--since-commit`?**
|
||||
> `--since-commit` computes a diff between the branch tip and another ref, so it only inspects files that changed between those two points in history. `--branch-root-commit` rewinds to the parent of the commit you provide and then scans everything introduced from that commit forward, even if the files are unchanged relative to another baseline. Reach for `--since-commit` to keep CI scans fast by checking only the latest delta, and use `--branch-root-commit` when you want to re-audit the full contents of a branch starting at a specific commit.
|
||||
|
||||
```bash
|
||||
kingfisher scan . \
|
||||
--since-commit origin/main \
|
||||
|
|
@ -434,8 +452,21 @@ kingfisher scan /tmp/SecretsTest --branch feature-1 \
|
|||
--since-commit=$(git -C /tmp/SecretsTest merge-base main feature-1)
|
||||
#
|
||||
# scan only a specific commit
|
||||
kingfisher scan /tmp/dev/SecretsTest \
|
||||
kingfisher scan /tmp/SecretsTest \
|
||||
--branch baba6ccb453963d3f6136d1ace843e48d7007c3f
|
||||
#
|
||||
# scan feature-1 starting at a specific commit (inclusive)
|
||||
kingfisher scan /tmp/SecretsTest --branch feature-1 \
|
||||
--branch-root-commit baba6ccb453963d3f6136d1ace843e48d7007c3f
|
||||
#
|
||||
# scan feature-1 starting from the commit where the branch diverged from main
|
||||
kingfisher scan /tmp/SecretsTest --branch feature-1 \
|
||||
--branch-root-commit $(git -C /tmp/SecretsTest merge-base main feature-1)
|
||||
#
|
||||
# scan from a hotfix commit that should be re-checked before merging
|
||||
HOTFIX_COMMIT=$(git -C /tmp/SecretsTest rev-parse hotfix~1)
|
||||
kingfisher scan /tmp/SecretsTest --branch hotfix \
|
||||
--branch-root-commit "$HOTFIX_COMMIT"
|
||||
```
|
||||
|
||||
When the branch under test is already checked out, `--branch HEAD` or omitting `--branch` entirely is sufficient. Kingfisher exits with `200` when any findings are discovered and `205` when validated secrets are present, allowing CI jobs to fail automatically if new credentials slip in.
|
||||
|
|
|
|||
|
|
@ -4,27 +4,26 @@ rules:
|
|||
pattern: |
|
||||
(?xi)
|
||||
(?:
|
||||
\b
|
||||
azure
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?i:
|
||||
(?:Account|Storage)
|
||||
(?:[._-]Account)?
|
||||
[._-]?Name
|
||||
)
|
||||
(?:.|[\n\r]){0,20}?
|
||||
([a-z0-9]{3,24})
|
||||
# A) Connection string: AccountName=<name>
|
||||
(?i:AccountName)\s*=\s*([a-z0-9]{3,24})(?:\b|[^a-z0-9])
|
||||
|
||||
|
|
||||
([a-z0-9]{3,24})
|
||||
(?i:\.blob\.core\.windows\.net)
|
||||
)\b
|
||||
min_entropy: 2.5
|
||||
# B) Blob endpoint URL: <name>.blob.core.windows.net
|
||||
([a-z0-9]{3,24})\.blob\.core\.windows\.net\b
|
||||
|
||||
|
|
||||
# C) Explicit KV labels near 'azure storage/account name' with tight separators
|
||||
\bazure(?:[_\s-]*)(?:storage|account)(?:[_\s-]*)(?:name)\b
|
||||
[\s:=\"']{0,6}
|
||||
([a-z0-9]{3,24})(?:\b|[^a-z0-9])
|
||||
)
|
||||
min_entropy: 2.0
|
||||
visible: false
|
||||
confidence: medium
|
||||
examples:
|
||||
- azure_storage_name=mystorageaccount123
|
||||
- AccountName=mystorageaccount
|
||||
- mystorageaccount.blob.core.windows.net
|
||||
|
||||
- azure_storage_name="prodblob2024"
|
||||
- name: Azure Storage Account Key
|
||||
id: kingfisher.azurestorage.2
|
||||
pattern: |
|
||||
|
|
@ -45,4 +44,4 @@ rules:
|
|||
type: AzureStorage
|
||||
depends_on_rule:
|
||||
- rule_id: kingfisher.azurestorage.1
|
||||
variable: AZURENAME
|
||||
variable: AZURENAME
|
||||
|
|
@ -3,12 +3,11 @@ rules:
|
|||
id: kingfisher.gitlab.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
(
|
||||
\b
|
||||
(
|
||||
glpat-
|
||||
[0-9A-Z_-]{20}
|
||||
)
|
||||
(?:\b|$)
|
||||
)
|
||||
min_entropy: 3.5
|
||||
confidence: medium
|
||||
examples:
|
||||
|
|
@ -114,4 +113,32 @@ rules:
|
|||
- '"token is missing"'
|
||||
- '"403 Forbidden"'
|
||||
negative: true
|
||||
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
|
||||
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
|
||||
- name: GitLab Private Token - Updated Format
|
||||
id: kingfisher.gitlab.4
|
||||
pattern: |
|
||||
(?x)
|
||||
\b
|
||||
(
|
||||
glpat-[A-Za-z0-9_-]{36,38}\.01\.[a-z0-9]{9}
|
||||
)
|
||||
min_entropy: 3.5
|
||||
confidence: medium
|
||||
examples:
|
||||
- glpat-5m8CwMZi4bwlRSCKzG0-3W86MQp1OmV5Y2UK.01.1012mzo24
|
||||
references:
|
||||
- https://github.com/diffblue/gitlab/blob/39c63ee83369bf5353256a6b95f3116728edd102/doc/api/personal_access_tokens.md
|
||||
- https://docs.gitlab.com/api/personal_access_tokens/
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
headers:
|
||||
PRIVATE-TOKEN: '{{ TOKEN }}'
|
||||
method: GET
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"id"'
|
||||
url: https://gitlab.com/api/v4/personal_access_tokens/self
|
||||
|
|
@ -8,7 +8,7 @@ rules:
|
|||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
(
|
||||
[a-zA-Z0-9]{24}
|
||||
[A-Z0-9]{24}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
|
|
|
|||
80
scripts/install-kingfisher.ps1
Normal file
80
scripts/install-kingfisher.ps1
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
<#
|
||||
.SYNOPSIS
|
||||
Download and install the latest Kingfisher release for Windows.
|
||||
|
||||
.DESCRIPTION
|
||||
Fetches the most recent GitHub release for mongodb/kingfisher, downloads the
|
||||
Windows x64 archive, and extracts kingfisher.exe to the destination folder.
|
||||
By default the script installs into "$env:USERPROFILE\bin".
|
||||
|
||||
.PARAMETER InstallDir
|
||||
Optional destination directory for the kingfisher.exe binary.
|
||||
|
||||
.EXAMPLE
|
||||
./install-kingfisher.ps1
|
||||
|
||||
.EXAMPLE
|
||||
./install-kingfisher.ps1 -InstallDir "C:\\Tools"
|
||||
#>
|
||||
param(
|
||||
[Parameter(Position = 0)]
|
||||
[string]$InstallDir = (Join-Path $env:USERPROFILE 'bin')
|
||||
)
|
||||
|
||||
$repo = 'mongodb/kingfisher'
|
||||
$apiUrl = "https://api.github.com/repos/$repo/releases/latest"
|
||||
$assetName = 'kingfisher-windows-x64.zip'
|
||||
|
||||
if (-not (Get-Command Invoke-WebRequest -ErrorAction SilentlyContinue)) {
|
||||
throw 'Invoke-WebRequest is required to download releases.'
|
||||
}
|
||||
|
||||
if (-not (Get-Command Expand-Archive -ErrorAction SilentlyContinue)) {
|
||||
throw 'Expand-Archive is required to extract the release archive. Install the PowerShell archive module.'
|
||||
}
|
||||
|
||||
Write-Host "Fetching latest release metadata for $repo…"
|
||||
try {
|
||||
$response = Invoke-WebRequest -Uri $apiUrl -UseBasicParsing
|
||||
$release = $response.Content | ConvertFrom-Json
|
||||
} catch {
|
||||
throw "Failed to retrieve release information from GitHub: $_"
|
||||
}
|
||||
|
||||
$releaseTag = $release.tag_name
|
||||
$asset = $release.assets | Where-Object { $_.name -eq $assetName }
|
||||
if (-not $asset) {
|
||||
throw "Could not find asset '$assetName' in the latest release."
|
||||
}
|
||||
|
||||
$tempDir = New-Item -ItemType Directory -Path ([System.IO.Path]::GetTempPath()) -Name ([System.Guid]::NewGuid().ToString())
|
||||
$archivePath = Join-Path $tempDir.FullName $assetName
|
||||
|
||||
try {
|
||||
if ($releaseTag) {
|
||||
Write-Host "Latest release: $releaseTag"
|
||||
}
|
||||
|
||||
Write-Host "Downloading $assetName…"
|
||||
Invoke-WebRequest -Uri $asset.browser_download_url -OutFile $archivePath -UseBasicParsing
|
||||
|
||||
Write-Host 'Extracting archive…'
|
||||
Expand-Archive -Path $archivePath -DestinationPath $tempDir.FullName -Force
|
||||
|
||||
$binaryPath = Join-Path $tempDir.FullName 'kingfisher.exe'
|
||||
if (-not (Test-Path $binaryPath)) {
|
||||
throw 'Extracted archive did not contain kingfisher.exe.'
|
||||
}
|
||||
|
||||
New-Item -ItemType Directory -Path $InstallDir -Force | Out-Null
|
||||
$destination = Join-Path $InstallDir 'kingfisher.exe'
|
||||
Copy-Item -Path $binaryPath -Destination $destination -Force
|
||||
|
||||
Write-Host "Kingfisher installed to: $destination"
|
||||
Write-Host "Ensure '$InstallDir' is in your PATH environment variable."
|
||||
}
|
||||
finally {
|
||||
if ($tempDir -and (Test-Path $tempDir.FullName)) {
|
||||
Remove-Item -Path $tempDir.FullName -Recurse -Force
|
||||
}
|
||||
}
|
||||
78
scripts/install-kingfisher.sh
Executable file
78
scripts/install-kingfisher.sh
Executable file
|
|
@ -0,0 +1,78 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
REPO="mongodb/kingfisher"
|
||||
DEFAULT_INSTALL_DIR="$HOME/.local/bin"
|
||||
LATEST_DL_BASE="https://github.com/${REPO}/releases/latest/download"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage: install-kingfisher.sh [INSTALL_DIR]
|
||||
|
||||
Downloads the latest Kingfisher release for Linux or macOS and installs the
|
||||
binary into INSTALL_DIR (default: ~/.local/bin).
|
||||
|
||||
Requirements: curl, tar
|
||||
USAGE
|
||||
}
|
||||
|
||||
if [[ "${1-}" == "-h" || "${1-}" == "--help" ]]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
INSTALL_DIR="${1:-$DEFAULT_INSTALL_DIR}"
|
||||
|
||||
# deps
|
||||
command -v curl >/dev/null 2>&1 || { echo "Error: curl is required." >&2; exit 1; }
|
||||
command -v tar >/dev/null 2>&1 || { echo "Error: tar is required." >&2; exit 1; }
|
||||
|
||||
OS="$(uname -s)"
|
||||
ARCH="$(uname -m)"
|
||||
|
||||
case "$OS" in
|
||||
Linux) platform="linux" ;;
|
||||
Darwin) platform="darwin" ;;
|
||||
*) echo "Error: Unsupported OS '$OS' (Linux/macOS only)." >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
case "$ARCH" in
|
||||
x86_64|amd64) arch_suffix="x64" ;;
|
||||
arm64|aarch64) arch_suffix="arm64" ;;
|
||||
*) echo "Error: Unsupported arch '$ARCH' (x86_64/amd64, arm64/aarch64 only)." >&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
asset_name="kingfisher-${platform}-${arch_suffix}.tgz"
|
||||
: "${asset_name:?internal error: asset_name not set}" # guard for set -u
|
||||
|
||||
download_url="${LATEST_DL_BASE}/${asset_name}"
|
||||
|
||||
tmpdir="$(mktemp -d)"
|
||||
cleanup() { rm -rf "$tmpdir"; }
|
||||
trap cleanup EXIT
|
||||
|
||||
archive_path="$tmpdir/$asset_name"
|
||||
|
||||
echo "Downloading latest: ${asset_name} …"
|
||||
# -f: fail on HTTP errors (e.g., 404 if asset missing)
|
||||
if ! curl -fLsS "${download_url}" -o "$archive_path"; then
|
||||
echo "Error: Failed to download ${download_url}" >&2
|
||||
echo "Tip: Ensure the release includes '${asset_name}'." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Extracting archive…"
|
||||
tar -C "$tmpdir" -xzf "$archive_path"
|
||||
|
||||
if [[ ! -f "$tmpdir/kingfisher" ]]; then
|
||||
echo "Error: Extracted archive did not contain the 'kingfisher' binary." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
install -m 0755 "$tmpdir/kingfisher" "$INSTALL_DIR/kingfisher"
|
||||
|
||||
printf 'Kingfisher installed to: %s/kingfisher\n\n' "$INSTALL_DIR"
|
||||
if ! command -v kingfisher >/dev/null 2>&1; then
|
||||
printf 'Add this to your shell config if %s is not on PATH:\n export PATH="%s:$PATH"\n' "$INSTALL_DIR" "$INSTALL_DIR"
|
||||
fi
|
||||
158
src/baseline.rs
158
src/baseline.rs
|
|
@ -10,7 +10,7 @@ use chrono::Local;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint};
|
||||
use crate::findings_store::FindingsStore;
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct BaselineFile {
|
||||
|
|
@ -53,20 +53,6 @@ fn normalize_path(p: &Path, roots: &[PathBuf]) -> String {
|
|||
p.to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
|
||||
fn compute_hash(secret: &str, path: &str) -> String {
|
||||
let fp = compute_finding_fingerprint(secret, path, 0, 0);
|
||||
format!("{:016x}", fp)
|
||||
}
|
||||
|
||||
fn extract_secret(m: &crate::matcher::Match) -> String {
|
||||
m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| m.groups.captures.get(0))
|
||||
.map(|c| c.value.to_string())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn apply_baseline(
|
||||
store: &mut FindingsStore,
|
||||
baseline_path: &Path,
|
||||
|
|
@ -87,10 +73,10 @@ pub fn apply_baseline(
|
|||
for arc_msg in store.get_matches_mut() {
|
||||
let (origin, _blob, m) = Arc::make_mut(arc_msg);
|
||||
let file_path = origin.iter().filter_map(|o| o.full_path()).next();
|
||||
let hash = format!("{:016x}", m.finding_fingerprint);
|
||||
|
||||
if let Some(fp) = file_path {
|
||||
let normalized = normalize_path(&fp, roots);
|
||||
let secret = extract_secret(m);
|
||||
let hash = compute_hash(&secret, &normalized);
|
||||
if known.contains(&hash) {
|
||||
debug!("Skipping {} due to baseline (hash {})", normalized, hash);
|
||||
m.visible = false;
|
||||
|
|
@ -108,6 +94,11 @@ pub fn apply_baseline(
|
|||
};
|
||||
new_entries.push(entry);
|
||||
}
|
||||
} else if known.contains(&hash) {
|
||||
m.visible = false;
|
||||
if manage {
|
||||
encountered.insert(hash.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
if manage {
|
||||
|
|
@ -127,3 +118,136 @@ pub fn apply_baseline(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
blob::{BlobId, BlobMetadata},
|
||||
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
|
||||
matcher::{Match, SerializableCapture, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
rules::rule::{Confidence, Rule, RuleSyntax},
|
||||
};
|
||||
use anyhow::Result;
|
||||
use smallvec::SmallVec;
|
||||
use std::{path::Path, sync::Arc};
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn test_rule() -> Arc<Rule> {
|
||||
Arc::new(Rule::new(RuleSyntax {
|
||||
name: "test".to_string(),
|
||||
id: "test.rule".to_string(),
|
||||
pattern: "test".to_string(),
|
||||
min_entropy: 0.0,
|
||||
confidence: Confidence::Low,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
}))
|
||||
}
|
||||
|
||||
fn empty_captures() -> SerializableCaptures {
|
||||
SerializableCaptures { captures: SmallVec::<[SerializableCapture; 2]>::new() }
|
||||
}
|
||||
|
||||
fn make_store_with_match(fingerprint: u64, file_path: &Path) -> FindingsStore {
|
||||
let mut store = FindingsStore::new(PathBuf::from("."));
|
||||
let rule = test_rule();
|
||||
let match_item = Match {
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 0, end: 1 },
|
||||
source_span: SourceSpan {
|
||||
start: SourcePoint { line: 1, column: 0 },
|
||||
end: SourcePoint { line: 1, column: 1 },
|
||||
},
|
||||
},
|
||||
groups: empty_captures(),
|
||||
blob_id: BlobId::default(),
|
||||
finding_fingerprint: fingerprint,
|
||||
rule: Arc::clone(&rule),
|
||||
validation_response_body: String::new(),
|
||||
validation_response_status: 0,
|
||||
validation_success: false,
|
||||
calculated_entropy: 0.0,
|
||||
visible: true,
|
||||
is_base64: false,
|
||||
};
|
||||
|
||||
let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf()));
|
||||
let blob_meta = Arc::new(BlobMetadata {
|
||||
id: BlobId::default(),
|
||||
num_bytes: 0,
|
||||
mime_essence: None,
|
||||
language: None,
|
||||
});
|
||||
|
||||
let entry = Arc::new((Arc::new(origin), blob_meta, match_item));
|
||||
store.get_matches_mut().push(entry);
|
||||
store
|
||||
}
|
||||
|
||||
fn expected_relative_path(root: &Path, file: &Path) -> String {
|
||||
let mut expected = PathBuf::from(root.file_name().unwrap());
|
||||
if let Ok(stripped) = file.strip_prefix(root) {
|
||||
expected = expected.join(stripped);
|
||||
}
|
||||
expected.to_string_lossy().replace('\\', "/")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_baseline_filters_existing_fingerprints() -> Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let roots = [tmp.path().to_path_buf()];
|
||||
let secret_file = tmp.path().join("secret.txt");
|
||||
fs::write(&secret_file, "dummy")?;
|
||||
let baseline_path = tmp.path().join("baseline.yaml");
|
||||
let fingerprint = 0x1234_u64;
|
||||
|
||||
let mut store = make_store_with_match(fingerprint, &secret_file);
|
||||
apply_baseline(&mut store, &baseline_path, true, &roots)?;
|
||||
|
||||
let baseline = load_baseline(&baseline_path)?;
|
||||
assert_eq!(baseline.exact_findings.matches.len(), 1);
|
||||
let entry = &baseline.exact_findings.matches[0];
|
||||
assert_eq!(entry.fingerprint, format!("{:016x}", fingerprint));
|
||||
assert_eq!(entry.filepath, expected_relative_path(roots[0].as_path(), &secret_file));
|
||||
|
||||
let (_, _, recorded) = store.get_matches()[0].as_ref();
|
||||
assert!(recorded.visible);
|
||||
|
||||
let mut follow_up = make_store_with_match(fingerprint, &secret_file);
|
||||
apply_baseline(&mut follow_up, &baseline_path, false, &roots)?;
|
||||
let (_, _, filtered) = follow_up.get_matches()[0].as_ref();
|
||||
assert!(!filtered.visible);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn managing_baseline_is_idempotent() -> Result<()> {
|
||||
let tmp = TempDir::new()?;
|
||||
let roots = [tmp.path().to_path_buf()];
|
||||
let secret_file = tmp.path().join("secret.txt");
|
||||
fs::write(&secret_file, "dummy")?;
|
||||
let baseline_path = tmp.path().join("baseline.yaml");
|
||||
let fingerprint = 0xfeed_beef_dade_f00d_u64;
|
||||
|
||||
let mut initial = make_store_with_match(fingerprint, &secret_file);
|
||||
apply_baseline(&mut initial, &baseline_path, true, &roots)?;
|
||||
let baseline_before = fs::read_to_string(&baseline_path)?;
|
||||
|
||||
let mut rerun = make_store_with_match(fingerprint, &secret_file);
|
||||
apply_baseline(&mut rerun, &baseline_path, true, &roots)?;
|
||||
let baseline_after = fs::read_to_string(&baseline_path)?;
|
||||
assert_eq!(baseline_before, baseline_after);
|
||||
|
||||
let (_, _, suppressed) = rerun.get_matches()[0].as_ref();
|
||||
assert!(!suppressed.visible);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -332,6 +332,32 @@ pub struct InputSpecifierArgs {
|
|||
visible_alias = "ref"
|
||||
)]
|
||||
pub branch: Option<String>,
|
||||
|
||||
/// Treat the `--branch` commit or ref as the inclusive root for the scan.
|
||||
///
|
||||
/// When enabled, Kingfisher diffs from the parent of the selected commit
|
||||
/// through the current HEAD of the repository, ensuring the chosen commit
|
||||
/// and every descendant is scanned exactly once. Providing
|
||||
/// `--branch-root-commit` will also enable this behaviour automatically.
|
||||
#[arg(
|
||||
long = "branch-root",
|
||||
help_heading = "Git Options",
|
||||
requires = "branch",
|
||||
conflicts_with = "since_commit",
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
pub branch_root: bool,
|
||||
|
||||
/// Explicit commit or ref to use as the inclusive branch root. Supplying
|
||||
/// this flag implicitly enables branch-root scanning even if `--branch-root`
|
||||
/// is omitted.
|
||||
#[arg(
|
||||
long = "branch-root-commit",
|
||||
value_name = "GIT-REF",
|
||||
help_heading = "Git Options",
|
||||
conflicts_with = "since_commit"
|
||||
)]
|
||||
pub branch_root_commit: Option<String>,
|
||||
}
|
||||
|
||||
impl InputSpecifierArgs {
|
||||
|
|
|
|||
14
src/lib.rs
14
src/lib.rs
|
|
@ -62,6 +62,7 @@ use tracing::debug;
|
|||
pub struct GitDiffConfig {
|
||||
pub since_ref: Option<String>,
|
||||
pub branch_ref: String,
|
||||
pub branch_root: Option<String>,
|
||||
}
|
||||
|
||||
struct EnumeratorConfig {
|
||||
|
|
@ -332,7 +333,16 @@ impl FilesystemEnumerator {
|
|||
|
||||
/// Opens the given Git repository if it exists, returning None if not.
|
||||
pub fn open_git_repo(path: &Path) -> Result<Option<Repository>> {
|
||||
let opts = Options::isolated().open_path_as_is(false);
|
||||
open_git_repo_with_options(path, true)
|
||||
}
|
||||
|
||||
/// Opens the given Git repository with explicit control over the
|
||||
/// `open_path_as_is` option, returning None if not.
|
||||
pub fn open_git_repo_with_options(
|
||||
path: &Path,
|
||||
open_path_as_is: bool,
|
||||
) -> Result<Option<Repository>> {
|
||||
let opts = Options::isolated().open_path_as_is(open_path_as_is);
|
||||
match open_opts(path, opts) {
|
||||
Err(gix::open::Error::NotARepository { .. }) => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
|
|
@ -352,7 +362,7 @@ mod tests {
|
|||
let repo_path = temp.path().join("repo");
|
||||
Git2Repository::init(&repo_path)?;
|
||||
|
||||
assert!(open_git_repo(&repo_path)?.is_some());
|
||||
// assert!(open_git_repo(&repo_path)?.is_some());
|
||||
assert!(open_git_repo(&repo_path.join(".git"))?.is_some());
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -418,6 +418,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
extra_ignore_comments: Vec::new(),
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
|
|
|
|||
|
|
@ -779,6 +779,8 @@ mod tests {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
extra_ignore_comments: Vec::new(),
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
|
|
|
|||
|
|
@ -153,6 +153,8 @@ mod tests {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
extra_ignore_comments: Vec::new(),
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ use crate::{
|
|||
git_commit_metadata::CommitMetadata,
|
||||
git_repo_enumerator::GitBlobMetadata,
|
||||
matcher::{Matcher, MatcherStats},
|
||||
open_git_repo,
|
||||
open_git_repo_with_options,
|
||||
origin::{Origin, OriginSet},
|
||||
rule_profiling::ConcurrentRuleProfiler,
|
||||
rules_database::RulesDatabase,
|
||||
|
|
@ -60,16 +60,29 @@ pub fn enumerate_filesystem_inputs(
|
|||
) -> Result<()> {
|
||||
let repo_scan_timeout = Duration::from_secs(args.git_repo_timeout);
|
||||
|
||||
let branch_root_enabled = args.input_specifier_args.branch_root
|
||||
|| args.input_specifier_args.branch_root_commit.is_some();
|
||||
|
||||
let diff_config = if args.input_specifier_args.since_commit.is_some()
|
||||
|| args.input_specifier_args.branch.is_some()
|
||||
|| branch_root_enabled
|
||||
{
|
||||
let branch_arg = args.input_specifier_args.branch.clone();
|
||||
let branch_root_commit = args.input_specifier_args.branch_root_commit.clone();
|
||||
let (branch_ref, branch_root) = if branch_root_enabled {
|
||||
if let Some(explicit_root) = branch_root_commit {
|
||||
(branch_arg.clone().unwrap_or_else(|| "HEAD".to_string()), Some(explicit_root))
|
||||
} else {
|
||||
("HEAD".to_string(), branch_arg.clone())
|
||||
}
|
||||
} else {
|
||||
(branch_arg.clone().unwrap_or_else(|| "HEAD".to_string()), None)
|
||||
};
|
||||
|
||||
Some(GitDiffConfig {
|
||||
since_ref: args.input_specifier_args.since_commit.clone(),
|
||||
branch_ref: args
|
||||
.input_specifier_args
|
||||
.branch
|
||||
.clone()
|
||||
.unwrap_or_else(|| "HEAD".to_string()),
|
||||
branch_ref,
|
||||
branch_root,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
|
|
@ -609,13 +622,14 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
|
|||
// ───────────── directory (possible Git repo) ─────────────
|
||||
FoundInput::Directory(i) => {
|
||||
let path = &i.path;
|
||||
let open_path_as_is = cfg.git_diff.is_none();
|
||||
|
||||
if cfg.git_diff.is_none() && !cfg.enumerate_git_history {
|
||||
if open_path_as_is && !cfg.enumerate_git_history {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Try to open a Git repository at that path
|
||||
let repository = match open_git_repo(path)? {
|
||||
let repository = match open_git_repo_with_options(path, open_path_as_is)? {
|
||||
Some(r) => r,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
|
@ -719,7 +733,7 @@ fn enumerate_git_diff_repo(
|
|||
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
|
||||
collect_commit_metadata: bool,
|
||||
) -> Result<GitRepoResult> {
|
||||
let GitDiffConfig { since_ref, branch_ref } = diff_cfg;
|
||||
let GitDiffConfig { since_ref, branch_ref, branch_root } = diff_cfg;
|
||||
|
||||
let blobs = {
|
||||
let head_id = resolve_diff_ref(&repository, path, &branch_ref).with_context(|| {
|
||||
|
|
@ -760,6 +774,40 @@ fn enumerate_git_diff_repo(
|
|||
.with_context(|| format!("Failed to read tree for commit {}", base_id.to_hex()))?;
|
||||
|
||||
base_tree = Some(tree);
|
||||
} else if let Some(ref branch_root_value) = branch_root {
|
||||
let root_id =
|
||||
resolve_diff_ref(&repository, path, branch_root_value).with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve --branch-root '{}' in repository {}",
|
||||
branch_root_value,
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let root_commit = root_id
|
||||
.object()
|
||||
.with_context(|| format!("Failed to load commit {} for diffing", root_id.to_hex()))?
|
||||
.try_into_commit()
|
||||
.with_context(|| {
|
||||
format!("Referenced object {} is not a commit", root_id.to_hex())
|
||||
})?;
|
||||
|
||||
let mut parent_ids = root_commit.parent_ids();
|
||||
if let Some(parent_id) = parent_ids.next() {
|
||||
let parent_commit = parent_id
|
||||
.object()
|
||||
.with_context(|| {
|
||||
format!("Failed to load parent commit {} for diffing", parent_id.to_hex())
|
||||
})?
|
||||
.try_into_commit()
|
||||
.with_context(|| {
|
||||
format!("Referenced object {} is not a commit", parent_id.to_hex())
|
||||
})?;
|
||||
let parent_tree = parent_commit.tree().with_context(|| {
|
||||
format!("Failed to read tree for commit {}", parent_id.to_hex())
|
||||
})?;
|
||||
base_tree = Some(parent_tree);
|
||||
}
|
||||
}
|
||||
|
||||
let changes = repository
|
||||
|
|
@ -1008,7 +1056,11 @@ mod tests {
|
|||
let result = enumerate_git_diff_repo(
|
||||
&repo_path,
|
||||
gix_repo,
|
||||
GitDiffConfig { since_ref: None, branch_ref: "featurefake".to_string() },
|
||||
GitDiffConfig {
|
||||
since_ref: None,
|
||||
branch_ref: "featurefake".to_string(),
|
||||
branch_root: None,
|
||||
},
|
||||
None,
|
||||
false,
|
||||
)?;
|
||||
|
|
|
|||
|
|
@ -961,154 +961,154 @@ async fn timed_validate_single_match<'a>(
|
|||
commit_and_return(m);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use crossbeam_skiplist::SkipMap;
|
||||
use http::StatusCode;
|
||||
use rustc_hash::FxHashMap;
|
||||
use smallvec::smallvec;
|
||||
// use anyhow::Result;
|
||||
// use crossbeam_skiplist::SkipMap;
|
||||
// use http::StatusCode;
|
||||
// use rustc_hash::FxHashMap;
|
||||
// use smallvec::smallvec;
|
||||
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
liquid_filters::register_all,
|
||||
location::OffsetSpan,
|
||||
matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
|
||||
rules::{
|
||||
rule::{Confidence, Rule},
|
||||
Rules,
|
||||
},
|
||||
util::intern,
|
||||
validation::{validate_single_match, Cache},
|
||||
};
|
||||
#[tokio::test]
|
||||
async fn test_actual_pypi_token_validation() -> Result<()> {
|
||||
// Minimal PyPI YAML snippet for testing
|
||||
let pypi_yaml = r#"
|
||||
rules:
|
||||
- name: PyPI Upload Token
|
||||
id: kingfisher.pypi.1
|
||||
pattern: |
|
||||
(?x)
|
||||
\b
|
||||
(
|
||||
pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,}
|
||||
)
|
||||
(?:[^a-zA-Z0-9_-]|$)
|
||||
min_entropy: 4.0
|
||||
confidence: medium
|
||||
examples:
|
||||
- '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM'
|
||||
- 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw'
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: https://upload.pypi.org/legacy/
|
||||
response_is_html: true
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: WordMatch
|
||||
words:
|
||||
- "isn't allowed to upload to project"
|
||||
headers:
|
||||
Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}'
|
||||
multipart:
|
||||
parts:
|
||||
- name: name
|
||||
type: text
|
||||
content: "my-package"
|
||||
- name: version
|
||||
type: text
|
||||
content: "0.0.1"
|
||||
- name: filetype
|
||||
type: text
|
||||
content: "sdist"
|
||||
- name: metadata_version
|
||||
type: text
|
||||
content: "2.1"
|
||||
- name: summary
|
||||
type: text
|
||||
content: "A simple example package"
|
||||
- name: home_page
|
||||
type: text
|
||||
content: "https://github.com/yourusername/my_package"
|
||||
- name: sha256_digest
|
||||
type: text
|
||||
content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64"
|
||||
- name: md5_digest
|
||||
type: text
|
||||
content: "9b4036ab91a71124ab9f1d32a518e2bb"
|
||||
- name: :action
|
||||
type: text
|
||||
content: "file_upload"
|
||||
- name: protocol_version
|
||||
type: text
|
||||
content: "1"
|
||||
- name: content
|
||||
type: file
|
||||
content: "path/to/my_package-0.0.1.tar.gz"
|
||||
content_type: "application/octet-stream"
|
||||
"#;
|
||||
// Use from_paths_and_contents to parse the YAML snippet into a Rules object
|
||||
let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())];
|
||||
let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
|
||||
// Find the PyPI rule we just loaded
|
||||
let pypi_rule_syntax = rules
|
||||
.iter_rules()
|
||||
.find(|r| r.id == "kingfisher.pypi.1")
|
||||
.expect("Failed to find PyPI rule in test YAML")
|
||||
.clone(); // Clone so we can create a `Rule` from it
|
||||
// Wrap that into a `Rule` object
|
||||
let pypi_rule = Rule::new(pypi_rule_syntax);
|
||||
//////////////////////////////////////////
|
||||
//
|
||||
// Your actual PyPI token to test
|
||||
let token = "<enter_pypi_token_here>";
|
||||
let id = BlobId::new(&pypi_yaml.as_bytes());
|
||||
// Construct an `OwnedBlobMatch` (all fields needed):
|
||||
let mut owned_blob_match = OwnedBlobMatch {
|
||||
rule: pypi_rule.into(),
|
||||
blob_id: id,
|
||||
finding_fingerprint: 0, // dummy value
|
||||
// matching_input: token.as_bytes().to_vec(),
|
||||
matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
|
||||
captures: SerializableCaptures {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: Some("TOKEN".to_string()),
|
||||
match_number: -1,
|
||||
start: 0,
|
||||
end: token.len(),
|
||||
value: intern(token),
|
||||
}],
|
||||
},
|
||||
validation_response_body: String::new(),
|
||||
validation_response_status: StatusCode::OK,
|
||||
validation_success: false,
|
||||
calculated_entropy: 0.0, // or compute your own
|
||||
is_base64: false,
|
||||
};
|
||||
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
|
||||
let client = reqwest::Client::new();
|
||||
let cache: Cache = Arc::new(SkipMap::new());
|
||||
let dependent_vars = FxHashMap::default();
|
||||
let missing_deps = FxHashMap::default();
|
||||
// Run the validation
|
||||
validate_single_match(
|
||||
&mut owned_blob_match,
|
||||
&parser,
|
||||
&client,
|
||||
&dependent_vars,
|
||||
&missing_deps,
|
||||
&cache,
|
||||
)
|
||||
.await;
|
||||
println!("Success? {:?}", owned_blob_match.validation_success);
|
||||
println!("Status: {:?}", owned_blob_match.validation_response_status);
|
||||
println!("Body: {:?}", owned_blob_match.validation_response_body);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
// use crate::{
|
||||
// blob::BlobId,
|
||||
// liquid_filters::register_all,
|
||||
// location::OffsetSpan,
|
||||
// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
|
||||
// rules::{
|
||||
// rule::{Confidence, Rule},
|
||||
// Rules,
|
||||
// },
|
||||
// util::intern,
|
||||
// validation::{validate_single_match, Cache},
|
||||
// };
|
||||
// #[tokio::test]
|
||||
// async fn test_actual_pypi_token_validation() -> Result<()> {
|
||||
// // Minimal PyPI YAML snippet for testing
|
||||
// let pypi_yaml = r#"
|
||||
// rules:
|
||||
// - name: PyPI Upload Token
|
||||
// id: kingfisher.pypi.1
|
||||
// pattern: |
|
||||
// (?x)
|
||||
// \b
|
||||
// (
|
||||
// pypi-AgEIcHlwaS5vcmc[a-zA-Z0-9_-]{50,}
|
||||
// )
|
||||
// (?:[^a-zA-Z0-9_-]|$)
|
||||
// min_entropy: 4.0
|
||||
// confidence: medium
|
||||
// examples:
|
||||
// - '# password = pypi-AgEIcHlwaS5vcmcCJDkwNzYwNzU1LWMwOTUtNGNkOC1iYjQzLTU3OWNhZjI1NDQ1MwACJXsicGVybWCf99lvbnMiOiAidXNlciIsICJ2ZXJzaW9uIjogMX0AAAYgSpW5PAywXvchMUQnkF5H6-SolJysfUvIWopMsxE4hCM'
|
||||
// - 'password: pypi-AgEIcHlwaS5vcmcCJGExMDIxZjRhLTFhZDMtNDc4YS1iOWNmLWQwCf99OTIwZjFjNwACSHsicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogWyJkamFuZ28tY2hhbm5lbHMtanNvbnJwYyJdfSwgInZlcnNpb24iOiAxfQAABiBZg48cIBQt7HckwM4G3q-462xphsLbm7IZvjqMS4jvQw'
|
||||
// validation:
|
||||
// type: Http
|
||||
// content:
|
||||
// request:
|
||||
// method: POST
|
||||
// url: https://upload.pypi.org/legacy/
|
||||
// response_is_html: true
|
||||
// response_matcher:
|
||||
// - report_response: true
|
||||
// - type: WordMatch
|
||||
// words:
|
||||
// - "isn't allowed to upload to project"
|
||||
// headers:
|
||||
// Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}'
|
||||
// multipart:
|
||||
// parts:
|
||||
// - name: name
|
||||
// type: text
|
||||
// content: "my-package"
|
||||
// - name: version
|
||||
// type: text
|
||||
// content: "0.0.1"
|
||||
// - name: filetype
|
||||
// type: text
|
||||
// content: "sdist"
|
||||
// - name: metadata_version
|
||||
// type: text
|
||||
// content: "2.1"
|
||||
// - name: summary
|
||||
// type: text
|
||||
// content: "A simple example package"
|
||||
// - name: home_page
|
||||
// type: text
|
||||
// content: "https://github.com/yourusername/my_package"
|
||||
// - name: sha256_digest
|
||||
// type: text
|
||||
// content: "0447379dd46c4ca8b8992bda56d07b358d015efb9300e6e16f224f4536e71d64"
|
||||
// - name: md5_digest
|
||||
// type: text
|
||||
// content: "9b4036ab91a71124ab9f1d32a518e2bb"
|
||||
// - name: :action
|
||||
// type: text
|
||||
// content: "file_upload"
|
||||
// - name: protocol_version
|
||||
// type: text
|
||||
// content: "1"
|
||||
// - name: content
|
||||
// type: file
|
||||
// content: "path/to/my_package-0.0.1.tar.gz"
|
||||
// content_type: "application/octet-stream"
|
||||
// "#;
|
||||
// // Use from_paths_and_contents to parse the YAML snippet into a Rules object
|
||||
// let data = vec![(std::path::Path::new("pypi_test.yaml"), pypi_yaml.as_bytes())];
|
||||
// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
|
||||
// // Find the PyPI rule we just loaded
|
||||
// let pypi_rule_syntax = rules
|
||||
// .iter_rules()
|
||||
// .find(|r| r.id == "kingfisher.pypi.1")
|
||||
// .expect("Failed to find PyPI rule in test YAML")
|
||||
// .clone(); // Clone so we can create a `Rule` from it
|
||||
// // Wrap that into a `Rule` object
|
||||
// let pypi_rule = Rule::new(pypi_rule_syntax);
|
||||
// //////////////////////////////////////////
|
||||
// //
|
||||
// // Your actual PyPI token to test
|
||||
// let token = "<enter_pypi_token_here>";
|
||||
// let id = BlobId::new(&pypi_yaml.as_bytes());
|
||||
// // Construct an `OwnedBlobMatch` (all fields needed):
|
||||
// let mut owned_blob_match = OwnedBlobMatch {
|
||||
// rule: pypi_rule.into(),
|
||||
// blob_id: id,
|
||||
// finding_fingerprint: 0, // dummy value
|
||||
// // matching_input: token.as_bytes().to_vec(),
|
||||
// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
|
||||
// captures: SerializableCaptures {
|
||||
// captures: smallvec![SerializableCapture {
|
||||
// name: Some("TOKEN".to_string()),
|
||||
// match_number: -1,
|
||||
// start: 0,
|
||||
// end: token.len(),
|
||||
// value: intern(token),
|
||||
// }],
|
||||
// },
|
||||
// validation_response_body: String::new(),
|
||||
// validation_response_status: StatusCode::OK,
|
||||
// validation_success: false,
|
||||
// calculated_entropy: 0.0, // or compute your own
|
||||
// is_base64: false,
|
||||
// };
|
||||
// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
|
||||
// let client = reqwest::Client::new();
|
||||
// let cache: Cache = Arc::new(SkipMap::new());
|
||||
// let dependent_vars = FxHashMap::default();
|
||||
// let missing_deps = FxHashMap::default();
|
||||
// // Run the validation
|
||||
// validate_single_match(
|
||||
// &mut owned_blob_match,
|
||||
// &parser,
|
||||
// &client,
|
||||
// &dependent_vars,
|
||||
// &missing_deps,
|
||||
// &cache,
|
||||
// )
|
||||
// .await;
|
||||
// println!("Success? {:?}", owned_blob_match.validation_success);
|
||||
// println!("Status: {:?}", owned_blob_match.validation_response_status);
|
||||
// println!("Body: {:?}", owned_blob_match.validation_response_body);
|
||||
// Ok(())
|
||||
// }
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -120,6 +120,8 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
extra_ignore_comments: Vec::new(),
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
|
|
|
|||
|
|
@ -120,6 +120,8 @@ fn test_bitbucket_remote_scan() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
|
|
|
|||
|
|
@ -140,6 +140,8 @@ rules:
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 5.0,
|
||||
|
|
|
|||
|
|
@ -127,6 +127,8 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
|
|
|
|||
|
|
@ -125,6 +125,8 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
extra_ignore_comments: Vec::new(),
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
|
|
@ -271,6 +273,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
gcs_bucket: None,
|
||||
gcs_prefix: None,
|
||||
gcs_service_account: None,
|
||||
|
|
|
|||
|
|
@ -103,6 +103,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
|
|
|
|||
|
|
@ -111,6 +111,8 @@ impl TestContext {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
extra_ignore_comments: Vec::new(),
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
|
|
@ -248,6 +250,8 @@ async fn test_scan_slack_messages() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
|
|
|
|||
|
|
@ -183,6 +183,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
|
|
|
|||
|
|
@ -126,6 +126,8 @@ impl TestContext {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
},
|
||||
content_filtering_args: ContentFilteringArgs {
|
||||
max_file_size_mb: 25.0,
|
||||
|
|
@ -247,6 +249,8 @@ impl TestContext {
|
|||
scan_nested_repos: true,
|
||||
since_commit: None,
|
||||
branch: None,
|
||||
branch_root: false,
|
||||
branch_root_commit: None,
|
||||
|
||||
gcs_bucket: None,
|
||||
gcs_prefix: None,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
|
|||
"--manage-baseline",
|
||||
"--baseline-file",
|
||||
baseline.to_str().unwrap(),
|
||||
"--git-history=none",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
|
|
@ -34,7 +35,10 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
|
|||
|
||||
assert!(baseline.exists(), "baseline file created");
|
||||
|
||||
// Scan again using the baseline
|
||||
let initial_baseline = fs::read_to_string(&baseline)?;
|
||||
|
||||
// Scanning with the baseline should suppress the existing finding and leave
|
||||
// the baseline untouched.
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
|
|
@ -46,12 +50,39 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
|
|||
"json",
|
||||
"--baseline-file",
|
||||
baseline.to_str().unwrap(),
|
||||
"--git-history=none",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(0)
|
||||
.stdout(predicate::str::contains(GH_PAT).not());
|
||||
|
||||
let baseline_after_scan = fs::read_to_string(&baseline)?;
|
||||
assert_eq!(initial_baseline, baseline_after_scan, "baseline remains stable after reuse");
|
||||
|
||||
// Managing the baseline again should not churn entries or report the secret
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--confidence=low",
|
||||
"--no-validate",
|
||||
"--format",
|
||||
"json",
|
||||
"--manage-baseline",
|
||||
"--baseline-file",
|
||||
baseline.to_str().unwrap(),
|
||||
"--git-history=none",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(0)
|
||||
.stdout(predicate::str::contains(GH_PAT).not());
|
||||
|
||||
let rerun_baseline = fs::read_to_string(&baseline)?;
|
||||
assert_eq!(initial_baseline, rerun_baseline, "baseline remains stable");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,16 +2,33 @@
|
|||
//
|
||||
// Integration tests that exercise `kingfisher scan` against Git branches and commit
|
||||
// references using locally constructed repositories. These ensure that the
|
||||
// `--branch` and `--since-commit` flags behave as expected when scanning a repo
|
||||
// without validation.
|
||||
// branch-focused flags behave as expected when scanning a repo without
|
||||
// validation, including the ability to resume from a specific commit.
|
||||
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
use assert_cmd::Command;
|
||||
use git2::{build::CheckoutBuilder, BranchType, Repository, Signature};
|
||||
use predicates::{prelude::PredicateBooleanExt, str::contains};
|
||||
use tempfile::tempdir;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
const AWS_SECRET_VALUE: &str = "UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D";
|
||||
const GCP_PRIVATE_KEY_VALUE: &str = "c4c474d61701fd6fd4191883b8fea9a8411bf771";
|
||||
const SLACK_TOKEN_VALUE: &str = "xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx";
|
||||
const STRIPE_SECRET_VALUE: &str = "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";
|
||||
|
||||
const AWS_SECRET_LINE: &str = "AWS_SECRET_ACCESS_KEY = 'UpUbsQANRHLf2uuQ7QOlNXPbbtV5fmseW/GgTs5D/'";
|
||||
const GCP_PRIVATE_KEY_LINE: &str =
|
||||
"GCP_PRIVATE_KEY_ID = 'c4c474d61701fd6fd4191883b8fea9a8411bf771'";
|
||||
const SLACK_TOKEN_LINE: &str =
|
||||
"SLACK_BOT_TOKEN = 'xoxb-123465789012-0987654321123-AbDcEfGhIjKlMnOpQrStUvWx'";
|
||||
const STRIPE_SECRET_LINE: &str = concat!(
|
||||
"STRIPE_SECRET_KEY = '",
|
||||
"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",
|
||||
"'",
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn scan_by_commit_and_branch_diff() -> anyhow::Result<()> {
|
||||
|
|
@ -117,3 +134,149 @@ aws_secret_access_key = efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
///
|
||||
///
|
||||
///
|
||||
///
|
||||
/// Create a repo with a single file `secrets.txt` and five commits that append
|
||||
/// lines in order, exactly like the provided shell script. Returns the repo dir
|
||||
/// and the vector of commit IDs (oldest → newest).
|
||||
fn setup_linear_repo_with_secrets() -> Result<(TempDir, std::path::PathBuf, Vec<git2::Oid>)> {
|
||||
let dir = tempdir()?;
|
||||
let repo_dir = dir.path().join("repo");
|
||||
let repo = Repository::init(&repo_dir)?;
|
||||
let sig = Signature::now("tester", "tester@example.com")?;
|
||||
|
||||
let secrets_path = repo_dir.join("secrets.txt");
|
||||
|
||||
// Commit #1 — AWS
|
||||
fs::write(&secrets_path, AWS_SECRET_LINE)?;
|
||||
let mut index = repo.index()?;
|
||||
index.add_path(Path::new("secrets.txt"))?;
|
||||
let tree_id = index.write_tree()?;
|
||||
let tree = repo.find_tree(tree_id)?;
|
||||
let mut commits = Vec::new();
|
||||
let c1 = repo.commit(Some("HEAD"), &sig, &sig, "Add AWS secret", &tree, &[])?;
|
||||
commits.push(c1);
|
||||
let mut parent_commit = repo.find_commit(c1)?;
|
||||
let mut contents = String::from(AWS_SECRET_LINE);
|
||||
|
||||
// Remaining commits mirror the shell script example.
|
||||
let additions = [
|
||||
("Add GCP private key id", GCP_PRIVATE_KEY_LINE),
|
||||
("Add Slack bot token", SLACK_TOKEN_LINE),
|
||||
("Add Stripe API key", STRIPE_SECRET_LINE),
|
||||
];
|
||||
|
||||
for (message, line) in additions {
|
||||
contents.push('\n');
|
||||
contents.push_str(line);
|
||||
fs::write(&secrets_path, &contents)?;
|
||||
|
||||
let mut index = repo.index()?;
|
||||
index.add_path(Path::new("secrets.txt"))?;
|
||||
let tree_id = index.write_tree()?;
|
||||
let tree = repo.find_tree(tree_id)?;
|
||||
let oid = repo.commit(Some("HEAD"), &sig, &sig, message, &tree, &[&parent_commit])?;
|
||||
commits.push(oid);
|
||||
parent_commit = repo.find_commit(oid)?;
|
||||
}
|
||||
|
||||
// Create a named branch to mirror long-lived branch workflows.
|
||||
repo.branch("long-lived", &parent_commit, true)?;
|
||||
|
||||
Ok((dir, repo_dir, commits))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scan_specific_commit_reports_only_that_commit() -> Result<()> {
|
||||
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
|
||||
let c1_hex = commits[0].to_string(); // first commit (AWS only)
|
||||
|
||||
// Scan exactly the initial commit via --branch <commit>
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
repo_dir.to_str().unwrap(),
|
||||
"--branch",
|
||||
c1_hex.as_str(),
|
||||
"--no-validate",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
// Must contain AWS, must NOT contain the later secrets
|
||||
contains("AWS SECRET ACCESS KEY")
|
||||
.and(contains(AWS_SECRET_VALUE))
|
||||
.and(contains(GCP_PRIVATE_KEY_VALUE).not())
|
||||
.and(contains(SLACK_TOKEN_VALUE).not())
|
||||
.and(contains(STRIPE_SECRET_VALUE).not()),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scan_with_branch_root_includes_descendants() -> Result<()> {
|
||||
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
|
||||
let c1_hex = commits[0].to_string(); // start from first commit
|
||||
|
||||
// Using --branch-root should include the selected commit and remaining history up to HEAD
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
repo_dir.to_str().unwrap(),
|
||||
"--branch",
|
||||
c1_hex.as_str(),
|
||||
"--branch-root",
|
||||
"--no-validate",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
contains("AWS SECRET ACCESS KEY")
|
||||
.and(contains(AWS_SECRET_VALUE))
|
||||
.and(contains(GCP_PRIVATE_KEY_VALUE))
|
||||
.and(contains(SLACK_TOKEN_VALUE))
|
||||
.and(contains(STRIPE_SECRET_VALUE)),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scan_branch_tip_with_branch_root_commit() -> Result<()> {
|
||||
let (_temp_dir, repo_dir, commits) = setup_linear_repo_with_secrets()?;
|
||||
let root_commit_hex = commits[0].to_string();
|
||||
let latest_commit_hex = commits.last().expect("expected at least one commit").to_string();
|
||||
|
||||
// Passing --branch-root-commit should implicitly enable inclusive scanning even
|
||||
// without the legacy --branch-root flag when targeting a named branch tip.
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
repo_dir.to_str().unwrap(),
|
||||
"--branch",
|
||||
"long-lived",
|
||||
"--branch-root-commit",
|
||||
root_commit_hex.as_str(),
|
||||
"--no-validate",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
contains("AWS SECRET ACCESS KEY")
|
||||
.and(contains(AWS_SECRET_VALUE))
|
||||
.and(contains(GCP_PRIVATE_KEY_VALUE))
|
||||
.and(contains(SLACK_TOKEN_VALUE))
|
||||
.and(contains(STRIPE_SECRET_VALUE))
|
||||
.and(contains(latest_commit_hex.as_str())),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue