Merge pull request #169 from mongodb/development

v1.72.0
This commit is contained in:
Mick Grove 2025-12-22 13:39:56 -08:00 committed by GitHub
commit bc0080b4e2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 396 additions and 44 deletions

View file

@ -83,16 +83,43 @@ jobs:
vcpkg-
# Ensure downloads dir exists and seed PCRE 8.45 zip from a working mirror
- name: Pre-seed PCRE 8.45 for vcpkg (bypass SourceForge redirect)
- name: Pre-seed PCRE 8.45 for vcpkg
shell: pwsh
run: |
New-Item -ItemType Directory -Force -Path "$env:VCPKG_DOWNLOADS" | Out-Null
$dst = Join-Path $env:VCPKG_DOWNLOADS "pcre-8.45.zip"
if (-not (Test-Path $dst)) {
Invoke-WebRequest `
-Uri "https://versaweb.dl.sourceforge.net/project/pcre/pcre/8.45/pcre-8.45.zip" `
-OutFile $dst -UseBasicParsing
$sf = "https://sourceforge.net/projects/pcre/files/pcre/8.45/pcre-8.45.zip/download"
# Resolve to the final mirror URL (follow redirects without downloading the whole file)
$handler = New-Object System.Net.Http.HttpClientHandler
$handler.AllowAutoRedirect = $true
$client = New-Object System.Net.Http.HttpClient($handler)
try {
$req = New-Object System.Net.Http.HttpRequestMessage([System.Net.Http.HttpMethod]::Head, $sf)
$resp = $client.SendAsync($req).GetAwaiter().GetResult()
# Some mirrors dont like HEAD; fall back to GET headers only.
if (-not $resp.IsSuccessStatusCode) {
$req.Dispose()
$req = New-Object System.Net.Http.HttpRequestMessage([System.Net.Http.HttpMethod]::Get, $sf)
$resp = $client.SendAsync($req, [System.Net.Http.HttpCompletionOption]::ResponseHeadersRead).GetAwaiter().GetResult()
}
$finalUrl = $resp.RequestMessage.RequestUri.AbsoluteUri
Write-Host "Resolved SourceForge URL to: $finalUrl"
# Download the actual file
Invoke-WebRequest -Uri $finalUrl -OutFile $dst
}
finally {
$client.Dispose()
$handler.Dispose()
}
}
Get-ChildItem $env:VCPKG_DOWNLOADS
- uses: swatinem/rust-cache@v2

View file

@ -137,8 +137,6 @@ jobs:
- name: Build Darwin x64
run: make darwin-x64
- name: Run tests
run: make tests
- name: Move artifacts to dist
shell: bash
@ -218,18 +216,46 @@ jobs:
vcpkg-
# Ensure downloads dir exists and seed PCRE 8.45 zip from a working mirror
- name: Pre-seed PCRE 8.45 for vcpkg (bypass SourceForge redirect)
- name: Pre-seed PCRE 8.45 for vcpkg
shell: pwsh
run: |
New-Item -ItemType Directory -Force -Path "$env:VCPKG_DOWNLOADS" | Out-Null
$dst = Join-Path $env:VCPKG_DOWNLOADS "pcre-8.45.zip"
if (-not (Test-Path $dst)) {
Invoke-WebRequest `
-Uri "https://versaweb.dl.sourceforge.net/project/pcre/pcre/8.45/pcre-8.45.zip" `
-OutFile $dst -UseBasicParsing
$sf = "https://sourceforge.net/projects/pcre/files/pcre/8.45/pcre-8.45.zip/download"
# Resolve to the final mirror URL (follow redirects without downloading the whole file)
$handler = New-Object System.Net.Http.HttpClientHandler
$handler.AllowAutoRedirect = $true
$client = New-Object System.Net.Http.HttpClient($handler)
try {
$req = New-Object System.Net.Http.HttpRequestMessage([System.Net.Http.HttpMethod]::Head, $sf)
$resp = $client.SendAsync($req).GetAwaiter().GetResult()
# Some mirrors dont like HEAD; fall back to GET headers only.
if (-not $resp.IsSuccessStatusCode) {
$req.Dispose()
$req = New-Object System.Net.Http.HttpRequestMessage([System.Net.Http.HttpMethod]::Get, $sf)
$resp = $client.SendAsync($req, [System.Net.Http.HttpCompletionOption]::ResponseHeadersRead).GetAwaiter().GetResult()
}
$finalUrl = $resp.RequestMessage.RequestUri.AbsoluteUri
Write-Host "Resolved SourceForge URL to: $finalUrl"
# Download the actual file
Invoke-WebRequest -Uri $finalUrl -OutFile $dst
}
finally {
$client.Dispose()
$handler.Dispose()
}
}
Get-ChildItem $env:VCPKG_DOWNLOADS
- uses: swatinem/rust-cache@v2
- name: Build

View file

@ -2,6 +2,13 @@
All notable changes to this project will be documented in this file.
## [v1.72.0]
- Fixed deduplication for dependency-provider rules so dependent validations run per blob
- Updated Artifactory rule entropy and added new artifactory rule
- Aliased "kingfisher self-update" as "kingfisher update"
- Map SARIF result levels from rule confidence
- Added tag selection support to the bash and PowerShell install scripts.
## [v1.71.0]
- Improved Report Viewer layout
- Improved Salesforce rule

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.71.0"
version = "1.72.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true
@ -185,7 +185,7 @@ atty = "0.2.14"
self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] }
semver = "1.0.27"
globset = "0.4.18"
jsonwebtoken = "9.3.1"
jsonwebtoken = { version = "10.2.0", features = ["aws-lc-rs"] }
ipnet = "2.11.0"
gouqi = { version = "0.20.0", features = ["async"] }
oci-client = { version = "0.15", default-features = false, features = ["rustls-tls"] }
@ -202,6 +202,7 @@ aws-sdk-secretsmanager = "1.96.0"
gcloud-storage = { version = "1.1.1", default-features = false, features = [
"rustls-tls",
"auth",
"jwt-aws-lc-rs",
] }
tokei = "12.1.2"
crc32fast = "1.5.0"

View file

@ -209,6 +209,14 @@ curl --silent --location \
bash -s -- /opt/kingfisher
```
To install a specific tag:
```bash
curl --silent --location \
https://raw.githubusercontent.com/mongodb/kingfisher/main/scripts/install-kingfisher.sh | \
bash -s -- --tag v1.71.0
```
</details>
### Windows
@ -230,6 +238,12 @@ You can provide a custom destination using the `-InstallDir` parameter:
```powershell
./install-kingfisher.ps1 -InstallDir 'C:\Tools\Kingfisher'
```
To install a specific tag:
```powershell
./install-kingfisher.ps1 -Tag v1.71.0
```
</details>
@ -1280,12 +1294,7 @@ _If no token is provided Kingfisher still works for public repositories._
Kingfisher automatically queries GitHub for a newer release when it starts and tells you whether an update is available.
- **Hands-free updates** Add `--self-update` to any Kingfisher command
* If a newer version exists, Kingfisher will download it, replace the running binary, and re-launch itself with the **exact same arguments**.
* If the update fails or no newer release is found, the current run proceeds as normal
- **Manual update** Run `kingfisher self-update` to update the binary without scanning
- **Manual update** Run `kingfisher update` to update the binary without scanning
- **Disable version checks** Pass `--no-update-check` to skip both the startup and shutdown checks entirely

View file

@ -51,7 +51,7 @@ rules:
\.jfrog\.io
)
\b
min_entropy: 3.5
min_entropy: 2.5
visible: false
confidence: medium
examples:

View file

@ -1,6 +1,5 @@
# syntax=docker/dockerfile:1
FROM alpine:3.22
# FROM alpine:latest
FROM alpine:latest
RUN apk add --no-cache curl tar git

29
scripts/install-kingfisher.ps1 Normal file → Executable file
View file

@ -1,28 +1,35 @@
<#
.SYNOPSIS
Download and install the latest Kingfisher release for Windows.
Download and install a Kingfisher release for Windows.
.DESCRIPTION
Fetches the most recent GitHub release for mongodb/kingfisher, downloads the
Windows x64 archive, and extracts kingfisher.exe to the destination folder.
By default the script installs into "$env:USERPROFILE\bin".
Fetches a GitHub release for mongodb/kingfisher, downloads the Windows x64
archive, and extracts kingfisher.exe to the destination folder. By default
the script installs into "$env:USERPROFILE\bin".
.PARAMETER InstallDir
Optional destination directory for the kingfisher.exe binary.
.PARAMETER Tag
Optional GitHub release tag (e.g., v1.71.0). Defaults to the latest release.
.EXAMPLE
./install-kingfisher.ps1
.EXAMPLE
./install-kingfisher.ps1 -InstallDir "C:\\Tools"
.EXAMPLE
./install-kingfisher.ps1 -Tag v1.71.0
#>
param(
[Parameter(Position = 0)]
[string]$InstallDir = (Join-Path $env:USERPROFILE 'bin')
[string]$InstallDir = (Join-Path $env:USERPROFILE 'bin'),
[string]$Tag
)
$repo = 'mongodb/kingfisher'
$apiUrl = "https://api.github.com/repos/$repo/releases/latest"
$assetName = 'kingfisher-windows-x64.zip'
if (-not (Get-Command Invoke-WebRequest -ErrorAction SilentlyContinue)) {
@ -33,7 +40,13 @@ if (-not (Get-Command Expand-Archive -ErrorAction SilentlyContinue)) {
throw 'Expand-Archive is required to extract the release archive. Install the PowerShell archive module.'
}
Write-Host "Fetching latest release metadata for $repo"
if ($Tag) {
$apiUrl = "https://api.github.com/repos/$repo/releases/tags/$Tag"
Write-Host "Fetching release metadata for $repo tag $Tag"
} else {
$apiUrl = "https://api.github.com/repos/$repo/releases/latest"
Write-Host "Fetching latest release metadata for $repo"
}
try {
$response = Invoke-WebRequest -Uri $apiUrl -UseBasicParsing
$release = $response.Content | ConvertFrom-Json
@ -44,7 +57,7 @@ try {
$releaseTag = $release.tag_name
$asset = $release.assets | Where-Object { $_.name -eq $assetName }
if (-not $asset) {
throw "Could not find asset '$assetName' in the latest release."
throw "Could not find asset '$assetName' in the release metadata."
}
$tempDir = New-Item -ItemType Directory -Path ([System.IO.Path]::GetTempPath()) -Name ([System.Guid]::NewGuid().ToString())

View file

@ -3,16 +3,19 @@ set -euo pipefail
REPO="mongodb/kingfisher"
DEFAULT_INSTALL_DIR="$HOME/.local/bin"
LATEST_DL_BASE="https://github.com/${REPO}/releases/latest/download"
TAG=""
usage() {
cat <<'USAGE'
Usage: install-kingfisher.sh [INSTALL_DIR]
Usage: install-kingfisher.sh [OPTIONS] [INSTALL_DIR]
Downloads the latest Kingfisher release for Linux or macOS and installs the
binary into INSTALL_DIR (default: ~/.local/bin).
Downloads a Kingfisher release for Linux or macOS and installs the binary into
INSTALL_DIR (default: ~/.local/bin).
Requirements: curl, tar
Options:
-t, --tag TAG Install a specific release tag (e.g., v1.71.0).
USAGE
}
@ -21,7 +24,35 @@ if [[ "${1-}" == "-h" || "${1-}" == "--help" ]]; then
exit 0
fi
INSTALL_DIR="${1:-$DEFAULT_INSTALL_DIR}"
INSTALL_DIR="$DEFAULT_INSTALL_DIR"
while [[ $# -gt 0 ]]; do
case "$1" in
-t|--tag)
if [[ -z "${2-}" ]]; then
echo "Error: --tag requires a value." >&2
usage
exit 1
fi
TAG="$2"
shift 2
;;
-*)
echo "Error: Unknown option '$1'." >&2
usage
exit 1
;;
*)
if [[ "$INSTALL_DIR" != "$DEFAULT_INSTALL_DIR" ]]; then
echo "Error: INSTALL_DIR specified multiple times." >&2
usage
exit 1
fi
INSTALL_DIR="$1"
shift
;;
esac
done
# deps
command -v curl >/dev/null 2>&1 || { echo "Error: curl is required." >&2; exit 1; }
@ -45,7 +76,15 @@ esac
asset_name="kingfisher-${platform}-${arch_suffix}.tgz"
: "${asset_name:?internal error: asset_name not set}" # guard for set -u
download_url="${LATEST_DL_BASE}/${asset_name}"
if [[ -n "$TAG" ]]; then
dl_base="https://github.com/${REPO}/releases/download/${TAG}"
release_label="release tag ${TAG}"
else
dl_base="https://github.com/${REPO}/releases/latest/download"
release_label="latest release"
fi
download_url="${dl_base}/${asset_name}"
tmpdir="$(mktemp -d)"
cleanup() { rm -rf "$tmpdir"; }
@ -53,7 +92,7 @@ trap cleanup EXIT
archive_path="$tmpdir/$asset_name"
echo "Downloading latest: ${asset_name}"
echo "Downloading ${release_label}: ${asset_name}"
# -f: fail on HTTP errors (e.g., 404 if asset missing)
if ! curl -fLsS "${download_url}" -o "$archive_path"; then
echo "Error: Failed to download ${download_url}" >&2

View file

@ -72,7 +72,7 @@ pub enum Command {
View(ViewArgs),
/// Update the Kingfisher binary
#[command(name = "self-update")]
#[command(name = "self-update", alias = "update")]
SelfUpdate,
}
@ -104,7 +104,7 @@ pub struct GlobalArgs {
pub ignore_certs: bool,
/// Update the Kingfisher binary to the latest release
#[arg(global = true, long = "self-update", default_value_t = false)]
#[arg(global = true, long = "self-update", alias = "update", default_value_t = false)]
pub self_update: bool,
/// Disable automatic update checks

View file

@ -52,6 +52,7 @@ pub struct FindingsStore {
clone_dir: PathBuf,
seen_bloom: Bloom<u64>,
bloom_items: usize,
dependent_rule_ids: FxHashSet<String>,
blob_meta: FxHashMap<BlobId, Arc<BlobMetadata>>,
origin_meta: FxHashMap<u64, Arc<OriginSet>>,
docker_images: FxHashMap<PathBuf, String>,
@ -78,6 +79,7 @@ impl FindingsStore {
clone_dir,
seen_bloom,
bloom_items: 0,
dependent_rule_ids: FxHashSet::default(),
docker_images: FxHashMap::default(),
slack_links: FxHashMap::default(),
confluence_links: FxHashMap::default(),
@ -143,6 +145,12 @@ impl FindingsStore {
// Clear existing data and extend in place
self.rules.clear();
self.rules.extend_from_slice(rules);
self.dependent_rule_ids.clear();
for rule in rules {
for dependency in rule.syntax().depends_on_rule.iter().flatten() {
self.dependent_rule_ids.insert(dependency.rule_id.to_uppercase());
}
}
}
/// Insert a batch of findings.
@ -183,10 +191,13 @@ impl FindingsStore {
Origin::Extended(_) => "ext",
};
let key = xxh3_64(
format!("{}|{}|{}", m.rule.id().to_uppercase(), origin_kind, snippet)
.as_bytes(),
);
let rule_id = m.rule.id().to_uppercase();
let key_string = if self.dependent_rule_ids.contains(&rule_id) {
format!("{}|{}|{}|{}", rule_id, origin_kind, snippet, blob_md.id.hex())
} else {
format!("{}|{}|{}", rule_id, origin_kind, snippet)
};
let key = xxh3_64(key_string.as_bytes());
if self.seen_bloom.check(&key) {
continue; // very likely a duplicate

View file

@ -7,6 +7,16 @@ use super::*;
use crate::defaults::get_builtin_rules;
impl DetailsReporter {
fn sarif_level_for_confidence(confidence: &str) -> sarif::ResultLevel {
// println!("Mapping confidence '{}' to SARIF level", confidence);
match confidence.to_ascii_lowercase().as_str() {
"low" => sarif::ResultLevel::Note,
"medium" => sarif::ResultLevel::Warning,
"high" => sarif::ResultLevel::Error,
_ => sarif::ResultLevel::Warning,
}
}
fn record_to_sarif_result(&self, record: &FindingReporterRecord) -> Result<sarif::Result> {
let finding = &record.finding;
let artifact_location =
@ -49,7 +59,7 @@ impl DetailsReporter {
.message(message)
.kind(sarif::ResultKind::Review.to_string())
.locations(vec![location])
.level(sarif::ResultLevel::Warning.to_string())
.level(Self::sarif_level_for_confidence(&finding.confidence).to_string())
.partial_fingerprints([("fingerprint".to_string(), finding.fingerprint.clone())])
.build()?;
Ok(result)
@ -132,3 +142,69 @@ impl DetailsReporter {
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{findings_store::FindingsStore, reporter::styles::Styles};
use std::sync::{Arc, Mutex};
use tempfile::tempdir;
fn test_reporter() -> DetailsReporter {
let tmp = tempdir().expect("tempdir");
let store = FindingsStore::new(tmp.path().to_path_buf());
DetailsReporter {
datastore: Arc::new(Mutex::new(store)),
styles: Styles::new(false),
only_valid: false,
}
}
fn sample_record(confidence: &str) -> FindingReporterRecord {
FindingReporterRecord {
rule: RuleMetadata { name: "test-rule".to_string(), id: "rule-1".to_string() },
finding: FindingRecordData {
snippet: "secret".to_string(),
fingerprint: "fingerprint".to_string(),
confidence: confidence.to_string(),
entropy: "0.0".to_string(),
validation: ValidationInfo {
status: "unknown".to_string(),
response: "n/a".to_string(),
},
language: "Rust".to_string(),
line: 1,
column_start: 1,
column_end: 5,
path: "src/lib.rs".to_string(),
encoding: None,
git_metadata: None,
},
}
}
#[test]
fn sarif_level_maps_from_confidence() {
let reporter = test_reporter();
let low = reporter.record_to_sarif_result(&sample_record("low")).unwrap();
let medium = reporter.record_to_sarif_result(&sample_record("medium")).unwrap();
let high = reporter.record_to_sarif_result(&sample_record("high")).unwrap();
let expected_low = sarif::ResultLevel::Note.to_string();
let expected_medium = sarif::ResultLevel::Warning.to_string();
let expected_high = sarif::ResultLevel::Error.to_string();
assert_eq!(
low.level.as_ref().and_then(|level| level.as_str()),
Some(expected_low.as_str())
);
assert_eq!(
medium.level.as_ref().and_then(|level| level.as_str()),
Some(expected_medium.as_str())
);
assert_eq!(
high.level.as_ref().and_then(|level| level.as_str()),
Some(expected_high.as_str())
);
}
}

View file

@ -215,7 +215,7 @@ async fn render_and_parse_url(
// Check if the URL is resolvable.
utils::check_url_resolvable(&url).await.map_err(|e| {
let error_msg = format!("URL resolution failed: {}", e);
let error_msg = format!("URL <{}> resolution failed: {}", &url, e);
error_msg
})?;

View file

@ -0,0 +1,144 @@
use std::{path::PathBuf, sync::Arc};
use anyhow::Result;
use kingfisher::{
blob::{BlobId, BlobMetadata},
findings_store::{FindingsStore, FindingsStoreMessage},
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
matcher::{Match, SerializableCapture, SerializableCaptures},
origin::{Origin, OriginSet},
rules::rule::{Confidence, DependsOnRule, Rule, RuleSyntax},
util::intern,
};
use smallvec::smallvec;
fn make_rule(rule_id: &str, depends_on_rule: Vec<Option<DependsOnRule>>) -> Arc<Rule> {
Arc::new(Rule::new(RuleSyntax {
name: format!("{rule_id} rule"),
id: rule_id.to_string(),
pattern: "dummy".to_string(),
min_entropy: 0.0,
confidence: Confidence::Low,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None,
depends_on_rule,
pattern_requirements: None,
}))
}
fn make_match(rule: Arc<Rule>, blob_id: BlobId, value: &str) -> Match {
Match {
location: Location::with_source_span(
OffsetSpan { start: 0, end: value.len() },
Some(SourceSpan {
start: SourcePoint { line: 1, column: 0 },
end: SourcePoint { line: 1, column: value.len() },
}),
),
groups: SerializableCaptures {
captures: smallvec![SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: value.len(),
value: intern(value),
}],
},
blob_id,
finding_fingerprint: 123,
rule,
validation_response_body: None,
validation_response_status: 0,
validation_success: false,
calculated_entropy: 0.0,
visible: true,
is_base64: false,
}
}
fn record_match(
origin: &Arc<OriginSet>,
blob_metadata: &Arc<BlobMetadata>,
m: Match,
) -> FindingsStoreMessage {
(origin.clone(), blob_metadata.clone(), m)
}
#[test]
fn dedup_preserves_dependency_provider_matches_per_blob() -> Result<()> {
let provider_rule = make_rule("RULE.PROVIDER", vec![]);
let dependent_rule = make_rule(
"RULE.DEPENDENT",
vec![Some(DependsOnRule {
rule_id: "RULE.PROVIDER".to_string(),
variable: "TOKEN".into(),
})],
);
let mut store = FindingsStore::new(PathBuf::from("/tmp"));
store.record_rules(&[provider_rule.clone(), dependent_rule]);
let origin = Arc::new(OriginSet::single(Origin::from_file(PathBuf::from("a.txt"))));
let blob_a = Arc::new(BlobMetadata {
id: BlobId::new(b"blob-a"),
num_bytes: 10,
mime_essence: None,
language: None,
});
let blob_b = Arc::new(BlobMetadata {
id: BlobId::new(b"blob-b"),
num_bytes: 10,
mime_essence: None,
language: None,
});
let matches = vec![
record_match(
&origin,
&blob_a,
make_match(provider_rule.clone(), blob_a.id, "shared_token"),
),
record_match(&origin, &blob_b, make_match(provider_rule, blob_b.id, "shared_token")),
];
store.record(matches, true);
assert_eq!(store.get_matches().len(), 2);
Ok(())
}
#[test]
fn dedup_still_merges_non_dependency_rules_across_blobs() -> Result<()> {
let rule = make_rule("RULE.SIMPLE", vec![]);
let mut store = FindingsStore::new(PathBuf::from("/tmp"));
store.record_rules(&[rule.clone()]);
let origin = Arc::new(OriginSet::single(Origin::from_file(PathBuf::from("b.txt"))));
let blob_a = Arc::new(BlobMetadata {
id: BlobId::new(b"blob-a"),
num_bytes: 10,
mime_essence: None,
language: None,
});
let blob_b = Arc::new(BlobMetadata {
id: BlobId::new(b"blob-b"),
num_bytes: 10,
mime_essence: None,
language: None,
});
let matches = vec![
record_match(&origin, &blob_a, make_match(rule.clone(), blob_a.id, "shared_token")),
record_match(&origin, &blob_b, make_match(rule, blob_b.id, "shared_token")),
];
store.record(matches, true);
assert_eq!(store.get_matches().len(), 1);
Ok(())
}