Merge pull request #112 from mongodb/development

v1.52.0
This commit is contained in:
Mick Grove 2025-09-18 19:03:02 -07:00 committed by GitHub
commit 41b6772b91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 95 additions and 51 deletions

View file

@ -1,6 +1,10 @@
# Changelog
All notable changes to this project will be documented in this file.
## [v1.52.0]
- Enabled ANSI formatting in the tracing formatter whenever stderr is attached to a terminal so colorized updater messages render correctly instead of showing escape sequences.
- Added a new CLI flag, `--user-agent-suffix` to allow developers to append additional information to the user-agent
- Removed the unused --rlimit-nofile flag
## [1.51.0]
- Added diff-only Git scanning via `--since-commit` and `--branch`, including remote-aware ref resolution so CI jobs can pair `--git-url` clones with pull request branches

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.51.0"
version = "1.52.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -87,6 +87,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
- [To scan using **only** your own `my_rules.yaml` you could run:](#to-scan-using-only-your-own-my_rulesyaml-you-could-run)
- [To add your rules alongside the builtins:](#to-add-your-rules-alongside-the-builtins)
- [Other Examples](#other-examples)
- [Customize the HTTP User-Agent](#customize-the-http-user-agent)
- [Notable Scan Options](#notable-scan-options)
- [Understanding `--confidence`](#understanding---confidence)
- [Ignore known false positives](#ignore-known-false-positives)
@ -732,6 +733,23 @@ kingfisher github repos list --organization my-org --github-exclude my-org/*-arc
```
## Customize the HTTP User-Agent
Kingfisher identifies its HTTP requests with a user-agent that includes the binary name and version followed by a browser-style
string. Some environments require extra context, such as a contact address, a change-ticket number, or a temporary test label.
Use the global `--user-agent-suffix` flag to append this information between the Kingfisher identifier and the browser portion:
```bash
# Attach a contact email to all outbound validation requests
kingfisher --user-agent-suffix "contact=security@example.com" scan path/
# Label a one-off experiment
kingfisher --user-agent-suffix "Sept 2025 testing" github repos list --user my-user
```
When omitted, Kingfisher defaults to `kingfisher/<version> Mozilla/5.0 ...`. The suffix is trimmed; passing an empty string
leaves the default unchanged.
## Notable Scan Options
- `--no-dedup`: Report every occurrence of a finding (disable the default de-duplicate behavior)

View file

@ -41,6 +41,15 @@ impl CommandLineArgs {
args.global_args.progress = Mode::Never;
}
if let Some(suffix) = args.global_args.user_agent_suffix.as_mut() {
let trimmed = suffix.trim();
if trimmed.is_empty() {
args.global_args.user_agent_suffix = None;
} else if trimmed.len() != suffix.len() {
*suffix = trimmed.to_string();
}
}
args
}
}
@ -79,15 +88,6 @@ pub static RAM_GB: Lazy<Option<f64>> = Lazy::new(|| {
}
});
/// Advanced global options unlikely to be used in normal scenarios.
#[derive(Args, Debug, Clone)]
#[command(next_help_heading = "Advanced Global Options")]
pub struct AdvancedArgs {
/// Set the rlimit for the number of open files
#[arg(long, default_value_t = 16384, value_name = "LIMIT")]
pub rlimit_nofile: u64,
}
/// Top-level global CLI arguments
#[derive(Args, Debug, Clone)]
#[command(next_help_heading = "Global Options")]
@ -112,8 +112,9 @@ pub struct GlobalArgs {
#[arg(global = true, long = "no-update-check", default_value_t = false)]
pub no_update_check: bool,
#[command(flatten)]
pub advanced: AdvancedArgs,
/// Append a custom suffix to the default Kingfisher user-agent string
#[arg(global = true, long = "user-agent-suffix", value_name = "SUFFIX")]
pub user_agent_suffix: Option<String>,
// Internal fields (not CLI arguments)
#[clap(skip)]
@ -131,7 +132,7 @@ impl Default for GlobalArgs {
ignore_certs: false,
self_update: false,
no_update_check: false,
advanced: AdvancedArgs { rlimit_nofile: 16384 },
user_agent_suffix: None,
color: Mode::Auto,
progress: Mode::Auto,
}

View file

@ -18,7 +18,7 @@ use serde_json::Value;
use tracing::warn;
use url::Url;
use crate::{findings_store, git_url::GitUrl};
use crate::{findings_store, git_url::GitUrl, validation::GLOBAL_USER_AGENT};
use std::str::FromStr;
#[derive(Debug)]
@ -356,7 +356,7 @@ pub async fn fetch_repo_items(
let url = format!(
"https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=100&page={page}"
);
let mut req = client.get(&url).header("User-Agent", "kingfisher");
let mut req = client.get(&url).header("User-Agent", GLOBAL_USER_AGENT.as_str());
if let Ok(token) = env::var("KF_GITHUB_TOKEN") {
if !token.is_empty() {
req = req.bearer_auth(token);
@ -396,7 +396,7 @@ pub async fn fetch_repo_items(
page = 1;
loop {
let url = format!("https://api.github.com/users/{owner}/gists?per_page=100&page={page}");
let mut req = client.get(&url).header("User-Agent", "kingfisher");
let mut req = client.get(&url).header("User-Agent", GLOBAL_USER_AGENT.as_str());
if let Ok(token) = env::var("KF_GITHUB_TOKEN") {
if !token.is_empty() {
req = req.bearer_auth(&token);
@ -415,7 +415,7 @@ pub async fn fetch_repo_items(
if seen.insert(id.to_string()) {
let mut req_g = client
.get(&format!("https://api.github.com/gists/{id}"))
.header("User-Agent", "kingfisher");
.header("User-Agent", GLOBAL_USER_AGENT.as_str());
if let Ok(token) = env::var("KF_GITHUB_TOKEN") {
if !token.is_empty() {
req_g = req_g.bearer_auth(&token);
@ -449,7 +449,7 @@ pub async fn fetch_repo_items(
let url = format!("https://api.github.com/gists?per_page=100&page={page}");
let resp = client
.get(&url)
.header("User-Agent", "kingfisher")
.header("User-Agent", GLOBAL_USER_AGENT.as_str())
.bearer_auth(&token)
.send()
.await?;
@ -468,7 +468,7 @@ pub async fn fetch_repo_items(
if seen.insert(id.to_string()) {
let detail: Value = client
.get(&format!("https://api.github.com/gists/{id}"))
.header("User-Agent", "kingfisher")
.header("User-Agent", GLOBAL_USER_AGENT.as_str())
.bearer_auth(&token)
.send()
.await?

View file

@ -27,7 +27,7 @@ static GLOBAL: System = System;
// static GLOBAL: System = System;
use std::{
io::Read,
io::{IsTerminal, Read},
sync::{Arc, Mutex},
};
@ -56,6 +56,7 @@ use kingfisher::{
rules_database::RulesDatabase,
scanner::{load_and_record_rules, run_scan},
update::check_for_update,
validation::set_user_agent_suffix,
};
use serde_json::json;
use tempfile::TempDir;
@ -75,6 +76,8 @@ fn main() -> anyhow::Result<()> {
// Parse command-line arguments
let args = CommandLineArgs::parse_args();
set_user_agent_suffix(args.global_args.user_agent_suffix.clone());
// Determine the number of jobs, defaulting to the number of CPUs
let num_jobs = match args.command {
Command::Scan(ref scan_args) => scan_args.num_jobs,
@ -122,7 +125,7 @@ fn setup_logging(global_args: &GlobalArgs) {
let fmt_layer = fmt::layer()
.with_writer(std::io::stderr) // Write logs to stderr
.with_target(true) // Enable target filtering
.with_ansi(false) // Disable colors
.with_ansi(std::io::stderr().is_terminal()) // Emit ANSI colours when stderr is a TTY
.without_time(); // Remove timestamps
// Build and initialize the registry
registry()

View file

@ -771,14 +771,10 @@ impl SerializableCaptures {
// -------------------------------------------------------------------------------------------------
#[derive(Debug, Clone, Serialize, JsonSchema)]
pub struct Match {
/// The blob this match comes from
// pub blob_id: BlobId,
/// The location of the entire matching content
pub location: Location,
/// The capture groups
// pub groups: Groups,
pub groups: SerializableCaptures, // Store serialized captures
/// unique identifier of file / blob where this match was found

View file

@ -37,16 +37,38 @@ mod utils;
const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes
const MAX_VALIDATION_BODY_LEN: usize = 2048;
pub static GLOBAL_USER_AGENT: Lazy<String> = Lazy::new(|| {
format!(
"{}/{} {}",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
static USER_AGENT_SUFFIX: OnceCell<String> = OnceCell::new();
const BROWSER_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/140.0.0.0 Safari/537.36"
)
});
Chrome/140.0.0.0 Safari/537.36";
fn build_user_agent() -> String {
let base = format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"));
if let Some(suffix) = USER_AGENT_SUFFIX.get() {
format!("{base} {suffix} {BROWSER_USER_AGENT}")
} else {
format!("{base} {BROWSER_USER_AGENT}")
}
}
pub static GLOBAL_USER_AGENT: Lazy<String> = Lazy::new(build_user_agent);
/// Configure a user-agent suffix that is appended after the Kingfisher package name/version.
///
/// The suffix is inserted before the browser portion of the user-agent. Empty or whitespace-only
/// values are ignored. This should be called once near program start prior to accessing
/// [`GLOBAL_USER_AGENT`].
pub fn set_user_agent_suffix<S: Into<String>>(suffix: Option<S>) {
if let Some(suffix) = suffix {
let trimmed = suffix.into().trim().to_string();
if trimmed.is_empty() {
return;
}
let _ = USER_AGENT_SUFFIX.set(trimmed);
}
}
// Use SkipMap-based cache instead of a mutex-wrapped FxHashMap.
type Cache = Arc<SkipMap<String, CachedResponse>>;

View file

@ -14,7 +14,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -120,7 +120,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
no_update_check: true,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 8192 },
user_agent_suffix: None,
};
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;

View file

@ -18,7 +18,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -136,7 +136,7 @@ rules:
no_update_check: false,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 8192 },
user_agent_suffix: None,
};
// ── load rules once ─────────────────────────────────────────────

View file

@ -15,7 +15,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -123,7 +123,7 @@ fn test_github_remote_scan() -> Result<()> {
no_update_check: false,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16384 },
user_agent_suffix: None,
};
// Create in-memory datastore
let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));

View file

@ -15,7 +15,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -121,7 +121,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
no_update_check: false,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16_384 },
user_agent_suffix: None,
};
let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));
@ -233,7 +233,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
no_update_check: false,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16_384 },
user_agent_suffix: None,
};
let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));

View file

@ -15,7 +15,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, GlobalArgs, Mode},
global::{GlobalArgs, Mode},
},
findings_store::FindingsStore,
rule_loader::RuleLoader,
@ -103,7 +103,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
self_update: false,
progress: Mode::Never,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16384 },
user_agent_suffix: None,
};
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;

View file

@ -14,7 +14,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -211,7 +211,7 @@ async fn test_scan_slack_messages() -> Result<()> {
self_update: false,
progress: Mode::Never,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16384 },
user_agent_suffix: None,
};
let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));

View file

@ -18,7 +18,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -197,7 +197,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
no_update_check: false,
self_update: false,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 8192 },
user_agent_suffix: None,
};
run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?;

View file

@ -16,7 +16,7 @@ use kingfisher::{
rules::RuleSpecifierArgs,
scan::{ConfidenceLevel, ScanArgs},
},
global::{AdvancedArgs, Mode},
global::Mode,
GlobalArgs,
},
findings_store::FindingsStore,
@ -209,7 +209,7 @@ impl TestContext {
self_update: false,
progress: Mode::Never,
ignore_certs: false,
advanced: AdvancedArgs { rlimit_nofile: 16384 },
user_agent_suffix: None,
};
let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));