diff --git a/README.md b/README.md index e042806..a6db7c8 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) - [To scan using **only** your own `my_rules.yaml` you could run:](#to-scan-using-only-your-own-my_rulesyaml-you-could-run) - [To add your rules alongside the built‑ins:](#to-add-your-rules-alongside-the-builtins) - [Other Examples](#other-examples) + - [Customize the HTTP User-Agent](#customize-the-http-user-agent) - [Notable Scan Options](#notable-scan-options) - [Understanding `--confidence`](#understanding---confidence) - [Ignore known false positives](#ignore-known-false-positives) @@ -732,6 +733,23 @@ kingfisher github repos list --organization my-org --github-exclude my-org/*-arc ``` +## Customize the HTTP User-Agent + +Kingfisher identifies its HTTP requests with a user-agent that includes the binary name and version followed by a browser-style +string. Some environments require extra context, such as a contact address, a change-ticket number, or a temporary test label. +Use the global `--user-agent-suffix` flag to append this information between the Kingfisher identifier and the browser portion: + +```bash +# Attach a contact email to all outbound validation requests +kingfisher --user-agent-suffix "contact=security@example.com" scan path/ + +# Label a one-off experiment +kingfisher --user-agent-suffix "Sept 2025 testing" github repos list --user my-user +``` + +When omitted, Kingfisher defaults to `kingfisher/ Mozilla/5.0 ...`. The suffix is trimmed; passing an empty string +leaves the default unchanged. + ## Notable Scan Options - `--no-dedup`: Report every occurrence of a finding (disable the default de-duplicate behavior) diff --git a/src/cli/global.rs b/src/cli/global.rs index 8f761de..f21535d 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -41,6 +41,15 @@ impl CommandLineArgs { args.global_args.progress = Mode::Never; } + if let Some(suffix) = args.global_args.user_agent_suffix.as_mut() { + let trimmed = suffix.trim(); + if trimmed.is_empty() { + args.global_args.user_agent_suffix = None; + } else if trimmed.len() != suffix.len() { + *suffix = trimmed.to_string(); + } + } + args } } @@ -112,6 +121,10 @@ pub struct GlobalArgs { #[arg(global = true, long = "no-update-check", default_value_t = false)] pub no_update_check: bool, + /// Append a custom suffix to the default Kingfisher user-agent string + #[arg(global = true, long = "user-agent-suffix", value_name = "SUFFIX")] + pub user_agent_suffix: Option, + #[command(flatten)] pub advanced: AdvancedArgs, @@ -131,6 +144,7 @@ impl Default for GlobalArgs { ignore_certs: false, self_update: false, no_update_check: false, + user_agent_suffix: None, advanced: AdvancedArgs { rlimit_nofile: 16384 }, color: Mode::Auto, progress: Mode::Auto, diff --git a/src/github.rs b/src/github.rs index 56b7c3e..77105c3 100644 --- a/src/github.rs +++ b/src/github.rs @@ -18,7 +18,7 @@ use serde_json::Value; use tracing::warn; use url::Url; -use crate::{findings_store, git_url::GitUrl}; +use crate::{findings_store, git_url::GitUrl, validation::GLOBAL_USER_AGENT}; use std::str::FromStr; #[derive(Debug)] @@ -356,7 +356,7 @@ pub async fn fetch_repo_items( let url = format!( "https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=100&page={page}" ); - let mut req = client.get(&url).header("User-Agent", "kingfisher"); + let mut req = client.get(&url).header("User-Agent", GLOBAL_USER_AGENT.as_str()); if let Ok(token) = env::var("KF_GITHUB_TOKEN") { if !token.is_empty() { req = req.bearer_auth(token); @@ -396,7 +396,7 @@ pub async fn fetch_repo_items( page = 1; loop { let url = format!("https://api.github.com/users/{owner}/gists?per_page=100&page={page}"); - let mut req = client.get(&url).header("User-Agent", "kingfisher"); + let mut req = client.get(&url).header("User-Agent", GLOBAL_USER_AGENT.as_str()); if let Ok(token) = env::var("KF_GITHUB_TOKEN") { if !token.is_empty() { req = req.bearer_auth(&token); @@ -415,7 +415,7 @@ pub async fn fetch_repo_items( if seen.insert(id.to_string()) { let mut req_g = client .get(&format!("https://api.github.com/gists/{id}")) - .header("User-Agent", "kingfisher"); + .header("User-Agent", GLOBAL_USER_AGENT.as_str()); if let Ok(token) = env::var("KF_GITHUB_TOKEN") { if !token.is_empty() { req_g = req_g.bearer_auth(&token); @@ -449,7 +449,7 @@ pub async fn fetch_repo_items( let url = format!("https://api.github.com/gists?per_page=100&page={page}"); let resp = client .get(&url) - .header("User-Agent", "kingfisher") + .header("User-Agent", GLOBAL_USER_AGENT.as_str()) .bearer_auth(&token) .send() .await?; @@ -468,7 +468,7 @@ pub async fn fetch_repo_items( if seen.insert(id.to_string()) { let detail: Value = client .get(&format!("https://api.github.com/gists/{id}")) - .header("User-Agent", "kingfisher") + .header("User-Agent", GLOBAL_USER_AGENT.as_str()) .bearer_auth(&token) .send() .await? diff --git a/src/main.rs b/src/main.rs index 89d9cf5..9bea262 100644 --- a/src/main.rs +++ b/src/main.rs @@ -56,6 +56,7 @@ use kingfisher::{ rules_database::RulesDatabase, scanner::{load_and_record_rules, run_scan}, update::check_for_update, + validation::set_user_agent_suffix, }; use serde_json::json; use tempfile::TempDir; @@ -75,6 +76,8 @@ fn main() -> anyhow::Result<()> { // Parse command-line arguments let args = CommandLineArgs::parse_args(); + set_user_agent_suffix(args.global_args.user_agent_suffix.clone()); + // Determine the number of jobs, defaulting to the number of CPUs let num_jobs = match args.command { Command::Scan(ref scan_args) => scan_args.num_jobs, diff --git a/src/matcher.rs b/src/matcher.rs index d30b0d8..8bd71d6 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -771,14 +771,10 @@ impl SerializableCaptures { // ------------------------------------------------------------------------------------------------- #[derive(Debug, Clone, Serialize, JsonSchema)] pub struct Match { - /// The blob this match comes from - // pub blob_id: BlobId, - /// The location of the entire matching content pub location: Location, /// The capture groups - // pub groups: Groups, pub groups: SerializableCaptures, // Store serialized captures /// unique identifier of file / blob where this match was found diff --git a/src/validation.rs b/src/validation.rs index a713f73..81f97dd 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -37,16 +37,38 @@ mod utils; const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes const MAX_VALIDATION_BODY_LEN: usize = 2048; -pub static GLOBAL_USER_AGENT: Lazy = Lazy::new(|| { - format!( - "{}/{} {}", - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION"), - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ +static USER_AGENT_SUFFIX: OnceCell = OnceCell::new(); + +const BROWSER_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ AppleWebKit/537.36 (KHTML, like Gecko) \ - Chrome/140.0.0.0 Safari/537.36" - ) -}); + Chrome/140.0.0.0 Safari/537.36"; + +fn build_user_agent() -> String { + let base = format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION")); + if let Some(suffix) = USER_AGENT_SUFFIX.get() { + format!("{base} {suffix} {BROWSER_USER_AGENT}") + } else { + format!("{base} {BROWSER_USER_AGENT}") + } +} + +pub static GLOBAL_USER_AGENT: Lazy = Lazy::new(build_user_agent); + +/// Configure a user-agent suffix that is appended after the Kingfisher package name/version. +/// +/// The suffix is inserted before the browser portion of the user-agent. Empty or whitespace-only +/// values are ignored. This should be called once near program start prior to accessing +/// [`GLOBAL_USER_AGENT`]. +pub fn set_user_agent_suffix>(suffix: Option) { + if let Some(suffix) = suffix { + let trimmed = suffix.into().trim().to_string(); + if trimmed.is_empty() { + return; + } + + let _ = USER_AGENT_SUFFIX.set(trimmed); + } +} // Use SkipMap-based cache instead of a mutex-wrapped FxHashMap. type Cache = Arc>;