diff --git a/crates/kingfisher-rules/data/rules/auth.yml b/crates/kingfisher-rules/data/rules/auth.yml index 41e93f6..66e1217 100644 --- a/crates/kingfisher-rules/data/rules/auth.yml +++ b/crates/kingfisher-rules/data/rules/auth.yml @@ -10,6 +10,8 @@ rules: ) (?:[^A-Za-z0-9+/=]|$) pattern_requirements: + min_digits: 1 + min_lowercase: 1 ignore_if_contains: - "dXNlcjpwYXNz" # user:pass - "dXNlcjpwYXNzd29yZA" # user:password @@ -31,7 +33,7 @@ rules: - "YOUR_" - "$BASIC" min_entropy: 3.5 - confidence: low + confidence: medium examples: - "Authorization: Basic SWxqcmpxZDpld3B3IGdlZmhtZA==" - "Authorization: Basic bXpnaGd6cTpsbmFrdWF6cXNx" @@ -66,7 +68,7 @@ rules: - "1234567890abcdef" - "xxxxxxxxxxxxxxxx" min_entropy: 3.5 - confidence: low + confidence: medium examples: - "Authorization: Bearer d1fc83f09661bcaf3cb3591ec4b93fe93b07087ef020886a4ead09e8efd8c0df" - " - Bearer 7727d2fa5e8f7d37de25ef385ff8766d265ee2e7384efb311c04069d9c892c6b" diff --git a/crates/kingfisher-rules/data/rules/onelogin.yml b/crates/kingfisher-rules/data/rules/onelogin.yml index ddd977e..6f46a8b 100644 --- a/crates/kingfisher-rules/data/rules/onelogin.yml +++ b/crates/kingfisher-rules/data/rules/onelogin.yml @@ -49,7 +49,7 @@ rules: | api[\s_.-]*secret | - secret + [\s_.\-:"'=]secret\b ) (?:.|[\n\r]){0,20}? [=:"'\s_-]* diff --git a/src/access_map/pinecone.rs b/src/access_map/pinecone.rs index 17f975c..c70eef6 100644 --- a/src/access_map/pinecone.rs +++ b/src/access_map/pinecone.rs @@ -1,3 +1,5 @@ +use std::time::Duration; + use anyhow::{Context, Result, anyhow}; use reqwest::{Client, header}; use serde::Deserialize; @@ -101,6 +103,8 @@ pub async fn map_access(args: &AccessMapArgs) -> Result { pub async fn map_access_from_token(token: &str) -> Result { let client = Client::builder() .user_agent(GLOBAL_USER_AGENT.as_str()) + .connect_timeout(Duration::from_secs(10)) + .timeout(Duration::from_secs(30)) .build() .context("Failed to build Pinecone HTTP client")?; diff --git a/src/alerts/discord.rs b/src/alerts/discord.rs index 0304e0f..541f22c 100644 --- a/src/alerts/discord.rs +++ b/src/alerts/discord.rs @@ -113,12 +113,13 @@ pub fn build_payload( embed["url"] = Value::String(url.clone()); // Append a fields entry too — embed `url` only renders if the title // is short enough; the field guarantees the link is visible. - let fields_arr = embed["fields"].as_array_mut().expect("fields is an array"); - fields_arr.push(json!({ - "name": "Full report", - "value": format!("[Open]({})", url), - "inline": false, - })); + if let Some(fields_arr) = embed["fields"].as_array_mut() { + fields_arr.push(json!({ + "name": "Full report", + "value": format!("[Open]({})", url), + "inline": false, + })); + } } json!({ "embeds": [embed] }) diff --git a/src/alerts/mattermost.rs b/src/alerts/mattermost.rs index 8724291..462e585 100644 --- a/src/alerts/mattermost.rs +++ b/src/alerts/mattermost.rs @@ -108,12 +108,13 @@ pub fn build_payload( // Setting both `title_link` and a fallback field makes the link // visible regardless of how a given client/version renders. attachment["title_link"] = Value::String(url.clone()); - let fields_arr = attachment["fields"].as_array_mut().expect("fields is an array"); - fields_arr.push(json!({ - "short": false, - "title": "Full report", - "value": format!("[Open]({})", url), - })); + if let Some(fields_arr) = attachment["fields"].as_array_mut() { + fields_arr.push(json!({ + "short": false, + "title": "Full report", + "value": format!("[Open]({})", url), + })); + } } json!({ diff --git a/src/alerts/mod.rs b/src/alerts/mod.rs index 5566c2c..dccef16 100644 --- a/src/alerts/mod.rs +++ b/src/alerts/mod.rs @@ -87,14 +87,18 @@ impl AlertFormat { pub fn infer_from_url(url: &str) -> Self { let host = url::Url::parse(url).ok().and_then(|u| u.host_str().map(str::to_lowercase)); match host.as_deref() { - Some(h) if h.contains("slack.com") => AlertFormat::Slack, - Some(h) if h.contains("office.com") || h.contains("webhook.office") => { + Some(h) if host_matches(h, "slack.com") => AlertFormat::Slack, + Some(h) + if host_matches(h, "office.com") + || host_matches(h, "webhook.office.com") + || host_matches(h, "webhook.office.net") => + { AlertFormat::Teams } - Some(h) if h.contains("discord.com") || h.contains("discordapp.com") => { + Some(h) if host_matches(h, "discord.com") || host_matches(h, "discordapp.com") => { AlertFormat::Discord } - Some(h) if h.contains("chat.googleapis.com") => AlertFormat::Googlechat, + Some(h) if host_matches(h, "chat.googleapis.com") => AlertFormat::Googlechat, _ => AlertFormat::Generic, } } @@ -188,11 +192,40 @@ impl AlertSummary { fn build_client() -> Result { Client::builder() .timeout(Duration::from_secs(15)) + .connect_timeout(Duration::from_secs(5)) .user_agent(format!("kingfisher/{}", env!("CARGO_PKG_VERSION"))) .build() .context("failed to build webhook reqwest::Client") } +/// Tail-match a hostname against a webhook host so substrings like +/// `not-slack.com.attacker.example` cannot be misclassified. +fn host_matches(host: &str, suffix: &str) -> bool { + host == suffix || host.ends_with(&format!(".{suffix}")) +} + +/// Validate a webhook URL: must parse, must use http(s) scheme, must have a +/// host. Returns the parsed URL on success. +pub fn validate_webhook_url(url: &str) -> Result<()> { + let parsed = url::Url::parse(url) + .with_context(|| format!("invalid webhook URL `{}`", redact_for_log(url)))?; + let scheme = parsed.scheme(); + if scheme != "https" && scheme != "http" { + anyhow::bail!( + "webhook URL `{}` uses unsupported scheme `{scheme}` (only http/https are allowed)", + redact_for_log(url) + ); + } + if parsed.host_str().is_none_or(|h| h.is_empty()) { + anyhow::bail!("webhook URL `{}` has no host", redact_for_log(url)); + } + Ok(()) +} + +fn redact_for_log(url: &str) -> String { + redact_webhook(url) +} + /// Redact the path/query of a webhook URL so we never log the full secret token /// embedded by Slack/Teams/etc. e.g. `https://hooks.slack.com/services/...` → /// `https://hooks.slack.com/`. diff --git a/src/alerts/slack.rs b/src/alerts/slack.rs index 2bcbc31..363f410 100644 --- a/src/alerts/slack.rs +++ b/src/alerts/slack.rs @@ -102,7 +102,7 @@ pub fn build_payload( "type": "section", "text": { "type": "mrkdwn", - "text": format!("<{}|Full report →>", escape_mrkdwn(url)) + "text": format!("<{}|Full report →>", url) } })); } diff --git a/src/cli/config.rs b/src/cli/config.rs index a90a194..a447868 100644 --- a/src/cli/config.rs +++ b/src/cli/config.rs @@ -100,17 +100,43 @@ pub struct FiltersConfig { pub exclude: Vec, } -/// Parse YAML text into a config struct. +/// Cap on `discover_path` upward walks. Avoids unbounded directory traversal +/// on networked filesystems or pathological mount layouts. +const DISCOVER_MAX_DEPTH: usize = 32; + +/// Parse YAML text into a config struct, validating webhook URLs and +/// `skip_regex` patterns at parse time so config errors surface at a sensible +/// location rather than mid-scan. pub fn parse_str(yaml: &str) -> Result { - serde_yaml::from_str(yaml).context("failed to parse kingfisher.yaml") + let cfg: KingfisherConfig = + serde_yaml::from_str(yaml).context("failed to parse kingfisher.yaml")?; + validate(&cfg)?; + Ok(cfg) } -/// Walk upward from `start` looking for a sibling `kingfisher.yaml`. Returns -/// the absolute path when found. Performs *no* file reads — the caller does -/// the read once it has decided which file to use. +fn validate(cfg: &KingfisherConfig) -> Result<()> { + for (idx, w) in cfg.alerts.webhooks.iter().enumerate() { + crate::alerts::validate_webhook_url(&w.url) + .with_context(|| format!("alerts.webhooks[{idx}].url"))?; + if let Some(report_url) = &w.report_url { + url::Url::parse(report_url) + .with_context(|| format!("alerts.webhooks[{idx}].report_url is not a valid URL"))?; + } + } + for (idx, pattern) in cfg.filters.skip_regex.iter().enumerate() { + regex::Regex::new(pattern) + .with_context(|| format!("filters.skip_regex[{idx}] is not a valid regex"))?; + } + Ok(()) +} + +/// Walk upward from `start` looking for `kingfisher.yaml` in each ancestor +/// directory. Returns the absolute path when found. Performs *no* file reads — +/// the caller does the read once it has decided which file to use. Capped at +/// [`DISCOVER_MAX_DEPTH`] levels to bound the walk on networked filesystems. pub fn discover_path(start: &std::path::Path) -> Option { let mut current = start.to_path_buf(); - loop { + for _ in 0..=DISCOVER_MAX_DEPTH { let candidate = current.join(DEFAULT_CONFIG_NAME); if candidate.is_file() { return Some(candidate); @@ -119,6 +145,7 @@ pub fn discover_path(start: &std::path::Path) -> Option { return None; } } + None } #[cfg(test)] @@ -156,11 +183,28 @@ filters: #[test] fn empty_yaml_yields_default() { - let cfg = parse_str("").unwrap_or_default(); + // serde_yaml rejects an empty document, so feed it the canonical empty + // mapping. This both pins the contract (top-level must be a mapping) + // and exercises the "no fields set" path. + let cfg = parse_str("{}").unwrap(); assert!(cfg.alerts.webhooks.is_empty()); assert!(cfg.filters.skip_words.is_empty()); } + #[test] + fn invalid_webhook_url_is_rejected() { + let yaml = "alerts:\n webhooks:\n - url: not-a-url\n"; + let err = parse_str(yaml).unwrap_err(); + assert!(format!("{err:#}").contains("alerts.webhooks[0].url")); + } + + #[test] + fn invalid_skip_regex_is_rejected() { + let yaml = "filters:\n skip_regex: ['(unclosed']\n"; + let err = parse_str(yaml).unwrap_err(); + assert!(format!("{err:#}").contains("filters.skip_regex[0]")); + } + #[test] fn unknown_field_is_rejected() { let yaml = "alerts:\n webhooks: []\nbogus: 42\n"; diff --git a/src/direct_validate.rs b/src/direct_validate.rs index 9e377f9..0e98315 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -629,8 +629,8 @@ pub async fn run_direct_validation( // leak credentials into stderr when -v is on. debug!("HTTP validation failed"); DirectValidationResult { - rule_id: String::new(), - rule_name: String::new(), + rule_id: rule_id.clone(), + rule_name: rule_name.clone(), is_valid: false, status_code: None, message: "HTTP validation failed".to_string(), @@ -652,8 +652,8 @@ pub async fn run_direct_validation( Err(_e) => { debug!("gRPC validation failed"); DirectValidationResult { - rule_id: String::new(), - rule_name: String::new(), + rule_id: rule_id.clone(), + rule_name: rule_name.clone(), is_valid: false, status_code: None, message: "gRPC validation failed".to_string(), diff --git a/src/main.rs b/src/main.rs index 00ec9ce..d2d6548 100644 --- a/src/main.rs +++ b/src/main.rs @@ -443,7 +443,8 @@ fn describe_scan_target(args: &InputSpecifierArgs) -> Option { fn build_alert_sinks( scan_args: &cli::commands::scan::ScanArgs, ) -> Vec { - let cli_count = scan_args.alert_webhook.len() - scan_args.config_webhook_overrides.len(); + let cli_count = + scan_args.alert_webhook.len().saturating_sub(scan_args.config_webhook_overrides.len()); scan_args .alert_webhook .iter() @@ -652,7 +653,20 @@ async fn async_main(args: CommandLineArgs) -> Result { Ok(records) => { let target = describe_scan_target(&scan_args.input_specifier_args); - let sinks = build_alert_sinks(&scan_args); + let sinks: Vec<_> = build_alert_sinks(&scan_args) + .into_iter() + .filter(|sink| { + match kingfisher::alerts::validate_webhook_url( + &sink.url, + ) { + Ok(()) => true, + Err(e) => { + warn!("alert dispatch: skipping sink: {}", e); + false + } + } + }) + .collect(); kingfisher::alerts::dispatch(&sinks, &records, target).await; } Err(e) => warn!("alert dispatch: failed to build findings: {}", e), diff --git a/src/update.rs b/src/update.rs index 53dde7c..5e5c036 100644 --- a/src/update.rs +++ b/src/update.rs @@ -231,9 +231,22 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Upd if global_args.self_update { match updater.update() { Ok(status) => { - let message = format!("Updated to version {}", status.version()); - let _ = writeln!(std::io::stderr(), "{}", styled_heading(&styles, &message)); - was_self_updated = true; + if status.updated() { + let message = format!("Updated to version {}", status.version()); + let _ = writeln!(std::io::stderr(), "{}", styled_heading(&styles, &message)); + was_self_updated = true; + } else { + // Reported `UpToDate` despite the version-comparison branch — race or + // tag mismatch. Skip re-exec to avoid pointless fork+exec. + let _ = writeln!( + std::io::stderr(), + "{}", + styled_heading( + &styles, + &format!("Already at version {} — no update applied", status.version()), + ) + ); + } } Err(e) => match e { UpdError::Io(ref io_err) => match io_err.kind() {