preparing for v1.99.0

This commit is contained in:
Mick Grove 2026-05-04 14:48:41 -07:00
commit a9cdaea6cd
11 changed files with 148 additions and 36 deletions

View file

@ -10,6 +10,8 @@ rules:
)
(?:[^A-Za-z0-9+/=]|$)
pattern_requirements:
min_digits: 1
min_lowercase: 1
ignore_if_contains:
- "dXNlcjpwYXNz" # user:pass
- "dXNlcjpwYXNzd29yZA" # user:password
@ -31,7 +33,7 @@ rules:
- "YOUR_"
- "$BASIC"
min_entropy: 3.5
confidence: low
confidence: medium
examples:
- "Authorization: Basic SWxqcmpxZDpld3B3IGdlZmhtZA=="
- "Authorization: Basic bXpnaGd6cTpsbmFrdWF6cXNx"
@ -66,7 +68,7 @@ rules:
- "1234567890abcdef"
- "xxxxxxxxxxxxxxxx"
min_entropy: 3.5
confidence: low
confidence: medium
examples:
- "Authorization: Bearer d1fc83f09661bcaf3cb3591ec4b93fe93b07087ef020886a4ead09e8efd8c0df"
- " - Bearer 7727d2fa5e8f7d37de25ef385ff8766d265ee2e7384efb311c04069d9c892c6b"

View file

@ -49,7 +49,7 @@ rules:
|
api[\s_.-]*secret
|
secret
[\s_.\-:"'=]secret\b
)
(?:.|[\n\r]){0,20}?
[=:"'\s_-]*

View file

@ -1,3 +1,5 @@
use std::time::Duration;
use anyhow::{Context, Result, anyhow};
use reqwest::{Client, header};
use serde::Deserialize;
@ -101,6 +103,8 @@ pub async fn map_access(args: &AccessMapArgs) -> Result<AccessMapResult> {
pub async fn map_access_from_token(token: &str) -> Result<AccessMapResult> {
let client = Client::builder()
.user_agent(GLOBAL_USER_AGENT.as_str())
.connect_timeout(Duration::from_secs(10))
.timeout(Duration::from_secs(30))
.build()
.context("Failed to build Pinecone HTTP client")?;

View file

@ -113,12 +113,13 @@ pub fn build_payload(
embed["url"] = Value::String(url.clone());
// Append a fields entry too — embed `url` only renders if the title
// is short enough; the field guarantees the link is visible.
let fields_arr = embed["fields"].as_array_mut().expect("fields is an array");
fields_arr.push(json!({
"name": "Full report",
"value": format!("[Open]({})", url),
"inline": false,
}));
if let Some(fields_arr) = embed["fields"].as_array_mut() {
fields_arr.push(json!({
"name": "Full report",
"value": format!("[Open]({})", url),
"inline": false,
}));
}
}
json!({ "embeds": [embed] })

View file

@ -108,12 +108,13 @@ pub fn build_payload(
// Setting both `title_link` and a fallback field makes the link
// visible regardless of how a given client/version renders.
attachment["title_link"] = Value::String(url.clone());
let fields_arr = attachment["fields"].as_array_mut().expect("fields is an array");
fields_arr.push(json!({
"short": false,
"title": "Full report",
"value": format!("[Open]({})", url),
}));
if let Some(fields_arr) = attachment["fields"].as_array_mut() {
fields_arr.push(json!({
"short": false,
"title": "Full report",
"value": format!("[Open]({})", url),
}));
}
}
json!({

View file

@ -87,14 +87,18 @@ impl AlertFormat {
pub fn infer_from_url(url: &str) -> Self {
let host = url::Url::parse(url).ok().and_then(|u| u.host_str().map(str::to_lowercase));
match host.as_deref() {
Some(h) if h.contains("slack.com") => AlertFormat::Slack,
Some(h) if h.contains("office.com") || h.contains("webhook.office") => {
Some(h) if host_matches(h, "slack.com") => AlertFormat::Slack,
Some(h)
if host_matches(h, "office.com")
|| host_matches(h, "webhook.office.com")
|| host_matches(h, "webhook.office.net") =>
{
AlertFormat::Teams
}
Some(h) if h.contains("discord.com") || h.contains("discordapp.com") => {
Some(h) if host_matches(h, "discord.com") || host_matches(h, "discordapp.com") => {
AlertFormat::Discord
}
Some(h) if h.contains("chat.googleapis.com") => AlertFormat::Googlechat,
Some(h) if host_matches(h, "chat.googleapis.com") => AlertFormat::Googlechat,
_ => AlertFormat::Generic,
}
}
@ -188,11 +192,40 @@ impl AlertSummary {
fn build_client() -> Result<Client> {
Client::builder()
.timeout(Duration::from_secs(15))
.connect_timeout(Duration::from_secs(5))
.user_agent(format!("kingfisher/{}", env!("CARGO_PKG_VERSION")))
.build()
.context("failed to build webhook reqwest::Client")
}
/// Tail-match a hostname against a webhook host so substrings like
/// `not-slack.com.attacker.example` cannot be misclassified.
fn host_matches(host: &str, suffix: &str) -> bool {
host == suffix || host.ends_with(&format!(".{suffix}"))
}
/// Validate a webhook URL: must parse, must use http(s) scheme, must have a
/// host. Returns the parsed URL on success.
pub fn validate_webhook_url(url: &str) -> Result<()> {
let parsed = url::Url::parse(url)
.with_context(|| format!("invalid webhook URL `{}`", redact_for_log(url)))?;
let scheme = parsed.scheme();
if scheme != "https" && scheme != "http" {
anyhow::bail!(
"webhook URL `{}` uses unsupported scheme `{scheme}` (only http/https are allowed)",
redact_for_log(url)
);
}
if parsed.host_str().is_none_or(|h| h.is_empty()) {
anyhow::bail!("webhook URL `{}` has no host", redact_for_log(url));
}
Ok(())
}
fn redact_for_log(url: &str) -> String {
redact_webhook(url)
}
/// Redact the path/query of a webhook URL so we never log the full secret token
/// embedded by Slack/Teams/etc. e.g. `https://hooks.slack.com/services/...` →
/// `https://hooks.slack.com/<redacted>`.

View file

@ -102,7 +102,7 @@ pub fn build_payload(
"type": "section",
"text": {
"type": "mrkdwn",
"text": format!("<{}|Full report →>", escape_mrkdwn(url))
"text": format!("<{}|Full report →>", url)
}
}));
}

View file

@ -100,17 +100,43 @@ pub struct FiltersConfig {
pub exclude: Vec<String>,
}
/// Parse YAML text into a config struct.
/// Cap on `discover_path` upward walks. Avoids unbounded directory traversal
/// on networked filesystems or pathological mount layouts.
const DISCOVER_MAX_DEPTH: usize = 32;
/// Parse YAML text into a config struct, validating webhook URLs and
/// `skip_regex` patterns at parse time so config errors surface at a sensible
/// location rather than mid-scan.
pub fn parse_str(yaml: &str) -> Result<KingfisherConfig> {
serde_yaml::from_str(yaml).context("failed to parse kingfisher.yaml")
let cfg: KingfisherConfig =
serde_yaml::from_str(yaml).context("failed to parse kingfisher.yaml")?;
validate(&cfg)?;
Ok(cfg)
}
/// Walk upward from `start` looking for a sibling `kingfisher.yaml`. Returns
/// the absolute path when found. Performs *no* file reads — the caller does
/// the read once it has decided which file to use.
fn validate(cfg: &KingfisherConfig) -> Result<()> {
for (idx, w) in cfg.alerts.webhooks.iter().enumerate() {
crate::alerts::validate_webhook_url(&w.url)
.with_context(|| format!("alerts.webhooks[{idx}].url"))?;
if let Some(report_url) = &w.report_url {
url::Url::parse(report_url)
.with_context(|| format!("alerts.webhooks[{idx}].report_url is not a valid URL"))?;
}
}
for (idx, pattern) in cfg.filters.skip_regex.iter().enumerate() {
regex::Regex::new(pattern)
.with_context(|| format!("filters.skip_regex[{idx}] is not a valid regex"))?;
}
Ok(())
}
/// Walk upward from `start` looking for `kingfisher.yaml` in each ancestor
/// directory. Returns the absolute path when found. Performs *no* file reads —
/// the caller does the read once it has decided which file to use. Capped at
/// [`DISCOVER_MAX_DEPTH`] levels to bound the walk on networked filesystems.
pub fn discover_path(start: &std::path::Path) -> Option<std::path::PathBuf> {
let mut current = start.to_path_buf();
loop {
for _ in 0..=DISCOVER_MAX_DEPTH {
let candidate = current.join(DEFAULT_CONFIG_NAME);
if candidate.is_file() {
return Some(candidate);
@ -119,6 +145,7 @@ pub fn discover_path(start: &std::path::Path) -> Option<std::path::PathBuf> {
return None;
}
}
None
}
#[cfg(test)]
@ -156,11 +183,28 @@ filters:
#[test]
fn empty_yaml_yields_default() {
let cfg = parse_str("").unwrap_or_default();
// serde_yaml rejects an empty document, so feed it the canonical empty
// mapping. This both pins the contract (top-level must be a mapping)
// and exercises the "no fields set" path.
let cfg = parse_str("{}").unwrap();
assert!(cfg.alerts.webhooks.is_empty());
assert!(cfg.filters.skip_words.is_empty());
}
#[test]
fn invalid_webhook_url_is_rejected() {
let yaml = "alerts:\n webhooks:\n - url: not-a-url\n";
let err = parse_str(yaml).unwrap_err();
assert!(format!("{err:#}").contains("alerts.webhooks[0].url"));
}
#[test]
fn invalid_skip_regex_is_rejected() {
let yaml = "filters:\n skip_regex: ['(unclosed']\n";
let err = parse_str(yaml).unwrap_err();
assert!(format!("{err:#}").contains("filters.skip_regex[0]"));
}
#[test]
fn unknown_field_is_rejected() {
let yaml = "alerts:\n webhooks: []\nbogus: 42\n";

View file

@ -629,8 +629,8 @@ pub async fn run_direct_validation(
// leak credentials into stderr when -v is on.
debug!("HTTP validation failed");
DirectValidationResult {
rule_id: String::new(),
rule_name: String::new(),
rule_id: rule_id.clone(),
rule_name: rule_name.clone(),
is_valid: false,
status_code: None,
message: "HTTP validation failed".to_string(),
@ -652,8 +652,8 @@ pub async fn run_direct_validation(
Err(_e) => {
debug!("gRPC validation failed");
DirectValidationResult {
rule_id: String::new(),
rule_name: String::new(),
rule_id: rule_id.clone(),
rule_name: rule_name.clone(),
is_valid: false,
status_code: None,
message: "gRPC validation failed".to_string(),

View file

@ -443,7 +443,8 @@ fn describe_scan_target(args: &InputSpecifierArgs) -> Option<String> {
fn build_alert_sinks(
scan_args: &cli::commands::scan::ScanArgs,
) -> Vec<kingfisher::alerts::AlertSink> {
let cli_count = scan_args.alert_webhook.len() - scan_args.config_webhook_overrides.len();
let cli_count =
scan_args.alert_webhook.len().saturating_sub(scan_args.config_webhook_overrides.len());
scan_args
.alert_webhook
.iter()
@ -652,7 +653,20 @@ async fn async_main(args: CommandLineArgs) -> Result<AsyncMainOutcome> {
Ok(records) => {
let target =
describe_scan_target(&scan_args.input_specifier_args);
let sinks = build_alert_sinks(&scan_args);
let sinks: Vec<_> = build_alert_sinks(&scan_args)
.into_iter()
.filter(|sink| {
match kingfisher::alerts::validate_webhook_url(
&sink.url,
) {
Ok(()) => true,
Err(e) => {
warn!("alert dispatch: skipping sink: {}", e);
false
}
}
})
.collect();
kingfisher::alerts::dispatch(&sinks, &records, target).await;
}
Err(e) => warn!("alert dispatch: failed to build findings: {}", e),

View file

@ -231,9 +231,22 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Upd
if global_args.self_update {
match updater.update() {
Ok(status) => {
let message = format!("Updated to version {}", status.version());
let _ = writeln!(std::io::stderr(), "{}", styled_heading(&styles, &message));
was_self_updated = true;
if status.updated() {
let message = format!("Updated to version {}", status.version());
let _ = writeln!(std::io::stderr(), "{}", styled_heading(&styles, &message));
was_self_updated = true;
} else {
// Reported `UpToDate` despite the version-comparison branch — race or
// tag mismatch. Skip re-exec to avoid pointless fork+exec.
let _ = writeln!(
std::io::stderr(),
"{}",
styled_heading(
&styles,
&format!("Already at version {} — no update applied", status.version()),
)
);
}
}
Err(e) => match e {
UpdError::Io(ref io_err) => match io_err.kind() {