From 87d2a83e3e7f13f74b36b3c07fb2b3751c6e6d86 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 27 Jun 2025 15:28:34 -0700 Subject: [PATCH] Fix: HTML detection now requires both HTML content-type and html tag, fixing webhook false negatives --- CHANGELOG.md | 5 ++ Cargo.toml | 2 +- Makefile | 7 -- data/rules/intercom.yml | 2 +- data/rules/pem.yml | 4 +- data/rules/privkey.yml | 4 +- data/rules/slack.yml | 11 ++- data/rules/twitter.yml | 2 +- src/validation.rs | 128 +++++++++++++++++++++++++++++++ src/validation/httpvalidation.rs | 60 ++++++++++++--- 10 files changed, 193 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff2c109..a9a31a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.16.0] +- Fix: HTML detection now requires both HTML content-type and "/dev/null 2>&1; then \ - echo "βœ… cargo-nextest already present"; \ - else \ - echo "πŸ“¦ installing cargo-nextest …"; \ - cargo install --locked cargo-nextest || true; \ - fi @echo "β–Ά running tests …"; \ if command -v cargo-nextest >/dev/null 2>&1; then \ cargo nextest run --workspace --all-targets; \ diff --git a/data/rules/intercom.yml b/data/rules/intercom.yml index 0c75e33..01fdf0d 100644 --- a/data/rules/intercom.yml +++ b/data/rules/intercom.yml @@ -3,7 +3,7 @@ rules: id: kingfisher.intercom.1 pattern: | (?xi) - (?:intercom(?:_access)?|ic) + (?:intercom|ic) (?:.|[\n\r]){0,16}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,16}? diff --git a/data/rules/pem.yml b/data/rules/pem.yml index fb51a06..dc8e558 100644 --- a/data/rules/pem.yml +++ b/data/rules/pem.yml @@ -10,7 +10,7 @@ rules: -----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}----- min_entropy: 4.5 confidence: high - prevalidated: false + prevalidated: true examples: - | -----BEGIN RSA PRIVATE KEY----- @@ -62,7 +62,7 @@ rules: (?: [^a-zA-Z0-9+/=] | $ ) min_entropy: 4.5 confidence: high - prevalidated: false + prevalidated: true examples: - 'PRIVATE_KEY_B64=LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBb3kxWFh1VkFRcHFIYlFFMDVta2hyTmcvMTI0Ri8ySzlPYW5pelpUWlVVaEswOFU4CkxhaC9SbVVsWHFRMDEvU255aktGOWZqUDhFcU1OZ1dpamUzYmVwL3RPOVpTMEFUMi9PVlJXeS9TOG52RDQ5WTMKenMxMktSbERhR2lZc0RsYUZrbHJkeDQ4RWhRVmdHN3hmWE1jaC9OejJzc2FEby9kRkNBOW80TkZZQWUzM2UveApWNVo1UHNkWkl6dkNZQVlCNDRoUEtpN3JXRE1IbFdzM1kvVkVtQXMzSzVNK2QvL3QzRHB4WnBEbWJERGdYa2w2CjZUdDh3VXloUVZ3MkZpMStobTF1T2QwYjFkaW9aNko2OXNTT2JOZXpSR3YxYjdZaFltT0JKL1JBbHN5ZHoxTmgKVXpXT1lYV0Z1OGJrOU9JM3lQMEc0TE84QjhtbWRldE1RVVoyelFJREFRQUJBb0lCQUN2ckhUUHVVZ0JiSlE0QwpvQ0ZQdEgrWDZIN3NIdk1ndVR0VzdUTlYxN1BYMkVQdE53ZzI3S0tld0pNYmNSbWF3THBjSk5BU09xMDY4MGZxCjlsaHE1NEsybnB4WFVBeXErV3NSc1hid2hUODhibm5aQTBaRzZJR2hTaEpFN0t1cGxBU2htQ29FV2ppbmJTNFgKTGlvTW5HWSs4VFMzSzNrMTRWUDBaWUtuNXprMERHZnFBMEo0VTRXSmxUeGwrTWZxd0pJOTlrcTdHbFVlZkdncQpuK3Q1d2NrV3BPbTd5TUJjZTlTSXlmTm54bnU3TkZYQm50VTN5RGxSUThWUWZmNEtRMzJCaWNiYlJWemR1TThNCnNxMU5CZWNzL0EzUXRvdG1nWUc4d094ZXpNS3Iyays2QzB2NmlFc0h5T0lmR25GWktSZDJFd0dnWlo3aytURHUKUUYrcjd1VUNnWUVBMkRqNUJoYmpybDFRNTZya3BhTGFvVldRV1Y5YUYzUUJtNlNZM2VQYmlvY2JNR2k1ak1ESQpkSjdJVXlLYUljK3BNV1RQYlBmVUd2WmNENlczZDFBNUNUSnFuWHVuVlY3czRqaWJ6WDZUbjhNM3IrMHZTZnNZCmdPMHBtRFpndlNqaVZTRUNBQTZFOFUxQ1lFZU5KUDFDOW12cGJVNzJRTEpndWp3M3JMb2oyYmNDZ1lFQXdUSXYKOUNSeWNOQXRBbDcvUHdWZGh5eXRvVHBSRnZDSU1HSVk5SjMxZ3lva0ZlaFQvWjQ4WkF6anl6ZTBSUXYzdGUxTQoveVJMQkVETGkwbEtrZFVXckVkaVR3dm1KdkpwMDZ0OEdCbERsK25ycXVLWTFxVThDbTR5cis4QzZtRThkVnZrClNINXBhRXptOERFTE1wSjhGVTZFYnhmZHZjRzZmSGx6dnVnZmc1c0NnWUFFQ1BRa3QvS2h3MTRLSkxkRm5BZG0KY1ZsVFFhTkZ3c1Z3NlI1dExaNWdOR3MrZVFYVmFaZVVEWTZCZHFqWHJxOWltNVgvVzVTYXVEUTVtb2NVOCt0TQpqNk5Mc3c0SldzOGkzWm1TdVNUNkcwT0R4ZkpXK0JlWitGTUpZeUpsQlVsTCsyUzFLWkF6akpTTGhXcE40V2dKCmZ6UUk5U3RGUTg3b1NzMWpMTW9VZXdLQmdGOE9CMlFURHErTTdhaE4vejROc0wvU2JyZDJEdkcvZFBLQlFaQVIKcS90V0g1MGJ5ejlzdkgvcGk2YXdDS1UwUnpPZXh4UjkwZDhNMWxqNHZaVFZDQ3ZKajRnZTdhVlovbEdqL1JHSwpWS1NJOW1nRXgzaE1vaWJybzByR3lXTnlaaUhFRGFUUmRhRll2UU9PemRpYkZDd1RqcnR1UGE2Z2c5VzhtQU5sCkNDUmpBb0dBSTRIbnpyV3kzaU5kR2xqVnh4bW1DN1V0c0MvajJBUEZpcHc0ZHJ0U2NsMDFRZzF5WkowbDNBTk4KOU5lTmVSUUFzN3pFTng2T1B1SzlxYy83T1ROMTJKaHdoUTIzdXZwNjZjV0krdTRjcVpOZTJyZVFVVWVmM3psbQpMcXRmOU50VHp5M3pjMGZQcGoxQnBlRmxHSG9SVDhjVHpBWjFTeGwyZWChazlqS2RVeDQ9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0t' - ' "privateKey": "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBbUhKOEJHdTFYZUZ4aENVQXBrNHNSTVI4RnRTdGtyMEx0OWtWTGNSUjRFWitiOWhHCmR0blJpOFhqV3d5MU5zMHliMkJMdHBpVHZKSFVKTUphWXluZ2ZkZnZhcWhocm1yYm5vV0pLQkxmeUxwTXFNS1EKQ3RialFxbnVrQURJUWVQd2ZGeTNpVHkxd1JkRC9zTUs1U0VtV0Fxb0pZQk50eTFZZzA2UzVkYVlPM2xjY3hrYQpQWjRjcm9McWF6Ny9tU3dDVTR5VWRSb3h4WVF4VG1MZXg5M2tqU09TTmdpK0FXc0lCbjV3UHI0VHNuVHFSeWpIClN2aEdMdk9YREpRYWZRdk56WjFSL1FYMzlOQk9xOEVKZW5pWXdaUm9uNVcvNVhMYW94MFFyUGhrY1BES3A5SVUKeHpJakUwWlNmMStUK1FFbTQ3TkFtSnhvZjFhdGRFVzZDTCtheHdJREFRQUJBb0lCQUQ3enI4REhsWnFSK1NWZgpmbGd1bWRzLzVCb3Rjd3ZRWXlGbFZIaVV4RmEvNVlCY0tDVDJKN0QzWTc1NmplNTJaK2hVTkkvUGk5cG53ZG40CkpBa2xCdDRRcUg0NzBES05UK216TFFOT1gvanM3YkVXdnhLcTBDZjhNbFptN0V0QlRGS2VtdS9pRVJBT2duYVcKcGs0ZUZVNXdBQ1dVU1FObWgxR1p4ZEdCZjFXM1VjUnQxcFRvOEtQTDluZm4vSGJiRFNsQkNVL3VIcWd2TSt2cApmTE03bzRIVDZ1K1ZzU00rWGZqeDhpeE5ZRHdoalNuKzQyZm13d1d3ZzJISHUrdUozZ1pUSWQwRUI1VW9hdUNjCjZUTlVtcEJscjU5UGFmVkZRWUY1S3VxaHJXKzVQaWpHcHBZcXg4Ynl6aFpOQzkwZnl5V0NXcXg2eGFZVm5OdzgKNkJmUXM2a0NnWUVBeVlyRVg1NU1RTzJnWDY2TGwxaGJDMzNzWk1OZzloVG1SK1doSTFjNksvbFZ1TFoyL0RPdwpsYTZ6eHdBU204Z0ZyVUFYbUljV2h2b3FwWGVzNWZzOVZKeDlNT0ZVYVBrckRPQllnY1laMUR6VVNVOHc3SSttCnlyV3hRUkRNajhvSGpRbHVpM0s2MzZucm5RajhxOGkvQ2dranVPcHJGZnliMzVEMFlDdjVXZzBDZ1lFQXdhT3cKRWFhN0l1MjFGa08vbmFjdVhjSnBhNkVlUTNqZFNlNlRQaXZ6bVVXU0haeGJuUy9XSnJaRjQwSExzUWxOZHl0ZgpNTTBKZFU0VmMyR0NVc1pMYjdQSmJwdVRqRERSSHJXV1pCMnhiemF0K3A3N2RzNWlOcXFRcTZ6M0syUVh4Y3ZTCis5am5VZXpDU2Y0N1R1OWNTTW96V3hTMW82b1BPSFdHVFRvdHR5TUNnWUFQdWc1Y3o4TnZoWnR3Ry9TMG1LWnkKSFI5bk5YL0pkQlFNSkRVUXh1dTVKcm16c2psU3NNM2t3RDh6RmlSZGw1d3B5c2lNbEc0RGxsM2hqNWNrVXhpVQpFNm9KT0d3WHpPbTVGWUNTajl6UUhQY0x5V3d0NlgvQWJiRXBQS0JaMEJBS3gyT2k2ZzcvQ1FsanRhSFIzZFphCmVDQWJlOTlqVmRUcit5bTJuM2ZUdVFLQmdBMm5TZ25rbEx0Z3dXMEJkK2hZMm1jWUJ6RGttbXF0Z2dUdGdvcFcKdFFWd3AxM1pJWWlTeituSTNtS295QUVDbytpc01Ua1NyQUVPY1dyQ1RGc2p5anZsRkdYdEtGa3hNLzJUVmpoVwo4NlRnMlNHYnhpVlpaZ2x1dTJhdmVub2Z3NkZadnRXdE5KcE5OR0hkUURkUG4xVXVsTEp1WW1SWTRGdmR4WXQ2CmQ3QzdBb0dBRUsvalFiZ0l3OXFLQUNOZ0JySnB1cU5Ham9JajFoQTRlb29DMXp1bFEyZUpnZ2J5OTBpSDg2VzEKM0xyOVZMVFkyc2JKTzlqekZVR0lOL01BOEhYQTE1a2grZHRibkRsdFRFZGNnenBCRzhCQUZRQ3hQWnBGWHhtZgpDUmhXN1l6RW1IeWJ4R0toR3NOK2M3NUhKTHZFSWwrRTh6eitXRk9xT240dkJXU1ZwSnc9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg==",' \ No newline at end of file diff --git a/data/rules/privkey.yml b/data/rules/privkey.yml index 6a5fe32..b0e25b5 100644 --- a/data/rules/privkey.yml +++ b/data/rules/privkey.yml @@ -24,7 +24,7 @@ rules: ----- min_entropy: 4.5 confidence: high - prevalidated: false + prevalidated: true examples: - |- -----BEGIN RSA PRIVATE KEY----- @@ -77,7 +77,7 @@ rules: ) min_entropy: 4.5 confidence: high - prevalidated: false + prevalidated: true examples: - | -----BEGIN PRIVATE KEY----- diff --git a/data/rules/slack.yml b/data/rules/slack.yml index 9f3db01..9544d10 100644 --- a/data/rules/slack.yml +++ b/data/rules/slack.yml @@ -85,9 +85,9 @@ rules: \b ( https://hooks\.slack\.com/services/ - T[a-z0-9_-]{8,12}/ # Team ID - B[a-z0-9_-]{8,12}/ # Bot ID - [a-z0-9_-]{20,30} # Webhook token + T[a-z0-9_-]{8,12}/ + B[a-z0-9_-]{8,12}/ + [a-z0-9_-]{20,30} ) \b min_entropy: 3.3 @@ -105,10 +105,9 @@ rules: - report_response: true type: WordMatch words: - - ok - invalid_payload - type: WordMatch - words: + words: - "invalid_token" negative: true - url: '{{ TOKEN }}' \ No newline at end of file + url: "{{ TOKEN }}" diff --git a/data/rules/twitter.yml b/data/rules/twitter.yml index 311c0e9..18ff50d 100644 --- a/data/rules/twitter.yml +++ b/data/rules/twitter.yml @@ -16,7 +16,7 @@ rules: min_entropy: 4.0 confidence: medium examples: - - Authorization: Bearer AAAAAAAAAAAAAAAAAAAAAAL6NcQAAAAABkCyZ4E5jEXAMPLE0wuJbhVN8 + - "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAAAL6NcQAAAAABkCyZ4E5jEXAMPLE0wuJbhVN8" - TWITTER_BEARER="AAAAAAAAAAAAAAAAAAAAAAAYk9SClAAAAAAAfkwAAAABAAAAA" validation: type: Http diff --git a/src/validation.rs b/src/validation.rs index 2000a4e..b45ff3b 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -989,4 +989,132 @@ rules: println!("Body: {:?}", owned_blob_match.validation_response_body); Ok(()) } + +// // ──────────────────────────────────────────────────────────────── +// // Slack Webhook – end-to-end validation test +// // ──────────────────────────────────────────────────────────────── +// #[tokio::test] +// async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> { +// use std::sync::Arc; + +// use crossbeam_skiplist::SkipMap; +// use http::StatusCode; +// use rustc_hash::FxHashMap; + +// use crate::{ +// blob::BlobId, +// liquid_filters::register_all, +// location::OffsetSpan, +// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, +// rules::{ +// rule::{Confidence, Rule}, +// Rules, +// }, +// validation::{validate_single_match, Cache}, +// }; + +// // 1️⃣ YAML snippet with the **exact** Slack rule +// let slack_yaml = r#" +// rules: +// - name: Slack Webhook +// id: kingfisher.slack.4 +// pattern: | +// (?xi) +// \b +// ( +// https://hooks\.slack\.com/services/ +// T[a-z0-9_-]{8,12}/ +// B[a-z0-9_-]{8,12}/ +// [a-z0-9_-]{20,30} +// ) +// \b +// min_entropy: 3.3 +// confidence: medium +// examples: +// - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV +// - https://hooks.slack.com/services/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 +// validation: +// type: Http +// content: +// request: +// headers: +// Content-Type: application/json +// method: POST +// response_matcher: +// - report_response: true +// - type: WordMatch +// words: +// - invalid_payload +// - type: WordMatch +// words: +// - "invalid_token" +// negative: true +// url: "{{ TOKEN }}" +// "#; + +// // 2️⃣ Load that YAML into a Rules object +// let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())]; +// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; + +// // 3️⃣ Pull the rule syntax & wrap into a Rule +// let slack_rule_syntax = rules +// .rules +// .iter() +// .find(|r| r.id == "kingfisher.slack.4") +// .expect("Slack rule not found") +// .clone(); +// let slack_rule = Rule::new(slack_rule_syntax); + +// // 4️⃣ Provide a real-looking webhook URL (use one of the examples) +// let token = "ENTER YOUR SLACK WEBHOOK URL HERE"; + +// // 5️⃣ Build OwnedBlobMatch stub +// let blob_id = BlobId::new(&token.as_bytes()); +// let mut owned_blob_match = OwnedBlobMatch { +// rule: slack_rule.into(), +// blob_id, +// finding_fingerprint: 0, +// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, +// captures: SerializableCaptures { +// captures: vec![SerializableCapture { +// name: Some("TOKEN".to_string()), +// match_number: -1, +// start: 0, +// end: token.len(), +// value: token.into(), +// }], +// }, +// validation_response_body: String::new(), +// validation_response_status: StatusCode::OK, +// validation_success: false, +// calculated_entropy: 5.0, +// }; + +// // 6️⃣ Prepare helpers and run validation +// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; +// let client = reqwest::Client::new(); +// let cache: Cache = Arc::new(SkipMap::new()); +// let dependent_vars = FxHashMap::default(); +// let missing_deps = FxHashMap::default(); + +// validate_single_match( +// &mut owned_blob_match, +// &parser, +// &client, +// &dependent_vars, +// &missing_deps, +// &cache, +// ) +// .await; + +// // 7️⃣ Inspect outcome (true β‡’ credential considered ACTIVE) +// assert!( +// owned_blob_match.validation_success, +// "Slack webhook should be reported ACTIVE; body was {:?}", +// owned_blob_match.validation_response_body +// ); + +// Ok(()) +// } + } diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index 91b8e36..d2e9290 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -270,23 +270,26 @@ pub async fn retry_request( /// Return `true` when the body is very likely HTML. /// -/// Heuristics (fast): -/// 1. Content-Type header says β€œtext/html” or β€œapplication/xhtml+xml”. -/// 2. First 1 kB starts with β€œ<” **and** contains β€œ bool { // ---- 1. header heuristic --------------------------------------------- - if let Some(ct) = headers.get("content-type").and_then(|v| v.to_str().ok()) { - let ct = ct.to_ascii_lowercase(); - if ct.contains("text/html") || ct.contains("application/xhtml") { - return true; - } - } + let header_says_html = headers + .get("content-type") + .and_then(|v| v.to_str().ok()) + .map(|ct| { + let ct = ct.to_ascii_lowercase(); + ct.contains("text/html") || ct.contains("application/xhtml") + }) + .unwrap_or(false); // ---- 2. early-body scan (<=1024 bytes) -------------------------------- let probe = body[..body.len().min(1024)].to_ascii_lowercase(); - probe.starts_with('<') && probe.contains(" bool { - // Since match_all_types is always true here, we simply require all word and status conditions - // to hold. + // Since match_all_types is always true here, we simply require all word and status conditions to hold. let word_ok = matchers .iter() .filter_map(|m| { @@ -474,4 +476,38 @@ mod tests { // --- assert ----------------------------------------------------------- assert!(result); } + #[test] + fn test_validate_response_slack_webhook() { + // 1️⃣ Build matchers equivalent to rule kingfisher.slack.4 + let matchers = vec![ + ResponseMatcher::WordMatch { + r#type: "word-match".to_string(), + words: vec!["invalid_payload".to_string()], + match_all_words: false, // rule omits this β†’ default is false + negative: false, + }, + ResponseMatcher::WordMatch { + r#type: "word-match".to_string(), + words: vec!["invalid_token".to_string()], + match_all_words: false, + negative: true, // body must *not* contain β€œinvalid_token” + }, + ]; + + // 2️⃣ Simulate the real Slack response you posted + let body = "invalid_payload"; + let status = StatusCode::BAD_REQUEST; // 400 + let mut headers = HeaderMap::new(); + headers.insert( + header::CONTENT_TYPE, + HeaderValue::from_static("text/plain"), + ); + + // 3️⃣ Call validate_response with html_allowed = false + let ok = validate_response(&matchers, body, &status, &headers, false); + + // 4️⃣ It *should* be valid (true) because all matcher conditions hold + assert!(ok, "Slack webhook response should be considered ACTIVE"); + } + }