Fix: HTML detection now requires both HTML content-type and html tag, fixing webhook false negatives

This commit is contained in:
Mick Grove 2025-06-27 15:28:34 -07:00
commit 87d2a83e3e
10 changed files with 193 additions and 32 deletions

View file

@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
## [1.16.0]
- Fix: HTML detection now requires both HTML content-type and "<html" tag, fixing webhook false negatives
- Removed cargo-nextest installation during test running
- Added rules for 1password, droneci
## [1.15.0]
- Ensuring temp files are cleaned up
- Applying visual style to the update check output

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.15.0"
version = "1.16.0"
edition.workspace = true
rust-version.workspace = true
license.workspace = true

View file

@ -400,13 +400,6 @@ check-rust:
fi
tests:
@echo "🔍 checking for cargo-nextest …"
@if command -v cargo-nextest >/dev/null 2>&1; then \
echo "✅ cargo-nextest already present"; \
else \
echo "📦 installing cargo-nextest …"; \
cargo install --locked cargo-nextest || true; \
fi
@echo "▶ running tests …"; \
if command -v cargo-nextest >/dev/null 2>&1; then \
cargo nextest run --workspace --all-targets; \

View file

@ -3,7 +3,7 @@ rules:
id: kingfisher.intercom.1
pattern: |
(?xi)
(?:intercom(?:_access)?|ic)
(?:intercom|ic)
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?

View file

@ -10,7 +10,7 @@ rules:
-----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----
min_entropy: 4.5
confidence: high
prevalidated: false
prevalidated: true
examples:
- |
-----BEGIN RSA PRIVATE KEY-----
@ -62,7 +62,7 @@ rules:
(?: [^a-zA-Z0-9+/=] | $ )
min_entropy: 4.5
confidence: high
prevalidated: false
prevalidated: true
examples:
- 'PRIVATE_KEY_B64=LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBb3kxWFh1VkFRcHFIYlFFMDVta2hyTmcvMTI0Ri8ySzlPYW5pelpUWlVVaEswOFU4CkxhaC9SbVVsWHFRMDEvU255aktGOWZqUDhFcU1OZ1dpamUzYmVwL3RPOVpTMEFUMi9PVlJXeS9TOG52RDQ5WTMKenMxMktSbERhR2lZc0RsYUZrbHJkeDQ4RWhRVmdHN3hmWE1jaC9OejJzc2FEby9kRkNBOW80TkZZQWUzM2UveApWNVo1UHNkWkl6dkNZQVlCNDRoUEtpN3JXRE1IbFdzM1kvVkVtQXMzSzVNK2QvL3QzRHB4WnBEbWJERGdYa2w2CjZUdDh3VXloUVZ3MkZpMStobTF1T2QwYjFkaW9aNko2OXNTT2JOZXpSR3YxYjdZaFltT0JKL1JBbHN5ZHoxTmgKVXpXT1lYV0Z1OGJrOU9JM3lQMEc0TE84QjhtbWRldE1RVVoyelFJREFRQUJBb0lCQUN2ckhUUHVVZ0JiSlE0QwpvQ0ZQdEgrWDZIN3NIdk1ndVR0VzdUTlYxN1BYMkVQdE53ZzI3S0tld0pNYmNSbWF3THBjSk5BU09xMDY4MGZxCjlsaHE1NEsybnB4WFVBeXErV3NSc1hid2hUODhibm5aQTBaRzZJR2hTaEpFN0t1cGxBU2htQ29FV2ppbmJTNFgKTGlvTW5HWSs4VFMzSzNrMTRWUDBaWUtuNXprMERHZnFBMEo0VTRXSmxUeGwrTWZxd0pJOTlrcTdHbFVlZkdncQpuK3Q1d2NrV3BPbTd5TUJjZTlTSXlmTm54bnU3TkZYQm50VTN5RGxSUThWUWZmNEtRMzJCaWNiYlJWemR1TThNCnNxMU5CZWNzL0EzUXRvdG1nWUc4d094ZXpNS3Iyays2QzB2NmlFc0h5T0lmR25GWktSZDJFd0dnWlo3aytURHUKUUYrcjd1VUNnWUVBMkRqNUJoYmpybDFRNTZya3BhTGFvVldRV1Y5YUYzUUJtNlNZM2VQYmlvY2JNR2k1ak1ESQpkSjdJVXlLYUljK3BNV1RQYlBmVUd2WmNENlczZDFBNUNUSnFuWHVuVlY3czRqaWJ6WDZUbjhNM3IrMHZTZnNZCmdPMHBtRFpndlNqaVZTRUNBQTZFOFUxQ1lFZU5KUDFDOW12cGJVNzJRTEpndWp3M3JMb2oyYmNDZ1lFQXdUSXYKOUNSeWNOQXRBbDcvUHdWZGh5eXRvVHBSRnZDSU1HSVk5SjMxZ3lva0ZlaFQvWjQ4WkF6anl6ZTBSUXYzdGUxTQoveVJMQkVETGkwbEtrZFVXckVkaVR3dm1KdkpwMDZ0OEdCbERsK25ycXVLWTFxVThDbTR5cis4QzZtRThkVnZrClNINXBhRXptOERFTE1wSjhGVTZFYnhmZHZjRzZmSGx6dnVnZmc1c0NnWUFFQ1BRa3QvS2h3MTRLSkxkRm5BZG0KY1ZsVFFhTkZ3c1Z3NlI1dExaNWdOR3MrZVFYVmFaZVVEWTZCZHFqWHJxOWltNVgvVzVTYXVEUTVtb2NVOCt0TQpqNk5Mc3c0SldzOGkzWm1TdVNUNkcwT0R4ZkpXK0JlWitGTUpZeUpsQlVsTCsyUzFLWkF6akpTTGhXcE40V2dKCmZ6UUk5U3RGUTg3b1NzMWpMTW9VZXdLQmdGOE9CMlFURHErTTdhaE4vejROc0wvU2JyZDJEdkcvZFBLQlFaQVIKcS90V0g1MGJ5ejlzdkgvcGk2YXdDS1UwUnpPZXh4UjkwZDhNMWxqNHZaVFZDQ3ZKajRnZTdhVlovbEdqL1JHSwpWS1NJOW1nRXgzaE1vaWJybzByR3lXTnlaaUhFRGFUUmRhRll2UU9PemRpYkZDd1RqcnR1UGE2Z2c5VzhtQU5sCkNDUmpBb0dBSTRIbnpyV3kzaU5kR2xqVnh4bW1DN1V0c0MvajJBUEZpcHc0ZHJ0U2NsMDFRZzF5WkowbDNBTk4KOU5lTmVSUUFzN3pFTng2T1B1SzlxYy83T1ROMTJKaHdoUTIzdXZwNjZjV0krdTRjcVpOZTJyZVFVVWVmM3psbQpMcXRmOU50VHp5M3pjMGZQcGoxQnBlRmxHSG9SVDhjVHpBWjFTeGwyZWChazlqS2RVeDQ9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0t'
- ' "privateKey": "LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0tLQpNSUlFb2dJQkFBS0NBUUVBbUhKOEJHdTFYZUZ4aENVQXBrNHNSTVI4RnRTdGtyMEx0OWtWTGNSUjRFWitiOWhHCmR0blJpOFhqV3d5MU5zMHliMkJMdHBpVHZKSFVKTUphWXluZ2ZkZnZhcWhocm1yYm5vV0pLQkxmeUxwTXFNS1EKQ3RialFxbnVrQURJUWVQd2ZGeTNpVHkxd1JkRC9zTUs1U0VtV0Fxb0pZQk50eTFZZzA2UzVkYVlPM2xjY3hrYQpQWjRjcm9McWF6Ny9tU3dDVTR5VWRSb3h4WVF4VG1MZXg5M2tqU09TTmdpK0FXc0lCbjV3UHI0VHNuVHFSeWpIClN2aEdMdk9YREpRYWZRdk56WjFSL1FYMzlOQk9xOEVKZW5pWXdaUm9uNVcvNVhMYW94MFFyUGhrY1BES3A5SVUKeHpJakUwWlNmMStUK1FFbTQ3TkFtSnhvZjFhdGRFVzZDTCtheHdJREFRQUJBb0lCQUQ3enI4REhsWnFSK1NWZgpmbGd1bWRzLzVCb3Rjd3ZRWXlGbFZIaVV4RmEvNVlCY0tDVDJKN0QzWTc1NmplNTJaK2hVTkkvUGk5cG53ZG40CkpBa2xCdDRRcUg0NzBES05UK216TFFOT1gvanM3YkVXdnhLcTBDZjhNbFptN0V0QlRGS2VtdS9pRVJBT2duYVcKcGs0ZUZVNXdBQ1dVU1FObWgxR1p4ZEdCZjFXM1VjUnQxcFRvOEtQTDluZm4vSGJiRFNsQkNVL3VIcWd2TSt2cApmTE03bzRIVDZ1K1ZzU00rWGZqeDhpeE5ZRHdoalNuKzQyZm13d1d3ZzJISHUrdUozZ1pUSWQwRUI1VW9hdUNjCjZUTlVtcEJscjU5UGFmVkZRWUY1S3VxaHJXKzVQaWpHcHBZcXg4Ynl6aFpOQzkwZnl5V0NXcXg2eGFZVm5OdzgKNkJmUXM2a0NnWUVBeVlyRVg1NU1RTzJnWDY2TGwxaGJDMzNzWk1OZzloVG1SK1doSTFjNksvbFZ1TFoyL0RPdwpsYTZ6eHdBU204Z0ZyVUFYbUljV2h2b3FwWGVzNWZzOVZKeDlNT0ZVYVBrckRPQllnY1laMUR6VVNVOHc3SSttCnlyV3hRUkRNajhvSGpRbHVpM0s2MzZucm5RajhxOGkvQ2dranVPcHJGZnliMzVEMFlDdjVXZzBDZ1lFQXdhT3cKRWFhN0l1MjFGa08vbmFjdVhjSnBhNkVlUTNqZFNlNlRQaXZ6bVVXU0haeGJuUy9XSnJaRjQwSExzUWxOZHl0ZgpNTTBKZFU0VmMyR0NVc1pMYjdQSmJwdVRqRERSSHJXV1pCMnhiemF0K3A3N2RzNWlOcXFRcTZ6M0syUVh4Y3ZTCis5am5VZXpDU2Y0N1R1OWNTTW96V3hTMW82b1BPSFdHVFRvdHR5TUNnWUFQdWc1Y3o4TnZoWnR3Ry9TMG1LWnkKSFI5bk5YL0pkQlFNSkRVUXh1dTVKcm16c2psU3NNM2t3RDh6RmlSZGw1d3B5c2lNbEc0RGxsM2hqNWNrVXhpVQpFNm9KT0d3WHpPbTVGWUNTajl6UUhQY0x5V3d0NlgvQWJiRXBQS0JaMEJBS3gyT2k2ZzcvQ1FsanRhSFIzZFphCmVDQWJlOTlqVmRUcit5bTJuM2ZUdVFLQmdBMm5TZ25rbEx0Z3dXMEJkK2hZMm1jWUJ6RGttbXF0Z2dUdGdvcFcKdFFWd3AxM1pJWWlTeituSTNtS295QUVDbytpc01Ua1NyQUVPY1dyQ1RGc2p5anZsRkdYdEtGa3hNLzJUVmpoVwo4NlRnMlNHYnhpVlpaZ2x1dTJhdmVub2Z3NkZadnRXdE5KcE5OR0hkUURkUG4xVXVsTEp1WW1SWTRGdmR4WXQ2CmQ3QzdBb0dBRUsvalFiZ0l3OXFLQUNOZ0JySnB1cU5Ham9JajFoQTRlb29DMXp1bFEyZUpnZ2J5OTBpSDg2VzEKM0xyOVZMVFkyc2JKTzlqekZVR0lOL01BOEhYQTE1a2grZHRibkRsdFRFZGNnenBCRzhCQUZRQ3hQWnBGWHhtZgpDUmhXN1l6RW1IeWJ4R0toR3NOK2M3NUhKTHZFSWwrRTh6eitXRk9xT240dkJXU1ZwSnc9Ci0tLS0tRU5EIFJTQSBQUklWQVRFIEtFWS0tLS0tCg==",'

View file

@ -24,7 +24,7 @@ rules:
-----
min_entropy: 4.5
confidence: high
prevalidated: false
prevalidated: true
examples:
- |-
-----BEGIN RSA PRIVATE KEY-----
@ -77,7 +77,7 @@ rules:
)
min_entropy: 4.5
confidence: high
prevalidated: false
prevalidated: true
examples:
- |
-----BEGIN PRIVATE KEY-----

View file

@ -85,9 +85,9 @@ rules:
\b
(
https://hooks\.slack\.com/services/
T[a-z0-9_-]{8,12}/ # Team ID
B[a-z0-9_-]{8,12}/ # Bot ID
[a-z0-9_-]{20,30} # Webhook token
T[a-z0-9_-]{8,12}/
B[a-z0-9_-]{8,12}/
[a-z0-9_-]{20,30}
)
\b
min_entropy: 3.3
@ -105,10 +105,9 @@ rules:
- report_response: true
type: WordMatch
words:
- ok
- invalid_payload
- type: WordMatch
words:
words:
- "invalid_token"
negative: true
url: '{{ TOKEN }}'
url: "{{ TOKEN }}"

View file

@ -16,7 +16,7 @@ rules:
min_entropy: 4.0
confidence: medium
examples:
- Authorization: Bearer AAAAAAAAAAAAAAAAAAAAAAL6NcQAAAAABkCyZ4E5jEXAMPLE0wuJbhVN8
- "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAAAL6NcQAAAAABkCyZ4E5jEXAMPLE0wuJbhVN8"
- TWITTER_BEARER="AAAAAAAAAAAAAAAAAAAAAAAYk9SClAAAAAAAfkwAAAABAAAAA"
validation:
type: Http

View file

@ -989,4 +989,132 @@ rules:
println!("Body: {:?}", owned_blob_match.validation_response_body);
Ok(())
}
// // ────────────────────────────────────────────────────────────────
// // Slack Webhook end-to-end validation test
// // ────────────────────────────────────────────────────────────────
// #[tokio::test]
// async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> {
// use std::sync::Arc;
// use crossbeam_skiplist::SkipMap;
// use http::StatusCode;
// use rustc_hash::FxHashMap;
// use crate::{
// blob::BlobId,
// liquid_filters::register_all,
// location::OffsetSpan,
// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
// rules::{
// rule::{Confidence, Rule},
// Rules,
// },
// validation::{validate_single_match, Cache},
// };
// // 1⃣ YAML snippet with the **exact** Slack rule
// let slack_yaml = r#"
// rules:
// - name: Slack Webhook
// id: kingfisher.slack.4
// pattern: |
// (?xi)
// \b
// (
// https://hooks\.slack\.com/services/
// T[a-z0-9_-]{8,12}/
// B[a-z0-9_-]{8,12}/
// [a-z0-9_-]{20,30}
// )
// \b
// min_entropy: 3.3
// confidence: medium
// examples:
// - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV
// - https://hooks.slack.com/services/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
// validation:
// type: Http
// content:
// request:
// headers:
// Content-Type: application/json
// method: POST
// response_matcher:
// - report_response: true
// - type: WordMatch
// words:
// - invalid_payload
// - type: WordMatch
// words:
// - "invalid_token"
// negative: true
// url: "{{ TOKEN }}"
// "#;
// // 2⃣ Load that YAML into a Rules object
// let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())];
// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
// // 3⃣ Pull the rule syntax & wrap into a Rule
// let slack_rule_syntax = rules
// .rules
// .iter()
// .find(|r| r.id == "kingfisher.slack.4")
// .expect("Slack rule not found")
// .clone();
// let slack_rule = Rule::new(slack_rule_syntax);
// // 4⃣ Provide a real-looking webhook URL (use one of the examples)
// let token = "ENTER YOUR SLACK WEBHOOK URL HERE";
// // 5⃣ Build OwnedBlobMatch stub
// let blob_id = BlobId::new(&token.as_bytes());
// let mut owned_blob_match = OwnedBlobMatch {
// rule: slack_rule.into(),
// blob_id,
// finding_fingerprint: 0,
// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
// captures: SerializableCaptures {
// captures: vec![SerializableCapture {
// name: Some("TOKEN".to_string()),
// match_number: -1,
// start: 0,
// end: token.len(),
// value: token.into(),
// }],
// },
// validation_response_body: String::new(),
// validation_response_status: StatusCode::OK,
// validation_success: false,
// calculated_entropy: 5.0,
// };
// // 6⃣ Prepare helpers and run validation
// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
// let client = reqwest::Client::new();
// let cache: Cache = Arc::new(SkipMap::new());
// let dependent_vars = FxHashMap::default();
// let missing_deps = FxHashMap::default();
// validate_single_match(
// &mut owned_blob_match,
// &parser,
// &client,
// &dependent_vars,
// &missing_deps,
// &cache,
// )
// .await;
// // 7⃣ Inspect outcome (true ⇒ credential considered ACTIVE)
// assert!(
// owned_blob_match.validation_success,
// "Slack webhook should be reported ACTIVE; body was {:?}",
// owned_blob_match.validation_response_body
// );
// Ok(())
// }
}

View file

@ -270,23 +270,26 @@ pub async fn retry_request(
/// Return `true` when the body is very likely HTML.
///
/// Heuristics (fast):
/// 1. Content-Type header says “text/html” or “application/xhtml+xml”.
/// 2. First 1 kB starts with “<” **and** contains “<html”.
fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool {
// ---- 1. header heuristic ---------------------------------------------
if let Some(ct) = headers.get("content-type").and_then(|v| v.to_str().ok()) {
let ct = ct.to_ascii_lowercase();
if ct.contains("text/html") || ct.contains("application/xhtml") {
return true;
}
}
let header_says_html = headers
.get("content-type")
.and_then(|v| v.to_str().ok())
.map(|ct| {
let ct = ct.to_ascii_lowercase();
ct.contains("text/html") || ct.contains("application/xhtml")
})
.unwrap_or(false);
// ---- 2. early-body scan (<=1024 bytes) --------------------------------
let probe = body[..body.len().min(1024)].to_ascii_lowercase();
probe.starts_with('<') && probe.contains("<html")
let body_looks_htmlish = probe.starts_with('<') && probe.contains("<html");
// ⇒ Only HTML if **both** header and body agree
header_says_html && body_looks_htmlish
}
/// Validate the response by checking word and status matchers.
pub fn validate_response(
matchers: &[ResponseMatcher],
@ -295,8 +298,7 @@ pub fn validate_response(
headers: &HeaderMap,
html_allowed: bool,
) -> bool {
// Since match_all_types is always true here, we simply require all word and status conditions
// to hold.
// Since match_all_types is always true here, we simply require all word and status conditions to hold.
let word_ok = matchers
.iter()
.filter_map(|m| {
@ -474,4 +476,38 @@ mod tests {
// --- assert -----------------------------------------------------------
assert!(result);
}
#[test]
fn test_validate_response_slack_webhook() {
// 1⃣ Build matchers equivalent to rule kingfisher.slack.4
let matchers = vec![
ResponseMatcher::WordMatch {
r#type: "word-match".to_string(),
words: vec!["invalid_payload".to_string()],
match_all_words: false, // rule omits this → default is false
negative: false,
},
ResponseMatcher::WordMatch {
r#type: "word-match".to_string(),
words: vec!["invalid_token".to_string()],
match_all_words: false,
negative: true, // body must *not* contain “invalid_token”
},
];
// 2⃣ Simulate the real Slack response you posted
let body = "invalid_payload";
let status = StatusCode::BAD_REQUEST; // 400
let mut headers = HeaderMap::new();
headers.insert(
header::CONTENT_TYPE,
HeaderValue::from_static("text/plain"),
);
// 3⃣ Call validate_response with html_allowed = false
let ok = validate_response(&matchers, body, &status, &headers, false);
// 4⃣ It *should* be valid (true) because all matcher conditions hold
assert!(ok, "Slack webhook response should be considered ACTIVE");
}
}