changes in response to PR review

This commit is contained in:
Mick Grove 2026-04-08 11:09:36 -07:00
commit 0d33dff196
5 changed files with 173 additions and 99 deletions

View file

@ -60,21 +60,7 @@ impl RulesDatabase {
let mut reason_codes: Vec<&'static str> = Vec::new();
let has_self_identifying_prefix = [
"ccipat_",
"xoxb-",
"xoxa-",
"xoxp-",
"xapp-",
"ghp_",
"github_pat_",
"sk_live_",
"sk_test_",
"ltai",
"akia",
]
.iter()
.any(|m| normalized.contains(m));
let has_self_identifying_prefix = has_self_identifying_shape(&normalized);
if has_self_identifying_prefix {
reason_codes.push("self_identifying_prefix");
return RuleMatchProfile {
@ -307,6 +293,33 @@ impl RulesDatabase {
}
}
fn has_self_identifying_shape(normalized_pattern: &str) -> bool {
let literal_markers = [
"ccipat_",
"xapp-",
"ghp_",
"github_pat_",
"sk_live_",
"sk_test_",
"ltai",
"akia",
"aizasy",
"pypi-ageichlwas5vcmc",
"https://hooks\\.slack\\.com/services/",
];
literal_markers.iter().any(|needle| normalized_pattern.contains(needle))
|| normalized_pattern.contains("xox[pbarose]")
|| normalized_pattern.contains("xoxe\\.xox[bparose]-")
|| normalized_pattern.contains("xoxe-\\d-")
|| (normalized_pattern.contains("-----begin\\s")
&& normalized_pattern.contains("private\\skey")
&& normalized_pattern.contains("-----end\\s"))
|| (normalized_pattern.contains("-----begin\\ ")
&& normalized_pattern.contains("private\\ key")
&& normalized_pattern.contains("-----end\\ "))
}
fn has_generic_token_class(normalized_pattern: &str) -> bool {
[
"[a-za-z0-9]{",
@ -436,6 +449,57 @@ mod test_rule_match_profiles {
assert!(profile.reason_codes.contains(&"self_identifying_prefix"));
}
#[test]
fn classifies_google_api_key_rule_as_self_identifying() {
let rule = mk_rule("kingfisher.google.7", r"(?xi)\b(AIzaSy[A-Za-z0-9_-]{33})");
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
}
#[test]
fn classifies_slack_token_charclass_rule_as_self_identifying() {
let rule = mk_rule(
"kingfisher.slack.2",
r"(?xi)\b(xox[pbarose][-0-9]{0,3}-[0-9a-z]{6,15}-[0-9a-z]{6,15}-[-0-9a-z]{6,66})\b",
);
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
}
#[test]
fn classifies_slack_webhook_rule_as_self_identifying() {
let rule = mk_rule(
"kingfisher.slack.4",
r"(?xi)\b(https://hooks\.slack\.com/services/T[a-z0-9_-]{8,12}/B[a-z0-9_-]{8,12}/[a-z0-9_-]{20,30})",
);
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
}
#[test]
fn classifies_pypi_token_rule_as_self_identifying() {
let rule = mk_rule("kingfisher.pypi.1", r"(?x)(pypi-AgEIcHlwaS5vcmc[A-Za-z0-9_-]{50,})\b");
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
}
#[test]
fn classifies_private_key_envelope_rules_as_self_identifying() {
let rule = mk_rule(
"kingfisher.privkey.2",
r"(?xims)(-----BEGIN\s(?:RSA|PGP|DSA|OPENSSH|ENCRYPTED|EC)?\s{0,1}PRIVATE\sKEY-----[a-z0-9 /+=\r\n\\n]{32,}?-----END\s(?:RSA|PGP|DSA|OPENSSH|ENCRYPTED|EC)?\s{0,1}PRIVATE\sKEY-----)",
);
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
let pem_rule = mk_rule(
"kingfisher.pem.1",
r#"(?x)-----BEGIN\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----\s*((?:[a-zA-Z0-9+/=\s"',]|\\r|\\n){50,})\s*-----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----"#,
);
let pem_profile = RulesDatabase::classify_rule_profile(&pem_rule);
assert_eq!(pem_profile.kind, RuleDetectionProfileKind::SelfIdentifying);
}
#[test]
fn classifies_context_dependent_generic_rule() {
let rule = mk_rule(

View file

@ -36,12 +36,11 @@ use kingfisher_rules::ResponseMatcher;
/// Build a deterministic cache key from the immutable parts of an HTTP request.
pub fn generate_http_cache_key_parts(
method: &str,
url: &Url,
url: &str,
headers: &BTreeMap<String, String>,
body: Option<&str>,
) -> String {
let method = method.to_uppercase();
let url = url.as_str();
let mut hasher = Sha1::new();
hasher.update(method.as_bytes());
@ -101,6 +100,21 @@ pub fn with_request_template_globals(globals: &Object) -> Object {
out
}
/// Clone `globals` and add stable placeholder values for request-scoped template vars that
/// would otherwise make HTTP validation cache keys vary per execution.
pub fn with_cache_key_template_globals(globals: &Object) -> Object {
let mut out = globals.clone();
if !out.contains_key("REQUEST_RFC1123_DATE") {
out.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("REQUEST_RFC1123_DATE"));
}
if !out.contains_key("REQUEST_UNIX_MILLIS") {
out.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("REQUEST_UNIX_MILLIS"));
}
out
}
/// Build a reqwest RequestBuilder using the provided parameters.
pub fn build_request_builder(
client: &Client,
@ -630,6 +644,27 @@ mod tests {
assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "123");
}
#[test]
fn cache_key_template_globals_use_stable_placeholders() {
let globals = Object::new();
let rendered = with_cache_key_template_globals(&globals);
assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "REQUEST_RFC1123_DATE");
assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "REQUEST_UNIX_MILLIS");
}
#[test]
fn cache_key_template_globals_preserve_explicit_overrides() {
let mut globals = Object::new();
globals.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("custom-date"));
globals.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("123"));
let rendered = with_cache_key_template_globals(&globals);
assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "custom-date");
assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "123");
}
#[test]
fn rejects_ipv4_loopback() {
assert!(!is_ssrf_safe_ip(&IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))));

View file

@ -1215,4 +1215,47 @@ line2
assert_eq!(found.len(), 1, "self-identifying tokens should remain raw-pass findings");
Ok(())
}
#[test]
fn self_identifying_charclass_prefix_rule_remains_hyperscan_only() -> Result<()> {
let token = "xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY";
let rule = Rule::new(RuleSyntax {
id: "kingfisher.slack.2".into(),
name: "slack token".into(),
pattern:
"(?xi)\\b(xox[pbarose][-0-9]{0,3}-[0-9a-z]{6,15}-[0-9a-z]{6,15}-[-0-9a-z]{6,66})\\b"
.into(),
confidence: crate::rules::rule::Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
revocation: None,
depends_on_rule: vec![],
pattern_requirements: None,
tls_mode: None,
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let blob = Blob::from_bytes(format!("token={token}").into_bytes());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("slack.txt")));
let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => matches,
_ => panic!("unexpected scan result"),
};
assert_eq!(
found.len(),
1,
"self-identifying token families should not require parser context"
);
Ok(())
}
}

View file

@ -615,6 +615,7 @@ async fn timed_validate_single_match<'a>(
let multipart_timeout = validation_timeout;
let max_retries: u32 = validation_retries;
let request_globals = httpvalidation::with_request_template_globals(&globals);
let cache_globals = httpvalidation::with_cache_key_template_globals(&globals);
// render URL
let url = match render_and_parse_url(
parser,
@ -661,10 +662,19 @@ async fn timed_validate_single_match<'a>(
// old per-request cache (optional)
if !is_multipart {
let cache_url = render_template(
parser,
&cache_globals,
&rule_syntax.name,
&http_validation.request.url,
)
.await
.unwrap_or_else(|_| http_validation.request.url.clone());
let rendered_headers = httpvalidation::process_headers(
&http_validation.request.headers,
parser,
&request_globals,
&cache_globals,
&url,
)
.unwrap_or_default();
@ -682,12 +692,12 @@ async fn timed_validate_single_match<'a>(
parser
.parse(body_template)
.ok()
.and_then(|template| template.render(&request_globals).ok())
.and_then(|template| template.render(&cache_globals).ok())
});
cache_key = httpvalidation::generate_http_cache_key_parts(
http_validation.request.method.as_str(),
&url,
&cache_url,
&header_map,
rendered_body.as_deref(),
);

View file

@ -3,84 +3,6 @@ use std::{fs, process::Command};
use anyhow::{Context, Result};
use serde_json::{Deserializer, Value};
fn macos_arm64_known_missing_findings() -> &'static [(&'static str, &'static str)] {
&[
("kingfisher.google.7", "AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM"),
(
"kingfisher.pem.1",
"MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShzA2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGAMeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCVUpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS/70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/XQZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62XRr1VAkEAljSLsMOk5H7XCctEk3mCu1WgCsUvb/RRCBiBT+cic14OpVtytJMAeLeqcAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/MhrcF+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/TFnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0IkdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vCi5tKaqKIc3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw=",
),
(
"kingfisher.privkey.2",
"-----BEGIN RSA PRIVATE KEY-----MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShzA2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGAMeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCVUpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS/70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/XQZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62XRr1VAkEAljSLsMOk5H7XCctEk3mCu1WgCsUvb/RRCBiBT+cic14OpVtytJMAeLeqcAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/MhrcF+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/TFnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0IkdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vCi5tKaqKIc3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw=-----END RSA PRIVATE KEY-----",
),
(
"kingfisher.pypi.1",
"pypi-AgEIcHlwaS5vcmcCAWEAAAYgNh9pJUqVF-EtMCwGaZYcStFR07RbE8hyb9h2vYxifO8",
),
(
"kingfisher.pypi.1",
"pypi-AgEIcHlwaS5vcmcCAWIAAAYgf_d_XvJfqkOhrkqbEBo-eW9UID46ABNJIdGfaO3n3_k",
),
(
"kingfisher.pypi.1",
"pypi-AgEIcHlwaS5vcmcCAWIAAAYgxbyLvb9egSCECeOdB3qW3h4oXEoNC6kJI0NtaFOQlUY",
),
(
"kingfisher.pypi.1",
"pypi-AgEIcHlwaS5vcmcCAWIAAi97InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogW119fQAABiBWHBa1jsbY-iN-Swf3JCrxy8Q8eRCxMrc_1KkkDuB6KQ",
),
(
"kingfisher.pypi.1",
"pypi-AgEIcHlwaS5vcmcCAWIAAiV7InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiAidXNlciJ9AAAGIBeIJGhXk8kPPref7vLuwlKbnSWusZKZivIh92GRUUX4",
),
(
"kingfisher.slack.1",
"xapp-1-A01C259PH2A-1440755929120-7d5241948a2cc1b464add85df8a8e75f9040ae2869f6599926ed0b9dcafdb32b",
),
(
"kingfisher.slack.2",
"xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY",
),
(
"kingfisher.slack.4",
"https://hooks.slack.com/services/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",
),
]
}
fn is_known_macos_arm64_missing_finding(finding: &Value) -> bool {
let rule_id = finding.get("rule_id").and_then(Value::as_str).unwrap_or_default();
let snippet = finding.get("snippet").and_then(Value::as_str).unwrap_or_default();
macos_arm64_known_missing_findings().iter().any(|(known_rule_id, known_snippet)| {
rule_id == *known_rule_id && snippet == *known_snippet
})
}
fn assert_findings_match_for_platform(actual: Vec<Value>, expected: Vec<Value>) {
if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
let missing = expected
.iter()
.filter(|finding| !actual.contains(finding))
.cloned()
.collect::<Vec<_>>();
let extras = actual
.iter()
.filter(|finding| !expected.contains(finding))
.cloned()
.collect::<Vec<_>>();
assert!(extras.is_empty(), "unexpected extra findings on macOS ARM64: {extras:#?}");
assert!(
missing.iter().all(is_known_macos_arm64_missing_finding),
"unexpected missing findings on macOS ARM64: {missing:#?}"
);
return;
}
assert_eq!(actual, expected);
}
#[test]
fn scan_findings_match_pre_removal_baseline() -> Result<()> {
let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
@ -156,6 +78,6 @@ fn scan_findings_match_pre_removal_baseline() -> Result<()> {
.collect::<Vec<_>>();
expected.sort_by(|left, right| left.to_string().cmp(&right.to_string()));
assert_findings_match_for_platform(actual, expected);
assert_eq!(actual, expected);
Ok(())
}