forked from mirrors/kingfisher
changes in response to PR review
This commit is contained in:
parent
eee7697e24
commit
0d33dff196
5 changed files with 173 additions and 99 deletions
|
|
@ -60,21 +60,7 @@ impl RulesDatabase {
|
|||
|
||||
let mut reason_codes: Vec<&'static str> = Vec::new();
|
||||
|
||||
let has_self_identifying_prefix = [
|
||||
"ccipat_",
|
||||
"xoxb-",
|
||||
"xoxa-",
|
||||
"xoxp-",
|
||||
"xapp-",
|
||||
"ghp_",
|
||||
"github_pat_",
|
||||
"sk_live_",
|
||||
"sk_test_",
|
||||
"ltai",
|
||||
"akia",
|
||||
]
|
||||
.iter()
|
||||
.any(|m| normalized.contains(m));
|
||||
let has_self_identifying_prefix = has_self_identifying_shape(&normalized);
|
||||
if has_self_identifying_prefix {
|
||||
reason_codes.push("self_identifying_prefix");
|
||||
return RuleMatchProfile {
|
||||
|
|
@ -307,6 +293,33 @@ impl RulesDatabase {
|
|||
}
|
||||
}
|
||||
|
||||
fn has_self_identifying_shape(normalized_pattern: &str) -> bool {
|
||||
let literal_markers = [
|
||||
"ccipat_",
|
||||
"xapp-",
|
||||
"ghp_",
|
||||
"github_pat_",
|
||||
"sk_live_",
|
||||
"sk_test_",
|
||||
"ltai",
|
||||
"akia",
|
||||
"aizasy",
|
||||
"pypi-ageichlwas5vcmc",
|
||||
"https://hooks\\.slack\\.com/services/",
|
||||
];
|
||||
|
||||
literal_markers.iter().any(|needle| normalized_pattern.contains(needle))
|
||||
|| normalized_pattern.contains("xox[pbarose]")
|
||||
|| normalized_pattern.contains("xoxe\\.xox[bparose]-")
|
||||
|| normalized_pattern.contains("xoxe-\\d-")
|
||||
|| (normalized_pattern.contains("-----begin\\s")
|
||||
&& normalized_pattern.contains("private\\skey")
|
||||
&& normalized_pattern.contains("-----end\\s"))
|
||||
|| (normalized_pattern.contains("-----begin\\ ")
|
||||
&& normalized_pattern.contains("private\\ key")
|
||||
&& normalized_pattern.contains("-----end\\ "))
|
||||
}
|
||||
|
||||
fn has_generic_token_class(normalized_pattern: &str) -> bool {
|
||||
[
|
||||
"[a-za-z0-9]{",
|
||||
|
|
@ -436,6 +449,57 @@ mod test_rule_match_profiles {
|
|||
assert!(profile.reason_codes.contains(&"self_identifying_prefix"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_google_api_key_rule_as_self_identifying() {
|
||||
let rule = mk_rule("kingfisher.google.7", r"(?xi)\b(AIzaSy[A-Za-z0-9_-]{33})");
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_slack_token_charclass_rule_as_self_identifying() {
|
||||
let rule = mk_rule(
|
||||
"kingfisher.slack.2",
|
||||
r"(?xi)\b(xox[pbarose][-0-9]{0,3}-[0-9a-z]{6,15}-[0-9a-z]{6,15}-[-0-9a-z]{6,66})\b",
|
||||
);
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_slack_webhook_rule_as_self_identifying() {
|
||||
let rule = mk_rule(
|
||||
"kingfisher.slack.4",
|
||||
r"(?xi)\b(https://hooks\.slack\.com/services/T[a-z0-9_-]{8,12}/B[a-z0-9_-]{8,12}/[a-z0-9_-]{20,30})",
|
||||
);
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_pypi_token_rule_as_self_identifying() {
|
||||
let rule = mk_rule("kingfisher.pypi.1", r"(?x)(pypi-AgEIcHlwaS5vcmc[A-Za-z0-9_-]{50,})\b");
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_private_key_envelope_rules_as_self_identifying() {
|
||||
let rule = mk_rule(
|
||||
"kingfisher.privkey.2",
|
||||
r"(?xims)(-----BEGIN\s(?:RSA|PGP|DSA|OPENSSH|ENCRYPTED|EC)?\s{0,1}PRIVATE\sKEY-----[a-z0-9 /+=\r\n\\n]{32,}?-----END\s(?:RSA|PGP|DSA|OPENSSH|ENCRYPTED|EC)?\s{0,1}PRIVATE\sKEY-----)",
|
||||
);
|
||||
let profile = RulesDatabase::classify_rule_profile(&rule);
|
||||
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
|
||||
let pem_rule = mk_rule(
|
||||
"kingfisher.pem.1",
|
||||
r#"(?x)-----BEGIN\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----\s*((?:[a-zA-Z0-9+/=\s"',]|\\r|\\n){50,})\s*-----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----"#,
|
||||
);
|
||||
let pem_profile = RulesDatabase::classify_rule_profile(&pem_rule);
|
||||
assert_eq!(pem_profile.kind, RuleDetectionProfileKind::SelfIdentifying);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classifies_context_dependent_generic_rule() {
|
||||
let rule = mk_rule(
|
||||
|
|
|
|||
|
|
@ -36,12 +36,11 @@ use kingfisher_rules::ResponseMatcher;
|
|||
/// Build a deterministic cache key from the immutable parts of an HTTP request.
|
||||
pub fn generate_http_cache_key_parts(
|
||||
method: &str,
|
||||
url: &Url,
|
||||
url: &str,
|
||||
headers: &BTreeMap<String, String>,
|
||||
body: Option<&str>,
|
||||
) -> String {
|
||||
let method = method.to_uppercase();
|
||||
let url = url.as_str();
|
||||
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(method.as_bytes());
|
||||
|
|
@ -101,6 +100,21 @@ pub fn with_request_template_globals(globals: &Object) -> Object {
|
|||
out
|
||||
}
|
||||
|
||||
/// Clone `globals` and add stable placeholder values for request-scoped template vars that
|
||||
/// would otherwise make HTTP validation cache keys vary per execution.
|
||||
pub fn with_cache_key_template_globals(globals: &Object) -> Object {
|
||||
let mut out = globals.clone();
|
||||
|
||||
if !out.contains_key("REQUEST_RFC1123_DATE") {
|
||||
out.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("REQUEST_RFC1123_DATE"));
|
||||
}
|
||||
if !out.contains_key("REQUEST_UNIX_MILLIS") {
|
||||
out.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("REQUEST_UNIX_MILLIS"));
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Build a reqwest RequestBuilder using the provided parameters.
|
||||
pub fn build_request_builder(
|
||||
client: &Client,
|
||||
|
|
@ -630,6 +644,27 @@ mod tests {
|
|||
assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cache_key_template_globals_use_stable_placeholders() {
|
||||
let globals = Object::new();
|
||||
let rendered = with_cache_key_template_globals(&globals);
|
||||
|
||||
assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "REQUEST_RFC1123_DATE");
|
||||
assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "REQUEST_UNIX_MILLIS");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cache_key_template_globals_preserve_explicit_overrides() {
|
||||
let mut globals = Object::new();
|
||||
globals.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("custom-date"));
|
||||
globals.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("123"));
|
||||
|
||||
let rendered = with_cache_key_template_globals(&globals);
|
||||
|
||||
assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "custom-date");
|
||||
assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_ipv4_loopback() {
|
||||
assert!(!is_ssrf_safe_ip(&IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))));
|
||||
|
|
|
|||
|
|
@ -1215,4 +1215,47 @@ line2
|
|||
assert_eq!(found.len(), 1, "self-identifying tokens should remain raw-pass findings");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn self_identifying_charclass_prefix_rule_remains_hyperscan_only() -> Result<()> {
|
||||
let token = "xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY";
|
||||
let rule = Rule::new(RuleSyntax {
|
||||
id: "kingfisher.slack.2".into(),
|
||||
name: "slack token".into(),
|
||||
pattern:
|
||||
"(?xi)\\b(xox[pbarose][-0-9]{0,3}-[0-9a-z]{6,15}-[0-9a-z]{6,15}-[-0-9a-z]{6,66})\\b"
|
||||
.into(),
|
||||
confidence: crate::rules::rule::Confidence::Medium,
|
||||
min_entropy: 0.0,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None::<Validation>,
|
||||
revocation: None,
|
||||
depends_on_rule: vec![],
|
||||
pattern_requirements: None,
|
||||
tls_mode: None,
|
||||
});
|
||||
|
||||
let rules_db = RulesDatabase::from_rules(vec![rule])?;
|
||||
let seen = BlobIdMap::new();
|
||||
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
|
||||
let mut matcher =
|
||||
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
|
||||
|
||||
let blob = Blob::from_bytes(format!("token={token}").into_bytes());
|
||||
let origin = OriginSet::from(Origin::from_file(PathBuf::from("slack.txt")));
|
||||
|
||||
let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
|
||||
ScanResult::New(matches) => matches,
|
||||
_ => panic!("unexpected scan result"),
|
||||
};
|
||||
assert_eq!(
|
||||
found.len(),
|
||||
1,
|
||||
"self-identifying token families should not require parser context"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -615,6 +615,7 @@ async fn timed_validate_single_match<'a>(
|
|||
let multipart_timeout = validation_timeout;
|
||||
let max_retries: u32 = validation_retries;
|
||||
let request_globals = httpvalidation::with_request_template_globals(&globals);
|
||||
let cache_globals = httpvalidation::with_cache_key_template_globals(&globals);
|
||||
// render URL
|
||||
let url = match render_and_parse_url(
|
||||
parser,
|
||||
|
|
@ -661,10 +662,19 @@ async fn timed_validate_single_match<'a>(
|
|||
|
||||
// old per-request cache (optional)
|
||||
if !is_multipart {
|
||||
let cache_url = render_template(
|
||||
parser,
|
||||
&cache_globals,
|
||||
&rule_syntax.name,
|
||||
&http_validation.request.url,
|
||||
)
|
||||
.await
|
||||
.unwrap_or_else(|_| http_validation.request.url.clone());
|
||||
|
||||
let rendered_headers = httpvalidation::process_headers(
|
||||
&http_validation.request.headers,
|
||||
parser,
|
||||
&request_globals,
|
||||
&cache_globals,
|
||||
&url,
|
||||
)
|
||||
.unwrap_or_default();
|
||||
|
|
@ -682,12 +692,12 @@ async fn timed_validate_single_match<'a>(
|
|||
parser
|
||||
.parse(body_template)
|
||||
.ok()
|
||||
.and_then(|template| template.render(&request_globals).ok())
|
||||
.and_then(|template| template.render(&cache_globals).ok())
|
||||
});
|
||||
|
||||
cache_key = httpvalidation::generate_http_cache_key_parts(
|
||||
http_validation.request.method.as_str(),
|
||||
&url,
|
||||
&cache_url,
|
||||
&header_map,
|
||||
rendered_body.as_deref(),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -3,84 +3,6 @@ use std::{fs, process::Command};
|
|||
use anyhow::{Context, Result};
|
||||
use serde_json::{Deserializer, Value};
|
||||
|
||||
fn macos_arm64_known_missing_findings() -> &'static [(&'static str, &'static str)] {
|
||||
&[
|
||||
("kingfisher.google.7", "AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM"),
|
||||
(
|
||||
"kingfisher.pem.1",
|
||||
"MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShzA2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGAMeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCVUpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS/70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/XQZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62XRr1VAkEAljSLsMOk5H7XCctEk3mCu1WgCsUvb/RRCBiBT+cic14OpVtytJMAeLeqcAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/MhrcF+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/TFnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0IkdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vCi5tKaqKIc3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw=",
|
||||
),
|
||||
(
|
||||
"kingfisher.privkey.2",
|
||||
"-----BEGIN RSA PRIVATE KEY-----MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShzA2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGAMeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCVUpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS/70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/XQZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62XRr1VAkEAljSLsMOk5H7XCctEk3mCu1WgCsUvb/RRCBiBT+cic14OpVtytJMAeLeqcAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/MhrcF+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/TFnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0IkdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vCi5tKaqKIc3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw=-----END RSA PRIVATE KEY-----",
|
||||
),
|
||||
(
|
||||
"kingfisher.pypi.1",
|
||||
"pypi-AgEIcHlwaS5vcmcCAWEAAAYgNh9pJUqVF-EtMCwGaZYcStFR07RbE8hyb9h2vYxifO8",
|
||||
),
|
||||
(
|
||||
"kingfisher.pypi.1",
|
||||
"pypi-AgEIcHlwaS5vcmcCAWIAAAYgf_d_XvJfqkOhrkqbEBo-eW9UID46ABNJIdGfaO3n3_k",
|
||||
),
|
||||
(
|
||||
"kingfisher.pypi.1",
|
||||
"pypi-AgEIcHlwaS5vcmcCAWIAAAYgxbyLvb9egSCECeOdB3qW3h4oXEoNC6kJI0NtaFOQlUY",
|
||||
),
|
||||
(
|
||||
"kingfisher.pypi.1",
|
||||
"pypi-AgEIcHlwaS5vcmcCAWIAAi97InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogW119fQAABiBWHBa1jsbY-iN-Swf3JCrxy8Q8eRCxMrc_1KkkDuB6KQ",
|
||||
),
|
||||
(
|
||||
"kingfisher.pypi.1",
|
||||
"pypi-AgEIcHlwaS5vcmcCAWIAAiV7InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiAidXNlciJ9AAAGIBeIJGhXk8kPPref7vLuwlKbnSWusZKZivIh92GRUUX4",
|
||||
),
|
||||
(
|
||||
"kingfisher.slack.1",
|
||||
"xapp-1-A01C259PH2A-1440755929120-7d5241948a2cc1b464add85df8a8e75f9040ae2869f6599926ed0b9dcafdb32b",
|
||||
),
|
||||
(
|
||||
"kingfisher.slack.2",
|
||||
"xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY",
|
||||
),
|
||||
(
|
||||
"kingfisher.slack.4",
|
||||
"https://hooks.slack.com/services/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",
|
||||
),
|
||||
]
|
||||
}
|
||||
|
||||
fn is_known_macos_arm64_missing_finding(finding: &Value) -> bool {
|
||||
let rule_id = finding.get("rule_id").and_then(Value::as_str).unwrap_or_default();
|
||||
let snippet = finding.get("snippet").and_then(Value::as_str).unwrap_or_default();
|
||||
macos_arm64_known_missing_findings().iter().any(|(known_rule_id, known_snippet)| {
|
||||
rule_id == *known_rule_id && snippet == *known_snippet
|
||||
})
|
||||
}
|
||||
|
||||
fn assert_findings_match_for_platform(actual: Vec<Value>, expected: Vec<Value>) {
|
||||
if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
|
||||
let missing = expected
|
||||
.iter()
|
||||
.filter(|finding| !actual.contains(finding))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
let extras = actual
|
||||
.iter()
|
||||
.filter(|finding| !expected.contains(finding))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert!(extras.is_empty(), "unexpected extra findings on macOS ARM64: {extras:#?}");
|
||||
assert!(
|
||||
missing.iter().all(is_known_macos_arm64_missing_finding),
|
||||
"unexpected missing findings on macOS ARM64: {missing:#?}"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
assert_eq!(actual, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scan_findings_match_pre_removal_baseline() -> Result<()> {
|
||||
let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
|
||||
|
|
@ -156,6 +78,6 @@ fn scan_findings_match_pre_removal_baseline() -> Result<()> {
|
|||
.collect::<Vec<_>>();
|
||||
expected.sort_by(|left, right| left.to_string().cmp(&right.to_string()));
|
||||
|
||||
assert_findings_match_for_platform(actual, expected);
|
||||
assert_eq!(actual, expected);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue