kingfisher/src/safe_list.rs
2025-11-12 22:25:33 -08:00

217 lines
8.8 KiB
Rust

//! Safe-match filters: identify *benign* placeholder/example/redacted strings
//! so they don't get treated as real secrets. When a rule matches, we log
//! which rule/explanation fired at `debug!` level.
//
// Usage:
// if is_safe_match(bytes) { /* skip finding */ }
//
// If you also want the specific reason:
// if let Some(reason) = is_safe_match_reason(bytes) {
// // reason contains the rule description
// }
use once_cell::sync::Lazy;
use regex::bytes::Regex;
use std::sync::Mutex;
use tracing::debug;
/// A rule that describes *why* a match is considered safe/benign.
#[derive(Debug)]
struct SafeRule {
/// Human-friendly reason that will be logged when this rule fires.
description: &'static str,
/// Compiled regex to detect the benign pattern.
regex: Regex,
}
/// Compile a bytes regex and panic on failure (at init time).
fn compile(pattern: &'static str) -> Regex {
Regex::new(pattern).unwrap_or_else(|e| {
// Compile happens once at startup, so panic is acceptable here.
// We still emit a debug line to aid troubleshooting in non-panic logs.
debug!("Failed to compile safe-list regex: {pattern}\nError: {e}");
panic!("invalid safe-list regex: {pattern}: {e}");
})
}
/// Case-insensitive patterns that indicate a *benign* match (placeholders, examples, redactions, etc.).
/// `is_safe_match()` returns true if any of these are present and logs which rule fired.
/// `is_safe_match_reason()` returns the matching rule's description instead of logging.
static SAFE_LIST_FILTER_RULES: Lazy<Vec<SafeRule>> = Lazy::new(|| {
vec![
SafeRule {
description: "Assignment ending with EXAMPLEKEY (placeholder)",
regex: compile(r"(?i)[:=][^:=]{0,64}EXAMPLEKEY"),
},
SafeRule {
description: "AWS AKIA key explicitly marked as example/fake/test/sample",
regex: compile(r"(?i)\b(AKIA(?:.*?EXAMPLE|.*?FAKE|TEST|.*?SAMPLE))\b"),
},
SafeRule {
description: "Secret-like key followed by redaction marker (&&, ||, or ***** run)",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\s(&&|\|\||\*{5,50})",
),
},
SafeRule {
description: "Secret-like key + short value followed by another short assignment on same line (example-y)",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\b\w{4,12}\s{0,6}=\s{0,6}\D{0,3}\w{1,12}",
),
},
SafeRule {
description: "Secret-like key assigned from a shell variable reference (e.g., $FOO), not a literal",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\$\w{4,30}",
),
},
SafeRule {
description: "Secret-like key set via randomness generator command (openssl rand ...), not a literal",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,16}[=:?][^=:?]{0,8}\bopenssl\s{0,4}rand\b",
),
},
SafeRule {
description: "Secret-like key assigned a value containing 'encrypted' (metadata/marker)",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}encrypted",
),
},
SafeRule {
description: "Secret-like key assigned boolean literal (true/false)",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\b(?:false|true)\b",
),
},
SafeRule {
description: "Secret-like key assigned to null-ish or self-referential placeholders",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}\b(null|nil|none|password|pass|pwd|passwd|secret|cred|key|auth|authorization).{1,6}$",
),
},
SafeRule {
description: "Classic xkcd fake password 'hunter2'",
regex: compile(
r"(?i)(password|pass|pwd|passwd|secret|cred|key|auth|authorization)[^=:?]{0,8}[=:?][^=:?]{0,8}hunter2",
),
},
SafeRule {
description: "Obvious placeholder sequences (123456789 or abcdefghij)",
regex: compile(r"(?i)123456789|abcdefghij"),
},
SafeRule {
description: "Literal placeholder tag '<secretmanager>'",
regex: compile(r"(?i)<secretmanager>"),
},
SafeRule {
description: "OpenAPI schema references near assignment/query (not a secret)",
regex: compile(r"(?i)[=:?][^=:?]{0,8}#/components/schemas/"),
},
SafeRule {
description: "Example MongoDB URI with placeholder user/pass like user:pass or foo:bar",
regex: compile(
r"(?i)\b(mongodb(?:\+srv)?://(?:user|foo)[^:@]+:(?:pass|bar)[^@]+@[-\w.%+/:]{3,64}(?:/\w+)?)",
),
},
SafeRule {
description: "Classpath URI (configuration reference, not a secret)",
regex: compile(r"(?i)\b(classpath://)"),
},
SafeRule {
description: "Assignment using property placeholder like ${ENV_VAR}",
regex: compile(r"(?i)(\b[^\s\t]{0,16}[=:][^$]*\$\{[a-z_-]{5,30}\})"),
},
SafeRule {
description: "URL with basic auth to host ending in example/test (placeholder)",
regex: compile(r"(?i)\b((?:https?:)?//[^:@]{3,50}:[^:@]{3,50}@[\w.]{0,16}(?:example|test))"),
},
SafeRule {
description: "Assignment ending with SECRETMANAGER (explicit placeholder)",
regex: compile(r"(?i)[:=][^:=]{0,32}\bSECRETMANAGER"),
},
]
});
// User-supplied allow-list patterns (regexes) and skipwords. These are empty by
// default and populated via CLI flags at runtime.
static USER_SAFE_REGEXES: Lazy<Mutex<Vec<Regex>>> = Lazy::new(|| Mutex::new(Vec::new()));
static USER_SAFE_SKIPWORDS: Lazy<Mutex<Vec<String>>> = Lazy::new(|| Mutex::new(Vec::new()));
/// Register an additional allow-list regex provided by the user.
/// If the pattern fails to compile, the error is returned so the caller can
/// surface it.
pub fn add_user_regex(pattern: &str) -> std::result::Result<(), regex::Error> {
let re = Regex::new(pattern)?;
USER_SAFE_REGEXES.lock().unwrap().push(re);
Ok(())
}
/// Register an allow-list skipword provided by the user. Comparisons are
/// case-insensitive.
pub fn add_user_skipword(word: &str) {
USER_SAFE_SKIPWORDS.lock().unwrap().push(word.to_lowercase());
}
/// Returns `true` if the given input matches any user-supplied allow-list
/// patterns (regexes or skipwords).
///
/// `secret` is the primary capture group (typically just the secret value)
/// while `full_match` includes the entire match, allowing regexes to target
/// surrounding context such as variable names.
pub fn is_user_match(secret: &[u8], full_match: &[u8]) -> bool {
{
let regexes = USER_SAFE_REGEXES.lock().unwrap();
if regexes.iter().any(|re| re.is_match(secret) || re.is_match(full_match)) {
debug!("Safe match: user skip-regex");
return true;
}
}
let skipwords = USER_SAFE_SKIPWORDS.lock().unwrap();
if skipwords.is_empty() {
return false;
}
// Check skipwords against both the secret and full match (case-insensitive)
let contains_skipword = |bytes: &[u8]| -> bool {
if let Ok(s) = std::str::from_utf8(bytes) {
let lower = s.to_lowercase();
return skipwords.iter().any(|w| lower.contains(w));
}
false
};
if contains_skipword(secret) || contains_skipword(full_match) {
debug!("Safe match: user skip-word");
return true;
}
false
}
/// Returns `Some(&'static str)` with the rule description if the input likely
/// contains *benign* placeholder/test strings; otherwise `None`.
pub fn is_safe_match_reason(input: &[u8]) -> Option<&'static str> {
SAFE_LIST_FILTER_RULES
.iter()
.find(|rule| rule.regex.is_match(input))
.map(|rule| rule.description)
}
/// Test helper: clear all user-provided allow-list configuration.
#[doc(hidden)]
pub fn clear_user_filters_for_tests() {
USER_SAFE_REGEXES.lock().unwrap().clear();
USER_SAFE_SKIPWORDS.lock().unwrap().clear();
}
/// Returns true if the input likely contains *benign* placeholder/test strings,
/// and logs which rule triggered at `debug!` level.
pub fn is_safe_match(input: &[u8]) -> bool {
if let Some(reason) = is_safe_match_reason(input) {
debug!("Safe match: {reason}");
true
} else {
false
}
}