forked from mirrors/kingfisher
983 lines
33 KiB
Rust
983 lines
33 KiB
Rust
//! This module defines rule syntax and evaluation types for secret detection.
|
|
//!
|
|
//! It provides structures and methods to represent validation configurations,
|
|
//! HTTP requests for validation, rule syntax (with support for comment stripping),
|
|
//! and associated metadata such as confidence and fingerprints.
|
|
|
|
use std::{
|
|
borrow::Cow, cmp::Ordering, collections::BTreeMap, fmt, hash::Hash, path::Path, str::FromStr,
|
|
};
|
|
|
|
use anyhow::{anyhow, Context, Result};
|
|
use lazy_static::lazy_static;
|
|
use liquid::{
|
|
model::{KString, Value},
|
|
object, ParserBuilder,
|
|
};
|
|
use regex::Regex;
|
|
use schemars::{
|
|
gen::SchemaGenerator,
|
|
schema::{Schema, SchemaObject},
|
|
JsonSchema,
|
|
};
|
|
use serde::{Deserialize, Serialize};
|
|
use tracing::debug;
|
|
// use sha1::{Digest, Sha1};
|
|
use xxhash_rust::xxh3::xxh3_64;
|
|
|
|
use crate::liquid_filters;
|
|
|
|
/// Returns false as the default value.
|
|
fn default_false() -> bool {
|
|
false
|
|
}
|
|
|
|
/// Returns true as the default value.
|
|
fn default_true() -> bool {
|
|
true
|
|
}
|
|
|
|
/// Represents various types of validation that a rule can perform.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
#[serde(tag = "type", content = "content")]
|
|
pub enum Validation {
|
|
AWS,
|
|
AzureStorage,
|
|
Coinbase,
|
|
GCP,
|
|
MongoDB,
|
|
Postgres,
|
|
Jdbc,
|
|
JWT,
|
|
Raw(String),
|
|
Http(HttpValidation),
|
|
}
|
|
|
|
/// Specifies that a rule depends on a variable from another rule.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct DependsOnRule {
|
|
pub rule_id: String,
|
|
pub variable: String,
|
|
}
|
|
|
|
/// Specifies character type requirements for matched secrets.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct PatternRequirements {
|
|
/// Minimum number of digits required (0-9)
|
|
#[serde(default)]
|
|
pub min_digits: Option<usize>,
|
|
/// Minimum number of uppercase letters required (A-Z)
|
|
#[serde(default)]
|
|
pub min_uppercase: Option<usize>,
|
|
/// Minimum number of lowercase letters required (a-z)
|
|
#[serde(default)]
|
|
pub min_lowercase: Option<usize>,
|
|
/// Minimum number of special characters required
|
|
#[serde(default)]
|
|
pub min_special_chars: Option<usize>,
|
|
/// Custom set of characters to consider as "special" (defaults to common punctuation)
|
|
#[serde(default)]
|
|
pub special_chars: Option<String>,
|
|
/// Words that should cause the match to be excluded when present (case-insensitive)
|
|
#[serde(default)]
|
|
pub ignore_if_contains: Option<Vec<String>>,
|
|
/// Optional checksum validation configuration.
|
|
#[serde(default)]
|
|
pub checksum: Option<ChecksumRequirement>,
|
|
}
|
|
|
|
/// Defines a checksum validation strategy for a matched pattern.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct ChecksumRequirement {
|
|
/// Template describing how to extract the checksum from the match.
|
|
pub actual: ChecksumActual,
|
|
/// Template describing how to compute the expected checksum.
|
|
pub expected: String,
|
|
/// When true, checksum evaluation is skipped if the required capture is missing.
|
|
#[serde(default)]
|
|
pub skip_if_missing: bool,
|
|
}
|
|
|
|
/// Describes how to extract the checksum value from a match.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct ChecksumActual {
|
|
/// Liquid template used to compute the checksum from the match.
|
|
pub template: String,
|
|
/// Optional capture group that must be present before evaluating the checksum.
|
|
#[serde(default)]
|
|
pub requires_capture: Option<String>,
|
|
}
|
|
|
|
/// Contextual information available when validating pattern requirements.
|
|
#[derive(Clone, Copy)]
|
|
pub struct PatternRequirementContext<'a> {
|
|
/// Compiled regex associated with the rule.
|
|
pub regex: &'a regex::bytes::Regex,
|
|
/// Captures for the current match.
|
|
pub captures: &'a regex::bytes::Captures<'a>,
|
|
/// Full bytes matched by the rule (capture group 0).
|
|
pub full_match: &'a [u8],
|
|
}
|
|
|
|
impl PatternRequirements {
|
|
/// Default special characters if none are specified
|
|
const DEFAULT_SPECIAL_CHARS: &'static str = "!@#$%^&*()_+-=[]{}|;:'\",.<>?/\\`~";
|
|
|
|
/// Validates whether the given byte slice meets the character requirements.
|
|
/// Returns the validation outcome, including whether the match should be ignored
|
|
/// due to `ignore_if_contains` entries when that behaviour is enabled.
|
|
pub fn validate(
|
|
&self,
|
|
input: &[u8],
|
|
context: Option<PatternRequirementContext<'_>>,
|
|
respect_ignore_if_contains: bool,
|
|
) -> PatternValidationResult {
|
|
// Convert to string (lossy for non-UTF8)
|
|
let s = String::from_utf8_lossy(input);
|
|
|
|
// Check digit requirement
|
|
if let Some(min_digits) = self.min_digits {
|
|
let digit_count = s.chars().filter(|c| c.is_ascii_digit()).count();
|
|
if digit_count < min_digits {
|
|
return PatternValidationResult::Failed;
|
|
}
|
|
}
|
|
|
|
// Check uppercase requirement
|
|
if let Some(min_uppercase) = self.min_uppercase {
|
|
let uppercase_count = s.chars().filter(|c| c.is_ascii_uppercase()).count();
|
|
if uppercase_count < min_uppercase {
|
|
return PatternValidationResult::Failed;
|
|
}
|
|
}
|
|
|
|
// Check lowercase requirement
|
|
if let Some(min_lowercase) = self.min_lowercase {
|
|
let lowercase_count = s.chars().filter(|c| c.is_ascii_lowercase()).count();
|
|
if lowercase_count < min_lowercase {
|
|
return PatternValidationResult::Failed;
|
|
}
|
|
}
|
|
|
|
// Check special character requirement
|
|
if let Some(min_special) = self.min_special_chars {
|
|
let special_chars =
|
|
self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
|
|
let special_count = s.chars().filter(|c| special_chars.contains(*c)).count();
|
|
if special_count < min_special {
|
|
return PatternValidationResult::Failed;
|
|
}
|
|
}
|
|
|
|
// Check ignore-if-contains requirement
|
|
if respect_ignore_if_contains {
|
|
if let Some(ignore_terms) = self.ignore_if_contains.as_ref() {
|
|
let lowercase_input = s.to_lowercase();
|
|
if let Some(matched_term) = ignore_terms
|
|
.iter()
|
|
.filter_map(|term| {
|
|
let trimmed = term.trim();
|
|
if trimmed.is_empty() {
|
|
None
|
|
} else {
|
|
Some((trimmed, trimmed.to_lowercase()))
|
|
}
|
|
})
|
|
.find_map(|(original, lowered)| {
|
|
if lowercase_input.contains(&lowered) {
|
|
Some(original.to_string())
|
|
} else {
|
|
None
|
|
}
|
|
})
|
|
{
|
|
return PatternValidationResult::IgnoredBySubstring { matched_term };
|
|
}
|
|
}
|
|
}
|
|
|
|
if let Some(checksum) = &self.checksum {
|
|
let Some(ctx) = context else {
|
|
return if checksum.skip_if_missing {
|
|
PatternValidationResult::Passed
|
|
} else {
|
|
PatternValidationResult::Failed
|
|
};
|
|
};
|
|
|
|
if let Some(required) = checksum.actual.requires_capture.as_deref() {
|
|
if ctx.captures.name(required).is_none() {
|
|
return if checksum.skip_if_missing {
|
|
PatternValidationResult::Passed
|
|
} else {
|
|
PatternValidationResult::Failed
|
|
};
|
|
}
|
|
}
|
|
|
|
let mut globals = object!({
|
|
"MATCH": s.to_string(),
|
|
"FULL_MATCH": String::from_utf8_lossy(ctx.full_match).to_string(),
|
|
});
|
|
|
|
for name in ctx.regex.capture_names().flatten() {
|
|
if let Some(capture) = ctx.captures.name(name) {
|
|
let value = String::from_utf8_lossy(capture.as_bytes()).to_string();
|
|
globals.insert(KString::from_ref(name), Value::scalar(value.clone()));
|
|
globals.insert(
|
|
KString::from_string(name.to_ascii_uppercase()),
|
|
Value::scalar(value),
|
|
);
|
|
}
|
|
}
|
|
|
|
let actual =
|
|
match render_pattern_requirement_template(&checksum.actual.template, &globals) {
|
|
Ok(rendered) => rendered,
|
|
Err(err) => {
|
|
debug!(
|
|
"Failed to render checksum actual template '{}': {}",
|
|
checksum.actual.template, err
|
|
);
|
|
return PatternValidationResult::Failed;
|
|
}
|
|
};
|
|
let expected = match render_pattern_requirement_template(&checksum.expected, &globals) {
|
|
Ok(rendered) => rendered,
|
|
Err(err) => {
|
|
debug!(
|
|
"Failed to render checksum expected template '{}': {}",
|
|
checksum.expected, err
|
|
);
|
|
return PatternValidationResult::Failed;
|
|
}
|
|
};
|
|
|
|
if actual != expected {
|
|
let actual_len = actual.chars().count();
|
|
let expected_len = expected.chars().count();
|
|
return PatternValidationResult::FailedChecksum { actual_len, expected_len };
|
|
}
|
|
}
|
|
|
|
PatternValidationResult::Passed
|
|
}
|
|
}
|
|
|
|
fn render_pattern_requirement_template(
|
|
template: &str,
|
|
globals: &liquid::Object,
|
|
) -> Result<String, String> {
|
|
PATTERN_REQUIREMENTS_TEMPLATE_PARSER
|
|
.parse(template)
|
|
.map_err(|e| e.to_string())
|
|
.and_then(|parsed| parsed.render(globals).map_err(|e| e.to_string()))
|
|
}
|
|
|
|
/// Result of validating [`PatternRequirements`] against a potential match.
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum PatternValidationResult {
|
|
/// All requirements are satisfied and the match should be kept.
|
|
Passed,
|
|
/// Requirements were not satisfied.
|
|
Failed,
|
|
/// Checksum requirements were not satisfied; captures basic mismatch details for debugging.
|
|
FailedChecksum { actual_len: usize, expected_len: usize },
|
|
/// The match contains one of the `ignore_if_contains` substrings and should be skipped.
|
|
IgnoredBySubstring { matched_term: String },
|
|
}
|
|
|
|
/// Configuration for HTTP validation. This contains a request configuration
|
|
/// and an optional multipart configuration.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct HttpValidation {
|
|
pub request: HttpRequest,
|
|
pub multipart: Option<MultipartConfig>,
|
|
}
|
|
|
|
/// Configuration for an HTTP request used for validation.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct HttpRequest {
|
|
pub method: String,
|
|
pub url: String,
|
|
#[serde(default)]
|
|
pub headers: BTreeMap<String, String>,
|
|
#[serde(default)]
|
|
pub body: Option<String>,
|
|
#[serde(default)]
|
|
pub response_matcher: Option<Vec<ResponseMatcher>>,
|
|
#[serde(default)]
|
|
pub multipart: Option<MultipartConfig>,
|
|
// allow HTML only when explicitly set true
|
|
#[serde(default = "default_false")]
|
|
pub response_is_html: bool,
|
|
}
|
|
|
|
/// Configuration for multipart HTTP requests.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct MultipartConfig {
|
|
pub parts: Vec<MultipartPart>,
|
|
}
|
|
|
|
/// Configuration for a single multipart part.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
pub struct MultipartPart {
|
|
pub name: String,
|
|
#[serde(rename = "type")]
|
|
pub part_type: String,
|
|
pub content: String,
|
|
#[serde(default)]
|
|
pub content_type: Option<String>,
|
|
}
|
|
|
|
// ── wrapper with deny_unknown_fields ───────────────────────────
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
#[serde(deny_unknown_fields)]
|
|
pub struct ReportResponseData {
|
|
#[serde(default = "default_true")]
|
|
report_response: bool,
|
|
}
|
|
|
|
/// Describes how to match HTTP responses.
|
|
/// This is an untagged enum to allow for different matching strategies.
|
|
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
|
#[serde(untagged)]
|
|
pub enum ResponseMatcher {
|
|
WordMatch {
|
|
r#type: String,
|
|
words: Vec<String>,
|
|
#[serde(default = "default_false")]
|
|
match_all_words: bool,
|
|
#[serde(default = "default_false")]
|
|
negative: bool, // true = “fail if the pattern *does* appear”
|
|
},
|
|
StatusMatch {
|
|
r#type: String,
|
|
status: Vec<u16>,
|
|
#[serde(default = "default_false")]
|
|
match_all_status: bool,
|
|
#[serde(default = "default_false")]
|
|
negative: bool, // true = “fail if the status *does* match”
|
|
},
|
|
HeaderMatch {
|
|
r#type: String, // "HeaderMatch"
|
|
header: String, // e.g. "content-type"
|
|
expected: Vec<String>, // one or more acceptable tokens
|
|
#[serde(default = "default_false")]
|
|
match_all_values: bool,
|
|
},
|
|
JsonValid {
|
|
// "JsonValid"
|
|
r#type: String,
|
|
},
|
|
XmlValid {
|
|
// "XmlValid"
|
|
r#type: String,
|
|
},
|
|
ReportResponse(ReportResponseData),
|
|
}
|
|
|
|
/// The confidence level associated with a rule.
|
|
#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
|
|
pub enum Confidence {
|
|
Low,
|
|
Medium,
|
|
High,
|
|
}
|
|
|
|
impl Default for Confidence {
|
|
fn default() -> Self {
|
|
Confidence::Medium
|
|
}
|
|
}
|
|
|
|
impl PartialOrd for Confidence {
|
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
Some(self.cmp(other))
|
|
}
|
|
}
|
|
|
|
impl Ord for Confidence {
|
|
fn cmp(&self, other: &Self) -> Ordering {
|
|
match (self, other) {
|
|
(Confidence::Low, Confidence::Low) => Ordering::Equal,
|
|
(Confidence::Low, _) => Ordering::Less,
|
|
(Confidence::Medium, Confidence::Low) => Ordering::Greater,
|
|
(Confidence::Medium, Confidence::Medium) => Ordering::Equal,
|
|
(Confidence::Medium, Confidence::High) => Ordering::Less,
|
|
(Confidence::High, Confidence::High) => Ordering::Equal,
|
|
(Confidence::High, _) => Ordering::Greater,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Confidence {
|
|
/// Returns true if the current confidence is at least as high as `other`.
|
|
pub fn is_at_least(&self, other: &Confidence) -> bool {
|
|
match (self, other) {
|
|
(Confidence::High, _) => true,
|
|
(Confidence::Medium, Confidence::Low) | (Confidence::Medium, Confidence::Medium) => {
|
|
true
|
|
}
|
|
(Confidence::Low, Confidence::Low) => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for Confidence {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let s = match self {
|
|
Confidence::Low => "low",
|
|
Confidence::Medium => "medium",
|
|
Confidence::High => "high",
|
|
};
|
|
write!(f, "{}", s)
|
|
}
|
|
}
|
|
|
|
impl FromStr for Confidence {
|
|
type Err = anyhow::Error;
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
|
match s.to_lowercase().as_str() {
|
|
"low" => Ok(Confidence::Low),
|
|
"medium" => Ok(Confidence::Medium),
|
|
"high" => Ok(Confidence::High),
|
|
_ => Err(anyhow!("Invalid confidence level: {}", s)),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl JsonSchema for Confidence {
|
|
fn schema_name() -> String {
|
|
"Confidence".to_string()
|
|
}
|
|
|
|
fn json_schema(_gen: &mut SchemaGenerator) -> Schema {
|
|
let mut schema = SchemaObject::default();
|
|
schema.enum_values = Some(vec![
|
|
serde_json::to_value("Low").unwrap(),
|
|
serde_json::to_value("Medium").unwrap(),
|
|
serde_json::to_value("High").unwrap(),
|
|
]);
|
|
Schema::Object(schema)
|
|
}
|
|
}
|
|
|
|
// Custom serialization/deserialization for case-insensitive handling.
|
|
impl Serialize for Confidence {
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
where
|
|
S: serde::Serializer,
|
|
{
|
|
serializer.serialize_str(&self.to_string())
|
|
}
|
|
}
|
|
|
|
impl<'de> Deserialize<'de> for Confidence {
|
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
where
|
|
D: serde::Deserializer<'de>,
|
|
{
|
|
let s = String::deserialize(deserializer)?;
|
|
s.parse().map_err(serde::de::Error::custom)
|
|
}
|
|
}
|
|
|
|
/// The syntactic representation of a rule.
|
|
#[derive(Serialize, Deserialize, Debug, PartialEq, PartialOrd, Clone)]
|
|
pub struct RuleSyntax {
|
|
/// Human-readable name of the rule.
|
|
pub name: String,
|
|
/// Globally unique identifier for the rule.
|
|
pub id: String,
|
|
/// The regex pattern used by the rule.
|
|
pub pattern: String,
|
|
/// Minimum Shannon entropy required.
|
|
#[serde(default)]
|
|
pub min_entropy: f32,
|
|
/// Confidence level of the rule.
|
|
#[serde(default)]
|
|
pub confidence: Confidence,
|
|
/// Whether the rule is visible to end-users.
|
|
#[serde(default = "default_true")]
|
|
pub visible: bool,
|
|
/// Example inputs that should match.
|
|
#[serde(default)]
|
|
pub examples: Vec<String>,
|
|
/// Example inputs that should not match.
|
|
#[serde(default)]
|
|
pub negative_examples: Vec<String>,
|
|
/// References (e.g., URLs) for further context.
|
|
#[serde(default)]
|
|
pub references: Vec<String>,
|
|
/// Optional validation configuration.
|
|
#[serde(default)]
|
|
pub validation: Option<Validation>,
|
|
/// Optional dependencies on other rules.
|
|
#[serde(default)]
|
|
pub depends_on_rule: Vec<Option<DependsOnRule>>,
|
|
/// Optional character type requirements for matched secrets.
|
|
#[serde(default)]
|
|
pub pattern_requirements: Option<PatternRequirements>,
|
|
}
|
|
|
|
lazy_static! {
|
|
/// Regex pattern used to remove vectorscan-style comments from rule patterns.
|
|
pub static ref RULE_COMMENTS_PATTERN: Regex = Regex::new(
|
|
r"(?m)(\(\?#[^)]*\))|(\s\#[\sa-zA-Z]*$)"
|
|
).expect("comment-stripping regex should compile");
|
|
static ref PATTERN_REQUIREMENTS_TEMPLATE_PARSER: liquid::Parser =
|
|
liquid_filters::register_all(ParserBuilder::with_stdlib())
|
|
.build()
|
|
.expect("pattern requirement template parser should compile");
|
|
}
|
|
|
|
impl RuleSyntax {
|
|
/// Maximum allowed regex size.
|
|
const REGEX_SIZE_LIMIT: usize = 16 * 1024 * 1024;
|
|
|
|
/// Returns the rule pattern with comments removed.
|
|
pub fn uncommented_pattern(&self) -> Cow<'_, str> {
|
|
RULE_COMMENTS_PATTERN.replace_all(&self.pattern, "")
|
|
}
|
|
|
|
/// Helper to build a regex from a pattern string.
|
|
fn build_regex(pattern: &str) -> Result<regex::bytes::Regex> {
|
|
regex::bytes::RegexBuilder::new(pattern)
|
|
.unicode(false)
|
|
.size_limit(Self::REGEX_SIZE_LIMIT)
|
|
.build()
|
|
.context("Failed to build regex")
|
|
}
|
|
|
|
/// Compile the rule pattern into a regex.
|
|
pub fn as_regex(&self) -> Result<regex::bytes::Regex> {
|
|
Self::build_regex(&self.uncommented_pattern())
|
|
}
|
|
|
|
/// Compile the rule pattern into an anchored regex (matching end-of-input).
|
|
///
|
|
/// # Example
|
|
///
|
|
/// ```
|
|
/// # use kingfisher_rules::RuleSyntax;
|
|
/// let r = RuleSyntax {
|
|
/// name: "Test rule".to_string(),
|
|
/// id: "test.1".to_string(),
|
|
/// pattern: r"hello\s*world".to_string(),
|
|
/// examples: vec![],
|
|
/// negative_examples: vec![],
|
|
/// references: vec![],
|
|
/// min_entropy: 0.0,
|
|
/// confidence: Default::default(),
|
|
/// visible: true,
|
|
/// validation: None,
|
|
/// depends_on_rule: vec![],
|
|
/// pattern_requirements: None,
|
|
/// };
|
|
/// assert_eq!(r.as_anchored_regex().unwrap().as_str(), r"hello\s*world$");
|
|
/// ```
|
|
pub fn as_anchored_regex(&self) -> Result<regex::bytes::Regex> {
|
|
Self::build_regex(&format!("{}$", self.uncommented_pattern()))
|
|
}
|
|
|
|
/// Computes a content-based fingerprint of the rule's pattern.
|
|
pub fn finding_sha1_fingerprint(&self) -> String {
|
|
let hash = xxh3_64(self.pattern.as_bytes());
|
|
format!("{:x}", hash)
|
|
}
|
|
|
|
/// Serializes the rule syntax to JSON.
|
|
pub fn to_json(&self) -> String {
|
|
serde_json::to_string(self).expect("Serialization of rule syntax should succeed")
|
|
}
|
|
|
|
/// Loads rule syntax definitions from a YAML file.
|
|
pub fn from_yaml_file<P: AsRef<Path>>(path: P) -> Result<Vec<Self>> {
|
|
let path = path.as_ref();
|
|
let contents = std::fs::read_to_string(path)
|
|
.with_context(|| format!("Failed to read file: {}", path.display()))?;
|
|
serde_yaml::from_str(&contents).map_err(|e| {
|
|
let context = e.location().map_or(String::new(), |loc| {
|
|
format!(" at line {} column {}", loc.line(), loc.column())
|
|
});
|
|
anyhow!("Failed to parse YAML from {}{}: {}", path.display(), context, e)
|
|
})
|
|
}
|
|
}
|
|
|
|
/// A rule combines its syntactic definition with computed metadata.
|
|
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
|
|
pub struct Rule {
|
|
pub syntax: RuleSyntax,
|
|
finding_sha1_fingerprint: String,
|
|
min_entropy: f32,
|
|
visible: bool,
|
|
}
|
|
|
|
impl Rule {
|
|
/// Constructs a new rule from its syntax.
|
|
pub fn new(syntax: RuleSyntax) -> Self {
|
|
Self {
|
|
finding_sha1_fingerprint: syntax.finding_sha1_fingerprint(),
|
|
min_entropy: syntax.min_entropy,
|
|
visible: syntax.visible,
|
|
syntax,
|
|
}
|
|
}
|
|
|
|
/// Returns a reference to the rule's syntax.
|
|
pub fn syntax(&self) -> &RuleSyntax {
|
|
&self.syntax
|
|
}
|
|
|
|
/// Returns the JSON representation of the rule's syntax.
|
|
pub fn json_syntax(&self) -> String {
|
|
self.syntax.to_json()
|
|
}
|
|
|
|
/// Returns the rule's computed fingerprint.
|
|
pub fn finding_sha1_fingerprint(&self) -> &str {
|
|
&self.finding_sha1_fingerprint
|
|
}
|
|
|
|
/// Returns the human-readable name of the rule.
|
|
pub fn name(&self) -> &str {
|
|
&self.syntax.name
|
|
}
|
|
|
|
/// Returns the unique identifier of the rule.
|
|
pub fn id(&self) -> &str {
|
|
&self.syntax.id
|
|
}
|
|
|
|
/// Returns the rule's minimum entropy requirement.
|
|
pub fn min_entropy(&self) -> f32 {
|
|
self.min_entropy
|
|
}
|
|
|
|
/// Updates the rule's minimum entropy value.
|
|
///
|
|
/// Returns an error if the new value is negative.
|
|
pub fn set_entropy(&mut self, new_entropy: f32) -> Result<()> {
|
|
if new_entropy < 0.0 {
|
|
return Err(anyhow!("Entropy value cannot be negative"));
|
|
}
|
|
self.min_entropy = new_entropy;
|
|
self.syntax.min_entropy = new_entropy;
|
|
Ok(())
|
|
}
|
|
|
|
/// Returns whether the rule is visible.
|
|
pub fn visible(&self) -> bool {
|
|
self.visible
|
|
}
|
|
|
|
/// Returns the confidence level of the rule.
|
|
pub fn confidence(&self) -> Confidence {
|
|
self.syntax.confidence
|
|
}
|
|
|
|
/// Returns the character requirements for this rule, if any.
|
|
pub fn pattern_requirements(&self) -> Option<&PatternRequirements> {
|
|
self.syntax.pattern_requirements.as_ref()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use regex::bytes::Regex as BytesRegex;
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_digits() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: Some(2),
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: has 3 digits
|
|
assert!(matches!(reqs.validate(b"abc123def", None, true), PatternValidationResult::Passed));
|
|
|
|
// Should fail: only 1 digit
|
|
assert!(matches!(reqs.validate(b"abc1def", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: no digits
|
|
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_checksum() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: None,
|
|
checksum: Some(ChecksumRequirement {
|
|
actual: ChecksumActual {
|
|
template: "{{ MATCH | suffix: 6 }}".to_string(),
|
|
requires_capture: Some("checksum".to_string()),
|
|
},
|
|
expected: "{{ BODY | crc32 | base62: 6 }}".to_string(),
|
|
skip_if_missing: true,
|
|
}),
|
|
};
|
|
|
|
let token = b"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";
|
|
let regex =
|
|
BytesRegex::new(r"(?x) ghp_(?P<body>[A-Za-z0-9]{30})(?P<checksum>[A-Za-z0-9]{6})")
|
|
.unwrap();
|
|
let captures = regex.captures(token).expect("token should match");
|
|
assert!(matches!(
|
|
reqs.validate(
|
|
token,
|
|
Some(PatternRequirementContext {
|
|
regex: ®ex,
|
|
captures: &captures,
|
|
full_match: token
|
|
}),
|
|
true
|
|
),
|
|
PatternValidationResult::Passed
|
|
));
|
|
|
|
let mut invalid = token.to_vec();
|
|
*invalid.last_mut().unwrap() = b'0';
|
|
let captures_invalid =
|
|
regex.captures(&invalid).expect("invalid token should still match pattern");
|
|
assert!(matches!(
|
|
reqs.validate(
|
|
&invalid,
|
|
Some(PatternRequirementContext {
|
|
regex: ®ex,
|
|
captures: &captures_invalid,
|
|
full_match: &invalid,
|
|
}),
|
|
true
|
|
),
|
|
PatternValidationResult::FailedChecksum { .. }
|
|
));
|
|
|
|
let legacy = b"ghp_legacy_token";
|
|
assert!(matches!(reqs.validate(legacy, None, true), PatternValidationResult::Passed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_uppercase() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: Some(2),
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: has 3 uppercase
|
|
assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed));
|
|
|
|
// Should fail: only 1 uppercase
|
|
assert!(matches!(reqs.validate(b"Adef", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: no uppercase
|
|
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_lowercase() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: Some(2),
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: has 3 lowercase
|
|
assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed));
|
|
|
|
// Should fail: only 1 lowercase
|
|
assert!(matches!(reqs.validate(b"ABCd", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: no lowercase
|
|
assert!(matches!(reqs.validate(b"ABC123", None, true), PatternValidationResult::Failed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_special_chars() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: Some(2),
|
|
special_chars: None, // uses default
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: has 2 special chars
|
|
assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Passed));
|
|
|
|
// Should fail: only 1 special char
|
|
assert!(matches!(reqs.validate(b"abc!def", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: no special chars
|
|
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_custom_special_chars() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: Some(2),
|
|
special_chars: Some("$%^".to_string()),
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: has 2 custom special chars
|
|
assert!(matches!(reqs.validate(b"abc$%def", None, true), PatternValidationResult::Passed));
|
|
|
|
// Should fail: has special chars but not the custom ones
|
|
assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: only 1 custom special char
|
|
assert!(matches!(reqs.validate(b"abc$def", None, true), PatternValidationResult::Failed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_combined() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: Some(1),
|
|
min_uppercase: Some(1),
|
|
min_lowercase: Some(1),
|
|
min_special_chars: Some(1),
|
|
special_chars: None,
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: has all requirements
|
|
assert!(matches!(reqs.validate(b"Abc1!", None, true), PatternValidationResult::Passed));
|
|
|
|
// Should fail: missing digit
|
|
assert!(matches!(reqs.validate(b"Abc!", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: missing uppercase
|
|
assert!(matches!(reqs.validate(b"abc1!", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: missing lowercase
|
|
assert!(matches!(reqs.validate(b"ABC1!", None, true), PatternValidationResult::Failed));
|
|
|
|
// Should fail: missing special
|
|
assert!(matches!(reqs.validate(b"Abc1", None, true), PatternValidationResult::Failed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_ignore_if_contains() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: Some(vec!["test".to_string(), "Demo".to_string()]),
|
|
checksum: None,
|
|
};
|
|
|
|
// Should fail: contains "test" (case-insensitive)
|
|
assert!(matches!(
|
|
reqs.validate(b"MyTestToken", None, true),
|
|
PatternValidationResult::IgnoredBySubstring { .. }
|
|
));
|
|
|
|
// Should fail: contains "demo" (case-insensitive)
|
|
assert!(matches!(
|
|
reqs.validate(b"example-demo-value", None, true),
|
|
PatternValidationResult::IgnoredBySubstring { .. }
|
|
));
|
|
|
|
// Should pass: does not contain excluded words
|
|
assert!(matches!(
|
|
reqs.validate(b"example-value", None, true),
|
|
PatternValidationResult::Passed
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_ignore_if_contains_ignores_empty_entries() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
|
|
checksum: None,
|
|
};
|
|
|
|
// Should fail only when non-empty exclusion matches
|
|
assert!(matches!(
|
|
reqs.validate(b"needs-blocking", None, true),
|
|
PatternValidationResult::IgnoredBySubstring { .. }
|
|
));
|
|
assert!(matches!(reqs.validate(b"allowed", None, true), PatternValidationResult::Passed));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_ignore_if_contains_can_be_disabled() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: Some(vec!["ignoreme".to_string()]),
|
|
checksum: None,
|
|
};
|
|
|
|
// With ignoring enabled, the match is skipped
|
|
assert!(matches!(
|
|
reqs.validate(b"value-ignoreme", None, true),
|
|
PatternValidationResult::IgnoredBySubstring { .. }
|
|
));
|
|
|
|
// With ignoring disabled, the same input passes requirements
|
|
assert!(matches!(
|
|
reqs.validate(b"value-ignoreme", None, false),
|
|
PatternValidationResult::Passed
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_pattern_requirements_none() {
|
|
let reqs = PatternRequirements {
|
|
min_digits: None,
|
|
min_uppercase: None,
|
|
min_lowercase: None,
|
|
min_special_chars: None,
|
|
special_chars: None,
|
|
ignore_if_contains: None,
|
|
checksum: None,
|
|
};
|
|
|
|
// Should pass: no requirements
|
|
assert!(matches!(reqs.validate(b"anything", None, true), PatternValidationResult::Passed));
|
|
assert!(matches!(reqs.validate(b"123", None, true), PatternValidationResult::Passed));
|
|
assert!(matches!(reqs.validate(b"!@#", None, true), PatternValidationResult::Passed));
|
|
}
|
|
}
|