forked from mirrors/kingfisher
Added an optional exclude_words list to PatternRequirements so matches containing case-insensitive placeholder words are filtered out, with accompanying tests to cover the new behavior.
This commit is contained in:
parent
8c9a0ad771
commit
3b3a4e5030
4 changed files with 149 additions and 6 deletions
|
|
@ -1004,7 +1004,9 @@ mod test {
|
|||
use crate::{
|
||||
blob::{Blob, BlobIdMap},
|
||||
origin::{Origin, OriginSet},
|
||||
rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation},
|
||||
rules::rule::{
|
||||
DependsOnRule, HttpRequest, HttpValidation, PatternRequirements, RuleSyntax, Validation,
|
||||
},
|
||||
};
|
||||
|
||||
proptest! {
|
||||
|
|
@ -1138,6 +1140,51 @@ mod test {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_exclude_words_filters_matches() -> Result<()> {
|
||||
let rules = vec![Rule::new(RuleSyntax {
|
||||
id: "test.exclude".to_string(),
|
||||
name: "exclude words".to_string(),
|
||||
pattern: "(?P<token>prefix[A-Za-z]+)".to_string(),
|
||||
confidence: crate::rules::rule::Confidence::Medium,
|
||||
min_entropy: 0.0,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
pattern_requirements: Some(PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: Some(vec!["TEST".to_string()]),
|
||||
}),
|
||||
})];
|
||||
|
||||
let rules_db = RulesDatabase::from_rules(rules)?;
|
||||
let input = b"prefixgood prefixtest";
|
||||
let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
|
||||
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
|
||||
let mut matcher =
|
||||
Matcher::new(&rules_db, scanner_pool, &seen_blobs, None, false, None, &[], false)?;
|
||||
|
||||
matcher.scan_bytes_raw(input, "fname")?;
|
||||
|
||||
let matches = &matcher.user_data.raw_matches_scratch;
|
||||
assert_eq!(matches.len(), 1, "exclude_words should drop filtered matches");
|
||||
let RawMatch { start_idx, end_idx, .. } = matches[0];
|
||||
assert_eq!(
|
||||
&input[start_idx as usize..end_idx as usize],
|
||||
b"prefixgood",
|
||||
"remaining match should be the non-excluded token",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// additional deterministic unit-tests
|
||||
// ---------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -70,6 +70,9 @@ pub struct PatternRequirements {
|
|||
/// Custom set of characters to consider as "special" (defaults to common punctuation)
|
||||
#[serde(default)]
|
||||
pub special_chars: Option<String>,
|
||||
/// Words that should cause the match to be excluded when present (case-insensitive)
|
||||
#[serde(default)]
|
||||
pub exclude_words: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl PatternRequirements {
|
||||
|
|
@ -108,13 +111,33 @@ impl PatternRequirements {
|
|||
|
||||
// Check special character requirement
|
||||
if let Some(min_special) = self.min_special_chars {
|
||||
let special_chars = self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
|
||||
let special_chars =
|
||||
self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS);
|
||||
let special_count = s.chars().filter(|c| special_chars.contains(*c)).count();
|
||||
if special_count < min_special {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check exclude words requirement
|
||||
if let Some(exclude_words) = self.exclude_words.as_ref() {
|
||||
let lowercase_input = s.to_lowercase();
|
||||
if exclude_words
|
||||
.iter()
|
||||
.filter_map(|word| {
|
||||
let trimmed = word.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed.to_lowercase())
|
||||
}
|
||||
})
|
||||
.any(|word| lowercase_input.contains(&word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
|
@ -526,6 +549,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 digits
|
||||
|
|
@ -546,6 +570,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 uppercase
|
||||
|
|
@ -566,6 +591,7 @@ mod tests {
|
|||
min_lowercase: Some(2),
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 lowercase
|
||||
|
|
@ -586,6 +612,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: Some(2),
|
||||
special_chars: None, // uses default
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 2 special chars
|
||||
|
|
@ -606,6 +633,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: Some(2),
|
||||
special_chars: Some("$%^".to_string()),
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has 2 custom special chars
|
||||
|
|
@ -626,6 +654,7 @@ mod tests {
|
|||
min_lowercase: Some(1),
|
||||
min_special_chars: Some(1),
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: has all requirements
|
||||
|
|
@ -644,6 +673,43 @@ mod tests {
|
|||
assert!(!reqs.validate(b"Abc1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_exclude_words() {
|
||||
let reqs = PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: Some(vec!["test".to_string(), "Demo".to_string()]),
|
||||
};
|
||||
|
||||
// Should fail: contains "test" (case-insensitive)
|
||||
assert!(!reqs.validate(b"MyTestToken"));
|
||||
|
||||
// Should fail: contains "demo" (case-insensitive)
|
||||
assert!(!reqs.validate(b"example-demo-value"));
|
||||
|
||||
// Should pass: does not contain excluded words
|
||||
assert!(reqs.validate(b"example-value"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_exclude_words_ignores_empty_entries() {
|
||||
let reqs = PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
|
||||
};
|
||||
|
||||
// Should fail only when non-empty exclusion matches
|
||||
assert!(!reqs.validate(b"needs-blocking"));
|
||||
assert!(reqs.validate(b"allowed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_none() {
|
||||
let reqs = PatternRequirements {
|
||||
|
|
@ -652,6 +718,7 @@ mod tests {
|
|||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
exclude_words: None,
|
||||
};
|
||||
|
||||
// Should pass: no requirements
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue