Added checksum comparisons to pattern_requirements, new suffix, crc32, and base62 Liquid filters, and verbose logging so mismatched checksums are skipped with context rather than reported as findings.

This commit is contained in:
Mick Grove 2025-11-07 16:31:24 -08:00
commit ccbbbad5bc
16 changed files with 2355 additions and 122 deletions

View file

@ -147,12 +147,23 @@ impl FindingsStore {
1. Optional duplicate filter (unchanged)
*/
if dedup {
// Prefer the full unnamed match (index 0). Fall back to a named TOKEN capture
// before using whatever capture is available.
let snippet = m
.groups
.captures
.get(1)
.or_else(|| m.groups.captures.get(0))
.map_or("", |c| c.value);
.iter()
.find(|c| c.name.is_none() && c.match_number == 0)
.map(|c| c.value)
.or_else(|| {
m.groups
.captures
.iter()
.find(|c| matches!(c.name.as_deref(), Some("TOKEN")))
.map(|c| c.value)
})
.or_else(|| m.groups.captures.get(0).map(|c| c.value))
.unwrap_or("");
let origin_kind = match origin.first() {
Origin::GitRepo(_) => "git",

View file

@ -1,6 +1,7 @@
//! Collection of small Liquid filters that make HTTP validations & API-signing templates easy
use base64::{engine::general_purpose, Engine};
use crc32fast::Hasher;
use hmac::{Hmac, Mac};
use liquid_core::{
Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection,
@ -223,22 +224,90 @@ impl Filter for HmacSha384Filter {
}
// ── random_string ────────────────────────────────
static_filter!(
/// Random alphanumeric string (default 32 chars).
RandomStringFilter { len: Option<usize> },
"random_string",
|s: &RandomStringFilter, input: &dyn ValueView| -> String {
let n = s.len // explicit argument?
.or_else(|| input.to_kstr().parse().ok()) // else parse input
.unwrap_or(32); // else default
#[derive(Debug, FilterParameters)]
struct RandomStringArgs {
#[parameter(description = "Desired output length", arg_type = "integer")]
len: Option<Expression>,
}
rand::rng()
.sample_iter(&Alphanumeric)
.take(n)
.map(char::from)
.collect()
#[derive(Clone, ParseFilter, FilterReflection, Default)]
#[filter(
name = "random_string",
description = "Random alphanumeric string (default 32 chars).",
parameters(RandomStringArgs),
parsed(RandomString)
)]
pub struct RandomStringFilter;
#[derive(Debug, FromFilterParameters, Display_filter)]
#[name = "random_string"]
struct RandomString {
#[parameters]
args: RandomStringArgs,
}
impl Filter for RandomString {
fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
let args = self.args.evaluate(runtime)?;
let n = args
.len
.and_then(|value| {
let scalar = Value::scalar(value);
value_to_usize(&scalar)
})
.or_else(|| input.to_kstr().parse().ok())
.unwrap_or(32);
let value: String =
rand::rng().sample_iter(&Alphanumeric).take(n).map(char::from).collect();
Ok(Value::scalar(value))
}
);
}
#[derive(Debug, FilterParameters)]
struct SuffixArgs {
#[parameter(description = "Number of trailing characters to keep", arg_type = "integer")]
len: Option<Expression>,
}
#[derive(Clone, ParseFilter, FilterReflection, Default)]
#[filter(
name = "suffix",
description = "Return the suffix (last N characters) of the provided string.",
parameters(SuffixArgs),
parsed(Suffix)
)]
pub struct SuffixFilter;
#[derive(Debug, FromFilterParameters, Display_filter)]
#[name = "suffix"]
struct Suffix {
#[parameters]
args: SuffixArgs,
}
impl Filter for Suffix {
fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
let args = self.args.evaluate(runtime)?;
let text = input.to_kstr();
let requested = args
.len
.and_then(|value| {
let scalar = Value::scalar(value);
value_to_usize(&scalar)
})
.unwrap_or_else(|| text.len());
if requested == 0 {
return Ok(Value::scalar(String::new()));
}
let mut chars: Vec<char> = text.chars().collect();
let keep = requested.min(chars.len());
chars.drain(0..chars.len().saturating_sub(keep));
Ok(Value::scalar(chars.into_iter().collect::<String>()))
}
}
#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]
#[filter(
@ -307,6 +376,111 @@ static_filter!(
}
);
static_filter!(
/// Compute the CRC32 of the input and return it as a decimal number.
Crc32Filter,
"crc32",
|input: &dyn ValueView| -> i64 {
let mut hasher = Hasher::new();
hasher.update(input.to_kstr().as_bytes());
i64::from(hasher.finalize())
}
);
#[derive(Debug, FilterParameters)]
struct Base62Args {
#[parameter(
description = "Pad the encoded value to at least this width",
arg_type = "integer"
)]
width: Option<Expression>,
}
#[derive(Clone, ParseFilter, FilterReflection, Default)]
#[filter(
name = "base62",
description = "Encode the provided integer value using Base62.",
parameters(Base62Args),
parsed(Base62)
)]
pub struct Base62Filter;
#[derive(Debug, FromFilterParameters, Display_filter)]
#[name = "base62"]
struct Base62 {
#[parameters]
args: Base62Args,
}
impl Filter for Base62 {
fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
let args = self.args.evaluate(runtime)?;
let value = input
.as_scalar()
.and_then(|scalar| {
if let Some(int) = scalar.to_integer() {
Some(if int < 0 { 0 } else { int as u64 })
} else if let Some(float) = scalar.to_float() {
Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 })
} else if let Some(boolean) = scalar.to_bool() {
Some(u64::from(boolean))
} else {
scalar.to_kstr().to_string().parse::<u64>().ok()
}
})
.or_else(|| input.to_kstr().to_string().parse::<u64>().ok())
.unwrap_or(0);
let mut encoded = encode_base62(value);
if let Some(width) = args.width.and_then(|value| {
let scalar = Value::scalar(value);
value_to_usize(&scalar)
}) {
if encoded.len() < width {
let mut padded = String::with_capacity(width);
for _ in 0..(width - encoded.len()) {
padded.push('0');
}
padded.push_str(&encoded);
encoded = padded;
}
}
Ok(Value::scalar(encoded))
}
}
fn encode_base62(mut value: u64) -> String {
const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
if value == 0 {
return "0".to_string();
}
let mut buf = Vec::new();
while value > 0 {
let rem = (value % 62) as usize;
buf.push(ALPHABET[rem] as char);
value /= 62;
}
buf.iter().rev().collect()
}
fn value_to_usize(value: &Value) -> Option<usize> {
let view = value.as_view();
view.as_scalar()
.and_then(|scalar| {
if let Some(int) = scalar.to_integer() {
Some(if int < 0 { 0 } else { int as usize })
} else if let Some(float) = scalar.to_float() {
Some(if float.is_sign_negative() { 0 } else { float.floor() as usize })
} else if let Some(boolean) = scalar.to_bool() {
Some(if boolean { 1 } else { 0 })
} else {
scalar.to_kstr().parse::<usize>().ok()
}
})
.or_else(|| view.to_kstr().parse::<usize>().ok())
}
// {{ value | b64url_enc }} URL-safe base64 w/o padding
static_filter!(
/// Base64 URL-safe (no = padding).
@ -415,6 +589,9 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder {
.filter(B64EncFilter::default())
.filter(B64DecFilter::default())
.filter(RandomStringFilter::default())
.filter(SuffixFilter::default())
.filter(Crc32Filter::default())
.filter(Base62Filter::default())
.filter(HmacSha256::default())
.filter(HmacSha1::default())
.filter(HmacSha384::default())
@ -461,6 +638,20 @@ mod tests {
assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect);
}
#[test]
fn suffix_filter() {
assert_eq!(render(r#"{{ "abcdef" | suffix: 3 }}"#), "def");
assert_eq!(render(r#"{{ "short" | suffix: 10 }}"#), "short");
assert_eq!(render(r#"{{ "value" | suffix: 0 }}"#), "");
}
#[test]
fn crc32_and_base62_filters() {
assert_eq!(render(r#"{{ "hello" | crc32 }}"#), "907060870");
assert_eq!(render(r#"{{ "hello" | crc32 | base62 }}"#), "zNvy2");
assert_eq!(render(r#"{{ "hello" | crc32 | base62: 6 }}"#), "0zNvy2");
}
#[test]
fn hmac_sha1_filter() {
let key = b"key1";

View file

@ -5,27 +5,27 @@
// * Fallback - system allocator (`system-alloc` feature)
// ────────────────────────────────────────────────────────────
// --- jemalloc (opt-in) ---
#[cfg(feature = "use-jemalloc")]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
// // --- jemalloc (opt-in) ---
// #[cfg(feature = "use-jemalloc")]
// #[global_allocator]
// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
// --- mimalloc (default) ---
#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
#[global_allocator]
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
// --- system allocator (explicit opt-out) ---
#[cfg(feature = "system-alloc")]
use std::alloc::System;
#[cfg(feature = "system-alloc")]
#[global_allocator]
static GLOBAL: System = System;
// // --- mimalloc (default) ---
// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
// #[global_allocator]
// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
// // --- system allocator (explicit opt-out) ---
// #[cfg(feature = "system-alloc")]
// use std::alloc::System;
// #[cfg(feature = "system-alloc")]
// #[global_allocator]
// static GLOBAL: System = System;
use std::alloc::System;
#[global_allocator]
static GLOBAL: System = System;
use std::{
io::{IsTerminal, Read},
sync::{Arc, Mutex},

View file

@ -29,7 +29,7 @@ use crate::{
parser,
parser::{Checker, Language},
rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
rules::rule::{PatternValidationResult, Rule},
rules::rule::{PatternRequirementContext, PatternValidationResult, Rule},
rules_database::RulesDatabase,
safe_list::{is_safe_match, is_user_match},
scanner_pool::ScannerPool,
@ -614,7 +614,12 @@ fn filter_match<'b>(
// Check character requirements if specified
if let Some(char_reqs) = rule.pattern_requirements() {
match char_reqs.validate(mi_bytes, respect_ignore_if_contains) {
let context = PatternRequirementContext {
regex: re,
captures: &captures,
full_match: full_bytes,
};
match char_reqs.validate(mi_bytes, Some(context), respect_ignore_if_contains) {
PatternValidationResult::Passed => {}
PatternValidationResult::Failed => {
debug!(
@ -623,6 +628,15 @@ fn filter_match<'b>(
);
continue;
}
PatternValidationResult::FailedChecksum { actual_len, expected_len } => {
debug!(
"Skipping match for rule {} due to checksum mismatch (actual_len={}, expected_len={})",
rule.id(),
actual_len,
expected_len
);
continue;
}
PatternValidationResult::IgnoredBySubstring { matched_term } => {
debug!(
"Skipping match for rule {} because it contains ignored term {matched_term}",
@ -790,40 +804,31 @@ impl SerializableCaptures {
redact: bool,
) -> Self {
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
// Process named captures
for name in re.capture_names().flatten() {
if let Some(capture) = captures.name(name) {
let value = if redact {
redact_value(&String::from_utf8_lossy(capture.as_bytes()))
} else {
String::from_utf8_lossy(capture.as_bytes()).to_string()
};
serialized_captures.push(SerializableCapture {
name: Some(name.to_string()),
match_number: -1,
start: capture.start(),
end: capture.end(),
value: intern(&value),
});
}
}
// Process unnamed captures (numbered groups)
let capture_names: SmallVec<[Option<String>; 4]> =
re.capture_names().map(|name| name.map(str::to_string)).collect();
for i in 0..captures.len() {
if let Some(capture) = captures.get(i) {
if let Some(cap) = captures.get(i) {
let value = if redact {
redact_value(&String::from_utf8_lossy(capture.as_bytes()))
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(capture.as_bytes()).to_string()
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
name: None,
name,
match_number: i32::try_from(i).unwrap_or(0),
start: capture.start(),
end: capture.end(),
value: intern(&value),
start: cap.start(),
end: cap.end(),
value: interned,
});
}
}
SerializableCaptures { captures: serialized_captures }
}
}
@ -1182,6 +1187,7 @@ mod test {
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec!["TEST".to_string()]),
checksum: None,
}),
})];
@ -1244,6 +1250,7 @@ mod test {
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec!["TEST".to_string()]),
checksum: None,
}),
})];
@ -1500,4 +1507,24 @@ line2
Ok(())
}
#[test]
fn serializes_captures_in_numeric_order() {
let re =
Regex::new(r"(?xi)\b(ghp_(?P<body>[A-Z0-9]{3})(?P<checksum>[A-Z0-9]{2}))").unwrap();
let caps = re.captures(b"ghp_ABC12").expect("expected captures");
let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false);
let entries: Vec<(Option<&str>, i32, &str)> = serialized
.captures
.iter()
.map(|cap| (cap.name.as_deref(), cap.match_number, cap.value))
.collect();
assert_eq!(entries.len(), 4);
assert_eq!(entries[0], (None, 0, "ghp_ABC12"));
assert_eq!(entries[1], (None, 1, "ghp_ABC12"));
assert_eq!(entries[2], (Some("body"), 2, "ABC"));
assert_eq!(entries[3], (Some("checksum"), 3, "12"));
}
}

View file

@ -10,6 +10,10 @@ use std::{
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use liquid::{
model::{KString, Value},
object, Parser, ParserBuilder,
};
use regex::Regex;
use schemars::{
gen::SchemaGenerator,
@ -17,9 +21,12 @@ use schemars::{
JsonSchema,
};
use serde::{Deserialize, Serialize};
use tracing::debug;
// use sha1::{Digest, Sha1};
use xxhash_rust::xxh3::xxh3_64;
use crate::liquid_filters;
/// Returns false as the default value.
fn default_false() -> bool {
false
@ -73,6 +80,42 @@ pub struct PatternRequirements {
/// Words that should cause the match to be excluded when present (case-insensitive)
#[serde(default)]
pub ignore_if_contains: Option<Vec<String>>,
/// Optional checksum validation configuration.
#[serde(default)]
pub checksum: Option<ChecksumRequirement>,
}
/// Defines a checksum validation strategy for a matched pattern.
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct ChecksumRequirement {
/// Template describing how to extract the checksum from the match.
pub actual: ChecksumActual,
/// Template describing how to compute the expected checksum.
pub expected: String,
/// When true, checksum evaluation is skipped if the required capture is missing.
#[serde(default)]
pub skip_if_missing: bool,
}
/// Describes how to extract the checksum value from a match.
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct ChecksumActual {
/// Liquid template used to compute the checksum from the match.
pub template: String,
/// Optional capture group that must be present before evaluating the checksum.
#[serde(default)]
pub requires_capture: Option<String>,
}
/// Contextual information available when validating pattern requirements.
#[derive(Clone, Copy)]
pub struct PatternRequirementContext<'a> {
/// Compiled regex associated with the rule.
pub regex: &'a regex::bytes::Regex,
/// Captures for the current match.
pub captures: &'a regex::bytes::Captures<'a>,
/// Full bytes matched by the rule (capture group 0).
pub full_match: &'a [u8],
}
impl PatternRequirements {
@ -85,6 +128,7 @@ impl PatternRequirements {
pub fn validate(
&self,
input: &[u8],
context: Option<PatternRequirementContext<'_>>,
respect_ignore_if_contains: bool,
) -> PatternValidationResult {
// Convert to string (lossy for non-UTF8)
@ -151,10 +195,84 @@ impl PatternRequirements {
}
}
if let Some(checksum) = &self.checksum {
let Some(ctx) = context else {
return if checksum.skip_if_missing {
PatternValidationResult::Passed
} else {
PatternValidationResult::Failed
};
};
if let Some(required) = checksum.actual.requires_capture.as_deref() {
if ctx.captures.name(required).is_none() {
return if checksum.skip_if_missing {
PatternValidationResult::Passed
} else {
PatternValidationResult::Failed
};
}
}
let mut globals = object!({
"MATCH": s.to_string(),
"FULL_MATCH": String::from_utf8_lossy(ctx.full_match).to_string(),
});
for name in ctx.regex.capture_names().flatten() {
if let Some(capture) = ctx.captures.name(name) {
let value = String::from_utf8_lossy(capture.as_bytes()).to_string();
globals.insert(KString::from_ref(name), Value::scalar(value.clone()));
globals.insert(
KString::from_string(name.to_ascii_uppercase()),
Value::scalar(value),
);
}
}
let actual =
match render_pattern_requirement_template(&checksum.actual.template, &globals) {
Ok(rendered) => rendered,
Err(err) => {
debug!(
"Failed to render checksum actual template '{}': {}",
checksum.actual.template, err
);
return PatternValidationResult::Failed;
}
};
let expected = match render_pattern_requirement_template(&checksum.expected, &globals) {
Ok(rendered) => rendered,
Err(err) => {
debug!(
"Failed to render checksum expected template '{}': {}",
checksum.expected, err
);
return PatternValidationResult::Failed;
}
};
if actual != expected {
let actual_len = actual.chars().count();
let expected_len = expected.chars().count();
return PatternValidationResult::FailedChecksum { actual_len, expected_len };
}
}
PatternValidationResult::Passed
}
}
fn render_pattern_requirement_template(
template: &str,
globals: &liquid::Object,
) -> Result<String, String> {
PATTERN_REQUIREMENTS_TEMPLATE_PARSER
.parse(template)
.map_err(|e| e.to_string())
.and_then(|parsed| parsed.render(globals).map_err(|e| e.to_string()))
}
/// Result of validating [`PatternRequirements`] against a potential match.
#[derive(Debug, PartialEq, Eq)]
pub enum PatternValidationResult {
@ -162,6 +280,8 @@ pub enum PatternValidationResult {
Passed,
/// Requirements were not satisfied.
Failed,
/// Checksum requirements were not satisfied; captures basic mismatch details for debugging.
FailedChecksum { actual_len: usize, expected_len: usize },
/// The match contains one of the `ignore_if_contains` substrings and should be skipped.
IgnoredBySubstring { matched_term: String },
}
@ -407,6 +527,10 @@ lazy_static! {
pub static ref RULE_COMMENTS_PATTERN: Regex = Regex::new(
r"(?m)(\(\?#[^)]*\))|(\s\#[\sa-zA-Z]*$)"
).expect("comment-stripping regex should compile");
static ref PATTERN_REQUIREMENTS_TEMPLATE_PARSER: liquid::Parser =
liquid_filters::register_all(ParserBuilder::with_stdlib())
.build()
.expect("pattern requirement template parser should compile");
}
impl RuleSyntax {
@ -564,6 +688,7 @@ impl Rule {
#[cfg(test)]
mod tests {
use super::*;
use regex::bytes::Regex as BytesRegex;
#[test]
fn test_pattern_requirements_digits() {
@ -574,16 +699,75 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: None,
checksum: None,
};
// Should pass: has 3 digits
assert!(matches!(reqs.validate(b"abc123def", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"abc123def", None, true), PatternValidationResult::Passed));
// Should fail: only 1 digit
assert!(matches!(reqs.validate(b"abc1def", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abc1def", None, true), PatternValidationResult::Failed));
// Should fail: no digits
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
}
#[test]
fn test_pattern_requirements_checksum() {
let reqs = PatternRequirements {
min_digits: None,
min_uppercase: None,
min_lowercase: None,
min_special_chars: None,
special_chars: None,
ignore_if_contains: None,
checksum: Some(ChecksumRequirement {
actual: ChecksumActual {
template: "{{ MATCH | suffix: 6 }}".to_string(),
requires_capture: Some("checksum".to_string()),
},
expected: "{{ BODY | crc32 | base62: 6 }}".to_string(),
skip_if_missing: true,
}),
};
let token = b"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";
let regex =
BytesRegex::new(r"(?x) ghp_(?P<body>[A-Za-z0-9]{30})(?P<checksum>[A-Za-z0-9]{6})")
.unwrap();
let captures = regex.captures(token).expect("token should match");
assert!(matches!(
reqs.validate(
token,
Some(PatternRequirementContext {
regex: &regex,
captures: &captures,
full_match: token
}),
true
),
PatternValidationResult::Passed
));
let mut invalid = token.to_vec();
*invalid.last_mut().unwrap() = b'0';
let captures_invalid =
regex.captures(&invalid).expect("invalid token should still match pattern");
assert!(matches!(
reqs.validate(
&invalid,
Some(PatternRequirementContext {
regex: &regex,
captures: &captures_invalid,
full_match: &invalid,
}),
true
),
PatternValidationResult::FailedChecksum { .. }
));
let legacy = b"ghp_legacy_token";
assert!(matches!(reqs.validate(legacy, None, true), PatternValidationResult::Passed));
}
#[test]
@ -595,16 +779,17 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: None,
checksum: None,
};
// Should pass: has 3 uppercase
assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed));
// Should fail: only 1 uppercase
assert!(matches!(reqs.validate(b"Adef", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"Adef", None, true), PatternValidationResult::Failed));
// Should fail: no uppercase
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
}
#[test]
@ -616,16 +801,17 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: None,
checksum: None,
};
// Should pass: has 3 lowercase
assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed));
// Should fail: only 1 lowercase
assert!(matches!(reqs.validate(b"ABCd", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"ABCd", None, true), PatternValidationResult::Failed));
// Should fail: no lowercase
assert!(matches!(reqs.validate(b"ABC123", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"ABC123", None, true), PatternValidationResult::Failed));
}
#[test]
@ -637,16 +823,17 @@ mod tests {
min_special_chars: Some(2),
special_chars: None, // uses default
ignore_if_contains: None,
checksum: None,
};
// Should pass: has 2 special chars
assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Passed));
// Should fail: only 1 special char
assert!(matches!(reqs.validate(b"abc!def", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abc!def", None, true), PatternValidationResult::Failed));
// Should fail: no special chars
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
}
#[test]
@ -658,16 +845,17 @@ mod tests {
min_special_chars: Some(2),
special_chars: Some("$%^".to_string()),
ignore_if_contains: None,
checksum: None,
};
// Should pass: has 2 custom special chars
assert!(matches!(reqs.validate(b"abc$%def", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"abc$%def", None, true), PatternValidationResult::Passed));
// Should fail: has special chars but not the custom ones
assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Failed));
// Should fail: only 1 custom special char
assert!(matches!(reqs.validate(b"abc$def", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abc$def", None, true), PatternValidationResult::Failed));
}
#[test]
@ -679,22 +867,23 @@ mod tests {
min_special_chars: Some(1),
special_chars: None,
ignore_if_contains: None,
checksum: None,
};
// Should pass: has all requirements
assert!(matches!(reqs.validate(b"Abc1!", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"Abc1!", None, true), PatternValidationResult::Passed));
// Should fail: missing digit
assert!(matches!(reqs.validate(b"Abc!", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"Abc!", None, true), PatternValidationResult::Failed));
// Should fail: missing uppercase
assert!(matches!(reqs.validate(b"abc1!", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"abc1!", None, true), PatternValidationResult::Failed));
// Should fail: missing lowercase
assert!(matches!(reqs.validate(b"ABC1!", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"ABC1!", None, true), PatternValidationResult::Failed));
// Should fail: missing special
assert!(matches!(reqs.validate(b"Abc1", true), PatternValidationResult::Failed));
assert!(matches!(reqs.validate(b"Abc1", None, true), PatternValidationResult::Failed));
}
#[test]
@ -706,22 +895,26 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec!["test".to_string(), "Demo".to_string()]),
checksum: None,
};
// Should fail: contains "test" (case-insensitive)
assert!(matches!(
reqs.validate(b"MyTestToken", true),
reqs.validate(b"MyTestToken", None, true),
PatternValidationResult::IgnoredBySubstring { .. }
));
// Should fail: contains "demo" (case-insensitive)
assert!(matches!(
reqs.validate(b"example-demo-value", true),
reqs.validate(b"example-demo-value", None, true),
PatternValidationResult::IgnoredBySubstring { .. }
));
// Should pass: does not contain excluded words
assert!(matches!(reqs.validate(b"example-value", true), PatternValidationResult::Passed));
assert!(matches!(
reqs.validate(b"example-value", None, true),
PatternValidationResult::Passed
));
}
#[test]
@ -733,14 +926,15 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
checksum: None,
};
// Should fail only when non-empty exclusion matches
assert!(matches!(
reqs.validate(b"needs-blocking", true),
reqs.validate(b"needs-blocking", None, true),
PatternValidationResult::IgnoredBySubstring { .. }
));
assert!(matches!(reqs.validate(b"allowed", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"allowed", None, true), PatternValidationResult::Passed));
}
#[test]
@ -752,16 +946,20 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: Some(vec!["ignoreme".to_string()]),
checksum: None,
};
// With ignoring enabled, the match is skipped
assert!(matches!(
reqs.validate(b"value-ignoreme", true),
reqs.validate(b"value-ignoreme", None, true),
PatternValidationResult::IgnoredBySubstring { .. }
));
// With ignoring disabled, the same input passes requirements
assert!(matches!(reqs.validate(b"value-ignoreme", false), PatternValidationResult::Passed));
assert!(matches!(
reqs.validate(b"value-ignoreme", None, false),
PatternValidationResult::Passed
));
}
#[test]
@ -773,11 +971,12 @@ mod tests {
min_special_chars: None,
special_chars: None,
ignore_if_contains: None,
checksum: None,
};
// Should pass: no requirements
assert!(matches!(reqs.validate(b"anything", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"123", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"!@#", true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"anything", None, true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"123", None, true), PatternValidationResult::Passed));
assert!(matches!(reqs.validate(b"!@#", None, true), PatternValidationResult::Passed));
}
}

View file

@ -332,9 +332,7 @@ async fn timed_validate_single_match<'a>(
}
let mut globals = Object::new();
for (k, v, ..) in &captured_values {
globals.insert(k.to_uppercase().into(), Value::scalar(v.clone()));
}
populate_globals_from_captures(&mut globals, &captured_values);
let rule_syntax = m.rule.syntax();
@ -961,6 +959,59 @@ async fn timed_validate_single_match<'a>(
commit_and_return(m);
}
fn populate_globals_from_captures(
globals: &mut Object,
captured_values: &[(String, String, usize, usize)],
) {
let mut best_token: Option<(usize, String)> = None;
for (k, v, ..) in captured_values {
let key = k.to_uppercase();
if key == "TOKEN" {
if best_token.as_ref().map_or(true, |(len, _)| v.len() >= *len) {
best_token = Some((v.len(), v.clone()));
}
} else {
globals.insert(key.into(), Value::scalar(v.clone()));
}
}
if let Some((_, token)) = best_token {
globals.insert("TOKEN".into(), Value::scalar(token));
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn populate_globals_prefers_longest_token() {
let captured_values = vec![
("TOKEN".to_string(), "short".to_string(), 0usize, 5usize),
("BODY".to_string(), "body".to_string(), 0usize, 4usize),
("TOKEN".to_string(), "longervalue".to_string(), 0usize, 11usize),
];
let mut globals = Object::new();
populate_globals_from_captures(&mut globals, &captured_values);
assert_eq!(globals.get("TOKEN").map(|v| v.to_string()), Some("longervalue".to_string()));
assert_eq!(globals.get("BODY").map(|v| v.to_string()), Some("body".to_string()));
}
#[test]
fn populate_globals_handles_missing_token() {
let captured_values = vec![("CHECKSUM".to_string(), "123456".to_string(), 0usize, 6usize)];
let mut globals = Object::new();
populate_globals_from_captures(&mut globals, &captured_values);
assert!(globals.get("TOKEN").is_none());
assert_eq!(globals.get("CHECKSUM").map(|v| v.to_string()), Some("123456".to_string()));
}
}
// #[cfg(test)]
// mod tests {
// use std::sync::Arc;

View file

@ -6,19 +6,11 @@ use crate::validation::SerializableCaptures;
/// Return (NAME, value, start, end) for every capture we care about.
///
/// * If a capture has a name, use that (upper-cased)
/// * If its unnamed, fall back to `"TOKEN"`
/// * Skip the unnamed “whole-match” capture **only when** there are
/// additional captures to return.
/// * If its unnamed, fall back to `"TOKEN"`
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
let multiple = captures.captures.len() > 1;
captures
.captures
.iter()
// Skip the whole-match capture (match_number == 0) only when there
// are additional captures. All other captures named or unnamed
// should be preserved.
.filter(|cap| !multiple || cap.match_number != 0)
.map(|cap| {
let name =
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
@ -140,7 +132,7 @@ mod tests {
}
#[test]
fn skips_whole_match_when_multiple() {
fn includes_whole_match_when_multiple() {
let captures = SerializableCaptures {
captures: smallvec![
SerializableCapture {
@ -160,11 +152,17 @@ mod tests {
],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("FOO".to_string(), "bcd".to_string(), 1usize, 4usize)]);
assert_eq!(
result,
vec![
("TOKEN".to_string(), "abcde".to_string(), 0usize, 5usize),
("FOO".to_string(), "bcd".to_string(), 1usize, 4usize),
]
);
}
#[test]
fn includes_unnamed_groups_but_skips_whole_match() {
fn includes_whole_match_and_unnamed_groups() {
let captures = SerializableCaptures {
captures: smallvec![
SerializableCapture {
@ -188,6 +186,7 @@ mod tests {
assert_eq!(
result,
vec![
("TOKEN".to_string(), "aabbcc".to_string(), 0usize, 6usize),
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
]