forked from mirrors/kingfisher
Added checksum comparisons to pattern_requirements, new suffix, crc32, and base62 Liquid filters, and verbose logging so mismatched checksums are skipped with context rather than reported as findings.
This commit is contained in:
parent
9c4e459a14
commit
ccbbbad5bc
16 changed files with 2355 additions and 122 deletions
|
|
@ -147,12 +147,23 @@ impl FindingsStore {
|
|||
│ 1. Optional duplicate filter (unchanged) │
|
||||
└───────────────────────────────────────────────────────────────*/
|
||||
if dedup {
|
||||
// Prefer the full unnamed match (index 0). Fall back to a named TOKEN capture
|
||||
// before using whatever capture is available.
|
||||
let snippet = m
|
||||
.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| m.groups.captures.get(0))
|
||||
.map_or("", |c| c.value);
|
||||
.iter()
|
||||
.find(|c| c.name.is_none() && c.match_number == 0)
|
||||
.map(|c| c.value)
|
||||
.or_else(|| {
|
||||
m.groups
|
||||
.captures
|
||||
.iter()
|
||||
.find(|c| matches!(c.name.as_deref(), Some("TOKEN")))
|
||||
.map(|c| c.value)
|
||||
})
|
||||
.or_else(|| m.groups.captures.get(0).map(|c| c.value))
|
||||
.unwrap_or("");
|
||||
|
||||
let origin_kind = match origin.first() {
|
||||
Origin::GitRepo(_) => "git",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
//! Collection of small Liquid filters that make HTTP validations & API-signing templates easy
|
||||
|
||||
use base64::{engine::general_purpose, Engine};
|
||||
use crc32fast::Hasher;
|
||||
use hmac::{Hmac, Mac};
|
||||
use liquid_core::{
|
||||
Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection,
|
||||
|
|
@ -223,22 +224,90 @@ impl Filter for HmacSha384Filter {
|
|||
}
|
||||
|
||||
// ── random_string ────────────────────────────────
|
||||
static_filter!(
|
||||
/// Random alphanumeric string (default 32 chars).
|
||||
RandomStringFilter { len: Option<usize> },
|
||||
"random_string",
|
||||
|s: &RandomStringFilter, input: &dyn ValueView| -> String {
|
||||
let n = s.len // explicit argument?
|
||||
.or_else(|| input.to_kstr().parse().ok()) // else parse input
|
||||
.unwrap_or(32); // else default
|
||||
#[derive(Debug, FilterParameters)]
|
||||
struct RandomStringArgs {
|
||||
#[parameter(description = "Desired output length", arg_type = "integer")]
|
||||
len: Option<Expression>,
|
||||
}
|
||||
|
||||
rand::rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(n)
|
||||
.map(char::from)
|
||||
.collect()
|
||||
#[derive(Clone, ParseFilter, FilterReflection, Default)]
|
||||
#[filter(
|
||||
name = "random_string",
|
||||
description = "Random alphanumeric string (default 32 chars).",
|
||||
parameters(RandomStringArgs),
|
||||
parsed(RandomString)
|
||||
)]
|
||||
pub struct RandomStringFilter;
|
||||
|
||||
#[derive(Debug, FromFilterParameters, Display_filter)]
|
||||
#[name = "random_string"]
|
||||
struct RandomString {
|
||||
#[parameters]
|
||||
args: RandomStringArgs,
|
||||
}
|
||||
|
||||
impl Filter for RandomString {
|
||||
fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
|
||||
let args = self.args.evaluate(runtime)?;
|
||||
let n = args
|
||||
.len
|
||||
.and_then(|value| {
|
||||
let scalar = Value::scalar(value);
|
||||
value_to_usize(&scalar)
|
||||
})
|
||||
.or_else(|| input.to_kstr().parse().ok())
|
||||
.unwrap_or(32);
|
||||
|
||||
let value: String =
|
||||
rand::rng().sample_iter(&Alphanumeric).take(n).map(char::from).collect();
|
||||
|
||||
Ok(Value::scalar(value))
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[derive(Debug, FilterParameters)]
|
||||
struct SuffixArgs {
|
||||
#[parameter(description = "Number of trailing characters to keep", arg_type = "integer")]
|
||||
len: Option<Expression>,
|
||||
}
|
||||
|
||||
#[derive(Clone, ParseFilter, FilterReflection, Default)]
|
||||
#[filter(
|
||||
name = "suffix",
|
||||
description = "Return the suffix (last N characters) of the provided string.",
|
||||
parameters(SuffixArgs),
|
||||
parsed(Suffix)
|
||||
)]
|
||||
pub struct SuffixFilter;
|
||||
|
||||
#[derive(Debug, FromFilterParameters, Display_filter)]
|
||||
#[name = "suffix"]
|
||||
struct Suffix {
|
||||
#[parameters]
|
||||
args: SuffixArgs,
|
||||
}
|
||||
|
||||
impl Filter for Suffix {
|
||||
fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
|
||||
let args = self.args.evaluate(runtime)?;
|
||||
let text = input.to_kstr();
|
||||
let requested = args
|
||||
.len
|
||||
.and_then(|value| {
|
||||
let scalar = Value::scalar(value);
|
||||
value_to_usize(&scalar)
|
||||
})
|
||||
.unwrap_or_else(|| text.len());
|
||||
if requested == 0 {
|
||||
return Ok(Value::scalar(String::new()));
|
||||
}
|
||||
|
||||
let mut chars: Vec<char> = text.chars().collect();
|
||||
let keep = requested.min(chars.len());
|
||||
chars.drain(0..chars.len().saturating_sub(keep));
|
||||
Ok(Value::scalar(chars.into_iter().collect::<String>()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]
|
||||
#[filter(
|
||||
|
|
@ -307,6 +376,111 @@ static_filter!(
|
|||
}
|
||||
);
|
||||
|
||||
static_filter!(
|
||||
/// Compute the CRC32 of the input and return it as a decimal number.
|
||||
Crc32Filter,
|
||||
"crc32",
|
||||
|input: &dyn ValueView| -> i64 {
|
||||
let mut hasher = Hasher::new();
|
||||
hasher.update(input.to_kstr().as_bytes());
|
||||
i64::from(hasher.finalize())
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Debug, FilterParameters)]
|
||||
struct Base62Args {
|
||||
#[parameter(
|
||||
description = "Pad the encoded value to at least this width",
|
||||
arg_type = "integer"
|
||||
)]
|
||||
width: Option<Expression>,
|
||||
}
|
||||
|
||||
#[derive(Clone, ParseFilter, FilterReflection, Default)]
|
||||
#[filter(
|
||||
name = "base62",
|
||||
description = "Encode the provided integer value using Base62.",
|
||||
parameters(Base62Args),
|
||||
parsed(Base62)
|
||||
)]
|
||||
pub struct Base62Filter;
|
||||
|
||||
#[derive(Debug, FromFilterParameters, Display_filter)]
|
||||
#[name = "base62"]
|
||||
struct Base62 {
|
||||
#[parameters]
|
||||
args: Base62Args,
|
||||
}
|
||||
|
||||
impl Filter for Base62 {
|
||||
fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
|
||||
let args = self.args.evaluate(runtime)?;
|
||||
let value = input
|
||||
.as_scalar()
|
||||
.and_then(|scalar| {
|
||||
if let Some(int) = scalar.to_integer() {
|
||||
Some(if int < 0 { 0 } else { int as u64 })
|
||||
} else if let Some(float) = scalar.to_float() {
|
||||
Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 })
|
||||
} else if let Some(boolean) = scalar.to_bool() {
|
||||
Some(u64::from(boolean))
|
||||
} else {
|
||||
scalar.to_kstr().to_string().parse::<u64>().ok()
|
||||
}
|
||||
})
|
||||
.or_else(|| input.to_kstr().to_string().parse::<u64>().ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let mut encoded = encode_base62(value);
|
||||
if let Some(width) = args.width.and_then(|value| {
|
||||
let scalar = Value::scalar(value);
|
||||
value_to_usize(&scalar)
|
||||
}) {
|
||||
if encoded.len() < width {
|
||||
let mut padded = String::with_capacity(width);
|
||||
for _ in 0..(width - encoded.len()) {
|
||||
padded.push('0');
|
||||
}
|
||||
padded.push_str(&encoded);
|
||||
encoded = padded;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Value::scalar(encoded))
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_base62(mut value: u64) -> String {
|
||||
const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
if value == 0 {
|
||||
return "0".to_string();
|
||||
}
|
||||
let mut buf = Vec::new();
|
||||
while value > 0 {
|
||||
let rem = (value % 62) as usize;
|
||||
buf.push(ALPHABET[rem] as char);
|
||||
value /= 62;
|
||||
}
|
||||
buf.iter().rev().collect()
|
||||
}
|
||||
|
||||
fn value_to_usize(value: &Value) -> Option<usize> {
|
||||
let view = value.as_view();
|
||||
view.as_scalar()
|
||||
.and_then(|scalar| {
|
||||
if let Some(int) = scalar.to_integer() {
|
||||
Some(if int < 0 { 0 } else { int as usize })
|
||||
} else if let Some(float) = scalar.to_float() {
|
||||
Some(if float.is_sign_negative() { 0 } else { float.floor() as usize })
|
||||
} else if let Some(boolean) = scalar.to_bool() {
|
||||
Some(if boolean { 1 } else { 0 })
|
||||
} else {
|
||||
scalar.to_kstr().parse::<usize>().ok()
|
||||
}
|
||||
})
|
||||
.or_else(|| view.to_kstr().parse::<usize>().ok())
|
||||
}
|
||||
|
||||
// {{ value | b64url_enc }} – URL-safe base64 w/o padding
|
||||
static_filter!(
|
||||
/// Base64 URL-safe (no ‘=’ padding).
|
||||
|
|
@ -415,6 +589,9 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder {
|
|||
.filter(B64EncFilter::default())
|
||||
.filter(B64DecFilter::default())
|
||||
.filter(RandomStringFilter::default())
|
||||
.filter(SuffixFilter::default())
|
||||
.filter(Crc32Filter::default())
|
||||
.filter(Base62Filter::default())
|
||||
.filter(HmacSha256::default())
|
||||
.filter(HmacSha1::default())
|
||||
.filter(HmacSha384::default())
|
||||
|
|
@ -461,6 +638,20 @@ mod tests {
|
|||
assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_filter() {
|
||||
assert_eq!(render(r#"{{ "abcdef" | suffix: 3 }}"#), "def");
|
||||
assert_eq!(render(r#"{{ "short" | suffix: 10 }}"#), "short");
|
||||
assert_eq!(render(r#"{{ "value" | suffix: 0 }}"#), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn crc32_and_base62_filters() {
|
||||
assert_eq!(render(r#"{{ "hello" | crc32 }}"#), "907060870");
|
||||
assert_eq!(render(r#"{{ "hello" | crc32 | base62 }}"#), "zNvy2");
|
||||
assert_eq!(render(r#"{{ "hello" | crc32 | base62: 6 }}"#), "0zNvy2");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hmac_sha1_filter() {
|
||||
let key = b"key1";
|
||||
|
|
|
|||
30
src/main.rs
30
src/main.rs
|
|
@ -5,27 +5,27 @@
|
|||
// * Fallback - system allocator (`system-alloc` feature)
|
||||
// ────────────────────────────────────────────────────────────
|
||||
|
||||
// --- jemalloc (opt-in) ---
|
||||
#[cfg(feature = "use-jemalloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
// // --- jemalloc (opt-in) ---
|
||||
// #[cfg(feature = "use-jemalloc")]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
// --- mimalloc (default) ---
|
||||
#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
|
||||
#[global_allocator]
|
||||
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// --- system allocator (explicit opt-out) ---
|
||||
#[cfg(feature = "system-alloc")]
|
||||
use std::alloc::System;
|
||||
#[cfg(feature = "system-alloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
// // --- mimalloc (default) ---
|
||||
// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// // --- system allocator (explicit opt-out) ---
|
||||
// #[cfg(feature = "system-alloc")]
|
||||
// use std::alloc::System;
|
||||
// #[cfg(feature = "system-alloc")]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: System = System;
|
||||
|
||||
use std::alloc::System;
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
|
||||
use std::{
|
||||
io::{IsTerminal, Read},
|
||||
sync::{Arc, Mutex},
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ use crate::{
|
|||
parser,
|
||||
parser::{Checker, Language},
|
||||
rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
|
||||
rules::rule::{PatternValidationResult, Rule},
|
||||
rules::rule::{PatternRequirementContext, PatternValidationResult, Rule},
|
||||
rules_database::RulesDatabase,
|
||||
safe_list::{is_safe_match, is_user_match},
|
||||
scanner_pool::ScannerPool,
|
||||
|
|
@ -614,7 +614,12 @@ fn filter_match<'b>(
|
|||
|
||||
// Check character requirements if specified
|
||||
if let Some(char_reqs) = rule.pattern_requirements() {
|
||||
match char_reqs.validate(mi_bytes, respect_ignore_if_contains) {
|
||||
let context = PatternRequirementContext {
|
||||
regex: re,
|
||||
captures: &captures,
|
||||
full_match: full_bytes,
|
||||
};
|
||||
match char_reqs.validate(mi_bytes, Some(context), respect_ignore_if_contains) {
|
||||
PatternValidationResult::Passed => {}
|
||||
PatternValidationResult::Failed => {
|
||||
debug!(
|
||||
|
|
@ -623,6 +628,15 @@ fn filter_match<'b>(
|
|||
);
|
||||
continue;
|
||||
}
|
||||
PatternValidationResult::FailedChecksum { actual_len, expected_len } => {
|
||||
debug!(
|
||||
"Skipping match for rule {} due to checksum mismatch (actual_len={}, expected_len={})",
|
||||
rule.id(),
|
||||
actual_len,
|
||||
expected_len
|
||||
);
|
||||
continue;
|
||||
}
|
||||
PatternValidationResult::IgnoredBySubstring { matched_term } => {
|
||||
debug!(
|
||||
"Skipping match for rule {} because it contains ignored term {matched_term}",
|
||||
|
|
@ -790,40 +804,31 @@ impl SerializableCaptures {
|
|||
redact: bool,
|
||||
) -> Self {
|
||||
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
|
||||
// Process named captures
|
||||
for name in re.capture_names().flatten() {
|
||||
if let Some(capture) = captures.name(name) {
|
||||
let value = if redact {
|
||||
redact_value(&String::from_utf8_lossy(capture.as_bytes()))
|
||||
} else {
|
||||
String::from_utf8_lossy(capture.as_bytes()).to_string()
|
||||
};
|
||||
serialized_captures.push(SerializableCapture {
|
||||
name: Some(name.to_string()),
|
||||
match_number: -1,
|
||||
start: capture.start(),
|
||||
end: capture.end(),
|
||||
value: intern(&value),
|
||||
});
|
||||
}
|
||||
}
|
||||
// Process unnamed captures (numbered groups)
|
||||
|
||||
let capture_names: SmallVec<[Option<String>; 4]> =
|
||||
re.capture_names().map(|name| name.map(str::to_string)).collect();
|
||||
|
||||
for i in 0..captures.len() {
|
||||
if let Some(capture) = captures.get(i) {
|
||||
if let Some(cap) = captures.get(i) {
|
||||
let value = if redact {
|
||||
redact_value(&String::from_utf8_lossy(capture.as_bytes()))
|
||||
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
|
||||
} else {
|
||||
String::from_utf8_lossy(capture.as_bytes()).to_string()
|
||||
String::from_utf8_lossy(cap.as_bytes()).to_string()
|
||||
};
|
||||
let interned = intern(&value);
|
||||
|
||||
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
|
||||
|
||||
serialized_captures.push(SerializableCapture {
|
||||
name: None,
|
||||
name,
|
||||
match_number: i32::try_from(i).unwrap_or(0),
|
||||
start: capture.start(),
|
||||
end: capture.end(),
|
||||
value: intern(&value),
|
||||
start: cap.start(),
|
||||
end: cap.end(),
|
||||
value: interned,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
SerializableCaptures { captures: serialized_captures }
|
||||
}
|
||||
}
|
||||
|
|
@ -1182,6 +1187,7 @@ mod test {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: Some(vec!["TEST".to_string()]),
|
||||
checksum: None,
|
||||
}),
|
||||
})];
|
||||
|
||||
|
|
@ -1244,6 +1250,7 @@ mod test {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: Some(vec!["TEST".to_string()]),
|
||||
checksum: None,
|
||||
}),
|
||||
})];
|
||||
|
||||
|
|
@ -1500,4 +1507,24 @@ line2
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serializes_captures_in_numeric_order() {
|
||||
let re =
|
||||
Regex::new(r"(?xi)\b(ghp_(?P<body>[A-Z0-9]{3})(?P<checksum>[A-Z0-9]{2}))").unwrap();
|
||||
let caps = re.captures(b"ghp_ABC12").expect("expected captures");
|
||||
|
||||
let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false);
|
||||
let entries: Vec<(Option<&str>, i32, &str)> = serialized
|
||||
.captures
|
||||
.iter()
|
||||
.map(|cap| (cap.name.as_deref(), cap.match_number, cap.value))
|
||||
.collect();
|
||||
|
||||
assert_eq!(entries.len(), 4);
|
||||
assert_eq!(entries[0], (None, 0, "ghp_ABC12"));
|
||||
assert_eq!(entries[1], (None, 1, "ghp_ABC12"));
|
||||
assert_eq!(entries[2], (Some("body"), 2, "ABC"));
|
||||
assert_eq!(entries[3], (Some("checksum"), 3, "12"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,10 @@ use std::{
|
|||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use liquid::{
|
||||
model::{KString, Value},
|
||||
object, Parser, ParserBuilder,
|
||||
};
|
||||
use regex::Regex;
|
||||
use schemars::{
|
||||
gen::SchemaGenerator,
|
||||
|
|
@ -17,9 +21,12 @@ use schemars::{
|
|||
JsonSchema,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
// use sha1::{Digest, Sha1};
|
||||
use xxhash_rust::xxh3::xxh3_64;
|
||||
|
||||
use crate::liquid_filters;
|
||||
|
||||
/// Returns false as the default value.
|
||||
fn default_false() -> bool {
|
||||
false
|
||||
|
|
@ -73,6 +80,42 @@ pub struct PatternRequirements {
|
|||
/// Words that should cause the match to be excluded when present (case-insensitive)
|
||||
#[serde(default)]
|
||||
pub ignore_if_contains: Option<Vec<String>>,
|
||||
/// Optional checksum validation configuration.
|
||||
#[serde(default)]
|
||||
pub checksum: Option<ChecksumRequirement>,
|
||||
}
|
||||
|
||||
/// Defines a checksum validation strategy for a matched pattern.
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
||||
pub struct ChecksumRequirement {
|
||||
/// Template describing how to extract the checksum from the match.
|
||||
pub actual: ChecksumActual,
|
||||
/// Template describing how to compute the expected checksum.
|
||||
pub expected: String,
|
||||
/// When true, checksum evaluation is skipped if the required capture is missing.
|
||||
#[serde(default)]
|
||||
pub skip_if_missing: bool,
|
||||
}
|
||||
|
||||
/// Describes how to extract the checksum value from a match.
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
|
||||
pub struct ChecksumActual {
|
||||
/// Liquid template used to compute the checksum from the match.
|
||||
pub template: String,
|
||||
/// Optional capture group that must be present before evaluating the checksum.
|
||||
#[serde(default)]
|
||||
pub requires_capture: Option<String>,
|
||||
}
|
||||
|
||||
/// Contextual information available when validating pattern requirements.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct PatternRequirementContext<'a> {
|
||||
/// Compiled regex associated with the rule.
|
||||
pub regex: &'a regex::bytes::Regex,
|
||||
/// Captures for the current match.
|
||||
pub captures: &'a regex::bytes::Captures<'a>,
|
||||
/// Full bytes matched by the rule (capture group 0).
|
||||
pub full_match: &'a [u8],
|
||||
}
|
||||
|
||||
impl PatternRequirements {
|
||||
|
|
@ -85,6 +128,7 @@ impl PatternRequirements {
|
|||
pub fn validate(
|
||||
&self,
|
||||
input: &[u8],
|
||||
context: Option<PatternRequirementContext<'_>>,
|
||||
respect_ignore_if_contains: bool,
|
||||
) -> PatternValidationResult {
|
||||
// Convert to string (lossy for non-UTF8)
|
||||
|
|
@ -151,10 +195,84 @@ impl PatternRequirements {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(checksum) = &self.checksum {
|
||||
let Some(ctx) = context else {
|
||||
return if checksum.skip_if_missing {
|
||||
PatternValidationResult::Passed
|
||||
} else {
|
||||
PatternValidationResult::Failed
|
||||
};
|
||||
};
|
||||
|
||||
if let Some(required) = checksum.actual.requires_capture.as_deref() {
|
||||
if ctx.captures.name(required).is_none() {
|
||||
return if checksum.skip_if_missing {
|
||||
PatternValidationResult::Passed
|
||||
} else {
|
||||
PatternValidationResult::Failed
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let mut globals = object!({
|
||||
"MATCH": s.to_string(),
|
||||
"FULL_MATCH": String::from_utf8_lossy(ctx.full_match).to_string(),
|
||||
});
|
||||
|
||||
for name in ctx.regex.capture_names().flatten() {
|
||||
if let Some(capture) = ctx.captures.name(name) {
|
||||
let value = String::from_utf8_lossy(capture.as_bytes()).to_string();
|
||||
globals.insert(KString::from_ref(name), Value::scalar(value.clone()));
|
||||
globals.insert(
|
||||
KString::from_string(name.to_ascii_uppercase()),
|
||||
Value::scalar(value),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let actual =
|
||||
match render_pattern_requirement_template(&checksum.actual.template, &globals) {
|
||||
Ok(rendered) => rendered,
|
||||
Err(err) => {
|
||||
debug!(
|
||||
"Failed to render checksum actual template '{}': {}",
|
||||
checksum.actual.template, err
|
||||
);
|
||||
return PatternValidationResult::Failed;
|
||||
}
|
||||
};
|
||||
let expected = match render_pattern_requirement_template(&checksum.expected, &globals) {
|
||||
Ok(rendered) => rendered,
|
||||
Err(err) => {
|
||||
debug!(
|
||||
"Failed to render checksum expected template '{}': {}",
|
||||
checksum.expected, err
|
||||
);
|
||||
return PatternValidationResult::Failed;
|
||||
}
|
||||
};
|
||||
|
||||
if actual != expected {
|
||||
let actual_len = actual.chars().count();
|
||||
let expected_len = expected.chars().count();
|
||||
return PatternValidationResult::FailedChecksum { actual_len, expected_len };
|
||||
}
|
||||
}
|
||||
|
||||
PatternValidationResult::Passed
|
||||
}
|
||||
}
|
||||
|
||||
fn render_pattern_requirement_template(
|
||||
template: &str,
|
||||
globals: &liquid::Object,
|
||||
) -> Result<String, String> {
|
||||
PATTERN_REQUIREMENTS_TEMPLATE_PARSER
|
||||
.parse(template)
|
||||
.map_err(|e| e.to_string())
|
||||
.and_then(|parsed| parsed.render(globals).map_err(|e| e.to_string()))
|
||||
}
|
||||
|
||||
/// Result of validating [`PatternRequirements`] against a potential match.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum PatternValidationResult {
|
||||
|
|
@ -162,6 +280,8 @@ pub enum PatternValidationResult {
|
|||
Passed,
|
||||
/// Requirements were not satisfied.
|
||||
Failed,
|
||||
/// Checksum requirements were not satisfied; captures basic mismatch details for debugging.
|
||||
FailedChecksum { actual_len: usize, expected_len: usize },
|
||||
/// The match contains one of the `ignore_if_contains` substrings and should be skipped.
|
||||
IgnoredBySubstring { matched_term: String },
|
||||
}
|
||||
|
|
@ -407,6 +527,10 @@ lazy_static! {
|
|||
pub static ref RULE_COMMENTS_PATTERN: Regex = Regex::new(
|
||||
r"(?m)(\(\?#[^)]*\))|(\s\#[\sa-zA-Z]*$)"
|
||||
).expect("comment-stripping regex should compile");
|
||||
static ref PATTERN_REQUIREMENTS_TEMPLATE_PARSER: liquid::Parser =
|
||||
liquid_filters::register_all(ParserBuilder::with_stdlib())
|
||||
.build()
|
||||
.expect("pattern requirement template parser should compile");
|
||||
}
|
||||
|
||||
impl RuleSyntax {
|
||||
|
|
@ -564,6 +688,7 @@ impl Rule {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use regex::bytes::Regex as BytesRegex;
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_digits() {
|
||||
|
|
@ -574,16 +699,75 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 digits
|
||||
assert!(matches!(reqs.validate(b"abc123def", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"abc123def", None, true), PatternValidationResult::Passed));
|
||||
|
||||
// Should fail: only 1 digit
|
||||
assert!(matches!(reqs.validate(b"abc1def", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abc1def", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: no digits
|
||||
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_requirements_checksum() {
|
||||
let reqs = PatternRequirements {
|
||||
min_digits: None,
|
||||
min_uppercase: None,
|
||||
min_lowercase: None,
|
||||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: None,
|
||||
checksum: Some(ChecksumRequirement {
|
||||
actual: ChecksumActual {
|
||||
template: "{{ MATCH | suffix: 6 }}".to_string(),
|
||||
requires_capture: Some("checksum".to_string()),
|
||||
},
|
||||
expected: "{{ BODY | crc32 | base62: 6 }}".to_string(),
|
||||
skip_if_missing: true,
|
||||
}),
|
||||
};
|
||||
|
||||
let token = b"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";
|
||||
let regex =
|
||||
BytesRegex::new(r"(?x) ghp_(?P<body>[A-Za-z0-9]{30})(?P<checksum>[A-Za-z0-9]{6})")
|
||||
.unwrap();
|
||||
let captures = regex.captures(token).expect("token should match");
|
||||
assert!(matches!(
|
||||
reqs.validate(
|
||||
token,
|
||||
Some(PatternRequirementContext {
|
||||
regex: ®ex,
|
||||
captures: &captures,
|
||||
full_match: token
|
||||
}),
|
||||
true
|
||||
),
|
||||
PatternValidationResult::Passed
|
||||
));
|
||||
|
||||
let mut invalid = token.to_vec();
|
||||
*invalid.last_mut().unwrap() = b'0';
|
||||
let captures_invalid =
|
||||
regex.captures(&invalid).expect("invalid token should still match pattern");
|
||||
assert!(matches!(
|
||||
reqs.validate(
|
||||
&invalid,
|
||||
Some(PatternRequirementContext {
|
||||
regex: ®ex,
|
||||
captures: &captures_invalid,
|
||||
full_match: &invalid,
|
||||
}),
|
||||
true
|
||||
),
|
||||
PatternValidationResult::FailedChecksum { .. }
|
||||
));
|
||||
|
||||
let legacy = b"ghp_legacy_token";
|
||||
assert!(matches!(reqs.validate(legacy, None, true), PatternValidationResult::Passed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -595,16 +779,17 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 uppercase
|
||||
assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed));
|
||||
|
||||
// Should fail: only 1 uppercase
|
||||
assert!(matches!(reqs.validate(b"Adef", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"Adef", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: no uppercase
|
||||
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -616,16 +801,17 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: has 3 lowercase
|
||||
assert!(matches!(reqs.validate(b"ABCdef", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed));
|
||||
|
||||
// Should fail: only 1 lowercase
|
||||
assert!(matches!(reqs.validate(b"ABCd", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"ABCd", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: no lowercase
|
||||
assert!(matches!(reqs.validate(b"ABC123", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"ABC123", None, true), PatternValidationResult::Failed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -637,16 +823,17 @@ mod tests {
|
|||
min_special_chars: Some(2),
|
||||
special_chars: None, // uses default
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: has 2 special chars
|
||||
assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Passed));
|
||||
|
||||
// Should fail: only 1 special char
|
||||
assert!(matches!(reqs.validate(b"abc!def", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abc!def", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: no special chars
|
||||
assert!(matches!(reqs.validate(b"abcdef", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -658,16 +845,17 @@ mod tests {
|
|||
min_special_chars: Some(2),
|
||||
special_chars: Some("$%^".to_string()),
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: has 2 custom special chars
|
||||
assert!(matches!(reqs.validate(b"abc$%def", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"abc$%def", None, true), PatternValidationResult::Passed));
|
||||
|
||||
// Should fail: has special chars but not the custom ones
|
||||
assert!(matches!(reqs.validate(b"abc!@def", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: only 1 custom special char
|
||||
assert!(matches!(reqs.validate(b"abc$def", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abc$def", None, true), PatternValidationResult::Failed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -679,22 +867,23 @@ mod tests {
|
|||
min_special_chars: Some(1),
|
||||
special_chars: None,
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: has all requirements
|
||||
assert!(matches!(reqs.validate(b"Abc1!", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"Abc1!", None, true), PatternValidationResult::Passed));
|
||||
|
||||
// Should fail: missing digit
|
||||
assert!(matches!(reqs.validate(b"Abc!", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"Abc!", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: missing uppercase
|
||||
assert!(matches!(reqs.validate(b"abc1!", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"abc1!", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: missing lowercase
|
||||
assert!(matches!(reqs.validate(b"ABC1!", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"ABC1!", None, true), PatternValidationResult::Failed));
|
||||
|
||||
// Should fail: missing special
|
||||
assert!(matches!(reqs.validate(b"Abc1", true), PatternValidationResult::Failed));
|
||||
assert!(matches!(reqs.validate(b"Abc1", None, true), PatternValidationResult::Failed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -706,22 +895,26 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: Some(vec!["test".to_string(), "Demo".to_string()]),
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should fail: contains "test" (case-insensitive)
|
||||
assert!(matches!(
|
||||
reqs.validate(b"MyTestToken", true),
|
||||
reqs.validate(b"MyTestToken", None, true),
|
||||
PatternValidationResult::IgnoredBySubstring { .. }
|
||||
));
|
||||
|
||||
// Should fail: contains "demo" (case-insensitive)
|
||||
assert!(matches!(
|
||||
reqs.validate(b"example-demo-value", true),
|
||||
reqs.validate(b"example-demo-value", None, true),
|
||||
PatternValidationResult::IgnoredBySubstring { .. }
|
||||
));
|
||||
|
||||
// Should pass: does not contain excluded words
|
||||
assert!(matches!(reqs.validate(b"example-value", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(
|
||||
reqs.validate(b"example-value", None, true),
|
||||
PatternValidationResult::Passed
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -733,14 +926,15 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]),
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should fail only when non-empty exclusion matches
|
||||
assert!(matches!(
|
||||
reqs.validate(b"needs-blocking", true),
|
||||
reqs.validate(b"needs-blocking", None, true),
|
||||
PatternValidationResult::IgnoredBySubstring { .. }
|
||||
));
|
||||
assert!(matches!(reqs.validate(b"allowed", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"allowed", None, true), PatternValidationResult::Passed));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -752,16 +946,20 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: Some(vec!["ignoreme".to_string()]),
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// With ignoring enabled, the match is skipped
|
||||
assert!(matches!(
|
||||
reqs.validate(b"value-ignoreme", true),
|
||||
reqs.validate(b"value-ignoreme", None, true),
|
||||
PatternValidationResult::IgnoredBySubstring { .. }
|
||||
));
|
||||
|
||||
// With ignoring disabled, the same input passes requirements
|
||||
assert!(matches!(reqs.validate(b"value-ignoreme", false), PatternValidationResult::Passed));
|
||||
assert!(matches!(
|
||||
reqs.validate(b"value-ignoreme", None, false),
|
||||
PatternValidationResult::Passed
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -773,11 +971,12 @@ mod tests {
|
|||
min_special_chars: None,
|
||||
special_chars: None,
|
||||
ignore_if_contains: None,
|
||||
checksum: None,
|
||||
};
|
||||
|
||||
// Should pass: no requirements
|
||||
assert!(matches!(reqs.validate(b"anything", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"123", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"!@#", true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"anything", None, true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"123", None, true), PatternValidationResult::Passed));
|
||||
assert!(matches!(reqs.validate(b"!@#", None, true), PatternValidationResult::Passed));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -332,9 +332,7 @@ async fn timed_validate_single_match<'a>(
|
|||
}
|
||||
|
||||
let mut globals = Object::new();
|
||||
for (k, v, ..) in &captured_values {
|
||||
globals.insert(k.to_uppercase().into(), Value::scalar(v.clone()));
|
||||
}
|
||||
populate_globals_from_captures(&mut globals, &captured_values);
|
||||
|
||||
let rule_syntax = m.rule.syntax();
|
||||
|
||||
|
|
@ -961,6 +959,59 @@ async fn timed_validate_single_match<'a>(
|
|||
commit_and_return(m);
|
||||
}
|
||||
|
||||
fn populate_globals_from_captures(
|
||||
globals: &mut Object,
|
||||
captured_values: &[(String, String, usize, usize)],
|
||||
) {
|
||||
let mut best_token: Option<(usize, String)> = None;
|
||||
|
||||
for (k, v, ..) in captured_values {
|
||||
let key = k.to_uppercase();
|
||||
if key == "TOKEN" {
|
||||
if best_token.as_ref().map_or(true, |(len, _)| v.len() >= *len) {
|
||||
best_token = Some((v.len(), v.clone()));
|
||||
}
|
||||
} else {
|
||||
globals.insert(key.into(), Value::scalar(v.clone()));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((_, token)) = best_token {
|
||||
globals.insert("TOKEN".into(), Value::scalar(token));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn populate_globals_prefers_longest_token() {
|
||||
let captured_values = vec![
|
||||
("TOKEN".to_string(), "short".to_string(), 0usize, 5usize),
|
||||
("BODY".to_string(), "body".to_string(), 0usize, 4usize),
|
||||
("TOKEN".to_string(), "longervalue".to_string(), 0usize, 11usize),
|
||||
];
|
||||
|
||||
let mut globals = Object::new();
|
||||
populate_globals_from_captures(&mut globals, &captured_values);
|
||||
|
||||
assert_eq!(globals.get("TOKEN").map(|v| v.to_string()), Some("longervalue".to_string()));
|
||||
assert_eq!(globals.get("BODY").map(|v| v.to_string()), Some("body".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn populate_globals_handles_missing_token() {
|
||||
let captured_values = vec![("CHECKSUM".to_string(), "123456".to_string(), 0usize, 6usize)];
|
||||
|
||||
let mut globals = Object::new();
|
||||
populate_globals_from_captures(&mut globals, &captured_values);
|
||||
|
||||
assert!(globals.get("TOKEN").is_none());
|
||||
assert_eq!(globals.get("CHECKSUM").map(|v| v.to_string()), Some("123456".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
// mod tests {
|
||||
// use std::sync::Arc;
|
||||
|
|
|
|||
|
|
@ -6,19 +6,11 @@ use crate::validation::SerializableCaptures;
|
|||
/// Return (NAME, value, start, end) for every capture we care about.
|
||||
///
|
||||
/// * If a capture has a name, use that (upper-cased)
|
||||
/// * If it’s unnamed, fall back to `"TOKEN"`
|
||||
/// * Skip the unnamed “whole-match” capture **only when** there are
|
||||
/// additional captures to return.
|
||||
/// * If it’s unnamed, fall back to `"TOKEN"`
|
||||
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
|
||||
let multiple = captures.captures.len() > 1;
|
||||
|
||||
captures
|
||||
.captures
|
||||
.iter()
|
||||
// Skip the whole-match capture (match_number == 0) only when there
|
||||
// are additional captures. All other captures – named or unnamed –
|
||||
// should be preserved.
|
||||
.filter(|cap| !multiple || cap.match_number != 0)
|
||||
.map(|cap| {
|
||||
let name =
|
||||
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
|
||||
|
|
@ -140,7 +132,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn skips_whole_match_when_multiple() {
|
||||
fn includes_whole_match_when_multiple() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: smallvec![
|
||||
SerializableCapture {
|
||||
|
|
@ -160,11 +152,17 @@ mod tests {
|
|||
],
|
||||
};
|
||||
let result = process_captures(&captures);
|
||||
assert_eq!(result, vec![("FOO".to_string(), "bcd".to_string(), 1usize, 4usize)]);
|
||||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
("TOKEN".to_string(), "abcde".to_string(), 0usize, 5usize),
|
||||
("FOO".to_string(), "bcd".to_string(), 1usize, 4usize),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_unnamed_groups_but_skips_whole_match() {
|
||||
fn includes_whole_match_and_unnamed_groups() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: smallvec![
|
||||
SerializableCapture {
|
||||
|
|
@ -188,6 +186,7 @@ mod tests {
|
|||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
("TOKEN".to_string(), "aabbcc".to_string(), 0usize, 6usize),
|
||||
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
|
||||
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue