- New rules: Telegram bot token, OpenWeatherMap, Apify

- New OpenAI detectors added (@joshlarsen)
- Fixed bug that broke validation when using unnamed group captures
This commit is contained in:
Mick Grove 2025-08-01 16:56:04 -07:00
commit 8a74eba160
29 changed files with 241 additions and 64 deletions

View file

@ -4,16 +4,16 @@ use std::time::Duration;
use anyhow::{anyhow, Result};
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
use chrono::Utc;
use ed25519_dalek::SigningKey as Ed25519Key;
use p256::{
ecdsa::{signature::Signer as _, SigningKey},
pkcs8::DecodePrivateKey,
SecretKey,
};
use ed25519_dalek::{SigningKey as Ed25519Key, Signer as _};
use rand::rngs::OsRng;
use rand::TryRngCore;
use reqwest::{Client, StatusCode, Url};
use sha1::{Digest, Sha1};
use rand::TryRngCore;
use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS};
@ -68,7 +68,6 @@ pub async fn validate_cdp_api_key(
Ok((ok, msg))
}
// fn build_jwt(
// method: &str,
// host: &str,
@ -85,7 +84,7 @@ pub async fn validate_cdp_api_key(
// let mut rng = OsRng;
// let mut nonce = [0u8; 16];
// let _ = rng.try_fill_bytes(&mut nonce);
// let header = serde_json::json!({
@ -125,12 +124,12 @@ fn build_jwt(
let mut rng = OsRng;
let mut nonce = [0u8; 16];
let _ = rng.try_fill_bytes(&mut nonce);
// Try ECDSA (PEM encoded EC key). Fallback to raw Ed25519 base64 key.
if let Ok(secret_key) = SecretKey::from_sec1_pem(&pem)
.or_else(|_| SecretKey::from_pkcs8_pem(&pem))
if let Ok(secret_key) =
SecretKey::from_sec1_pem(&pem).or_else(|_| SecretKey::from_pkcs8_pem(&pem))
{
let signing_key = SigningKey::from(secret_key);
let header = serde_json::json!({
@ -168,7 +167,8 @@ fn build_jwt(
}
64 => {
let arr: [u8; 64] = key_bytes[..64].try_into().unwrap();
Ed25519Key::from_keypair_bytes(&arr).map_err(|e| anyhow!("invalid Ed25519 key: {e}"))?
Ed25519Key::from_keypair_bytes(&arr)
.map_err(|e| anyhow!("invalid Ed25519 key: {e}"))?
}
_ => return Err(anyhow!("invalid Ed25519 key length")),
};
@ -196,4 +196,4 @@ fn build_jwt(
let sig_b64 = URL_SAFE_NO_PAD.encode(sig.to_bytes());
return Ok(format!("{signing_input}.{sig_b64}"));
}
}
}

View file

@ -162,7 +162,11 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> {
return Ok((
true,
format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, extract_aud_strings(&claims)),
format!(
"JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})",
alg,
extract_aud_strings(&claims)
),
));
}

View file

@ -9,21 +9,19 @@ use crate::validation::SerializableCaptures;
/// * If its unnamed, fall back to `"TOKEN"`
/// * Skip the unnamed “whole-match” capture **only when** there are
/// additional captures to return.
pub fn process_captures(
captures: &SerializableCaptures,
) -> Vec<(String, String, usize, usize)> {
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
let multiple = captures.captures.len() > 1;
captures
.captures
.iter()
.filter(|cap| !multiple || cap.name.is_some())
// Skip the whole-match capture (match_number == 0) only when there
// are additional captures. All other captures named or unnamed
// should be preserved.
.filter(|cap| !multiple || cap.match_number != 0)
.map(|cap| {
let name = cap
.name
.as_ref()
.map(|n| n.to_uppercase())
.unwrap_or_else(|| "TOKEN".to_string());
let name =
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
(name, cap.value.clone().into_owned(), cap.start, cap.end)
})
.collect()
@ -68,3 +66,90 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::E
let addr = format!("{}:{}", host, port);
lookup_host(addr).await?.next().ok_or_else(|| "Failed to resolve URL".into()).map(|_| ())
}
// -----------------------------------------------------------------------------
// tests
// -----------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::matcher::{SerializableCapture, SerializableCaptures};
use pretty_assertions::assert_eq;
#[test]
fn single_unnamed_capture_is_returned() {
let captures = SerializableCaptures {
captures: vec![SerializableCapture {
name: None,
match_number: 0,
start: 1,
end: 4,
value: "abc".into(),
}],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
}
#[test]
fn skips_whole_match_when_multiple() {
let captures = SerializableCaptures {
captures: vec![
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 5,
value: "abcde".into(),
},
SerializableCapture {
name: Some("foo".to_string()),
match_number: -1,
start: 1,
end: 4,
value: "bcd".into(),
},
],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("FOO".to_string(), "bcd".to_string(), 1usize, 4usize)]);
}
#[test]
fn includes_unnamed_groups_but_skips_whole_match() {
let captures = SerializableCaptures {
captures: vec![
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 6,
value: "aabbcc".into(),
},
SerializableCapture {
name: Some("foo".to_string()),
match_number: -1,
start: 0,
end: 2,
value: "aa".into(),
},
SerializableCapture {
name: None,
match_number: 1,
start: 4,
end: 6,
value: "cc".into(),
},
],
};
let result = process_captures(&captures);
assert_eq!(
result,
vec![
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
]
);
}
}