kingfisher/src/validation/utils.rs
2025-09-05 09:31:52 -07:00

150 lines
5.1 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use reqwest::Url;
use tokio::net::lookup_host;
use crate::validation::SerializableCaptures;
/// Return (NAME, value, start, end) for every capture we care about.
///
/// * If a capture has a name, use that (upper-cased)
/// * If its unnamed, fall back to `"TOKEN"`
/// * Skip the unnamed “whole-match” capture **only when** there are
/// additional captures to return.
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
let multiple = captures.captures.len() > 1;
captures
.captures
.iter()
// Skip the whole-match capture (match_number == 0) only when there
// are additional captures. All other captures named or unnamed
// should be preserved.
.filter(|cap| !multiple || cap.match_number != 0)
.map(|cap| {
let name =
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
(name, cap.value.to_string(), cap.start, cap.end)
})
.collect()
}
pub fn find_closest_variable(
captures: &[(String, String, usize, usize)],
target_value: &String,
target_variable_name: &str,
search_variable_name: &str,
) -> Option<String> {
// Find positions of the target variable with the target value
let mut target_positions = Vec::new();
for (name, value, start, end) in captures {
if name == target_variable_name && value == target_value {
target_positions.push((*start, *end));
}
}
if target_positions.is_empty() {
return None;
}
// For each target position, find the closest search variable
let mut closest_distance = usize::MAX;
let mut closest_value: Option<String> = None;
for (_target_start, target_end) in target_positions {
for (name, value, start, _) in captures {
if name == search_variable_name {
let distance = (*start as isize - target_end as isize).abs() as usize;
if distance < closest_distance {
closest_distance = distance;
closest_value = Some(value.clone());
}
}
}
}
closest_value
}
pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::Error>> {
let host = url.host_str().ok_or("No host in URL")?;
let port = url.port().unwrap_or(if url.scheme() == "https" { 443 } else { 80 });
let addr = format!("{}:{}", host, port);
lookup_host(addr).await?.next().ok_or_else(|| "Failed to resolve URL".into()).map(|_| ())
}
// -----------------------------------------------------------------------------
// tests
// -----------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
use crate::matcher::{SerializableCapture, SerializableCaptures};
use pretty_assertions::assert_eq;
use smallvec::smallvec;
#[test]
fn single_unnamed_capture_is_returned() {
let captures = SerializableCaptures {
captures: smallvec![SerializableCapture {
name: None,
match_number: 0,
start: 1,
end: 4,
value: "abc",
}],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
}
#[test]
fn skips_whole_match_when_multiple() {
let captures = SerializableCaptures {
captures: smallvec![
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 5,
value: "abcde",
},
SerializableCapture {
name: Some("foo".to_string()),
match_number: -1,
start: 1,
end: 4,
value: "bcd",
},
],
};
let result = process_captures(&captures);
assert_eq!(result, vec![("FOO".to_string(), "bcd".to_string(), 1usize, 4usize)]);
}
#[test]
fn includes_unnamed_groups_but_skips_whole_match() {
let captures = SerializableCaptures {
captures: smallvec![
SerializableCapture {
name: None,
match_number: 0,
start: 0,
end: 6,
value: "aabbcc",
},
SerializableCapture {
name: Some("foo".to_string()),
match_number: -1,
start: 0,
end: 2,
value: "aa",
},
SerializableCapture { name: None, match_number: 1, start: 4, end: 6, value: "cc" },
],
};
let result = process_captures(&captures);
assert_eq!(
result,
vec![
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
]
);
}
}