forked from mirrors/kingfisher
249 lines
9.2 KiB
Rust
249 lines
9.2 KiB
Rust
use reqwest::Url;
|
||
use tokio::net::lookup_host;
|
||
|
||
use crate::validation::SerializableCaptures;
|
||
|
||
/// Return (NAME, value, start, end) for the captures we care about.
|
||
///
|
||
/// * Named captures keep their (upper-cased) name
|
||
/// * Among unnamed captures, keep **only the first one** and call it "TOKEN"
|
||
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
|
||
let mut saw_unnamed = false;
|
||
|
||
captures
|
||
.captures
|
||
.iter()
|
||
.filter_map(|cap| {
|
||
if let Some(name) = &cap.name {
|
||
Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end))
|
||
} else if !saw_unnamed {
|
||
saw_unnamed = true;
|
||
Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end))
|
||
} else {
|
||
// Ignore any additional unnamed captures (e.g., from unintended groups)
|
||
None
|
||
}
|
||
})
|
||
.collect()
|
||
}
|
||
|
||
pub fn find_closest_variable(
|
||
captures: &[(String, String, usize, usize)],
|
||
target_value: &String,
|
||
target_variable_name: &str,
|
||
search_variable_name: &str,
|
||
) -> Option<String> {
|
||
// Collect the positions of the target variable for the provided value so we can
|
||
// compare relative offsets with candidate variables.
|
||
let mut target_positions = Vec::new();
|
||
for (name, value, start, end) in captures {
|
||
if name == target_variable_name && value == target_value {
|
||
target_positions.push((*start, *end));
|
||
}
|
||
}
|
||
|
||
if target_positions.is_empty() {
|
||
return None;
|
||
}
|
||
|
||
// Prefer candidates that appear before the target value (same logical block), but
|
||
// fall back to overlapping values and then to those that appear after the target
|
||
// value when no better match exists. This avoids pairing with the next block when
|
||
// multiple credentials are close together in the same file.
|
||
let mut best_before: Option<(usize, String)> = None;
|
||
let mut best_overlap: Option<(usize, String)> = None;
|
||
let mut best_after: Option<(usize, String)> = None;
|
||
|
||
for (target_start, target_end) in target_positions.iter().copied() {
|
||
for (name, value, start, end) in captures {
|
||
if name != search_variable_name {
|
||
continue;
|
||
}
|
||
|
||
if *end <= target_start {
|
||
// Candidate is before the target; choose the one closest to the target start.
|
||
let distance = target_start - *end;
|
||
match &mut best_before {
|
||
Some((best_distance, best_value)) if distance < *best_distance => {
|
||
*best_distance = distance;
|
||
*best_value = value.clone();
|
||
}
|
||
None => {
|
||
best_before = Some((distance, value.clone()));
|
||
}
|
||
_ => {}
|
||
}
|
||
} else if *start >= target_end {
|
||
// Candidate is after the target; choose the one closest to the target end.
|
||
let distance = *start - target_end;
|
||
match &mut best_after {
|
||
Some((best_distance, best_value)) if distance < *best_distance => {
|
||
*best_distance = distance;
|
||
*best_value = value.clone();
|
||
}
|
||
None => {
|
||
best_after = Some((distance, value.clone()));
|
||
}
|
||
_ => {}
|
||
}
|
||
} else {
|
||
// Candidate overlaps the target – treat as an exact match.
|
||
let distance = 0usize;
|
||
match &mut best_overlap {
|
||
Some((best_distance, best_value)) if distance < *best_distance => {
|
||
*best_distance = distance;
|
||
*best_value = value.clone();
|
||
}
|
||
None => {
|
||
best_overlap = Some((distance, value.clone()));
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
best_before.or(best_overlap).or(best_after).map(|(_, value)| value)
|
||
}
|
||
|
||
pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::Error>> {
|
||
let host = url.host_str().ok_or("No host in URL")?;
|
||
let port = url.port().unwrap_or(if url.scheme() == "https" { 443 } else { 80 });
|
||
let addr = format!("{}:{}", host, port);
|
||
lookup_host(addr).await?.next().ok_or_else(|| "Failed to resolve URL".into()).map(|_| ())
|
||
}
|
||
|
||
// -----------------------------------------------------------------------------
|
||
// tests
|
||
// -----------------------------------------------------------------------------
|
||
//
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||
use pretty_assertions::assert_eq;
|
||
use smallvec::smallvec;
|
||
|
||
#[test]
|
||
fn single_unnamed_capture_is_returned() {
|
||
let captures = SerializableCaptures {
|
||
captures: smallvec![SerializableCapture {
|
||
name: None,
|
||
match_number: 0, // This test is for a rule with *no* explicit captures
|
||
start: 1,
|
||
end: 4,
|
||
value: "abc",
|
||
}],
|
||
};
|
||
let result = process_captures(&captures);
|
||
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
|
||
}
|
||
#[test]
|
||
fn includes_whole_match_when_multiple() {
|
||
let captures = SerializableCaptures {
|
||
captures: smallvec![
|
||
// --- FIX ---
|
||
// This test simulated a regex like `(abc)de(?P<foo>bcd)`.
|
||
// With our fix, group 0 ("abcde") is NOT serialized.
|
||
// We only get the explicit captures (group 1 and "foo").
|
||
SerializableCapture {
|
||
// This is group 1 (unnamed)
|
||
name: None,
|
||
match_number: 1, // Corrected match_number
|
||
start: 1,
|
||
end: 4,
|
||
value: "bcd",
|
||
},
|
||
SerializableCapture {
|
||
// This is group 2 (named "foo")
|
||
name: Some("foo".to_string()),
|
||
match_number: 2, // Corrected match_number
|
||
start: 1,
|
||
end: 4,
|
||
value: "bcd",
|
||
},
|
||
],
|
||
};
|
||
let result = process_captures(&captures);
|
||
|
||
// --- FIX ---
|
||
// The expected result now only contains the explicit captures.
|
||
// The first unnamed capture ("bcd") becomes "TOKEN".
|
||
assert_eq!(
|
||
result,
|
||
vec![
|
||
("TOKEN".to_string(), "bcd".to_string(), 1usize, 4usize),
|
||
("FOO".to_string(), "bcd".to_string(), 1usize, 4usize),
|
||
]
|
||
);
|
||
// --- END FIX ---
|
||
}
|
||
|
||
#[test]
|
||
fn includes_whole_match_and_unnamed_groups() {
|
||
let captures = SerializableCaptures {
|
||
captures: smallvec![
|
||
// --- FIX ---
|
||
// This test simulated a regex like `(?P<foo>aa)bb(cc)`.
|
||
// With our fix, group 0 ("aabbcc") is NOT serialized.
|
||
// We only get the explicit captures ("foo" and group 2).
|
||
SerializableCapture {
|
||
// This is group 1 (named "foo")
|
||
name: Some("foo".to_string()),
|
||
match_number: 1, // Corrected match_number
|
||
start: 0,
|
||
end: 2,
|
||
value: "aa",
|
||
},
|
||
SerializableCapture {
|
||
// This is group 2 (unnamed)
|
||
name: None,
|
||
match_number: 2, // Corrected match_number
|
||
start: 4,
|
||
end: 6,
|
||
value: "cc"
|
||
},
|
||
],
|
||
};
|
||
let result = process_captures(&captures);
|
||
|
||
// --- FIX ---
|
||
// The expected result no longer contains the full match ("aabbcc").
|
||
// The first (and only) unnamed capture ("cc") is now correctly labeled "TOKEN".
|
||
assert_eq!(
|
||
result,
|
||
vec![
|
||
("FOO".to_string(), "aa".to_string(), 0usize, 2usize), // From named group 1
|
||
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize), // From unnamed group 2
|
||
]
|
||
);
|
||
// --- END FIX ---
|
||
}
|
||
|
||
#[test]
|
||
fn prefers_closest_preceding_variable() {
|
||
let captures = vec![
|
||
("TOKEN".to_string(), "secret".to_string(), 75usize, 115usize),
|
||
("AKID".to_string(), "preceding".to_string(), 30usize, 50usize),
|
||
("AKID".to_string(), "following".to_string(), 180usize, 200usize),
|
||
];
|
||
|
||
let result =
|
||
find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
|
||
|
||
assert_eq!(result, "preceding".to_string());
|
||
}
|
||
|
||
#[test]
|
||
fn falls_back_to_following_when_no_preceding() {
|
||
let captures = vec![
|
||
("TOKEN".to_string(), "secret".to_string(), 10usize, 50usize),
|
||
("AKID".to_string(), "after".to_string(), 60usize, 80usize),
|
||
];
|
||
|
||
let result =
|
||
find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
|
||
|
||
assert_eq!(result, "after".to_string());
|
||
}
|
||
}
|