2026-04-15 17:13:10 -07:00
|
|
|
|
use std::sync::LazyLock;
|
|
|
|
|
|
|
|
|
|
|
|
use tl::{HTMLTag, Node, Parser, ParserOptions};
|
|
|
|
|
|
|
2025-06-24 17:17:16 -07:00
|
|
|
|
use crate::validation::SerializableCaptures;
|
2025-07-31 16:49:46 -07:00
|
|
|
|
|
2026-03-27 15:04:14 -07:00
|
|
|
|
// Re-export from the scanner crate so the rest of this module can use it.
|
2026-03-27 17:22:21 -07:00
|
|
|
|
pub use kingfisher_scanner::validation::{check_url_resolvable, is_ssrf_safe_ip};
|
2026-03-27 15:04:14 -07:00
|
|
|
|
|
2026-04-15 17:13:10 -07:00
|
|
|
|
static HTML_PARSER_OPTIONS: LazyLock<ParserOptions> = LazyLock::new(ParserOptions::default);
|
|
|
|
|
|
|
|
|
|
|
|
fn collapse_whitespace(input: &str) -> String {
|
|
|
|
|
|
let mut out = String::with_capacity(input.len());
|
|
|
|
|
|
let mut prev_was_whitespace = false;
|
|
|
|
|
|
|
|
|
|
|
|
for ch in input.chars() {
|
|
|
|
|
|
if ch.is_whitespace() {
|
|
|
|
|
|
if !prev_was_whitespace {
|
|
|
|
|
|
out.push(' ');
|
|
|
|
|
|
prev_was_whitespace = true;
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
out.push(ch);
|
|
|
|
|
|
prev_was_whitespace = false;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
out.trim().to_string()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn decode_common_html_entities(input: &str) -> String {
|
|
|
|
|
|
let mut decoded = input.to_string();
|
|
|
|
|
|
const ENTITY_REPLACEMENTS: [(&str, &str); 8] = [
|
|
|
|
|
|
(" ", " "),
|
|
|
|
|
|
(" ", " "),
|
|
|
|
|
|
("&", "&"),
|
|
|
|
|
|
("<", "<"),
|
|
|
|
|
|
(">", ">"),
|
|
|
|
|
|
(""", "\""),
|
|
|
|
|
|
(""", "\""),
|
|
|
|
|
|
("'", "'"),
|
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
|
|
for (entity, replacement) in ENTITY_REPLACEMENTS {
|
|
|
|
|
|
decoded = decoded.replace(entity, replacement);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
decoded
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn collect_visible_text_from_tag(tag: &HTMLTag<'_>, parser: &Parser<'_>, out: &mut String) {
|
|
|
|
|
|
for handle in tag.children().top().iter() {
|
|
|
|
|
|
let Some(node) = handle.get(parser) else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
collect_visible_text(node, parser, out);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn collect_visible_text(node: &Node<'_>, parser: &Parser<'_>, out: &mut String) {
|
|
|
|
|
|
match node {
|
|
|
|
|
|
Node::Raw(raw) => {
|
|
|
|
|
|
let chunk = raw.as_utf8_str();
|
|
|
|
|
|
let chunk = chunk.trim();
|
|
|
|
|
|
if !chunk.is_empty() {
|
|
|
|
|
|
if !out.is_empty() {
|
|
|
|
|
|
out.push(' ');
|
|
|
|
|
|
}
|
|
|
|
|
|
out.push_str(chunk);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
Node::Comment(_) => {}
|
|
|
|
|
|
Node::Tag(tag) => {
|
|
|
|
|
|
let name = tag.name().as_utf8_str();
|
|
|
|
|
|
if name.eq_ignore_ascii_case("script")
|
|
|
|
|
|
|| name.eq_ignore_ascii_case("style")
|
|
|
|
|
|
|| name.eq_ignore_ascii_case("noscript")
|
|
|
|
|
|
|| name.eq_ignore_ascii_case("template")
|
|
|
|
|
|
{
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
collect_visible_text_from_tag(tag, parser, out);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn extract_visible_text_from_html(input: &str) -> Option<String> {
|
|
|
|
|
|
let dom = tl::parse(input, *HTML_PARSER_OPTIONS).ok()?;
|
|
|
|
|
|
let parser = dom.parser();
|
|
|
|
|
|
|
|
|
|
|
|
let mut out = String::new();
|
|
|
|
|
|
for handle in dom.children() {
|
|
|
|
|
|
let Some(node) = handle.get(parser) else {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
};
|
|
|
|
|
|
collect_visible_text(node, parser, &mut out);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Some(collapse_whitespace(&decode_common_html_entities(&out)))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn strip_html_markup(input: &str) -> String {
|
|
|
|
|
|
extract_visible_text_from_html(input)
|
|
|
|
|
|
.unwrap_or_else(|| collapse_whitespace(&decode_common_html_entities(input)))
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn truncate_to_char_boundary(input: &str, max_len: usize) -> String {
|
|
|
|
|
|
if max_len == 0 || input.len() <= max_len {
|
|
|
|
|
|
return input.to_string();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let mut end = max_len.min(input.len());
|
|
|
|
|
|
while end > 0 && !input.is_char_boundary(end) {
|
|
|
|
|
|
end -= 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
input[..end].to_string()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Formats validation response text for report output.
|
|
|
|
|
|
///
|
|
|
|
|
|
/// When `strip_html` is true, HTML markup is stripped and common entities are decoded before
|
|
|
|
|
|
/// optional truncation.
|
|
|
|
|
|
pub fn format_response_body_for_display(body: &str, max_len: usize, strip_html: bool) -> String {
|
|
|
|
|
|
let rendered = if strip_html { strip_html_markup(body) } else { body.to_string() };
|
|
|
|
|
|
truncate_to_char_boundary(&rendered, max_len)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-10 18:47:51 -08:00
|
|
|
|
/// Return (NAME, value, start, end) for the captures we care about.
|
2025-07-31 16:49:46 -07:00
|
|
|
|
///
|
2025-11-10 18:47:51 -08:00
|
|
|
|
/// * Named captures keep their (upper-cased) name
|
|
|
|
|
|
/// * Among unnamed captures, keep **only the first one** and call it "TOKEN"
|
2025-08-01 16:56:04 -07:00
|
|
|
|
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
|
2025-11-10 18:47:51 -08:00
|
|
|
|
let mut saw_unnamed = false;
|
|
|
|
|
|
|
2025-06-24 17:17:16 -07:00
|
|
|
|
captures
|
|
|
|
|
|
.captures
|
|
|
|
|
|
.iter()
|
2025-11-10 18:47:51 -08:00
|
|
|
|
.filter_map(|cap| {
|
|
|
|
|
|
if let Some(name) = &cap.name {
|
2025-11-11 13:24:06 -08:00
|
|
|
|
Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end))
|
2025-11-10 18:47:51 -08:00
|
|
|
|
} else if !saw_unnamed {
|
|
|
|
|
|
saw_unnamed = true;
|
2025-11-11 13:24:06 -08:00
|
|
|
|
Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end))
|
2025-11-10 18:47:51 -08:00
|
|
|
|
} else {
|
|
|
|
|
|
// Ignore any additional unnamed captures (e.g., from unintended groups)
|
|
|
|
|
|
None
|
|
|
|
|
|
}
|
2025-06-24 17:17:16 -07:00
|
|
|
|
})
|
|
|
|
|
|
.collect()
|
|
|
|
|
|
}
|
2025-07-31 16:49:46 -07:00
|
|
|
|
|
2025-06-24 17:17:16 -07:00
|
|
|
|
pub fn find_closest_variable(
|
|
|
|
|
|
captures: &[(String, String, usize, usize)],
|
2026-05-18 18:12:27 -07:00
|
|
|
|
target_value: &str,
|
2025-06-24 17:17:16 -07:00
|
|
|
|
target_variable_name: &str,
|
|
|
|
|
|
search_variable_name: &str,
|
|
|
|
|
|
) -> Option<String> {
|
2025-10-15 22:47:40 -07:00
|
|
|
|
// Collect the positions of the target variable for the provided value so we can
|
|
|
|
|
|
// compare relative offsets with candidate variables.
|
2025-06-24 17:17:16 -07:00
|
|
|
|
let mut target_positions = Vec::new();
|
|
|
|
|
|
for (name, value, start, end) in captures {
|
2026-05-18 18:12:27 -07:00
|
|
|
|
if name == target_variable_name && value.as_str() == target_value {
|
2025-06-24 17:17:16 -07:00
|
|
|
|
target_positions.push((*start, *end));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-10-15 22:47:40 -07:00
|
|
|
|
|
2025-06-24 17:17:16 -07:00
|
|
|
|
if target_positions.is_empty() {
|
|
|
|
|
|
return None;
|
|
|
|
|
|
}
|
2025-10-15 22:47:40 -07:00
|
|
|
|
|
|
|
|
|
|
// Prefer candidates that appear before the target value (same logical block), but
|
|
|
|
|
|
// fall back to overlapping values and then to those that appear after the target
|
|
|
|
|
|
// value when no better match exists. This avoids pairing with the next block when
|
|
|
|
|
|
// multiple credentials are close together in the same file.
|
|
|
|
|
|
let mut best_before: Option<(usize, String)> = None;
|
|
|
|
|
|
let mut best_overlap: Option<(usize, String)> = None;
|
|
|
|
|
|
let mut best_after: Option<(usize, String)> = None;
|
|
|
|
|
|
|
|
|
|
|
|
for (target_start, target_end) in target_positions.iter().copied() {
|
|
|
|
|
|
for (name, value, start, end) in captures {
|
|
|
|
|
|
if name != search_variable_name {
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if *end <= target_start {
|
|
|
|
|
|
// Candidate is before the target; choose the one closest to the target start.
|
|
|
|
|
|
let distance = target_start - *end;
|
|
|
|
|
|
match &mut best_before {
|
|
|
|
|
|
Some((best_distance, best_value)) if distance < *best_distance => {
|
|
|
|
|
|
*best_distance = distance;
|
|
|
|
|
|
*best_value = value.clone();
|
|
|
|
|
|
}
|
|
|
|
|
|
None => {
|
|
|
|
|
|
best_before = Some((distance, value.clone()));
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => {}
|
|
|
|
|
|
}
|
|
|
|
|
|
} else if *start >= target_end {
|
|
|
|
|
|
// Candidate is after the target; choose the one closest to the target end.
|
|
|
|
|
|
let distance = *start - target_end;
|
|
|
|
|
|
match &mut best_after {
|
|
|
|
|
|
Some((best_distance, best_value)) if distance < *best_distance => {
|
|
|
|
|
|
*best_distance = distance;
|
|
|
|
|
|
*best_value = value.clone();
|
|
|
|
|
|
}
|
|
|
|
|
|
None => {
|
|
|
|
|
|
best_after = Some((distance, value.clone()));
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => {}
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// Candidate overlaps the target – treat as an exact match.
|
|
|
|
|
|
let distance = 0usize;
|
|
|
|
|
|
match &mut best_overlap {
|
|
|
|
|
|
Some((best_distance, best_value)) if distance < *best_distance => {
|
|
|
|
|
|
*best_distance = distance;
|
|
|
|
|
|
*best_value = value.clone();
|
|
|
|
|
|
}
|
|
|
|
|
|
None => {
|
|
|
|
|
|
best_overlap = Some((distance, value.clone()));
|
|
|
|
|
|
}
|
|
|
|
|
|
_ => {}
|
2025-06-24 17:17:16 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-10-15 22:47:40 -07:00
|
|
|
|
|
|
|
|
|
|
best_before.or(best_overlap).or(best_after).map(|(_, value)| value)
|
2025-06-24 17:17:16 -07:00
|
|
|
|
}
|
2025-07-31 16:49:46 -07:00
|
|
|
|
|
2025-08-01 16:56:04 -07:00
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
// tests
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
2025-11-10 18:47:51 -08:00
|
|
|
|
//
|
2025-08-01 16:56:04 -07:00
|
|
|
|
#[cfg(test)]
|
|
|
|
|
|
mod tests {
|
|
|
|
|
|
use super::*;
|
|
|
|
|
|
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
|
|
|
|
|
use pretty_assertions::assert_eq;
|
2026-03-27 17:22:21 -07:00
|
|
|
|
use reqwest::Url;
|
2025-09-05 09:31:52 -07:00
|
|
|
|
use smallvec::smallvec;
|
2025-08-01 16:56:04 -07:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn single_unnamed_capture_is_returned() {
|
|
|
|
|
|
let captures = SerializableCaptures {
|
2025-09-02 19:54:44 -07:00
|
|
|
|
captures: smallvec![SerializableCapture {
|
2025-08-01 16:56:04 -07:00
|
|
|
|
name: None,
|
2025-11-10 18:47:51 -08:00
|
|
|
|
match_number: 0, // This test is for a rule with *no* explicit captures
|
2025-08-01 16:56:04 -07:00
|
|
|
|
start: 1,
|
|
|
|
|
|
end: 4,
|
2025-09-02 19:54:44 -07:00
|
|
|
|
value: "abc",
|
2025-08-01 16:56:04 -07:00
|
|
|
|
}],
|
|
|
|
|
|
};
|
|
|
|
|
|
let result = process_captures(&captures);
|
|
|
|
|
|
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
|
|
|
|
|
|
}
|
|
|
|
|
|
#[test]
|
2025-11-07 16:31:24 -08:00
|
|
|
|
fn includes_whole_match_when_multiple() {
|
2025-08-01 16:56:04 -07:00
|
|
|
|
let captures = SerializableCaptures {
|
2025-09-02 19:54:44 -07:00
|
|
|
|
captures: smallvec![
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// --- FIX ---
|
|
|
|
|
|
// This test simulated a regex like `(abc)de(?P<foo>bcd)`.
|
|
|
|
|
|
// With our fix, group 0 ("abcde") is NOT serialized.
|
|
|
|
|
|
// We only get the explicit captures (group 1 and "foo").
|
2025-08-01 16:56:04 -07:00
|
|
|
|
SerializableCapture {
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// This is group 1 (unnamed)
|
2025-08-01 16:56:04 -07:00
|
|
|
|
name: None,
|
2025-11-10 18:47:51 -08:00
|
|
|
|
match_number: 1, // Corrected match_number
|
|
|
|
|
|
start: 1,
|
|
|
|
|
|
end: 4,
|
|
|
|
|
|
value: "bcd",
|
2025-08-01 16:56:04 -07:00
|
|
|
|
},
|
|
|
|
|
|
SerializableCapture {
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// This is group 2 (named "foo")
|
2025-12-04 22:02:30 -08:00
|
|
|
|
name: Some("foo"),
|
2025-11-10 18:47:51 -08:00
|
|
|
|
match_number: 2, // Corrected match_number
|
2025-08-01 16:56:04 -07:00
|
|
|
|
start: 1,
|
|
|
|
|
|
end: 4,
|
2025-09-02 19:54:44 -07:00
|
|
|
|
value: "bcd",
|
2025-08-01 16:56:04 -07:00
|
|
|
|
},
|
|
|
|
|
|
],
|
|
|
|
|
|
};
|
|
|
|
|
|
let result = process_captures(&captures);
|
2025-11-10 18:47:51 -08:00
|
|
|
|
|
|
|
|
|
|
// --- FIX ---
|
|
|
|
|
|
// The expected result now only contains the explicit captures.
|
|
|
|
|
|
// The first unnamed capture ("bcd") becomes "TOKEN".
|
2025-11-07 16:31:24 -08:00
|
|
|
|
assert_eq!(
|
|
|
|
|
|
result,
|
|
|
|
|
|
vec![
|
2025-11-10 18:47:51 -08:00
|
|
|
|
("TOKEN".to_string(), "bcd".to_string(), 1usize, 4usize),
|
2025-11-07 16:31:24 -08:00
|
|
|
|
("FOO".to_string(), "bcd".to_string(), 1usize, 4usize),
|
|
|
|
|
|
]
|
|
|
|
|
|
);
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// --- END FIX ---
|
2025-08-01 16:56:04 -07:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
2025-11-07 16:31:24 -08:00
|
|
|
|
fn includes_whole_match_and_unnamed_groups() {
|
2025-08-01 16:56:04 -07:00
|
|
|
|
let captures = SerializableCaptures {
|
2025-09-02 19:54:44 -07:00
|
|
|
|
captures: smallvec![
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// --- FIX ---
|
|
|
|
|
|
// This test simulated a regex like `(?P<foo>aa)bb(cc)`.
|
|
|
|
|
|
// With our fix, group 0 ("aabbcc") is NOT serialized.
|
|
|
|
|
|
// We only get the explicit captures ("foo" and group 2).
|
2025-08-01 16:56:04 -07:00
|
|
|
|
SerializableCapture {
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// This is group 1 (named "foo")
|
2025-12-04 22:02:30 -08:00
|
|
|
|
name: Some("foo"),
|
2025-11-10 18:47:51 -08:00
|
|
|
|
match_number: 1, // Corrected match_number
|
2025-08-01 16:56:04 -07:00
|
|
|
|
start: 0,
|
|
|
|
|
|
end: 2,
|
2025-09-02 19:54:44 -07:00
|
|
|
|
value: "aa",
|
2025-08-01 16:56:04 -07:00
|
|
|
|
},
|
2025-11-10 18:47:51 -08:00
|
|
|
|
SerializableCapture {
|
|
|
|
|
|
// This is group 2 (unnamed)
|
|
|
|
|
|
name: None,
|
|
|
|
|
|
match_number: 2, // Corrected match_number
|
|
|
|
|
|
start: 4,
|
|
|
|
|
|
end: 6,
|
2025-11-11 13:24:06 -08:00
|
|
|
|
value: "cc",
|
2025-11-10 18:47:51 -08:00
|
|
|
|
},
|
2025-08-01 16:56:04 -07:00
|
|
|
|
],
|
|
|
|
|
|
};
|
|
|
|
|
|
let result = process_captures(&captures);
|
2025-11-10 18:47:51 -08:00
|
|
|
|
|
|
|
|
|
|
// --- FIX ---
|
|
|
|
|
|
// The expected result no longer contains the full match ("aabbcc").
|
|
|
|
|
|
// The first (and only) unnamed capture ("cc") is now correctly labeled "TOKEN".
|
2025-08-01 16:56:04 -07:00
|
|
|
|
assert_eq!(
|
|
|
|
|
|
result,
|
|
|
|
|
|
vec![
|
2025-11-10 18:47:51 -08:00
|
|
|
|
("FOO".to_string(), "aa".to_string(), 0usize, 2usize), // From named group 1
|
|
|
|
|
|
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize), // From unnamed group 2
|
2025-08-01 16:56:04 -07:00
|
|
|
|
]
|
|
|
|
|
|
);
|
2025-11-10 18:47:51 -08:00
|
|
|
|
// --- END FIX ---
|
2025-08-01 16:56:04 -07:00
|
|
|
|
}
|
2025-10-15 22:47:40 -07:00
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn prefers_closest_preceding_variable() {
|
|
|
|
|
|
let captures = vec![
|
|
|
|
|
|
("TOKEN".to_string(), "secret".to_string(), 75usize, 115usize),
|
|
|
|
|
|
("AKID".to_string(), "preceding".to_string(), 30usize, 50usize),
|
|
|
|
|
|
("AKID".to_string(), "following".to_string(), 180usize, 200usize),
|
|
|
|
|
|
];
|
|
|
|
|
|
|
2026-05-18 18:12:27 -07:00
|
|
|
|
let result = find_closest_variable(&captures, "secret", "TOKEN", "AKID").unwrap();
|
2025-10-15 22:47:40 -07:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(result, "preceding".to_string());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn falls_back_to_following_when_no_preceding() {
|
|
|
|
|
|
let captures = vec![
|
|
|
|
|
|
("TOKEN".to_string(), "secret".to_string(), 10usize, 50usize),
|
|
|
|
|
|
("AKID".to_string(), "after".to_string(), 60usize, 80usize),
|
|
|
|
|
|
];
|
|
|
|
|
|
|
2026-05-18 18:12:27 -07:00
|
|
|
|
let result = find_closest_variable(&captures, "secret", "TOKEN", "AKID").unwrap();
|
2025-10-15 22:47:40 -07:00
|
|
|
|
|
|
|
|
|
|
assert_eq!(result, "after".to_string());
|
|
|
|
|
|
}
|
2026-03-27 15:04:14 -07:00
|
|
|
|
|
|
|
|
|
|
// ---- SSRF IP validation tests ----
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn ssrf_rejects_loopback() {
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"127.0.0.1".parse().unwrap()));
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"::1".parse().unwrap()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn ssrf_rejects_unspecified() {
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"0.0.0.0".parse().unwrap()));
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"::".parse().unwrap()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn ssrf_rejects_private_ranges() {
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"10.0.0.1".parse().unwrap()));
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"172.16.0.1".parse().unwrap()));
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"192.168.1.1".parse().unwrap()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn ssrf_rejects_link_local_and_metadata() {
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"169.254.169.254".parse().unwrap()));
|
|
|
|
|
|
assert!(!is_ssrf_safe_ip(&"169.254.1.1".parse().unwrap()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn ssrf_accepts_public_ips() {
|
|
|
|
|
|
assert!(is_ssrf_safe_ip(&"8.8.8.8".parse().unwrap()));
|
|
|
|
|
|
assert!(is_ssrf_safe_ip(&"1.1.1.1".parse().unwrap()));
|
|
|
|
|
|
assert!(is_ssrf_safe_ip(&"2606:4700::1111".parse().unwrap()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-04-15 17:13:10 -07:00
|
|
|
|
#[test]
|
|
|
|
|
|
fn format_response_body_for_display_strips_html() {
|
|
|
|
|
|
let html = r#"<!doctype html>
|
|
|
|
|
|
<html>
|
|
|
|
|
|
<head>
|
|
|
|
|
|
<script>console.log("ignore");</script>
|
|
|
|
|
|
</head>
|
|
|
|
|
|
<body><h1>Hello & goodbye</h1><p>World</p></body>
|
|
|
|
|
|
</html>"#;
|
|
|
|
|
|
|
|
|
|
|
|
let rendered = format_response_body_for_display(html, 0, true);
|
|
|
|
|
|
|
|
|
|
|
|
assert_eq!(rendered, "Hello & goodbye World");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
fn format_response_body_for_display_truncates_on_utf8_boundary() {
|
|
|
|
|
|
let body = "é".repeat(10);
|
|
|
|
|
|
let rendered = format_response_body_for_display(&body, 7, false);
|
|
|
|
|
|
assert_eq!(rendered, "ééé");
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-27 15:04:14 -07:00
|
|
|
|
#[tokio::test]
|
|
|
|
|
|
async fn check_url_resolvable_blocks_localhost() {
|
|
|
|
|
|
let url = Url::parse("https://localhost/path").unwrap();
|
|
|
|
|
|
let result = check_url_resolvable(&url, false).await;
|
|
|
|
|
|
assert!(result.is_err());
|
|
|
|
|
|
assert!(result.unwrap_err().to_string().contains("SSRF protection"));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
|
async fn check_url_resolvable_allows_localhost_when_opted_in() {
|
|
|
|
|
|
let url = Url::parse("https://localhost/path").unwrap();
|
|
|
|
|
|
let result = check_url_resolvable(&url, true).await;
|
|
|
|
|
|
assert!(result.is_ok());
|
|
|
|
|
|
}
|
2025-08-01 16:56:04 -07:00
|
|
|
|
}
|