kingfisher/src/validation/httpvalidation.rs

522 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use std::{collections::BTreeMap, future::Future, str::FromStr, time::Duration};
use anyhow::{anyhow, Error, Result};
use http::StatusCode;
use liquid::Object;
use quick_xml::de::from_str as xml_from_str;
use reqwest::{
header,
header::{HeaderMap, HeaderName, HeaderValue},
Client, Method, RequestBuilder, Response, Url,
};
use serde::de::IgnoredAny;
use sha1::{Digest, Sha1};
use tokio::time::sleep;
use tracing::debug;
use crate::rules::rule::ResponseMatcher;
/// Build a deterministic cache key from the immutable parts of an HTTP request.
///
/// * `method` case-insensitive HTTP verb (“GET”, “POST”…)
/// * `url` fully-qualified URL (any query string should already be present)
/// * `headers` *logical* headers you intend to send (template-rendered, lower-level additions
/// such as `User-Agent` may be appended by the caller)
///
/// The parts are concatenated with `\0` separators before hashing to avoid accidental
/// collisions such as `"GET/foo"` vs `"GE" + "T/foo"`.
pub fn generate_http_cache_key_parts(
method: &str,
url: &Url,
headers: &BTreeMap<String, String>,
) -> String {
let method = method.to_uppercase(); // ensure "get" == "GET"
let url = url.as_str(); // canonical form from `reqwest::Url`
let mut hasher = Sha1::new();
hasher.update(method.as_bytes());
hasher.update(b"\0");
hasher.update(url.as_bytes());
hasher.update(b"\0");
// Collect headers sorted lexicographically (BTreeMap is already sorted),
// then hash as `key:value\0`
for (k, v) in headers {
hasher.update(k.as_bytes());
hasher.update(b":");
hasher.update(v.as_bytes());
hasher.update(b"\0");
}
// Hex-encode and prefix so callers can tell this key came from HTTP logic
format!("HTTP:{:x}", hasher.finalize())
}
/// Parse an HTTP method from a string.
pub fn parse_http_method(method_str: &str) -> Result<Method, String> {
Method::from_str(method_str).map_err(|_| format!("Invalid HTTP method: {}", method_str))
}
/// Build a reqwest RequestBuilder using the provided parameters.
pub fn build_request_builder(
client: &Client,
method_str: &str,
url: &Url,
headers: &BTreeMap<String, String>,
body: &Option<String>,
parser: &liquid::Parser,
globals: &liquid::Object,
) -> Result<RequestBuilder, String> {
let method = parse_http_method(method_str).map_err(|err_msg| {
debug!("{}", err_msg);
err_msg
})?;
let mut request_builder = client.request(method, url.clone()).timeout(Duration::from_secs(10));
let custom_headers = process_headers(headers, parser, globals, url)
.map_err(|e| format!("Error processing headers: {}", e))?;
// Prepare a standard set of headers.
let user_agent = format!(
"{}/{}",
//"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION")
);
let standard_headers = [
(header::USER_AGENT, user_agent.as_str()),
(
header::ACCEPT,
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
),
(header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"),
(header::ACCEPT_ENCODING, "gzip, deflate, br"),
(header::CONNECTION, "keep-alive"),
];
// Extend custom headers with the standard ones (overwriting any duplicates).
let mut combined_headers = custom_headers;
for (name, value) in &standard_headers {
if let Ok(hv) = HeaderValue::from_str(value) {
combined_headers.insert(name.clone(), hv);
}
}
request_builder = request_builder.headers(combined_headers);
// If a body template is provided, parse and render it
if let Some(body_template) = body {
let template = parser
.parse(body_template)
.map_err(|e| format!("Error parsing body template: {}", e))?;
let rendered_body = template
.render(globals)
.map_err(|e| format!("Error rendering body template: {}", e))?;
request_builder = request_builder.body(rendered_body);
}
Ok(request_builder)
}
/// Process headers from a BTreeMap, rendering any Liquid templates.
pub fn process_headers(
headers: &BTreeMap<String, String>,
parser: &liquid::Parser,
globals: &Object,
url: &Url,
) -> Result<HeaderMap> {
let mut headers_map = HeaderMap::new();
for (key, value) in headers {
// Render the template
let template = match parser.parse(value) {
Ok(t) => t,
Err(e) => {
debug!("Error parsing Liquid template for '{}': {}", key, e);
continue;
}
};
let header_value = match template.render(globals) {
Ok(s) => s,
Err(e) => {
debug!(
"Failed to render header template. URL = <{}> | Key '{}': {}",
url.as_str(),
key,
e
);
continue;
}
};
// Clean key and value
let cleaned_key = key.trim().replace(&['\n', '\r'][..], "");
let cleaned_value = header_value.trim().replace(&['\n', '\r'][..], "");
// Validate header name
let name = match HeaderName::from_str(&cleaned_key) {
Ok(n) => n,
Err(e) => {
debug!(
"Invalid header name. URL = <{}> | Key '{}': {}",
url.as_str(),
cleaned_key,
e
);
continue;
}
};
// Validate header value
let value = match HeaderValue::from_str(&cleaned_value) {
Ok(v) => v,
Err(e) => {
debug!(
"Invalid header value. URL = <{}> | Value '{}': {}",
url.as_str(),
cleaned_value,
e
);
continue;
}
};
headers_map.insert(name, value);
}
Ok(headers_map)
}
/// Exponentialbackoff retry helper that always returns `Result<T, anyhow::Error>`
async fn retry_with_backoff<F, Fut, T>(
mut operation: F,
is_retryable: impl Fn(&Result<T, Error>, usize) -> bool,
max_retries: usize,
backoff_min: Duration,
backoff_max: Duration,
) -> Result<T, Error>
where
F: FnMut() -> Fut,
Fut: Future<Output = Result<T, Error>>,
{
let mut retries = 0;
while retries <= max_retries {
let result = operation().await;
// If this result is *not* retryable, return it directly (Ok or Err).
if !is_retryable(&result, retries) {
return result;
}
retries += 1;
let backoff = backoff_min.saturating_mul(2u32.pow(retries as u32)).min(backoff_max);
sleep(backoff).await;
}
Err(anyhow!("Max retries reached"))
}
pub async fn retry_multipart_request<F, Fut>(
mut build_request: F,
max_retries: usize,
backoff_min: Duration,
backoff_max: Duration,
) -> Result<Response, Error>
where
F: FnMut() -> Fut,
Fut: Future<Output = RequestBuilder>,
{
retry_with_backoff(
// 1) operation: build + send
move || {
let fut = build_request();
async move {
let rb = fut.await;
rb.send().await.map_err(Error::from)
}
},
// 2) same retry logic
|res: &Result<_, Error>, _attempt| match res {
Ok(resp)
if matches!(
resp.status(),
StatusCode::BAD_GATEWAY
| StatusCode::SERVICE_UNAVAILABLE
| StatusCode::GATEWAY_TIMEOUT
) =>
{
true
}
Err(_) => true,
_ => false,
},
max_retries,
backoff_min,
backoff_max,
)
.await
}
pub async fn retry_request(
request_builder: RequestBuilder,
max_retries: u32,
backoff_min: Duration,
backoff_max: Duration,
) -> Result<Response, Error> {
retry_with_backoff(
// 1) operation: clone + send, yielding Result<Response, Error>
move || {
let rb =
request_builder.try_clone().expect("retry_request: failed to clone RequestBuilder");
async move { rb.send().await.map_err(Error::from) }
},
// 2) is_retryable: transient HTTP status or network error
|res: &Result<_, Error>, _attempt| match res {
Ok(resp)
if matches!(
resp.status(),
StatusCode::BAD_GATEWAY
| StatusCode::SERVICE_UNAVAILABLE
| StatusCode::GATEWAY_TIMEOUT
) =>
{
true
}
Err(_) => true,
_ => false,
},
max_retries as usize,
backoff_min,
backoff_max,
)
.await
}
/// Return `true` when the body is very likely HTML.
fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool {
// ---- 1. header heuristic ---------------------------------------------
let header_says_html = headers
.get("content-type")
.and_then(|v| v.to_str().ok())
.map(|ct| {
let ct = ct.to_ascii_lowercase();
ct.contains("text/html") || ct.contains("application/xhtml")
})
.unwrap_or(false);
// ---- 2. early-body scan (<=1024 bytes) --------------------------------
let probe = body[..body.len().min(1024)].to_ascii_lowercase();
let body_looks_htmlish = probe.starts_with('<') && probe.contains("<html");
// ⇒ Only HTML if **both** header and body agree
header_says_html && body_looks_htmlish
}
/// Validate the response by checking word and status matchers.
pub fn validate_response(
matchers: &[ResponseMatcher],
body: &str,
status: &StatusCode,
headers: &HeaderMap,
html_allowed: bool,
) -> bool {
// Since match_all_types is always true here, we simply require all word and status conditions
// to hold.
let word_ok = matchers
.iter()
.filter_map(|m| {
if let ResponseMatcher::WordMatch { words, match_all_words, negative, .. } = m {
let raw = if *match_all_words {
words.iter().all(|w| body.contains(w))
} else {
words.iter().any(|w| body.contains(w))
};
Some(if *negative { !raw } else { raw })
} else {
None
}
})
.all(|b| b);
let status_ok = matchers
.iter()
.filter_map(|m| {
if let ResponseMatcher::StatusMatch {
status: expected,
match_all_status,
negative,
..
} = m
{
let raw = if *match_all_status {
expected.iter().all(|s| s.to_string() == status.as_str())
} else {
expected.iter().any(|s| s.to_string() == status.as_str())
};
Some(if *negative { !raw } else { raw })
} else {
None
}
})
.all(|b| b);
// ── Header checks ──────────────────────────────────────────
let header_ok = matchers
.iter()
.filter_map(|m| {
if let ResponseMatcher::HeaderMatch { header, expected, match_all_values, .. } = m {
// header names are case-insensitive
let val = headers
.get(header)
.and_then(|v| v.to_str().ok())
.unwrap_or_default()
.to_ascii_lowercase();
Some(if *match_all_values {
expected.iter().all(|e| val.contains(&e.to_ascii_lowercase()))
} else {
expected.iter().any(|e| val.contains(&e.to_ascii_lowercase()))
})
} else {
None
}
})
.all(|b| b);
// ----- JsonValid ----------------------------------------------------------
let json_ok = matchers
.iter()
.filter_map(|m| {
if matches!(m, ResponseMatcher::JsonValid { .. }) {
Some(serde_json::from_str::<serde_json::Value>(body).is_ok())
} else {
None
}
})
.all(|b| b);
let xml_ok = matchers
.iter()
.filter_map(|m| {
if matches!(m, ResponseMatcher::XmlValid { .. }) {
// succeeds if `body` is well-formed XML
Some(xml_from_str::<IgnoredAny>(body).is_ok())
} else {
None
}
})
.all(|b| b);
let html_detected = body_looks_like_html(body, headers);
let html_ok = html_allowed || !html_detected;
// // ── debug line ─-
// debug!(
// "validate_response -- word:{}, status:{}, header:{}, json:{}, xml:{} ⇒ {}",
// word_ok, status_ok, header_ok, json_ok, xml_ok, all_ok
// );
// // ──────────────────────────────────────────────────────────────
let all_ok = word_ok && status_ok && header_ok && json_ok && xml_ok && html_ok;
all_ok
}
#[cfg(test)]
mod tests {
use std::sync::Once;
use wiremock::{
matchers::{method, path},
Mock, MockServer, ResponseTemplate,
};
use super::*;
static INIT: Once = Once::new();
fn init() {
INIT.call_once(|| {
let _ = tracing_subscriber::fmt::try_init();
});
}
#[test]
fn test_build_request_builder() {
init();
let client = Client::builder()
.gzip(true) // enable gzip
.deflate(true) // enable deflate
.brotli(true) // enable brotli
.build()
.expect("building reqwest client");
let parser = liquid::ParserBuilder::with_stdlib().build().unwrap();
let globals = liquid::Object::new();
let headers =
BTreeMap::from([("Content-Type".to_string(), "application/json".to_string())]);
let url = Url::from_str("https://example.com").unwrap();
let result =
build_request_builder(&client, "GET", &url, &headers, &None, &parser, &globals);
assert!(result.is_ok());
}
#[tokio::test]
async fn test_retry_request() {
init();
let mock_server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/test"))
.respond_with(ResponseTemplate::new(200))
.mount(&mock_server)
.await;
let client = Client::builder()
.gzip(true) // enable gzip
.deflate(true) // enable deflate
.brotli(true) // enable brotli
.build()
.expect("building reqwest client");
let request_builder = client.get(&format!("{}/test", mock_server.uri()));
let response = retry_request(
request_builder,
3,
Duration::from_millis(50),
Duration::from_millis(200),
)
.await;
assert!(response.is_ok());
}
#[test]
fn test_validate_response() {
// --- arrange ----------------------------------------------------------
let matchers = vec![ResponseMatcher::WordMatch {
r#type: "word-match".to_string(),
words: vec!["test".to_string()],
match_all_words: true,
negative: false,
}];
let status = StatusCode::OK;
let body = "This is a test";
let headers = HeaderMap::new(); // empty header map
let html_allowed = false;
// --- act --------------------------------------------------------------
let result = validate_response(&matchers, body, &status, &headers, html_allowed);
// --- assert -----------------------------------------------------------
assert!(result);
}
#[test]
fn test_validate_response_slack_webhook() {
// Build matchers equivalent to rule kingfisher.slack.4
let matchers = vec![
ResponseMatcher::WordMatch {
r#type: "word-match".to_string(),
words: vec!["invalid_payload".to_string()],
match_all_words: false, // rule omits this → default is false
negative: false,
},
ResponseMatcher::WordMatch {
r#type: "word-match".to_string(),
words: vec!["invalid_token".to_string()],
match_all_words: false,
negative: true, // body must *not* contain “invalid_token”
},
];
// Simulate the real Slack response you posted
let body = "invalid_payload";
let status = StatusCode::BAD_REQUEST; // 400
let mut headers = HeaderMap::new();
headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain"));
// Call validate_response with html_allowed = false
let ok = validate_response(&matchers, body, &status, &headers, false);
// 4It *should* be valid (true) because all matcher conditions hold
assert!(ok, "Slack webhook response should be considered ACTIVE");
}
}