diff --git a/README.md b/README.md index 9164b9b..a156dcd 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ kingfisher scan /path/to/scan --access-map --view-report  **Click to view video** -[](https://private-user-images.githubusercontent.com/131722602/531560056-d33ee7a6-c60a-4e42-88e0-ac03cb429a46.mp4?jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NjczODczNzQsIm5iZiI6MTc2NzM4NzA3NCwicGF0aCI6Ii8xMzE3MjI2MDIvNTMxNTYwMDU2LWQzM2VlN2E2LWM2MGEtNGU0Mi04OGUwLWFjMDNjYjQyOWE0Ni5tcDQ_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjYwMTAyJTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI2MDEwMlQyMDUxMTRaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT03MThiMGEwMmJhNWY0OTVlYzRkYThhMWMyYzZkYmNlNjZjMTlhMmQ2ZDhjNjFhMzczYTAyYmRkOTZlYjAzMWQzJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.OIY-coUIIng2Os8mHrcR_W15AlCVDAk2NPZWJIHSWrQ) +[](https://github.com/user-attachments/assets/d33ee7a6-c60a-4e42-88e0-ac03cb429a46) # Table of Contents diff --git a/data/rules/openai.yml b/data/rules/openai.yml index 3e8d7d8..d77801e 100644 --- a/data/rules/openai.yml +++ b/data/rules/openai.yml @@ -60,3 +60,32 @@ rules: - 200 type: StatusMatch url: https://api.openai.com/v1/models + + - name: OpenAI API Key (Short Prefixed) + id: kingfisher.openai.3 + pattern: | + (?xi) + ( + sk-None-[A-Z0-9]{48} + ) + pattern_requirements: + min_digits: 2 + min_entropy: 3.3 + confidence: medium + examples: + - sk-None-abcdefghij1234567890ABCDEFGHIJ1234567890abcdefgh + references: + - https://help.openai.com/en/articles/9132009-how-can-i-view-the-users-or-organizations-associated-with-an-api-key + validation: + type: Http + content: + request: + headers: + Authorization: 'Bearer {{ TOKEN }}' + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: https://api.openai.com/v1/me diff --git a/data/rules/scraperapi.yml b/data/rules/scraperapi.yml new file mode 100644 index 0000000..2bca1ac --- /dev/null +++ b/data/rules/scraperapi.yml @@ -0,0 +1,35 @@ +rules: + - name: ScraperAPI Key + id: kingfisher.scraperapi.1 + pattern: | + (?xi) + \b(?:scraper(?:\s|[_-])?api|scraperapi) + (?:.|[\n\r]){0,32}? + (?:key|token|api[_-]?key) + (?:.|[\n\r]){0,16}? + ([a-z0-9]{32}) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 10 + min_entropy: 3.5 + confidence: medium + examples: + - 'SCRAPERAPI_KEY=a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6' + - 'scraper_api_token: "9f8e7d6c5b4a3029182736455463728a"' + references: + - https://www.scraperapi.com/documentation/ + validation: + type: Http + content: + request: + method: GET + url: "https://api.scraperapi.com?api_key={{ TOKEN }}&url=http://httpbin.org/ip" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + words: + - '"origin"' diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index 6b31bf8..ace6398 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -298,7 +298,13 @@ fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool { .unwrap_or(false); // ---- 2. early-body scan (<=1024 bytes) -------------------------------- - let probe = &body[..body.len().min(1024)]; + // Find the last character boundary at or before 1024 bytes to avoid UTF-8 boundary issues + // Walk backward at most 3 bytes (UTF-8 max char size is 4 bytes) to find valid boundary + let mut end = 1024.min(body.len()); + while end > 0 && !body.is_char_boundary(end) { + end -= 1; + } + let probe = &body[..end]; // Trim any leading whitespace so we still catch HTML that starts after newlines/indentation. let trimmed = probe.trim_start_matches(|c: char| c.is_whitespace()); let probe = trimmed.to_ascii_lowercase(); @@ -570,4 +576,30 @@ mod tests { assert!(!ok, "HTML responses should be rejected unless explicitly allowed"); } + + #[test] + fn test_body_looks_like_html_utf8_boundary() { + // Test case for UTF-8 boundary issue: multi-byte character at 1024-byte boundary + // This reproduces the bug where slicing at byte 1024 would panic if it's in the middle + // of a multi-byte character (e.g., Chinese character '业' spans bytes 1023..1026) + let mut headers = HeaderMap::new(); + headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/html; charset=utf-8")); + + // HTML at the start, with padding to push a multi-byte char to byte 1024 + // This mirrors the real crash: HTML response from Gitee with Chinese chars + let html_start = "