From bb038df5fffba94f234cf3a8ff4ed0a582d7e9a8 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Fri, 2 Jan 2026 16:06:03 -0800
Subject: [PATCH 1/4] Update demo link in README.md

Signed-off-by: Mick Grove <mick.grove@mongodb.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1b6029c..124c411 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ kingfisher scan /path/to/scan --access-map --view-report
 ![alt text](docs/kingfisher-usage-access-map-01.gif)
 
 **Click to view video**
-[![Demo](docs/demos/findings-thumbnail.png)](https://private-user-images.githubusercontent.com/131722602/531560056-d33ee7a6-c60a-4e42-88e0-ac03cb429a46.mp4?jwt=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3NjczODczNzQsIm5iZiI6MTc2NzM4NzA3NCwicGF0aCI6Ii8xMzE3MjI2MDIvNTMxNTYwMDU2LWQzM2VlN2E2LWM2MGEtNGU0Mi04OGUwLWFjMDNjYjQyOWE0Ni5tcDQ_WC1BbXotQWxnb3JpdGhtPUFXUzQtSE1BQy1TSEEyNTYmWC1BbXotQ3JlZGVudGlhbD1BS0lBVkNPRFlMU0E1M1BRSzRaQSUyRjIwMjYwMTAyJTJGdXMtZWFzdC0xJTJGczMlMkZhd3M0X3JlcXVlc3QmWC1BbXotRGF0ZT0yMDI2MDEwMlQyMDUxMTRaJlgtQW16LUV4cGlyZXM9MzAwJlgtQW16LVNpZ25hdHVyZT03MThiMGEwMmJhNWY0OTVlYzRkYThhMWMyYzZkYmNlNjZjMTlhMmQ2ZDhjNjFhMzczYTAyYmRkOTZlYjAzMWQzJlgtQW16LVNpZ25lZEhlYWRlcnM9aG9zdCJ9.OIY-coUIIng2Os8mHrcR_W15AlCVDAk2NPZWJIHSWrQ)
+[![Demo](docs/demos/findings-thumbnail.png)](https://github.com/user-attachments/assets/d33ee7a6-c60a-4e42-88e0-ac03cb429a46)
 
 
 # Table of Contents

From 98d009deaebac4b12ee6dc7c070c09c0b9d7561d Mon Sep 17 00:00:00 2001
From: Akshay Jain <akjain_navan@users.noreply.github.com>
Date: Wed, 7 Jan 2026 13:38:53 +0530
Subject: [PATCH 2/4] Add ScraperAPI key detection rule
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a new rule to detect ScraperAPI keys with:
- Pattern matching for 32-character alphanumeric keys
- Live validation against ScraperAPI endpoint
- Medium confidence with entropy check (min 3.5)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 data/rules/scraperapi.yml | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 data/rules/scraperapi.yml

diff --git a/data/rules/scraperapi.yml b/data/rules/scraperapi.yml
new file mode 100644
index 0000000..2bca1ac
--- /dev/null
+++ b/data/rules/scraperapi.yml
@@ -0,0 +1,35 @@
+rules:
+  - name: ScraperAPI Key
+    id: kingfisher.scraperapi.1
+    pattern: |
+      (?xi)
+      \b(?:scraper(?:\s|[_-])?api|scraperapi)
+      (?:.|[\n\r]){0,32}?
+      (?:key|token|api[_-]?key)
+      (?:.|[\n\r]){0,16}?
+      ([a-z0-9]{32})
+      \b
+    pattern_requirements:
+      min_digits: 2
+      min_lowercase: 10
+    min_entropy: 3.5
+    confidence: medium
+    examples:
+      - 'SCRAPERAPI_KEY=a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6'
+      - 'scraper_api_token: "9f8e7d6c5b4a3029182736455463728a"'
+    references:
+      - https://www.scraperapi.com/documentation/
+    validation:
+      type: Http
+      content:
+        request:
+          method: GET
+          url: "https://api.scraperapi.com?api_key={{ TOKEN }}&url=http://httpbin.org/ip"
+          response_matcher:
+            - report_response: true
+            - type: StatusMatch
+              status: [200]
+            - type: JsonValid
+            - type: WordMatch
+              words:
+                - '"origin"'

From 6ed438fe68f9a16c254fcf085e2dc1d6c984870b Mon Sep 17 00:00:00 2001
From: Himanshu Kumar Das <1238723+himanshudas@users.noreply.github.com>
Date: Tue, 13 Jan 2026 03:40:06 +0530
Subject: [PATCH 3/4] Fix UTF-8 boundary panic in HTTP response body slicing

The body_looks_like_html() function panicked when byte index 1024 fell inside a multi-byte UTF-8 character (e.g., Chinese text from Gitee). Use is_char_boundary() to find a valid slice point instead of arbitrary byte index.

Signed-off-by: Himanshu Kumar Das <1238723+himanshudas@users.noreply.github.com>
---
 src/validation/httpvalidation.rs | 34 +++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs
index 6b31bf8..ace6398 100644
--- a/src/validation/httpvalidation.rs
+++ b/src/validation/httpvalidation.rs
@@ -298,7 +298,13 @@ fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool {
         .unwrap_or(false);
 
     // ---- 2. early-body scan (<=1024 bytes) --------------------------------
-    let probe = &body[..body.len().min(1024)];
+    // Find the last character boundary at or before 1024 bytes to avoid UTF-8 boundary issues
+    // Walk backward at most 3 bytes (UTF-8 max char size is 4 bytes) to find valid boundary
+    let mut end = 1024.min(body.len());
+    while end > 0 && !body.is_char_boundary(end) {
+        end -= 1;
+    }
+    let probe = &body[..end];
     // Trim any leading whitespace so we still catch HTML that starts after newlines/indentation.
     let trimmed = probe.trim_start_matches(|c: char| c.is_whitespace());
     let probe = trimmed.to_ascii_lowercase();
@@ -570,4 +576,30 @@ mod tests {
 
         assert!(!ok, "HTML responses should be rejected unless explicitly allowed");
     }
+
+    #[test]
+    fn test_body_looks_like_html_utf8_boundary() {
+        // Test case for UTF-8 boundary issue: multi-byte character at 1024-byte boundary
+        // This reproduces the bug where slicing at byte 1024 would panic if it's in the middle
+        // of a multi-byte character (e.g., Chinese character '业' spans bytes 1023..1026)
+        let mut headers = HeaderMap::new();
+        headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/html; charset=utf-8"));
+
+        // HTML at the start, with padding to push a multi-byte char to byte 1024
+        // This mirrors the real crash: HTML response from Gitee with Chinese chars
+        let html_start = "<!DOCTYPE html><html lang=\"zh-CN\"><head><title>";
+        let padding_len = 1023 - html_start.len();
+        let body = format!(
+            "{}{}业</title></head><body>Gitee</body></html>",
+            html_start,
+            "x".repeat(padding_len)
+        );
+
+        // Verify our test setup: multi-byte char should be at byte 1023
+        assert_eq!(body.as_bytes()[1023], 0xE4, "Expected first byte of '业' at position 1023");
+
+        // This should not panic AND should correctly identify HTML
+        let result = body_looks_like_html(&body, &headers);
+        assert!(result, "Should correctly identify HTML even with multi-byte characters at boundary");
+    }
 }

From 69d447dcc95a9c292c9c1c5cfa1dabfe618150ec Mon Sep 17 00:00:00 2001
From: Akshay Jain <akjain_navan@users.noreply.github.com>
Date: Tue, 13 Jan 2026 13:00:55 +0530
Subject: [PATCH 4/4] Add detection for short sk-None- prefixed OpenAI API keys

OpenAI issues keys with sk-None- prefix in both short (56 char) and long
(130+ char) formats. The existing openai.2 rule only matches long keys
with {100,} minimum length. This adds openai.3 to detect the short variant:
sk-None- followed by exactly 48 alphanumeric characters.

Fixes detection gap where trufflehog found valid keys that kingfisher missed.
---
 data/rules/openai.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/data/rules/openai.yml b/data/rules/openai.yml
index 3e8d7d8..d77801e 100644
--- a/data/rules/openai.yml
+++ b/data/rules/openai.yml
@@ -60,3 +60,32 @@ rules:
                 - 200
               type: StatusMatch
           url: https://api.openai.com/v1/models
+
+  - name: OpenAI API Key (Short Prefixed)
+    id: kingfisher.openai.3
+    pattern: |
+      (?xi)
+      (
+        sk-None-[A-Z0-9]{48}
+      )
+    pattern_requirements:
+      min_digits: 2
+    min_entropy: 3.3
+    confidence: medium
+    examples:
+      - sk-None-abcdefghij1234567890ABCDEFGHIJ1234567890abcdefgh
+    references:
+      - https://help.openai.com/en/articles/9132009-how-can-i-view-the-users-or-organizations-associated-with-an-api-key
+    validation:
+      type: Http
+      content:
+        request:
+          headers:
+            Authorization: 'Bearer {{ TOKEN }}'
+          method: GET
+          response_matcher:
+            - report_response: true
+            - status:
+                - 200
+              type: StatusMatch
+          url: https://api.openai.com/v1/me