updates to new rules

This commit is contained in:
Mick Grove 2026-04-15 17:13:10 -07:00
commit 93a9cb796e
18 changed files with 472 additions and 78 deletions

View file

@ -61,7 +61,7 @@ Kingfisher is a high-performance, open source secret detection tool for source c
- **Python Bytecode (.pyc) Scanning**: Extracts and scans string constants from compiled Python (`.pyc`, `.pyo`) files
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
- **Checksum-aware detection**: verifies tokens with built-in checksums (e.g., GitHub, Confluent, Zuplo) — no API calls required
- **Built-in Report Viewer**: Visualize and triage findings locally with `kingfisher view ./report-file.json`
- **Built-in Report Viewer**: Visualize and triage findings locally with `kingfisher view ./report-file.json` (supports multiple files and directories)
- **Audit reporting**: Generate compliance-oriented HTML reports with scan metadata and validation ordering
- **Library crates**: Embed Kingfisher's scanning engine in your own Rust applications ([docs/LIBRARY.md](docs/LIBRARY.md))
@ -432,6 +432,12 @@ kingfisher scan /path/to/code --access-map --view-report
# View access-map reports locally
kingfisher view kingfisher.json
# Combine multiple reports (deduplicated by fingerprint)
kingfisher view report1.json report2.jsonl
# Load all reports from a directory (non-recursive, skips non-JSON/JSONL files)
kingfisher view ./reports/
```
> **Use the access map functionality only when you are authorized to inspect the target account, as Kingfisher will issue additional network requests to determine what access the secret grants**

View file

@ -4,6 +4,11 @@ rules:
pattern: |
(?xi)
\b
close
(?:.|[\n\r]){0,32}?
(?:API[_-]?KEY|SECRET|TOKEN|KEY)
(?:.|[\n\r]){0,16}?
\b
(
api_[A-Za-z0-9]{18,26}\.[A-Za-z0-9]{18,26}
)

View file

@ -12,7 +12,7 @@ rules:
confidence: medium
examples:
- 'INNGEST_SIGNING_KEY=signkey-prod-b2ed992186a5cb19f6668aade821f502c1d00970dfd0e35128d51bac4649916c'
- 'INNGEST_SIGNING_KEY="signkey-staging-12345678abcdef0123456789abcdef0123456789abcdef0123456789abcdef"'
- 'INNGEST_SIGNING_KEY="signkey-staging-a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2"'
references:
- https://www.inngest.com/docs/platform/signing-keys
validation:
@ -55,7 +55,7 @@ rules:
min_entropy: 3.0
confidence: medium
examples:
- 'INNGEST_EVENT_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-AbCdEfGhIj'
- 'INNGEST EVENT_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-AbCdEfGhIj'
- 'inngest event key: "ZyXwVuTsRqPoNmLkJiHgFeDcBa9876543210_-ZyXwVuTsRqPoNmLkJiHgFeDcBa9876543210_-ZyXwVuTsRq"'
references:
- https://www.inngest.com/docs/events/creating-an-event-key

View file

@ -70,23 +70,21 @@ rules:
method: POST
url: >
{%- assign base_url = LIVEKIT_URL | replace: "wss://", "https://" | replace: "ws://", "http://" -%}
{{ base_url }}/twirp/livekit.RoomService/ListRooms
{{ base_url }}/twirp/livekit.RoomService/ListParticipants
headers:
Content-Type: application/json
Accept: application/json
Authorization: '{%- assign header = "HS256" | jwt_header -%}{%- assign now = "" | unix_timestamp -%}{%- assign exp = now | plus: 300 -%}{%- assign nbf = now | minus: 5 -%}{%- assign payload_json = ''{"iss":"'' | append: API_KEY | append: ''","sub":"kingfisher-validation","exp":'' | append: exp | append: '',"nbf":'' | append: nbf | append: '',"video":{"roomList":true}}'' -%}{%- assign payload = payload_json | b64url_enc -%}{%- assign signing_input = header | append: "." | append: payload -%}{%- assign sig_b64 = signing_input | hmac_sha256: TOKEN -%}{%- assign sig = sig_b64 | replace: "+", "-" | replace: "/", "_" | replace: "=", "" -%}Bearer {{ header }}.{{ payload }}.{{ sig }}'
Authorization: '{%- assign header = "HS256" | jwt_header -%}{%- assign now = "" | unix_timestamp -%}{%- assign exp = now | plus: 300 -%}{%- assign nbf = now | minus: 5 -%}{%- assign payload_json = ''{"iss":"'' | append: API_KEY | append: ''","sub":"kingfisher-validation","exp":'' | append: exp | append: '',"nbf":'' | append: nbf | append: '',"video":{"roomAdmin":true,"room":"__kingfisher_validation__"}}'' -%}{%- assign payload = payload_json | b64url_enc -%}{%- assign signing_input = header | append: "." | append: payload -%}{%- assign sig_b64 = signing_input | hmac_sha256: TOKEN -%}{%- assign sig = sig_b64 | replace: "+", "-" | replace: "/", "_" | replace: "=", "" -%}Bearer {{ header }}.{{ payload }}.{{ sig }}'
body: |
{}
{"room":"__kingfisher_validation__"}
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
status: [200, 404]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid
- type: WordMatch
words: ['"rooms"']
# LiveKit validation needs the URL and API key as well, so standalone API secrets must remain
# detectable even when contextual verification is unavailable.

View file

@ -5,16 +5,18 @@ rules:
(?x)
\b
(
tr_(?P<env>dev|prod|stg)_[A-Za-z0-9]{20,40}
tr_(?:dev|prod|stg)_[A-Za-z0-9]{20}
)
\b
pattern_requirements:
min_digits: 1
min_uppercase: 1
min_lowercase: 1
min_entropy: 3.0
confidence: medium
examples:
- 'TRIGGER_SECRET_KEY=tr_dev_1a2b3c4d5e6f7g8h9i0j'
- 'TRIGGER_SECRET_KEY=tr_prod_xK8m2LpQr5nW0vYz3cJ7'
- 'TRIGGER_SECRET_KEY=tr_dev_AN0MnvS4n4GdfhELPUMU'
- 'TRIGGER_SECRET_KEY=tr_prod_KCqL36ucD5LTPa9kdnMj'
references:
- https://trigger.dev/docs/management/authentication
- https://trigger.dev/docs/management/envvars/list
@ -26,7 +28,7 @@ rules:
content:
request:
method: GET
url: 'https://api.trigger.dev/api/v1/projects/{{ TRIGGER_PROJECT_REF }}/envvars/{{ env | replace: "stg", "staging" }}'
url: 'https://api.trigger.dev/api/v1/projects/{{ TRIGGER_PROJECT_REF }}/envvars/{{ TOKEN | split: "_" | slice: 1, 1 | first | replace: "stg", "staging" }}'
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
@ -42,15 +44,17 @@ rules:
(?x)
\b
(
tr_pat_[A-Za-z0-9]{20,40}
tr_pat_[A-Za-z0-9]{20}
)
\b
pattern_requirements:
min_digits: 1
min_uppercase: 1
min_lowercase: 1
min_entropy: 3.0
confidence: medium
examples:
- 'TRIGGER_ACCESS_TOKEN=tr_pat_xK8m2LpQr5nW0vYz3cJ7aB4d'
- 'TRIGGER_ACCESS_TOKEN=tr_pat_G8DwRcZEc0ONFMtkVHt8'
references:
- https://trigger.dev/docs/management/authentication
- https://trigger.dev/docs/management/envvars/list

View file

@ -34,6 +34,7 @@ rules:
content:
request:
method: GET
response_is_html: true
url: '{{ TOKEN }}'
response_matcher:
- report_response: false

View file

@ -121,7 +121,7 @@ kingfisher scan /path/to/repo \
```
**What you'll see**
Findings tied to a skip-listed account report `Validation: Not Attempted` and note in the `Response:` that the entry came from the skip list:
Findings tied to a skip-listed account report `Validation: Canary Token (Skipped)` and note in the `Response:` that the entry came from the skip list:
```bash
AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
@ -129,7 +129,7 @@ AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
|Fingerprint...: 2141074333616819500
|Confidence....: medium
|Entropy.......: 5.00
|Validation....: Not Attempted
|Validation....: Canary Token (Skipped)
|__Response....: (skip list entry) AWS validation not attempted for account 171436882533.
|Language......: Unknown
|Line Num......: 21
@ -137,7 +137,7 @@ AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
```
**Why this matters**
Skipping prevents noisy tripwires in prod telemetry while keeping the status explicit—"Not Attempted" isn't a pass. If needed, verify these credentials out-of-band or with a safe, non-triggering method.
Skipping prevents noisy tripwires in prod telemetry while keeping the status explicit—"Canary Token (Skipped)" signals that the credential likely belongs to an active honeypot but was intentionally not validated. If needed, verify these credentials out-of-band or with a safe, non-triggering method.
#### Common CLI flows

View file

@ -121,6 +121,18 @@ kingfisher view kingfisher.json
The `view` subcommand starts a server (default port `7890`, bind address `127.0.0.1`) that bundles the HTML, CSS, and JavaScript for the access-map viewer directly into the Kingfisher binary. Provide a JSON or JSONL report to load it automatically and Kingfisher will open your browser, or open the page and upload a report in the browser. If port 7890 is already in use, re-run with `--port <PORT>`. To allow access from Docker or other hosts, use `--address 0.0.0.0`.
You can pass multiple files or a directory to combine reports. Findings are deduplicated by fingerprint. Non-matching files in a directory are silently skipped (no recursion).
```bash
# Combine multiple report files
kingfisher view report1.json report2.jsonl
# Load all JSON/JSONL reports from a directory
kingfisher view ./reports/
```
The browser-based viewer also supports loading multiple files via drag-and-drop or the file picker, with the same fingerprint-based deduplication.
### Pipe any text directly into Kingfisher by passing `-`
```bash

View file

@ -118,7 +118,7 @@ kingfisher scan /path/to/repo \
```
**What you'll see**
Findings tied to a skip-listed account report `Validation: Not Attempted` and note in the `Response:` that the entry came from the skip list:
Findings tied to a skip-listed account report `Validation: Canary Token (Skipped)` and note in the `Response:` that the entry came from the skip list:
```bash
AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
@ -126,7 +126,7 @@ AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
|Fingerprint...: 2141074333616819500
|Confidence....: medium
|Entropy.......: 5.00
|Validation....: Not Attempted
|Validation....: Canary Token (Skipped)
|__Response....: (skip list entry) AWS validation not attempted for account 171436882533.
|Language......: Unknown
|Line Num......: 21
@ -134,7 +134,7 @@ AWS SECRET ACCESS KEY => [KINGFISHER.AWS.2]
```
**Why this matters**
Skipping prevents noisy tripwires in prod telemetry while keeping the status explicit—"Not Attempted" isn't a pass. If needed, verify these credentials out-of-band or with a safe, non-triggering method.
Skipping prevents noisy tripwires in prod telemetry while keeping the status explicit—"Canary Token (Skipped)" signals that the credential likely belongs to an active honeypot but was intentionally not validated. If needed, verify these credentials out-of-band or with a safe, non-triggering method.
#### Common CLI flows

View file

@ -116,6 +116,18 @@ kingfisher view kingfisher.json
The `view` subcommand starts a server (default port `7890`, bind address `127.0.0.1`) that bundles the HTML, CSS, and JavaScript for the access-map viewer directly into the Kingfisher binary. Provide a JSON or JSONL report to load it automatically and Kingfisher will open your browser, or open the page and upload a report in the browser. If port 7890 is already in use, re-run with `--port <PORT>`. To allow access from Docker or other hosts, use `--address 0.0.0.0`.
You can pass multiple files or a directory to combine reports. Findings are deduplicated by fingerprint. Non-matching files in a directory are silently skipped (no recursion).
```bash
# Combine multiple report files
kingfisher view report1.json report2.jsonl
# Load all JSON/JSONL reports from a directory
kingfisher view ./reports/
```
The browser-based viewer also supports loading multiple files via drag-and-drop or the file picker, with the same fingerprint-based deduplication.
### Pipe any text directly into Kingfisher by passing `-`
```bash

View file

@ -1257,10 +1257,10 @@
<div style="padding:22px;">
<div class="upload-area" id="drop-zone">
<div class="upload-icon">📄</div>
<div class="upload-text">Drag &amp; drop a report here</div>
<div class="upload-sub">…or click to choose a file</div>
<div class="upload-sub">Your file stays in the browser—load JSON or JSONL reports locally.</div>
<input type="file" id="file-input" hidden accept=".json,.jsonl">
<div class="upload-text">Drag &amp; drop reports here</div>
<div class="upload-sub">…or click to choose files</div>
<div class="upload-sub">Your files stay in the browser—load JSON or JSONL reports locally. Multiple files are merged and deduplicated.</div>
<input type="file" id="file-input" hidden accept=".json,.jsonl" multiple>
</div>
<div id="error-msg" class="hidden" style="margin-top:16px; padding:10px 12px; background:#fef2f2; border:1px solid #fecaca; border-radius:6px; color:#b91c1c; font-size:13px;"></div>
</div>
@ -1397,6 +1397,7 @@
<option value="active">Active Credential</option>
<option value="inactive">Inactive Credential</option>
<option value="not_attempted">Not Attempted</option>
<option value="canary">Canary Token (Skipped)</option>
</select>
<span class="rows-label">Rows</span>
<select id="rows-select" class="rows-select">
@ -1611,7 +1612,7 @@
dropZone.addEventListener("click", () => fileInput.click());
fileInput.addEventListener("change", (e) => {
if (e.target.files.length) processFile(e.target.files[0]);
if (e.target.files.length) processFiles(Array.from(e.target.files));
});
dropZone.addEventListener("dragover", (e) => {
e.preventDefault();
@ -1624,7 +1625,7 @@
dropZone.addEventListener("drop", (e) => {
e.preventDefault();
dropZone.classList.remove("active");
if (e.dataTransfer.files.length) processFile(e.dataTransfer.files[0]);
if (e.dataTransfer.files.length) processFiles(Array.from(e.dataTransfer.files));
});
navButtons.forEach((btn) => {
@ -1859,25 +1860,69 @@
return { findings, accessMap, mainReport, statsReport };
}
function processFile(file) {
loaderText.textContent = 'Processing "' + file.name + '"…';
function processFiles(files) {
const validFiles = files.filter((f) => {
const name = f.name.toLowerCase();
return name.endsWith(".json") || name.endsWith(".jsonl");
});
if (validFiles.length === 0) {
errorMsg.textContent = "No JSON or JSONL files found in the selection.";
errorMsg.classList.remove("hidden");
return;
}
const label = validFiles.length === 1
? 'Processing "' + validFiles[0].name + '"…'
: "Processing " + validFiles.length + " files…";
loaderText.textContent = label;
loader.classList.remove("hidden");
errorMsg.classList.add("hidden");
errorMsg.textContent = "";
setTimeout(() => {
const reader = new FileReader();
reader.onload = (e) => {
try {
parseAndRender(e.target.result);
} catch (err) {
console.error(err);
errorMsg.textContent = "Error parsing file: " + err.message;
errorMsg.classList.remove("hidden");
loader.classList.add("hidden");
}
};
reader.readAsText(file);
let completed = 0;
const texts = new Array(validFiles.length);
validFiles.forEach((file, idx) => {
const reader = new FileReader();
reader.onload = (e) => {
texts[idx] = e.target.result;
completed++;
if (completed === validFiles.length) {
try {
parseAndRenderMultiple(texts);
} catch (err) {
console.error(err);
errorMsg.textContent = "Error parsing files: " + err.message;
errorMsg.classList.remove("hidden");
loader.classList.add("hidden");
}
}
};
reader.onerror = () => {
texts[idx] = null;
completed++;
if (completed === validFiles.length) {
const validTexts = texts.filter(Boolean);
if (validTexts.length === 0) {
errorMsg.textContent = "Failed to read any files.";
errorMsg.classList.remove("hidden");
loader.classList.add("hidden");
} else {
try {
parseAndRenderMultiple(validTexts);
} catch (err) {
console.error(err);
errorMsg.textContent = "Error parsing files: " + err.message;
errorMsg.classList.remove("hidden");
loader.classList.add("hidden");
}
}
}
};
reader.readAsText(file);
});
}, 30);
}
@ -1959,12 +2004,49 @@
}
}
function parseAndRenderMultiple(texts) {
const t0 = performance.now();
findings = [];
accessMap = [];
rawData = null;
for (let i = 0; i < texts.length; i++) {
const text = texts[i];
if (!text) continue;
const { f, am, rd } = parseSingleText(text);
findings.push(...f);
accessMap.push(...am);
if (!rawData && rd) rawData = rd;
else if (rd) Object.assign(rawData, rd);
}
// Deduplicate findings by fingerprint
findings = deduplicateFindings(findings);
// Deduplicate access map entries by fingerprint
accessMap = deduplicateAccessMap(accessMap);
finalizeRender(t0);
}
function parseAndRender(text) {
const t0 = performance.now();
findings = [];
accessMap = [];
rawData = null;
const { f, am, rd } = parseSingleText(text);
findings = deduplicateFindings(f);
accessMap = deduplicateAccessMap(am);
rawData = rd;
finalizeRender(t0);
}
function parseSingleText(text) {
let f = [];
let am = [];
let rd = null;
let parsed = parsePossiblyMultiJson(text);
if (parsed === null) {
@ -1983,20 +2065,48 @@
}
const collected = collectReportData(entries);
findings = collected.findings;
accessMap = collected.accessMap;
f = collected.findings;
am = collected.accessMap;
if (collected.mainReport || collected.statsReport) {
rawData = Object.assign({}, collected.mainReport || {}, collected.statsReport || {});
rd = Object.assign({}, collected.mainReport || {}, collected.statsReport || {});
}
} else {
const collected = collectReportData(parsed);
findings = collected.findings;
accessMap = collected.accessMap;
f = collected.findings;
am = collected.accessMap;
if (collected.mainReport || collected.statsReport || parsed) {
rawData = Object.assign({}, collected.mainReport || {}, collected.statsReport || {});
rd = Object.assign({}, collected.mainReport || {}, collected.statsReport || {});
}
}
return { f, am, rd };
}
function deduplicateFindings(list) {
const seen = new Set();
const result = [];
for (const f of list) {
const fp = f.finding && f.finding.fingerprint ? f.finding.fingerprint : "";
if (fp && seen.has(fp)) continue;
if (fp) seen.add(fp);
result.push(f);
}
return result;
}
function deduplicateAccessMap(list) {
const seen = new Set();
const result = [];
for (const entry of list) {
const fp = entry.fingerprint || "";
if (fp && seen.has(fp)) continue;
if (fp) seen.add(fp);
result.push(entry);
}
return result;
}
function finalizeRender(t0) {
currentPage = 1;
currentFilter = "";
validationFilter = "all";
@ -2192,6 +2302,8 @@
return normalizedStatus === "inactive";
} else if (validation === "not_attempted") {
return normalizedStatus === "not_attempted";
} else if (validation === "canary") {
return normalizedStatus === "canary";
}
return true;
@ -2284,11 +2396,15 @@
return "not_attempted";
}
if (normalized === "canary token (skipped)" || normalized === "canary") {
return "canary";
}
return "unknown";
}
function calculateValidationCounts(list = findings) {
const counts = { active: 0, inactive: 0, not_attempted: 0, unknown: 0 };
const counts = { active: 0, inactive: 0, not_attempted: 0, canary: 0, unknown: 0 };
(list || []).forEach((f) => {
const status =
f.finding && f.finding.validation && f.finding.validation.status
@ -2309,6 +2425,7 @@
active: "#22c55e",
inactive: "#f97316",
not_attempted: "#38bdf8",
canary: "#a855f7",
unknown: "#9ca3af",
};
@ -2316,6 +2433,7 @@
{ key: "active", label: "Active", color: palette.active },
{ key: "inactive", label: "Inactive", color: palette.inactive },
{ key: "not_attempted", label: "Not Attempted", color: palette.not_attempted },
{ key: "canary", label: "Canary Token", color: palette.canary },
{ key: "unknown", label: "Unknown", color: palette.unknown },
];
@ -2554,7 +2672,7 @@
return;
}
const statusOrder = { active: 0, inactive: 1, not_attempted: 2, unknown: 3 };
const statusOrder = { active: 0, inactive: 1, canary: 2, not_attempted: 3, unknown: 4 };
let baseFindings =
scope === "filtered" ? getFilteredSortedFindings().slice() : (Array.isArray(findings) ? findings.slice() : []);
if (activeOnly) {
@ -2692,6 +2810,7 @@
<strong>Review Recommended:</strong> This scan found <strong>${baseFindings.length} finding${baseFindings.length !== 1 ? "s" : ""}</strong>.
${counts.inactive > 0 ? `${counts.inactive} credential${counts.inactive !== 1 ? "s were" : " was"} inactive at scan time.` : ""}
${counts.not_attempted > 0 ? `${counts.not_attempted} ${counts.not_attempted !== 1 ? "were" : "was"} not validated.` : ""}
${counts.canary > 0 ? `${counts.canary} ${counts.canary !== 1 ? "were" : "was"} canary token${counts.canary !== 1 ? "s" : ""} (skipped).` : ""}
</div>`;
} else {
execSummary = `<div style="padding:12px 16px;background:#f0fdf4;border:1px solid #bbf7d0;border-radius:6px;font-size:13px;color:#166534;line-height:1.6;margin-bottom:16px;">
@ -2752,6 +2871,7 @@
<span><span class="dot" style="background:#22c55e"></span> Active: ${counts.active || 0}</span>
<span><span class="dot" style="background:#f97316"></span> Inactive: ${counts.inactive || 0}</span>
<span><span class="dot" style="background:#38bdf8"></span> Not Attempted: ${counts.not_attempted || 0}</span>
<span><span class="dot" style="background:#a855f7"></span> Canary Token: ${counts.canary || 0}</span>
<span><span class="dot" style="background:#9ca3af"></span> Unknown: ${counts.unknown || 0}</span>
</div>
</div>
@ -3632,7 +3752,7 @@
}
// Sort: active first, then by rule name
const statusOrder = { active: 0, inactive: 1, not_attempted: 2, unknown: 3 };
const statusOrder = { active: 0, inactive: 1, canary: 2, not_attempted: 3, unknown: 4 };
const sorted = pool.sort((a, b) => {
const fa = a.finding || {};
const fb = b.finding || {};

View file

@ -28,9 +28,11 @@ pub const DEFAULT_ADDRESS: &str = "127.0.0.1";
/// View a Kingfisher access-map report locally.
#[derive(clap::Args, Debug)]
pub struct ViewArgs {
/// Path to a JSON or JSONL access-map report to load automatically
#[arg(value_name = "REPORT", value_hint = clap::ValueHint::FilePath)]
pub report: Option<PathBuf>,
/// Paths to JSON/JSONL reports or directories containing them.
/// Multiple files are merged and deduplicated by fingerprint.
/// Directories are scanned (non-recursively) for .json/.jsonl files.
#[arg(value_name = "REPORT", value_hint = clap::ValueHint::AnyPath)]
pub reports: Vec<PathBuf>,
/// Local port for the embedded viewer (default 7890)
#[arg(long, default_value_t = DEFAULT_PORT)]
@ -70,27 +72,99 @@ pub fn ensure_port_available(port: u16, address: &str, flag_name: &str) -> Resul
Ok(())
}
/// Resolve report paths: expand directories (non-recursively) into their
/// `.json` / `.jsonl` children, expand tildes, and filter to valid extensions.
/// Non-matching files inside directories are silently skipped.
async fn resolve_report_paths(raw: &[PathBuf]) -> Result<Vec<PathBuf>> {
let mut paths = Vec::new();
for raw_path in raw {
let expanded = expand_tilde(raw_path)?;
let meta = tokio::fs::metadata(&expanded)
.await
.with_context(|| format!("Cannot access path: {}", expanded.display()))?;
if meta.is_dir() {
let mut read_dir = tokio::fs::read_dir(&expanded)
.await
.with_context(|| format!("Cannot read directory: {}", expanded.display()))?;
while let Some(entry) = read_dir.next_entry().await? {
let child = entry.path();
if child.is_file() && is_report_extension(&child) {
paths.push(child);
}
}
} else if meta.is_file() {
if !is_report_extension(&expanded) {
warn!(path = %expanded.display(), "Skipping file with unsupported extension");
continue;
}
paths.push(expanded);
}
}
Ok(paths)
}
fn is_report_extension(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| {
let lower = ext.to_ascii_lowercase();
lower == "json" || lower == "jsonl"
})
.unwrap_or(false)
}
/// Load multiple report files and concatenate their contents with newline
/// separators so the viewer can parse them as JSONL.
async fn load_and_combine_reports(paths: &[PathBuf]) -> Result<Vec<u8>> {
let mut combined = Vec::new();
let mut loaded = 0usize;
for path in paths {
match tokio::fs::read(path).await {
Ok(bytes) => {
if !combined.is_empty() {
combined.push(b'\n');
}
combined.extend_from_slice(&bytes);
loaded += 1;
}
Err(err) => {
warn!(path = %path.display(), %err, "Failed to read report file, skipping");
}
}
}
if loaded == 0 && !paths.is_empty() {
return Err(anyhow!("Failed to read any of the {} report file(s)", paths.len()));
}
if loaded > 0 {
info!(loaded, total = paths.len(), "Loaded report files");
}
Ok(combined)
}
/// Run the `kingfisher view` subcommand.
pub async fn run(args: ViewArgs) -> Result<()> {
let report = if let Some(report_bytes) = args.report_bytes.as_ref() {
Some(report_bytes.clone())
} else if let Some(path) = args.report.as_ref() {
let expanded_path = expand_tilde(path)?;
let ext = path
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_ascii_lowercase())
.unwrap_or_default();
if ext != "json" && ext != "jsonl" {
return Err(anyhow!("Report must be a JSON or JSONL file (got extension: {})", ext));
} else if !args.reports.is_empty() {
let paths = resolve_report_paths(&args.reports).await?;
if paths.is_empty() {
warn!("No JSON/JSONL report files found in the provided paths");
None
} else {
let combined = load_and_combine_reports(&paths).await?;
if combined.is_empty() {
None
} else {
Some(combined)
}
}
Some(
tokio::fs::read(&expanded_path)
.await
.with_context(|| format!("Failed to read report at {}", expanded_path.display()))?,
)
} else {
None
};
@ -110,7 +184,7 @@ pub async fn run(args: ViewArgs) -> Result<()> {
info!(%address, "Starting access-map viewer");
eprintln!("Serving access-map viewer at {} (Ctrl+C to stop)", url);
let open_browser = args.open_browser || args.report.is_some() || args.report_bytes.is_some();
let open_browser = args.open_browser || !args.reports.is_empty() || args.report_bytes.is_some();
if open_browser {
let url = url.clone();
tokio::task::spawn_blocking(move || {

View file

@ -330,12 +330,14 @@ async fn execute_http_validation(
let body =
response.text().await.unwrap_or_else(|e| format!("Failed to read response body: {}", e));
// Truncate body for display if too long
let display_body = preview_body_for_display(&body, 500);
// Validate the response
let matchers = http_validation.request.response_matcher.as_deref().unwrap_or(&[]);
let html_allowed = http_validation.request.response_is_html;
let display_body = if html_allowed {
crate::validation::utils::format_response_body_for_display(&body, 500, true)
} else {
preview_body_for_display(&body, 500)
};
let is_valid = validate_response(matchers, &body, &status, &headers, html_allowed);
Ok(DirectValidationResult {

View file

@ -346,7 +346,7 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
let envelope = reporter.build_report_envelope(&scan_args)?;
let report_bytes = serde_json::to_vec_pretty(&envelope)?;
let view_args = view::ViewArgs {
report: None,
reports: vec![],
port: scan_args.view_report_port,
address: scan_args.view_report_address.clone(),
open_browser: true,

View file

@ -902,6 +902,11 @@ impl DetailsReporter {
let validation_status = if rm.validation_success {
"Active Credential".to_string()
} else if rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16()
&& validation_body::as_str(&rm.validation_response_body)
.starts_with("(skip list entry)")
{
"Canary Token (Skipped)".to_string()
} else if matches!(
rm.validation_response_status,
status if status == StatusCode::CONTINUE.as_u16()
@ -1945,7 +1950,7 @@ mod tests {
let scan_args = sample_scan_args();
let record = reporter.build_finding_record(&report_match, &scan_args);
assert_eq!(record.finding.validation.status, "Not Attempted");
assert_eq!(record.finding.validation.status, "Canary Token (Skipped)");
assert_eq!(
record.finding.validation.response,
"(skip list entry) AWS validation not attempted for account 111122223333."

View file

@ -98,10 +98,12 @@ fn validation_rank(status: &str) -> usize {
0
} else if status.eq_ignore_ascii_case("Inactive Credential") {
1
} else if status.eq_ignore_ascii_case("Not Attempted") {
} else if status.eq_ignore_ascii_case("Canary Token (Skipped)") {
2
} else {
} else if status.eq_ignore_ascii_case("Not Attempted") {
3
} else {
4
}
}
@ -136,6 +138,8 @@ fn render_findings_table(findings: &[FindingReporterRecord]) -> String {
"status-active"
} else if record.finding.validation.status == "Inactive Credential" {
"status-inactive"
} else if record.finding.validation.status == "Canary Token (Skipped)" {
"status-canary"
} else {
"status-unknown"
};
@ -249,6 +253,7 @@ fn build_html(envelope: &ReportEnvelope) -> String {
.status {{ padding: 2px 8px; border-radius: 999px; font-weight: 700; }}
.status-active {{ background: #14532d; color: #86efac; }}
.status-inactive {{ background: #7f1d1d; color: #fecaca; }}
.status-canary {{ background: #581c87; color: #e9d5ff; }}
.status-unknown {{ background: #78350f; color: #fde68a; }}
</style>
</head>

View file

@ -829,7 +829,11 @@ async fn timed_validate_single_match<'a>(
return;
}
};
let display_body = truncate_preview(&body, max_body_len);
let display_body = if http_validation.request.response_is_html {
utils::format_response_body_for_display(&body, max_body_len, true)
} else {
truncate_preview(&body, max_body_len)
};
m.validation_response_status = status;
let body_opt = validation_body::from_string(display_body.clone());

View file

@ -1,8 +1,132 @@
use std::sync::LazyLock;
use tl::{HTMLTag, Node, Parser, ParserOptions};
use crate::validation::SerializableCaptures;
// Re-export from the scanner crate so the rest of this module can use it.
pub use kingfisher_scanner::validation::{check_url_resolvable, is_ssrf_safe_ip};
static HTML_PARSER_OPTIONS: LazyLock<ParserOptions> = LazyLock::new(ParserOptions::default);
fn collapse_whitespace(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut prev_was_whitespace = false;
for ch in input.chars() {
if ch.is_whitespace() {
if !prev_was_whitespace {
out.push(' ');
prev_was_whitespace = true;
}
} else {
out.push(ch);
prev_was_whitespace = false;
}
}
out.trim().to_string()
}
fn decode_common_html_entities(input: &str) -> String {
let mut decoded = input.to_string();
const ENTITY_REPLACEMENTS: [(&str, &str); 8] = [
("&nbsp;", " "),
("&#160;", " "),
("&amp;", "&"),
("&lt;", "<"),
("&gt;", ">"),
("&quot;", "\""),
("&#34;", "\""),
("&#39;", "'"),
];
for (entity, replacement) in ENTITY_REPLACEMENTS {
decoded = decoded.replace(entity, replacement);
}
decoded
}
fn collect_visible_text_from_tag(tag: &HTMLTag<'_>, parser: &Parser<'_>, out: &mut String) {
for handle in tag.children().top().iter() {
let Some(node) = handle.get(parser) else {
continue;
};
collect_visible_text(node, parser, out);
}
}
fn collect_visible_text(node: &Node<'_>, parser: &Parser<'_>, out: &mut String) {
match node {
Node::Raw(raw) => {
let chunk = raw.as_utf8_str();
let chunk = chunk.trim();
if !chunk.is_empty() {
if !out.is_empty() {
out.push(' ');
}
out.push_str(chunk);
}
}
Node::Comment(_) => {}
Node::Tag(tag) => {
let name = tag.name().as_utf8_str();
if name.eq_ignore_ascii_case("script")
|| name.eq_ignore_ascii_case("style")
|| name.eq_ignore_ascii_case("noscript")
|| name.eq_ignore_ascii_case("template")
{
return;
}
collect_visible_text_from_tag(tag, parser, out);
}
}
}
fn extract_visible_text_from_html(input: &str) -> Option<String> {
let dom = tl::parse(input, *HTML_PARSER_OPTIONS).ok()?;
let parser = dom.parser();
let mut out = String::new();
for handle in dom.children() {
let Some(node) = handle.get(parser) else {
continue;
};
collect_visible_text(node, parser, &mut out);
}
Some(collapse_whitespace(&decode_common_html_entities(&out)))
}
fn strip_html_markup(input: &str) -> String {
extract_visible_text_from_html(input)
.unwrap_or_else(|| collapse_whitespace(&decode_common_html_entities(input)))
}
fn truncate_to_char_boundary(input: &str, max_len: usize) -> String {
if max_len == 0 || input.len() <= max_len {
return input.to_string();
}
let mut end = max_len.min(input.len());
while end > 0 && !input.is_char_boundary(end) {
end -= 1;
}
input[..end].to_string()
}
/// Formats validation response text for report output.
///
/// When `strip_html` is true, HTML markup is stripped and common entities are decoded before
/// optional truncation.
pub fn format_response_body_for_display(body: &str, max_len: usize, strip_html: bool) -> String {
let rendered = if strip_html { strip_html_markup(body) } else { body.to_string() };
truncate_to_char_boundary(&rendered, max_len)
}
/// Return (NAME, value, start, end) for the captures we care about.
///
/// * Named captures keep their (upper-cased) name
@ -275,6 +399,28 @@ mod tests {
assert!(is_ssrf_safe_ip(&"2606:4700::1111".parse().unwrap()));
}
#[test]
fn format_response_body_for_display_strips_html() {
let html = r#"<!doctype html>
<html>
<head>
<script>console.log("ignore");</script>
</head>
<body><h1>Hello &amp; goodbye</h1><p>World</p></body>
</html>"#;
let rendered = format_response_body_for_display(html, 0, true);
assert_eq!(rendered, "Hello & goodbye World");
}
#[test]
fn format_response_body_for_display_truncates_on_utf8_boundary() {
let body = "é".repeat(10);
let rendered = format_response_body_for_display(&body, 7, false);
assert_eq!(rendered, "ééé");
}
#[tokio::test]
async fn check_url_resolvable_blocks_localhost() {
let url = Url::parse("https://localhost/path").unwrap();