Kingfisher can now generate an auditor-friendly HTML report

This commit is contained in:
Mick Grove 2026-02-15 14:29:42 -08:00
commit 39a4e217e3
23 changed files with 958 additions and 30 deletions

View file

@ -1,8 +1,9 @@
name: pypi-wheels
on:
release:
types: [published]
workflow_run:
workflows: ["Publish Docker image"]
types: [completed]
workflow_dispatch:
inputs:
tag:
@ -13,21 +14,25 @@ on:
jobs:
build-wheels:
name: Build PyPI wheels
if: >
github.event_name != 'workflow_run' ||
github.event.workflow_run.conclusion == 'success' ||
github.run_attempt > 1
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.head_sha || github.sha }}
- name: Determine version/tag
id: version
shell: bash
run: |
set -euo pipefail
if [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then
TAG="${{ github.event.release.tag_name }}"
elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then
if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then
TAG="${{ github.event.inputs.tag }}"
else
VERSION=$(grep -m1 '^version\s*=' Cargo.toml | cut -d '"' -f2)

View file

@ -3,8 +3,9 @@
All notable changes to this project will be documented in this file.
## [v1.83.0]
- Architecture: split `matcher.rs` (1742 lines) into a `src/matcher/` module directory with focused sub-modules (`base64_decode`, `captures`, `conversion`, `dedup`, `filter`, `fingerprint`). Decomposed `filter_match` into smaller validation helpers.
- Architecture: refactored `scanner/runner.rs` god function (~600 lines) into phase-based helpers (`enumerate_all_repos`, `fetch_all_artifacts`, `run_sequential_scan`, `run_parallel_scan`, etc.) with a `ValidationDeps` type alias.
- Kingfisher can now generate an auditor-friendly HTML report: `--format html --output kingfisher-audit.html`
- Architecture: split `matcher.rs` into a `src/matcher/` module directory with focused sub-modules (`base64_decode`, `captures`, `conversion`, `dedup`, `filter`, `fingerprint`). Decomposed `filter_match` into smaller validation helpers.
- Architecture: refactored `scanner/runner.rs` god function into phase-based helpers (`enumerate_all_repos`, `fetch_all_artifacts`, `run_sequential_scan`, `run_parallel_scan`, etc.) with a `ValidationDeps` type alias.
- Architecture: consolidated duplicated matching primitives (base64 detection, dedup, fingerprinting, secret capture selection) into `kingfisher-scanner::primitives` as the single source of truth; both the scanner crate and binary now share one implementation.
- Architecture: introduced `TokenAccessMapper` trait for access map providers, implemented for GitHub, GitLab, Slack, HuggingFace, Gitea, and Bitbucket.
- Architecture: moved `content_type` module to `kingfisher-core` crate where it logically belongs (zero binary-crate dependencies).

View file

@ -8,7 +8,7 @@ license.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
publish.workspace = true
publish = false
[dependencies]
# Serialization

View file

@ -0,0 +1,10 @@
# kingfisher-core
Foundational types and utilities for the Kingfisher secret scanning ecosystem.
This crate provides:
- blob and content abstractions
- source location and origin modeling
- shared error and entropy helpers
It is intended as the stable base for `kingfisher-rules` and `kingfisher-scanner`.

View file

@ -410,6 +410,15 @@ impl<V> BlobIdMap<V> {
pub fn is_empty(&self) -> bool {
self.maps.iter().all(|m| m.lock().is_empty())
}
/// Removes all entries from the map.
///
/// Note: This locks each shard in sequence.
pub fn clear(&self) {
for map in &self.maps {
map.lock().clear();
}
}
}
impl<V: Copy> BlobIdMap<V> {

View file

@ -8,7 +8,7 @@ license.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
publish.workspace = true
publish = false
[dependencies]
# Internal dependencies

View file

@ -0,0 +1,11 @@
# kingfisher-rules
Rule definitions and compiled rule database support for Kingfisher.
This crate provides:
- rule syntax and rule model types
- YAML loading and parsing for rules
- embedded builtin rules
- `RulesDatabase` compilation for scanning engines
Use this crate with `kingfisher-core` and `kingfisher-scanner` to build reusable scanning workflows.

View file

@ -8,7 +8,7 @@ license.workspace = true
authors.workspace = true
homepage.workspace = true
repository.workspace = true
publish.workspace = true
publish = false
[features]
default = []

View file

@ -0,0 +1,10 @@
# kingfisher-scanner
High-level scanning library for detecting secrets in files and buffers.
This crate provides:
- ergonomic scanner APIs for bytes, files, and blobs
- finding models with location and capture metadata
- optional validation modules behind feature flags
Pair with `kingfisher-rules` to compile rules and `kingfisher-core` for shared primitives.

View file

@ -52,12 +52,23 @@
//! - **validation-all**: Enable all validation features
mod finding;
#[doc(hidden)]
pub mod primitives;
mod scanner;
mod scanner_pool;
// Validation module (feature-gated)
#[cfg(any(feature = "validation", feature = "validation-http", feature = "validation-aws"))]
#[cfg(any(
feature = "validation",
feature = "validation-http",
feature = "validation-aws",
feature = "validation-azure",
feature = "validation-coinbase",
feature = "validation-gcp",
feature = "validation-jwt",
feature = "validation-database",
feature = "validation-all",
))]
pub mod validation;
pub use finding::{intern, Finding, FindingLocation, SerializableCapture, SerializableCaptures};

View file

@ -283,8 +283,7 @@ impl Scanner {
/// Call this to clear the seen blobs cache if you want to rescan
/// previously scanned content.
pub fn reset_dedup(&self) {
// Note: BlobIdMap doesn't have a clear method, so this creates a new scanner
// In a real implementation, you'd want to add a clear method or use a different approach
self.seen_blobs.clear();
}
fn redact(&self, bytes: &[u8]) -> String {

View file

@ -72,6 +72,14 @@ kingfisher scan . --format json | tee kingfisher.json
kingfisher scan /path/to/repo --format sarif --output findings.sarif
```
### Generate an auditor-friendly HTML report
```bash
kingfisher scan /path/to/repo --format html --output kingfisher-audit.html
```
The HTML audit report is standalone and includes scan metadata designed for evidence workflows, including scan timestamp, sanitized CLI arguments, version, and finding summary counts.
### Access map outputs and viewer
**Stop Guessing, Start Mapping: Understand Your True Blast Radius**

View file

@ -1759,11 +1759,11 @@
if (!amContainer) return;
if (collapsed) {
amContainer.classList.add("hidden");
amToggle.textContent = "Expand";
if (amToggle) amToggle.textContent = "Expand";
autoCollapsedAccessMap = auto;
} else {
amContainer.classList.remove("hidden");
amToggle.textContent = "Collapse";
if (amToggle) amToggle.textContent = "Collapse";
autoCollapsedAccessMap = false;
}
}
@ -2027,10 +2027,22 @@
// Try to extract metadata from the raw report data
const data = rawData;
scanMetadata.timestamp = data.timestamp || data.scan_timestamp || data.generated_at || new Date().toLocaleString();
const reportMeta = data.metadata && typeof data.metadata === "object" ? data.metadata : {};
scanMetadata.timestamp =
reportMeta.scan_timestamp ||
reportMeta.generated_at ||
data.timestamp ||
data.scan_timestamp ||
data.generated_at ||
new Date().toLocaleString();
// Target info
scanMetadata.target = data.target || data.scan_target || data.repository || data.repo ||
scanMetadata.target =
reportMeta.target ||
data.target ||
data.scan_target ||
data.repository ||
data.repo ||
(data.stats && data.stats.target) ||
(data.summary && data.summary.target) || "";
@ -2043,8 +2055,45 @@
}
// Version
scanMetadata.version = data.version || data.kingfisher_version ||
scanMetadata.version =
reportMeta.kingfisher_version ||
data.version ||
data.kingfisher_version ||
(data.kingfisher && data.kingfisher.version) || "";
scanMetadata.latestVersion =
reportMeta.latest_version_available ||
(data.kingfisher && data.kingfisher.latest_version) || "";
scanMetadata.updateCheckStatus =
reportMeta.update_check_status ||
(data.kingfisher && data.kingfisher.update_check_status) || "";
// Sanitized command-line arguments (when present)
const cliArgs = reportMeta.command_line_args;
if (Array.isArray(cliArgs) && cliArgs.length > 0) {
scanMetadata.commandLineArgs = cliArgs.map((arg) => String(arg));
}
const reportSummary = reportMeta.summary && typeof reportMeta.summary === "object"
? reportMeta.summary
: null;
if (reportSummary) {
scanMetadata.summary = {
findings: Number(reportSummary.findings || 0),
active: Number(reportSummary.active_findings || 0),
inactive: Number(reportSummary.inactive_findings || 0),
unknown: Number(reportSummary.unknown_validation_findings || 0),
identities: Number(reportSummary.access_map_identities || 0),
rulesApplied: Number(reportSummary.rules_applied || 0),
confidenceLevel: reportSummary.confidence_level ? String(reportSummary.confidence_level) : "",
customRulesUsed: Boolean(reportSummary.custom_rules_used),
successfulValidations: Number(reportSummary.successful_validations || 0),
failedValidations: Number(reportSummary.failed_validations || 0),
skippedValidations: Number(reportSummary.skipped_validations || 0),
blobsScanned: Number(reportSummary.blobs_scanned || 0),
bytesScanned: Number(reportSummary.bytes_scanned || 0),
scanDurationSeconds: Number(reportSummary.scan_duration_seconds || 0),
};
}
// Bytes scanned
const bytes = data.bytes_scanned ||
@ -2593,8 +2642,40 @@
if (scanMetadata.target) metaLines.push(`<strong>Target:</strong> ${escapeHtml(scanMetadata.target)}`);
if (scanMetadata.duration) metaLines.push(`<strong>Duration:</strong> ${escapeHtml(scanMetadata.duration)}`);
if (scanMetadata.version) metaLines.push(`<strong>Version:</strong> ${escapeHtml(scanMetadata.version)}`);
if (scanMetadata.latestVersion) metaLines.push(`<strong>Latest:</strong> ${escapeHtml(scanMetadata.latestVersion)}`);
if (scanMetadata.updateCheckStatus) metaLines.push(`<strong>Update Check:</strong> ${escapeHtml(scanMetadata.updateCheckStatus)}`);
if (scanMetadata.summary && scanMetadata.summary.rulesApplied > 0) {
metaLines.push(`<strong>Rules Applied:</strong> ${scanMetadata.summary.rulesApplied}`);
}
if (scanMetadata.summary && scanMetadata.summary.confidenceLevel) {
metaLines.push(`<strong>Confidence:</strong> ${escapeHtml(scanMetadata.summary.confidenceLevel)}`);
}
if (scanMetadata.summary) {
metaLines.push(`<strong>Custom Rules:</strong> ${scanMetadata.summary.customRulesUsed ? "Yes" : "No"}`);
}
if (scanMetadata.summary && scanMetadata.summary.successfulValidations) {
metaLines.push(`<strong>Successful Validations:</strong> ${scanMetadata.summary.successfulValidations}`);
}
if (scanMetadata.summary && scanMetadata.summary.failedValidations) {
metaLines.push(`<strong>Failed Validations:</strong> ${scanMetadata.summary.failedValidations}`);
}
if (scanMetadata.summary && scanMetadata.summary.skippedValidations) {
metaLines.push(`<strong>Skipped Validations:</strong> ${scanMetadata.summary.skippedValidations}`);
}
if (scanMetadata.summary && scanMetadata.summary.blobsScanned) {
metaLines.push(`<strong>Blobs Scanned:</strong> ${scanMetadata.summary.blobsScanned}`);
}
if (scanMetadata.summary && scanMetadata.summary.bytesScanned) {
metaLines.push(`<strong>Bytes Scanned:</strong> ${escapeHtml(formatBytes(scanMetadata.summary.bytesScanned))}`);
}
if (scanMetadata.summary && scanMetadata.summary.scanDurationSeconds) {
metaLines.push(`<strong>Scan Duration:</strong> ${escapeHtml(scanMetadata.summary.scanDurationSeconds.toFixed(3) + "s")}`);
}
const cliArgsHtml = scanMetadata.commandLineArgs && scanMetadata.commandLineArgs.length
? `<div style="margin-bottom:16px;"><div style="font-size:12px;font-weight:700;color:#0f172a;margin-bottom:6px;">Sanitized command-line arguments</div><pre style="margin:0;padding:10px 12px;background:#0f172a;color:#e2e8f0;border-radius:8px;white-space:pre-wrap;word-break:break-word;font-size:11px;line-height:1.5;">${escapeHtml(scanMetadata.commandLineArgs.join(" "))}</pre></div>`
: "";
const metaHtml = metaLines.length
? `<div style="display:flex;flex-wrap:wrap;gap:16px;padding:10px 14px;background:#f0f9ff;border:1px solid #bae6fd;border-radius:6px;font-size:12px;margin-bottom:16px;">${metaLines.join('<span style="color:#cbd5e1;">|</span>')}</div>`
? `<div style="display:flex;flex-wrap:wrap;gap:16px;padding:10px 14px;background:#f0f9ff;border:1px solid #bae6fd;border-radius:6px;font-size:12px;margin-bottom:16px;">${metaLines.join('<span style="color:#cbd5e1;">|</span>')}</div>${cliArgsHtml}`
: "";
// Executive summary

View file

@ -52,6 +52,9 @@ pub enum ReportOutputFormat {
/// SARIF format (experimental)
Sarif,
/// Standalone HTML audit report
Html,
}
// -----------------------------------------------------------------------------

View file

@ -1,5 +1,6 @@
use std::{
net::SocketAddr,
net::TcpListener as StdTcpListener,
path::{Path, PathBuf},
sync::Arc,
};
@ -44,6 +45,17 @@ struct AppState {
report: Option<Vec<u8>>,
}
pub fn ensure_port_available(port: u16) -> Result<()> {
StdTcpListener::bind(("127.0.0.1", port)).map_err(|err| match err.kind() {
std::io::ErrorKind::AddrInUse => anyhow!(
"Port {} is already in use. Re-run with --port <PORT> to choose a different port.",
port
),
_ => err.into(),
})?;
Ok(())
}
/// Run the `kingfisher view` subcommand.
pub async fn run(args: ViewArgs) -> Result<()> {
let report = if let Some(report_bytes) = args.report_bytes.as_ref() {

View file

@ -29,6 +29,7 @@ static GLOBAL: System = System;
use std::{
io::{IsTerminal, Read, Write},
sync::{Arc, Mutex},
time::Instant,
};
use anyhow::{Context, Result};
@ -51,7 +52,7 @@ use kingfisher::{
direct_revoke, direct_validate, findings_store,
findings_store::FindingsStore,
gitea, github, huggingface,
reporter::{styles::Styles, DetailsReporter},
reporter::{styles::Styles, DetailsReporter, ScanAuditContext},
rule_loader::RuleLoader,
rules_database::RulesDatabase,
scanner::{load_and_record_rules, run_scan},
@ -237,6 +238,11 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
match command {
Command::Scan(scan_command) => match scan_command.into_operation()? {
ScanOperation::Scan(mut scan_args) => {
if scan_args.view_report {
view::ensure_port_available(view::DEFAULT_PORT)?;
}
let view_scan_started_at = chrono::Local::now();
let view_scan_start_time = Instant::now();
let temp_dir =
TempDir::new().context("Failed to create temporary directory")?;
let temp_dir_path = temp_dir.path().to_path_buf();
@ -287,10 +293,28 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
let exit_code = determine_exit_code(&datastore);
if scan_args.view_report {
let audit_context = ScanAuditContext {
scan_timestamp: Some(view_scan_started_at.to_rfc3339()),
scan_duration_seconds: Some(
view_scan_start_time.elapsed().as_secs_f64(),
),
rules_applied: Some(rules_db.num_rules()),
successful_validations: None,
failed_validations: None,
skipped_validations: None,
blobs_scanned: None,
bytes_scanned: None,
running_version: Some(update_status.running_version.clone()),
latest_version: update_status.latest_version.clone(),
update_check_status: Some(
update_status.check_status.as_str().to_string(),
),
};
let reporter = DetailsReporter {
datastore: Arc::clone(&datastore),
styles: Styles::new(global_args.use_color(std::io::stdout())),
only_valid: scan_args.only_valid,
audit_context: Some(audit_context),
};
let envelope = reporter.build_report_envelope(&scan_args)?;
let report_bytes = serde_json::to_vec_pretty(&envelope)?;

View file

@ -5,6 +5,7 @@ use std::{
};
use anyhow::Result;
use chrono::{Local, Utc};
use http::StatusCode;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use schemars::JsonSchema;
@ -25,6 +26,7 @@ use crate::{
validation_body::{self, ValidationResponseBody},
};
mod bson_format;
mod html_format;
mod json_format;
mod pretty_format;
mod sarif_format;
@ -138,6 +140,75 @@ fn required_vars_for_validation(validation: &crate::rules::Validation) -> BTreeS
vars
}
fn is_sensitive_arg_key(key: &str) -> bool {
let normalized = key.trim_start_matches('-').to_ascii_lowercase();
let exact = [
"arg",
"var",
"token",
"secret",
"password",
"pass",
"key",
"api-key",
"apikey",
"auth",
"oauth-token",
"pat",
"credential",
"credentials",
];
if exact.iter().any(|candidate| *candidate == normalized) {
return true;
}
let contains = ["token", "secret", "password", "apikey", "api-key", "auth", "credential"];
contains.iter().any(|candidate| normalized.contains(candidate))
}
fn sanitize_command_line_args(args: &[String]) -> Vec<String> {
let mut sanitized = Vec::with_capacity(args.len());
let mut redact_next = false;
for arg in args {
if redact_next {
sanitized.push("***REDACTED***".to_string());
redact_next = false;
continue;
}
let Some(stripped) = arg.strip_prefix('-') else {
sanitized.push(arg.clone());
continue;
};
if stripped.is_empty() {
sanitized.push(arg.clone());
continue;
}
let key_value_split = arg.split_once('=');
if let Some((key, _)) = key_value_split {
if is_sensitive_arg_key(key) {
sanitized.push(format!("{key}=***REDACTED***"));
} else {
sanitized.push(arg.clone());
}
continue;
}
if is_sensitive_arg_key(arg) {
sanitized.push(arg.clone());
redact_next = true;
continue;
}
sanitized.push(arg.clone());
}
sanitized
}
fn required_vars_for_revocation(revocation: &Revocation) -> BTreeSet<String> {
let mut vars = BTreeSet::new();
@ -469,6 +540,7 @@ pub fn run(
global_args: &GlobalArgs,
ds: Arc<Mutex<findings_store::FindingsStore>>,
args: &cli::commands::scan::ScanArgs,
audit_context: Option<ScanAuditContext>,
) -> Result<()> {
global_args.use_color(std::io::stdout());
let stdout_is_tty = std::io::stdout().is_terminal();
@ -477,7 +549,8 @@ pub fn run(
let ds_clone = Arc::clone(&ds);
// Initialize the reporter
let reporter = DetailsReporter { datastore: ds_clone, styles, only_valid: args.only_valid };
let reporter =
DetailsReporter { datastore: ds_clone, styles, only_valid: args.only_valid, audit_context };
let writer = args.output_args.get_writer()?;
// Generate and write the report in the specified format
reporter.report(args.output_args.format, writer, args)
@ -486,6 +559,22 @@ pub struct DetailsReporter {
pub datastore: Arc<Mutex<findings_store::FindingsStore>>,
pub styles: Styles,
pub only_valid: bool,
pub audit_context: Option<ScanAuditContext>,
}
#[derive(Clone, Debug)]
pub struct ScanAuditContext {
pub scan_timestamp: Option<String>,
pub scan_duration_seconds: Option<f64>,
pub rules_applied: Option<usize>,
pub successful_validations: Option<usize>,
pub failed_validations: Option<usize>,
pub skipped_validations: Option<usize>,
pub blobs_scanned: Option<u64>,
pub bytes_scanned: Option<u64>,
pub running_version: Option<String>,
pub latest_version: Option<String>,
pub update_check_status: Option<String>,
}
impl DetailsReporter {
@ -1028,8 +1117,85 @@ impl DetailsReporter {
) -> Result<ReportEnvelope> {
let findings = self.build_finding_records(args)?;
let access_map = self.build_access_map_records(args);
let metadata = self.build_report_metadata(args, &findings, access_map.as_ref());
Ok(ReportEnvelope { findings, access_map })
Ok(ReportEnvelope { findings, access_map, metadata: Some(metadata) })
}
fn build_report_metadata(
&self,
args: &cli::commands::scan::ScanArgs,
findings: &[FindingReporterRecord],
access_map: Option<&Vec<AccessMapEntry>>,
) -> ScanReportMetadata {
let mut active_findings = 0usize;
let mut inactive_findings = 0usize;
let mut unknown_validation_findings = 0usize;
for record in findings {
let status = record.finding.validation.status.to_ascii_lowercase();
if status.contains("inactive") {
inactive_findings += 1;
} else if status.contains("active") {
active_findings += 1;
} else {
unknown_validation_findings += 1;
}
}
let command_line_args: Vec<String> = std::env::args().collect();
let sanitized_command_line_args = sanitize_command_line_args(&command_line_args);
let scan_timestamp = self.audit_context.as_ref().and_then(|ctx| ctx.scan_timestamp.clone());
let generated_at = generated_at_for_scan_timezone(scan_timestamp.as_deref());
let scan_timestamp = scan_timestamp.unwrap_or_else(|| generated_at.clone());
ScanReportMetadata {
generated_at: generated_at.clone(),
scan_timestamp,
target: derive_scan_target(args),
command_line_args: sanitized_command_line_args,
kingfisher_version: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.running_version.clone())
.unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()),
latest_version_available: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.latest_version.clone()),
update_check_status: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.update_check_status.clone()),
summary: ScanReportSummary {
findings: findings.len(),
active_findings,
inactive_findings,
unknown_validation_findings,
access_map_identities: access_map.map_or(0, Vec::len),
rules_applied: self.audit_context.as_ref().and_then(|ctx| ctx.rules_applied),
confidence_level: args.confidence.to_string(),
custom_rules_used: !args.rules.rules_path.is_empty() || !args.rules.load_builtins,
successful_validations: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.successful_validations),
failed_validations: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.failed_validations),
skipped_validations: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.skipped_validations),
blobs_scanned: self.audit_context.as_ref().and_then(|ctx| ctx.blobs_scanned),
bytes_scanned: self.audit_context.as_ref().and_then(|ctx| ctx.bytes_scanned),
scan_duration_seconds: self
.audit_context
.as_ref()
.and_then(|ctx| ctx.scan_duration_seconds),
},
}
}
fn build_access_map_records(
@ -1183,10 +1349,58 @@ impl Reportable for DetailsReporter {
ReportOutputFormat::Jsonl => self.jsonl_format(writer, args),
ReportOutputFormat::Bson => self.bson_format(writer, args),
ReportOutputFormat::Sarif => self.sarif_format(writer, args.no_dedup, args),
ReportOutputFormat::Html => self.html_format(writer, args),
}
}
}
fn generated_at_for_scan_timezone(scan_timestamp: Option<&str>) -> String {
if let Some(scan_timestamp) = scan_timestamp {
if let Ok(scan_dt) = chrono::DateTime::parse_from_rfc3339(scan_timestamp) {
return Utc::now().with_timezone(scan_dt.offset()).to_rfc3339();
}
}
Local::now().to_rfc3339()
}
fn derive_scan_target(args: &cli::commands::scan::ScanArgs) -> Option<String> {
let mut targets = Vec::new();
let input_args = &args.input_specifier_args;
for path in &input_args.path_inputs {
targets.push(path.display().to_string());
}
for git in &input_args.git_url {
targets.push(git.to_string());
}
if let Some(bucket) = &input_args.s3_bucket {
targets.push(format!("s3://{bucket}"));
}
if let Some(bucket) = &input_args.gcs_bucket {
targets.push(format!("gcs://{bucket}"));
}
for image in &input_args.docker_image {
targets.push(format!("docker://{image}"));
}
if input_args.jira_url.is_some() {
targets.push("jira".to_string());
}
if input_args.confluence_url.is_some() {
targets.push("confluence".to_string());
}
if input_args.slack_query.is_some() {
targets.push("slack".to_string());
}
if targets.is_empty() {
return None;
}
if targets.len() == 1 {
return targets.pop();
}
Some(format!("{} targets", targets.len()))
}
/// A match produced by one of kingfisher's rules.
/// This corresponds to a single location.
#[derive(Serialize, JsonSchema, Clone)]
@ -1256,6 +1470,48 @@ pub struct ReportEnvelope {
pub findings: Vec<FindingReporterRecord>,
#[serde(skip_serializing_if = "Option::is_none")]
pub access_map: Option<Vec<AccessMapEntry>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<ScanReportMetadata>,
}
#[derive(Serialize, JsonSchema, Clone, Debug)]
pub struct ScanReportMetadata {
pub generated_at: String,
pub scan_timestamp: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub target: Option<String>,
pub command_line_args: Vec<String>,
pub kingfisher_version: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub latest_version_available: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub update_check_status: Option<String>,
pub summary: ScanReportSummary,
}
#[derive(Serialize, JsonSchema, Clone, Debug)]
pub struct ScanReportSummary {
pub findings: usize,
pub active_findings: usize,
pub inactive_findings: usize,
pub unknown_validation_findings: usize,
pub access_map_identities: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub rules_applied: Option<usize>,
pub confidence_level: String,
pub custom_rules_used: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub successful_validations: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub failed_validations: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub skipped_validations: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub blobs_scanned: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub bytes_scanned: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub scan_duration_seconds: Option<f64>,
}
#[derive(Serialize, JsonSchema, Clone, Debug)]
@ -1580,7 +1836,12 @@ mod tests {
let temp = tempdir().unwrap();
let datastore =
Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false };
let reporter = DetailsReporter {
datastore,
styles: Styles::new(false),
only_valid: false,
audit_context: None,
};
let (report_match, _) = sample_report_match(validation_body, StatusCode::OK.as_u16(), true);
let mut scan_args = sample_scan_args();
@ -1595,7 +1856,12 @@ mod tests {
let temp = tempdir().unwrap();
let datastore =
Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false };
let reporter = DetailsReporter {
datastore,
styles: Styles::new(false),
only_valid: false,
audit_context: None,
};
let (report_match, blob_path) =
sample_report_match("Bad credentials", StatusCode::UNAUTHORIZED.as_u16(), false);
@ -1620,7 +1886,12 @@ mod tests {
let temp = tempdir().unwrap();
let datastore =
Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false };
let reporter = DetailsReporter {
datastore,
styles: Styles::new(false),
only_valid: false,
audit_context: None,
};
let (report_match, _) = sample_report_match(
"(skip list entry) AWS validation not attempted for account 111122223333.",
@ -1661,6 +1932,56 @@ mod tests {
assert!(response.chars().take(512).all(|ch| ch == 'é'));
}
#[test]
fn sanitize_command_line_args_redacts_secret_values() {
let input = vec![
"kingfisher".to_string(),
"scan".to_string(),
"--token".to_string(),
"abcd".to_string(),
"--output=report.html".to_string(),
"--arg=TOP_SECRET".to_string(),
"--var".to_string(),
"TOKEN=inline".to_string(),
"--path".to_string(),
"./repo".to_string(),
];
let sanitized = sanitize_command_line_args(&input);
assert_eq!(sanitized[2], "--token");
assert_eq!(sanitized[3], "***REDACTED***");
assert_eq!(sanitized[4], "--output=report.html");
assert_eq!(sanitized[5], "--arg=***REDACTED***");
assert_eq!(sanitized[6], "--var");
assert_eq!(sanitized[7], "***REDACTED***");
}
#[test]
fn report_envelope_contains_audit_metadata() {
let temp = tempdir().unwrap();
let datastore =
Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf())));
let reporter = DetailsReporter {
datastore,
styles: Styles::new(false),
only_valid: false,
audit_context: None,
};
let mut args = sample_scan_args();
args.input_specifier_args.path_inputs.push(PathBuf::from("/tmp/project"));
let envelope = reporter.build_report_envelope(&args).expect("build envelope");
let metadata = envelope.metadata.expect("metadata should be present");
assert_eq!(metadata.summary.findings, 0);
assert_eq!(metadata.summary.active_findings, 0);
assert_eq!(metadata.summary.inactive_findings, 0);
assert_eq!(metadata.summary.access_map_identities, 0);
assert_eq!(metadata.target.as_deref(), Some("/tmp/project"));
assert_eq!(metadata.kingfisher_version, env!("CARGO_PKG_VERSION"));
}
use super::build_git_urls;
#[test]

329
src/reporter/html_format.rs Normal file
View file

@ -0,0 +1,329 @@
use super::*;
fn escape_html(input: &str) -> String {
input
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&#39;")
}
fn format_timestamp(input: &str) -> String {
chrono::DateTime::parse_from_rfc3339(input)
.map(|dt| dt.format("%Y-%m-%d %H:%M:%S %:z").to_string())
.unwrap_or_else(|_| input.to_string())
}
fn summary_line(label: &str, value: &str) -> String {
format!(
"<div class=\"summary-line\"><span class=\"label\">{}</span><span class=\"dots\"></span><span class=\"value\">{}</span></div>",
escape_html(label),
escape_html(value)
)
}
fn render_metadata(metadata: &ScanReportMetadata) -> String {
let mut lines = Vec::new();
lines.push(summary_line("Findings", &metadata.summary.findings.to_string()));
if let Some(successful) = metadata.summary.successful_validations {
lines.push(summary_line(" |__Successful Validations", &successful.to_string()));
}
if let Some(failed) = metadata.summary.failed_validations {
lines.push(summary_line(" |__Failed Validations", &failed.to_string()));
}
if let Some(skipped) = metadata.summary.skipped_validations {
lines.push(summary_line(" |__Skipped Validations", &skipped.to_string()));
}
if let Some(rules_applied) = metadata.summary.rules_applied {
lines.push(summary_line("Rules Applied", &rules_applied.to_string()));
}
if let Some(blobs) = metadata.summary.blobs_scanned {
lines.push(summary_line(" |__Blobs Scanned", &blobs.to_string()));
}
if let Some(bytes) = metadata.summary.bytes_scanned {
lines.push(summary_line("Bytes Scanned", &bytes.to_string()));
}
if let Some(duration) = metadata.summary.scan_duration_seconds {
lines.push(summary_line("Scan Duration", &format!("{duration:.3}s")));
}
lines.push(summary_line("Scan Date", &format_timestamp(&metadata.scan_timestamp)));
lines.push(summary_line("Report Generated", &format_timestamp(&metadata.generated_at)));
lines.push(summary_line("Kingfisher Version", &metadata.kingfisher_version));
if let Some(latest) = &metadata.latest_version_available {
lines.push(summary_line(" |__Latest Version", latest));
}
if let Some(target) = &metadata.target {
lines.push(summary_line("Target", target));
}
lines.push(summary_line(
"Confidence Level",
&metadata.summary.confidence_level.to_ascii_lowercase(),
));
lines.push(summary_line(
"Custom Rules Used",
if metadata.summary.custom_rules_used { "yes" } else { "no" },
));
lines.push(summary_line(
"Validation Split",
&format!(
"Active {} | Inactive {} | Unknown {}",
metadata.summary.active_findings,
metadata.summary.inactive_findings,
metadata.summary.unknown_validation_findings
),
));
lines.push(summary_line(
"Access Map Identities",
&metadata.summary.access_map_identities.to_string(),
));
let cli_cmdline =
metadata.command_line_args.iter().map(|arg| escape_html(arg)).collect::<Vec<_>>().join(" ");
format!(
"<section class=\"panel\">
<h2>Scan Summary</h2>
<div class=\"meta summary\">{}</div>
<h3>Sanitized command-line arguments</h3>
<pre class=\"cmdline\"><code>{}</code></pre>
</section>",
lines.join(""),
cli_cmdline
)
}
fn validation_rank(status: &str) -> usize {
if status.eq_ignore_ascii_case("Active Credential") {
0
} else if status.eq_ignore_ascii_case("Inactive Credential") {
1
} else if status.eq_ignore_ascii_case("Not Attempted") {
2
} else {
3
}
}
fn finding_git_url(record: &FindingReporterRecord) -> Option<String> {
record
.finding
.git_metadata
.as_ref()
.and_then(|meta| {
meta.get("file").and_then(|file| file.get("url")).or_else(|| meta.get("repository_url"))
})
.and_then(|url| url.as_str())
.map(|url| url.to_string())
}
fn render_findings_table(findings: &[FindingReporterRecord]) -> String {
if findings.is_empty() {
return "<p>No findings detected.</p>".to_string();
}
let mut sorted = findings.to_vec();
sorted.sort_by(|a, b| {
validation_rank(&a.finding.validation.status)
.cmp(&validation_rank(&b.finding.validation.status))
.then_with(|| a.finding.path.cmp(&b.finding.path))
.then_with(|| a.finding.line.cmp(&b.finding.line))
});
let mut rows = String::new();
for record in &sorted {
let status_class = if record.finding.validation.status == "Active Credential" {
"status-active"
} else if record.finding.validation.status == "Inactive Credential" {
"status-inactive"
} else {
"status-unknown"
};
let git_url_html = finding_git_url(record)
.map(|url| {
format!(
"<a href=\"{}\" target=\"_blank\" rel=\"noopener noreferrer\">{}</a>",
escape_html(&url),
escape_html(&url)
)
})
.unwrap_or_default();
rows.push_str(&format!(
"<tr>\
<td>{}</td>\
<td>{}</td>\
<td><code>{}</code></td>\
<td>{}</td>\
<td><span class=\"status {}\">{}</span></td>\
<td>{}</td>\
<td>{}</td>\
</tr>",
escape_html(&record.rule.name),
escape_html(&record.rule.id),
escape_html(&record.finding.path),
git_url_html,
status_class,
escape_html(&record.finding.validation.status),
escape_html(&record.finding.confidence),
record.finding.line
));
}
format!(
"<table>
<thead>
<tr>
<th>Rule</th>
<th>Rule ID</th>
<th>Path</th>
<th>Git URL</th>
<th>Validation</th>
<th>Confidence</th>
<th>Line</th>
</tr>
</thead>
<tbody>{rows}</tbody>
</table>"
)
}
fn render_access_map(access_map: Option<&Vec<AccessMapEntry>>) -> String {
let Some(entries) = access_map else {
return String::new();
};
if entries.is_empty() {
return String::new();
}
let mut items = String::new();
for entry in entries {
let account = entry.account.clone().unwrap_or_else(|| "(identity)".to_string());
items.push_str(&format!(
"<li><strong>{}</strong> <span>{}</span> ({} groups)</li>",
escape_html(&account),
escape_html(&entry.provider.to_uppercase()),
entry.groups.len()
));
}
format!(
"<section class=\"panel\">
<h2>Access Map Summary</h2>
<ul>{items}</ul>
</section>"
)
}
fn build_html(envelope: &ReportEnvelope) -> String {
let metadata_html = envelope.metadata.as_ref().map(render_metadata).unwrap_or_default();
let findings_html = render_findings_table(&envelope.findings);
let access_map_html = render_access_map(envelope.access_map.as_ref());
format!(
"<!doctype html>
<html lang=\"en\">
<head>
<meta charset=\"utf-8\" />
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
<title>Kingfisher Audit Report</title>
<style>
body {{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, \"Liberation Mono\", monospace; margin: 0; padding: 24px; color: #111827; background: #f8fafc; }}
h1 {{ margin: 0 0 6px; color: #0f766e; }}
h2 {{ margin: 0 0 10px; }}
h3 {{ margin: 16px 0 8px; font-size: 14px; }}
.subtitle {{ color: #475569; margin-bottom: 18px; line-height: 1.45; }}
.subtitle a {{ color: #0f766e; text-decoration: none; font-weight: 600; }}
.subtitle a:hover {{ text-decoration: underline; }}
.panel {{ background: #ffffff; border: 1px solid #cbd5e1; border-radius: 10px; padding: 16px; margin-bottom: 16px; }}
.summary {{ display: grid; gap: 4px; }}
.summary-line {{ display: flex; align-items: baseline; gap: 8px; color: #111827; }}
.summary-line .label {{ color: #0f766e; white-space: nowrap; }}
.summary-line .dots {{ flex: 1; border-bottom: 1px dotted #cbd5e1; transform: translateY(-3px); }}
.summary-line .value {{ color: #0f172a; }}
.cmdline {{ margin: 0; padding: 12px; background: #f1f5f9; border-radius: 8px; overflow-x: auto; }}
.cmdline code {{ color: #0f172a; white-space: pre-wrap; word-break: break-word; }}
code {{ font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 12px; }}
table {{ width: 100%; border-collapse: collapse; }}
th, td {{ border: 1px solid #cbd5e1; padding: 8px; font-size: 12px; text-align: left; word-break: break-word; }}
th {{ background: #e2e8f0; color: #0f172a; }}
.status {{ padding: 2px 8px; border-radius: 999px; font-weight: 700; }}
.status-active {{ background: #14532d; color: #86efac; }}
.status-inactive {{ background: #7f1d1d; color: #fecaca; }}
.status-unknown {{ background: #78350f; color: #fde68a; }}
</style>
</head>
<body>
<h1>Kingfisher Audit Report</h1>
<div class=\"subtitle\">Secret scanning report generated by <a href=\"https://github.com/mongodb/kingfisher\" target=\"_blank\" rel=\"noopener noreferrer\">MongoDB Kingfisher</a>.</div>
{metadata_html}
<section class=\"panel\">
<h2>Detailed Findings</h2>
{findings_html}
</section>
{access_map_html}
</body>
</html>"
)
}
impl DetailsReporter {
pub fn html_format<W: std::io::Write>(
&self,
mut writer: W,
args: &cli::commands::scan::ScanArgs,
) -> Result<()> {
let envelope = self.build_report_envelope(args)?;
write!(writer, "{}", build_html(&envelope))?;
writeln!(writer)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn build_html_includes_audit_title_and_cli_args() {
let envelope = ReportEnvelope {
findings: Vec::new(),
access_map: None,
metadata: Some(ScanReportMetadata {
generated_at: "2026-01-01T00:00:00Z".to_string(),
scan_timestamp: "2026-01-01T00:00:00Z".to_string(),
target: Some("/tmp/repo".to_string()),
command_line_args: vec![
"kingfisher".to_string(),
"scan".to_string(),
"--token".to_string(),
"***REDACTED***".to_string(),
],
kingfisher_version: "1.2.3".to_string(),
latest_version_available: Some("1.2.4".to_string()),
update_check_status: Some("ok".to_string()),
summary: ScanReportSummary {
findings: 0,
active_findings: 0,
inactive_findings: 0,
unknown_validation_findings: 0,
access_map_identities: 0,
rules_applied: Some(10),
confidence_level: "medium".to_string(),
custom_rules_used: false,
successful_validations: Some(0),
failed_validations: Some(0),
skipped_validations: Some(0),
blobs_scanned: Some(1),
bytes_scanned: Some(10),
scan_duration_seconds: Some(0.1),
},
}),
};
let html = build_html(&envelope);
assert!(html.contains("Kingfisher Audit Report"));
assert!(html.contains("Sanitized command-line arguments"));
assert!(html.contains("***REDACTED***"));
assert!(html.contains("/tmp/repo"));
}
}

View file

@ -279,6 +279,7 @@ mod tests {
datastore: Arc::new(Mutex::new(datastore)),
styles: Styles::new(false),
only_valid: false,
audit_context: None,
}
}

View file

@ -163,6 +163,7 @@ mod tests {
datastore: Arc::new(Mutex::new(store)),
styles: Styles::new(false),
only_valid: false,
audit_context: None,
}
}

View file

@ -430,6 +430,7 @@ fn deduplicate_new_matches(
datastore: Arc::clone(store),
styles: Styles::new(global_args.use_color(std::io::stdout())),
only_valid: args.only_valid,
audit_context: None,
};
let all_matches = reporter.get_unfiltered_matches(Some(false))?;
@ -456,6 +457,31 @@ fn deduplicate_new_matches(
Ok(())
}
fn build_scan_audit_context(
args: &scan::ScanArgs,
rules_db: &RulesDatabase,
matcher_stats: &Arc<Mutex<MatcherStats>>,
datastore: &Arc<Mutex<FindingsStore>>,
start_time: Instant,
scan_started_at: chrono::DateTime<chrono::Local>,
update_status: &crate::update::UpdateStatus,
) -> crate::reporter::ScanAuditContext {
let totals = compute_scan_totals(datastore, args, matcher_stats.as_ref());
crate::reporter::ScanAuditContext {
scan_timestamp: Some(scan_started_at.to_rfc3339()),
scan_duration_seconds: Some(start_time.elapsed().as_secs_f64()),
rules_applied: Some(rules_db.num_rules()),
successful_validations: Some(totals.successful_validations),
failed_validations: Some(totals.failed_validations),
skipped_validations: Some(totals.skipped_validations),
blobs_scanned: Some(totals.blobs_scanned),
bytes_scanned: Some(totals.bytes_scanned),
running_version: Some(update_status.running_version.clone()),
latest_version: update_status.latest_version.clone(),
update_check_status: Some(update_status.check_status.as_str().to_string()),
}
}
/// Applies baseline filtering if configured.
fn apply_baseline_if_configured(
args: &scan::ScanArgs,
@ -566,7 +592,16 @@ async fn run_sequential_scan(
finalize_access_map(datastore, collector, args).await?;
}
crate::reporter::run(global_args, Arc::clone(datastore), args)
let audit_context = build_scan_audit_context(
args,
rules_db,
matcher_stats,
datastore,
start_time,
scan_started_at,
update_status,
);
crate::reporter::run(global_args, Arc::clone(datastore), args, Some(audit_context))
.context("Failed to run report command")?;
print_scan_summary(
start_time,
@ -727,8 +762,13 @@ async fn run_parallel_scan(
}
if !output_to_file {
crate::reporter::run(global_args, Arc::clone(&repo_datastore), &args)
.context("Failed to run report command")?;
crate::reporter::run(
global_args,
Arc::clone(&repo_datastore),
&args,
None,
)
.context("Failed to run report command")?;
}
{
@ -765,7 +805,16 @@ async fn run_parallel_scan(
}
if output_to_file && ran_repo_scan.load(Ordering::Relaxed) {
crate::reporter::run(global_args, Arc::clone(datastore), args)
let audit_context = build_scan_audit_context(
args,
rules_db,
matcher_stats,
datastore,
start_time,
scan_started_at,
update_status,
);
crate::reporter::run(global_args, Arc::clone(datastore), args, Some(audit_context))
.context("Failed to run report command")?;
}
@ -780,7 +829,16 @@ async fn run_parallel_scan(
finalize_access_map(datastore, collector, args).await?;
}
crate::reporter::run(global_args, Arc::clone(datastore), args)
let audit_context = build_scan_audit_context(
args,
rules_db,
matcher_stats,
datastore,
start_time,
scan_started_at,
update_status,
);
crate::reporter::run(global_args, Arc::clone(datastore), args, Some(audit_context))
.context("Failed to run report command")?;
}

View file

@ -1,5 +1,7 @@
use assert_cmd::Command;
use predicates::{prelude::PredicateBooleanExt, str::contains};
use std::fs;
use tempfile::tempdir;
mod test {
@ -29,4 +31,34 @@ mod test {
.success()
.stdout(contains(env!("CARGO_PKG_VERSION")));
}
#[test]
fn cli_scan_generates_html_audit_report() {
let temp = tempdir().expect("tempdir should be created");
let input_dir = temp.path().join("repo");
let output_html = temp.path().join("audit-report.html");
fs::create_dir_all(&input_dir).expect("input directory should be created");
fs::write(input_dir.join("README.txt"), "no credentials here")
.expect("seed file should be written");
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
input_dir.to_str().unwrap(),
"--format",
"html",
"--output",
output_html.to_str().unwrap(),
"--rule",
"kingfisher.aws.1",
"--no-validate",
"--no-update-check",
])
.assert()
.success();
let html = fs::read_to_string(&output_html).expect("html report should be written");
assert!(html.contains("Kingfisher Audit Report"));
assert!(html.contains("Audit Metadata"));
}
}

View file

@ -114,6 +114,7 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> {
datastore: Arc::new(Mutex::new(FindingsStore::new(PathBuf::from("/tmp")))),
styles: Styles::new(false),
only_valid: false,
audit_context: None,
};
let matches = vec![
@ -172,6 +173,7 @@ fn dedup_preserves_distinct_rules_with_same_fingerprint() -> Result<()> {
datastore: Arc::new(Mutex::new(FindingsStore::new(PathBuf::from("/tmp")))),
styles: Styles::new(false),
only_valid: false,
audit_context: None,
};
let matches = vec![