diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 8d687a0..6c21d2e 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -1,8 +1,9 @@ name: pypi-wheels on: - release: - types: [published] + workflow_run: + workflows: ["Publish Docker image"] + types: [completed] workflow_dispatch: inputs: tag: @@ -13,21 +14,25 @@ on: jobs: build-wheels: name: Build PyPI wheels + if: > + github.event_name != 'workflow_run' || + github.event.workflow_run.conclusion == 'success' || + github.run_attempt > 1 runs-on: ubuntu-latest permissions: contents: read id-token: write steps: - uses: actions/checkout@v4 + with: + ref: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.head_sha || github.sha }} - name: Determine version/tag id: version shell: bash run: | set -euo pipefail - if [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then - TAG="${{ github.event.release.tag_name }}" - elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then + if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then TAG="${{ github.event.inputs.tag }}" else VERSION=$(grep -m1 '^version\s*=' Cargo.toml | cut -d '"' -f2) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1dc79ec..3909ae8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,9 @@ All notable changes to this project will be documented in this file. ## [v1.83.0] -- Architecture: split `matcher.rs` (1742 lines) into a `src/matcher/` module directory with focused sub-modules (`base64_decode`, `captures`, `conversion`, `dedup`, `filter`, `fingerprint`). Decomposed `filter_match` into smaller validation helpers. -- Architecture: refactored `scanner/runner.rs` god function (~600 lines) into phase-based helpers (`enumerate_all_repos`, `fetch_all_artifacts`, `run_sequential_scan`, `run_parallel_scan`, etc.) with a `ValidationDeps` type alias. +- Kingfisher can now generate an auditor-friendly HTML report: `--format html --output kingfisher-audit.html` +- Architecture: split `matcher.rs` into a `src/matcher/` module directory with focused sub-modules (`base64_decode`, `captures`, `conversion`, `dedup`, `filter`, `fingerprint`). Decomposed `filter_match` into smaller validation helpers. +- Architecture: refactored `scanner/runner.rs` god function into phase-based helpers (`enumerate_all_repos`, `fetch_all_artifacts`, `run_sequential_scan`, `run_parallel_scan`, etc.) with a `ValidationDeps` type alias. - Architecture: consolidated duplicated matching primitives (base64 detection, dedup, fingerprinting, secret capture selection) into `kingfisher-scanner::primitives` as the single source of truth; both the scanner crate and binary now share one implementation. - Architecture: introduced `TokenAccessMapper` trait for access map providers, implemented for GitHub, GitLab, Slack, HuggingFace, Gitea, and Bitbucket. - Architecture: moved `content_type` module to `kingfisher-core` crate where it logically belongs (zero binary-crate dependencies). diff --git a/crates/kingfisher-core/Cargo.toml b/crates/kingfisher-core/Cargo.toml index d8a0902..28f49f6 100644 --- a/crates/kingfisher-core/Cargo.toml +++ b/crates/kingfisher-core/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true authors.workspace = true homepage.workspace = true repository.workspace = true -publish.workspace = true +publish = false [dependencies] # Serialization diff --git a/crates/kingfisher-core/README.md b/crates/kingfisher-core/README.md new file mode 100644 index 0000000..29b468a --- /dev/null +++ b/crates/kingfisher-core/README.md @@ -0,0 +1,10 @@ +# kingfisher-core + +Foundational types and utilities for the Kingfisher secret scanning ecosystem. + +This crate provides: +- blob and content abstractions +- source location and origin modeling +- shared error and entropy helpers + +It is intended as the stable base for `kingfisher-rules` and `kingfisher-scanner`. diff --git a/crates/kingfisher-core/src/blob.rs b/crates/kingfisher-core/src/blob.rs index a25e60d..b51b457 100644 --- a/crates/kingfisher-core/src/blob.rs +++ b/crates/kingfisher-core/src/blob.rs @@ -410,6 +410,15 @@ impl BlobIdMap { pub fn is_empty(&self) -> bool { self.maps.iter().all(|m| m.lock().is_empty()) } + + /// Removes all entries from the map. + /// + /// Note: This locks each shard in sequence. + pub fn clear(&self) { + for map in &self.maps { + map.lock().clear(); + } + } } impl BlobIdMap { diff --git a/crates/kingfisher-rules/Cargo.toml b/crates/kingfisher-rules/Cargo.toml index 9b417ee..420247a 100644 --- a/crates/kingfisher-rules/Cargo.toml +++ b/crates/kingfisher-rules/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true authors.workspace = true homepage.workspace = true repository.workspace = true -publish.workspace = true +publish = false [dependencies] # Internal dependencies diff --git a/crates/kingfisher-rules/README.md b/crates/kingfisher-rules/README.md new file mode 100644 index 0000000..6cef24a --- /dev/null +++ b/crates/kingfisher-rules/README.md @@ -0,0 +1,11 @@ +# kingfisher-rules + +Rule definitions and compiled rule database support for Kingfisher. + +This crate provides: +- rule syntax and rule model types +- YAML loading and parsing for rules +- embedded builtin rules +- `RulesDatabase` compilation for scanning engines + +Use this crate with `kingfisher-core` and `kingfisher-scanner` to build reusable scanning workflows. diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml index 875ae06..488a23b 100644 --- a/crates/kingfisher-scanner/Cargo.toml +++ b/crates/kingfisher-scanner/Cargo.toml @@ -8,7 +8,7 @@ license.workspace = true authors.workspace = true homepage.workspace = true repository.workspace = true -publish.workspace = true +publish = false [features] default = [] diff --git a/crates/kingfisher-scanner/README.md b/crates/kingfisher-scanner/README.md new file mode 100644 index 0000000..7369059 --- /dev/null +++ b/crates/kingfisher-scanner/README.md @@ -0,0 +1,10 @@ +# kingfisher-scanner + +High-level scanning library for detecting secrets in files and buffers. + +This crate provides: +- ergonomic scanner APIs for bytes, files, and blobs +- finding models with location and capture metadata +- optional validation modules behind feature flags + +Pair with `kingfisher-rules` to compile rules and `kingfisher-core` for shared primitives. diff --git a/crates/kingfisher-scanner/src/lib.rs b/crates/kingfisher-scanner/src/lib.rs index f6976ea..fc3abe8 100644 --- a/crates/kingfisher-scanner/src/lib.rs +++ b/crates/kingfisher-scanner/src/lib.rs @@ -52,12 +52,23 @@ //! - **validation-all**: Enable all validation features mod finding; +#[doc(hidden)] pub mod primitives; mod scanner; mod scanner_pool; // Validation module (feature-gated) -#[cfg(any(feature = "validation", feature = "validation-http", feature = "validation-aws"))] +#[cfg(any( + feature = "validation", + feature = "validation-http", + feature = "validation-aws", + feature = "validation-azure", + feature = "validation-coinbase", + feature = "validation-gcp", + feature = "validation-jwt", + feature = "validation-database", + feature = "validation-all", +))] pub mod validation; pub use finding::{intern, Finding, FindingLocation, SerializableCapture, SerializableCaptures}; diff --git a/crates/kingfisher-scanner/src/scanner.rs b/crates/kingfisher-scanner/src/scanner.rs index d3d83d6..dc8ef6c 100644 --- a/crates/kingfisher-scanner/src/scanner.rs +++ b/crates/kingfisher-scanner/src/scanner.rs @@ -283,8 +283,7 @@ impl Scanner { /// Call this to clear the seen blobs cache if you want to rescan /// previously scanned content. pub fn reset_dedup(&self) { - // Note: BlobIdMap doesn't have a clear method, so this creates a new scanner - // In a real implementation, you'd want to add a clear method or use a different approach + self.seen_blobs.clear(); } fn redact(&self, bytes: &[u8]) -> String { diff --git a/docs/USAGE.md b/docs/USAGE.md index 9660306..7916539 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -72,6 +72,14 @@ kingfisher scan . --format json | tee kingfisher.json kingfisher scan /path/to/repo --format sarif --output findings.sarif ``` +### Generate an auditor-friendly HTML report + +```bash +kingfisher scan /path/to/repo --format html --output kingfisher-audit.html +``` + +The HTML audit report is standalone and includes scan metadata designed for evidence workflows, including scan timestamp, sanitized CLI arguments, version, and finding summary counts. + ### Access map outputs and viewer **Stop Guessing, Start Mapping: Understand Your True Blast Radius** diff --git a/docs/access-map-viewer/index.html b/docs/access-map-viewer/index.html index f8975b2..76fd3ed 100644 --- a/docs/access-map-viewer/index.html +++ b/docs/access-map-viewer/index.html @@ -1759,11 +1759,11 @@ if (!amContainer) return; if (collapsed) { amContainer.classList.add("hidden"); - amToggle.textContent = "Expand"; + if (amToggle) amToggle.textContent = "Expand"; autoCollapsedAccessMap = auto; } else { amContainer.classList.remove("hidden"); - amToggle.textContent = "Collapse"; + if (amToggle) amToggle.textContent = "Collapse"; autoCollapsedAccessMap = false; } } @@ -2027,10 +2027,22 @@ // Try to extract metadata from the raw report data const data = rawData; - scanMetadata.timestamp = data.timestamp || data.scan_timestamp || data.generated_at || new Date().toLocaleString(); + const reportMeta = data.metadata && typeof data.metadata === "object" ? data.metadata : {}; + scanMetadata.timestamp = + reportMeta.scan_timestamp || + reportMeta.generated_at || + data.timestamp || + data.scan_timestamp || + data.generated_at || + new Date().toLocaleString(); // Target info - scanMetadata.target = data.target || data.scan_target || data.repository || data.repo || + scanMetadata.target = + reportMeta.target || + data.target || + data.scan_target || + data.repository || + data.repo || (data.stats && data.stats.target) || (data.summary && data.summary.target) || ""; @@ -2043,8 +2055,45 @@ } // Version - scanMetadata.version = data.version || data.kingfisher_version || + scanMetadata.version = + reportMeta.kingfisher_version || + data.version || + data.kingfisher_version || (data.kingfisher && data.kingfisher.version) || ""; + scanMetadata.latestVersion = + reportMeta.latest_version_available || + (data.kingfisher && data.kingfisher.latest_version) || ""; + scanMetadata.updateCheckStatus = + reportMeta.update_check_status || + (data.kingfisher && data.kingfisher.update_check_status) || ""; + + // Sanitized command-line arguments (when present) + const cliArgs = reportMeta.command_line_args; + if (Array.isArray(cliArgs) && cliArgs.length > 0) { + scanMetadata.commandLineArgs = cliArgs.map((arg) => String(arg)); + } + + const reportSummary = reportMeta.summary && typeof reportMeta.summary === "object" + ? reportMeta.summary + : null; + if (reportSummary) { + scanMetadata.summary = { + findings: Number(reportSummary.findings || 0), + active: Number(reportSummary.active_findings || 0), + inactive: Number(reportSummary.inactive_findings || 0), + unknown: Number(reportSummary.unknown_validation_findings || 0), + identities: Number(reportSummary.access_map_identities || 0), + rulesApplied: Number(reportSummary.rules_applied || 0), + confidenceLevel: reportSummary.confidence_level ? String(reportSummary.confidence_level) : "", + customRulesUsed: Boolean(reportSummary.custom_rules_used), + successfulValidations: Number(reportSummary.successful_validations || 0), + failedValidations: Number(reportSummary.failed_validations || 0), + skippedValidations: Number(reportSummary.skipped_validations || 0), + blobsScanned: Number(reportSummary.blobs_scanned || 0), + bytesScanned: Number(reportSummary.bytes_scanned || 0), + scanDurationSeconds: Number(reportSummary.scan_duration_seconds || 0), + }; + } // Bytes scanned const bytes = data.bytes_scanned || @@ -2593,8 +2642,40 @@ if (scanMetadata.target) metaLines.push(`Target: ${escapeHtml(scanMetadata.target)}`); if (scanMetadata.duration) metaLines.push(`Duration: ${escapeHtml(scanMetadata.duration)}`); if (scanMetadata.version) metaLines.push(`Version: ${escapeHtml(scanMetadata.version)}`); + if (scanMetadata.latestVersion) metaLines.push(`Latest: ${escapeHtml(scanMetadata.latestVersion)}`); + if (scanMetadata.updateCheckStatus) metaLines.push(`Update Check: ${escapeHtml(scanMetadata.updateCheckStatus)}`); + if (scanMetadata.summary && scanMetadata.summary.rulesApplied > 0) { + metaLines.push(`Rules Applied: ${scanMetadata.summary.rulesApplied}`); + } + if (scanMetadata.summary && scanMetadata.summary.confidenceLevel) { + metaLines.push(`Confidence: ${escapeHtml(scanMetadata.summary.confidenceLevel)}`); + } + if (scanMetadata.summary) { + metaLines.push(`Custom Rules: ${scanMetadata.summary.customRulesUsed ? "Yes" : "No"}`); + } + if (scanMetadata.summary && scanMetadata.summary.successfulValidations) { + metaLines.push(`Successful Validations: ${scanMetadata.summary.successfulValidations}`); + } + if (scanMetadata.summary && scanMetadata.summary.failedValidations) { + metaLines.push(`Failed Validations: ${scanMetadata.summary.failedValidations}`); + } + if (scanMetadata.summary && scanMetadata.summary.skippedValidations) { + metaLines.push(`Skipped Validations: ${scanMetadata.summary.skippedValidations}`); + } + if (scanMetadata.summary && scanMetadata.summary.blobsScanned) { + metaLines.push(`Blobs Scanned: ${scanMetadata.summary.blobsScanned}`); + } + if (scanMetadata.summary && scanMetadata.summary.bytesScanned) { + metaLines.push(`Bytes Scanned: ${escapeHtml(formatBytes(scanMetadata.summary.bytesScanned))}`); + } + if (scanMetadata.summary && scanMetadata.summary.scanDurationSeconds) { + metaLines.push(`Scan Duration: ${escapeHtml(scanMetadata.summary.scanDurationSeconds.toFixed(3) + "s")}`); + } + const cliArgsHtml = scanMetadata.commandLineArgs && scanMetadata.commandLineArgs.length + ? `
Sanitized command-line arguments
${escapeHtml(scanMetadata.commandLineArgs.join(" "))}
` + : ""; const metaHtml = metaLines.length - ? `
${metaLines.join('|')}
` + ? `
${metaLines.join('|')}
${cliArgsHtml}` : ""; // Executive summary diff --git a/src/cli/commands/output.rs b/src/cli/commands/output.rs index a807523..fe38e92 100644 --- a/src/cli/commands/output.rs +++ b/src/cli/commands/output.rs @@ -52,6 +52,9 @@ pub enum ReportOutputFormat { /// SARIF format (experimental) Sarif, + + /// Standalone HTML audit report + Html, } // ----------------------------------------------------------------------------- diff --git a/src/cli/commands/view.rs b/src/cli/commands/view.rs index 337f33e..dd1040b 100644 --- a/src/cli/commands/view.rs +++ b/src/cli/commands/view.rs @@ -1,5 +1,6 @@ use std::{ net::SocketAddr, + net::TcpListener as StdTcpListener, path::{Path, PathBuf}, sync::Arc, }; @@ -44,6 +45,17 @@ struct AppState { report: Option>, } +pub fn ensure_port_available(port: u16) -> Result<()> { + StdTcpListener::bind(("127.0.0.1", port)).map_err(|err| match err.kind() { + std::io::ErrorKind::AddrInUse => anyhow!( + "Port {} is already in use. Re-run with --port to choose a different port.", + port + ), + _ => err.into(), + })?; + Ok(()) +} + /// Run the `kingfisher view` subcommand. pub async fn run(args: ViewArgs) -> Result<()> { let report = if let Some(report_bytes) = args.report_bytes.as_ref() { diff --git a/src/main.rs b/src/main.rs index ecb0a44..a374f30 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,6 +29,7 @@ static GLOBAL: System = System; use std::{ io::{IsTerminal, Read, Write}, sync::{Arc, Mutex}, + time::Instant, }; use anyhow::{Context, Result}; @@ -51,7 +52,7 @@ use kingfisher::{ direct_revoke, direct_validate, findings_store, findings_store::FindingsStore, gitea, github, huggingface, - reporter::{styles::Styles, DetailsReporter}, + reporter::{styles::Styles, DetailsReporter, ScanAuditContext}, rule_loader::RuleLoader, rules_database::RulesDatabase, scanner::{load_and_record_rules, run_scan}, @@ -237,6 +238,11 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { match command { Command::Scan(scan_command) => match scan_command.into_operation()? { ScanOperation::Scan(mut scan_args) => { + if scan_args.view_report { + view::ensure_port_available(view::DEFAULT_PORT)?; + } + let view_scan_started_at = chrono::Local::now(); + let view_scan_start_time = Instant::now(); let temp_dir = TempDir::new().context("Failed to create temporary directory")?; let temp_dir_path = temp_dir.path().to_path_buf(); @@ -287,10 +293,28 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { let exit_code = determine_exit_code(&datastore); if scan_args.view_report { + let audit_context = ScanAuditContext { + scan_timestamp: Some(view_scan_started_at.to_rfc3339()), + scan_duration_seconds: Some( + view_scan_start_time.elapsed().as_secs_f64(), + ), + rules_applied: Some(rules_db.num_rules()), + successful_validations: None, + failed_validations: None, + skipped_validations: None, + blobs_scanned: None, + bytes_scanned: None, + running_version: Some(update_status.running_version.clone()), + latest_version: update_status.latest_version.clone(), + update_check_status: Some( + update_status.check_status.as_str().to_string(), + ), + }; let reporter = DetailsReporter { datastore: Arc::clone(&datastore), styles: Styles::new(global_args.use_color(std::io::stdout())), only_valid: scan_args.only_valid, + audit_context: Some(audit_context), }; let envelope = reporter.build_report_envelope(&scan_args)?; let report_bytes = serde_json::to_vec_pretty(&envelope)?; diff --git a/src/reporter.rs b/src/reporter.rs index fc545b2..1a31f61 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -5,6 +5,7 @@ use std::{ }; use anyhow::Result; +use chrono::{Local, Utc}; use http::StatusCode; use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; use schemars::JsonSchema; @@ -25,6 +26,7 @@ use crate::{ validation_body::{self, ValidationResponseBody}, }; mod bson_format; +mod html_format; mod json_format; mod pretty_format; mod sarif_format; @@ -138,6 +140,75 @@ fn required_vars_for_validation(validation: &crate::rules::Validation) -> BTreeS vars } +fn is_sensitive_arg_key(key: &str) -> bool { + let normalized = key.trim_start_matches('-').to_ascii_lowercase(); + let exact = [ + "arg", + "var", + "token", + "secret", + "password", + "pass", + "key", + "api-key", + "apikey", + "auth", + "oauth-token", + "pat", + "credential", + "credentials", + ]; + if exact.iter().any(|candidate| *candidate == normalized) { + return true; + } + + let contains = ["token", "secret", "password", "apikey", "api-key", "auth", "credential"]; + contains.iter().any(|candidate| normalized.contains(candidate)) +} + +fn sanitize_command_line_args(args: &[String]) -> Vec { + let mut sanitized = Vec::with_capacity(args.len()); + let mut redact_next = false; + + for arg in args { + if redact_next { + sanitized.push("***REDACTED***".to_string()); + redact_next = false; + continue; + } + + let Some(stripped) = arg.strip_prefix('-') else { + sanitized.push(arg.clone()); + continue; + }; + + if stripped.is_empty() { + sanitized.push(arg.clone()); + continue; + } + + let key_value_split = arg.split_once('='); + if let Some((key, _)) = key_value_split { + if is_sensitive_arg_key(key) { + sanitized.push(format!("{key}=***REDACTED***")); + } else { + sanitized.push(arg.clone()); + } + continue; + } + + if is_sensitive_arg_key(arg) { + sanitized.push(arg.clone()); + redact_next = true; + continue; + } + + sanitized.push(arg.clone()); + } + + sanitized +} + fn required_vars_for_revocation(revocation: &Revocation) -> BTreeSet { let mut vars = BTreeSet::new(); @@ -469,6 +540,7 @@ pub fn run( global_args: &GlobalArgs, ds: Arc>, args: &cli::commands::scan::ScanArgs, + audit_context: Option, ) -> Result<()> { global_args.use_color(std::io::stdout()); let stdout_is_tty = std::io::stdout().is_terminal(); @@ -477,7 +549,8 @@ pub fn run( let ds_clone = Arc::clone(&ds); // Initialize the reporter - let reporter = DetailsReporter { datastore: ds_clone, styles, only_valid: args.only_valid }; + let reporter = + DetailsReporter { datastore: ds_clone, styles, only_valid: args.only_valid, audit_context }; let writer = args.output_args.get_writer()?; // Generate and write the report in the specified format reporter.report(args.output_args.format, writer, args) @@ -486,6 +559,22 @@ pub struct DetailsReporter { pub datastore: Arc>, pub styles: Styles, pub only_valid: bool, + pub audit_context: Option, +} + +#[derive(Clone, Debug)] +pub struct ScanAuditContext { + pub scan_timestamp: Option, + pub scan_duration_seconds: Option, + pub rules_applied: Option, + pub successful_validations: Option, + pub failed_validations: Option, + pub skipped_validations: Option, + pub blobs_scanned: Option, + pub bytes_scanned: Option, + pub running_version: Option, + pub latest_version: Option, + pub update_check_status: Option, } impl DetailsReporter { @@ -1028,8 +1117,85 @@ impl DetailsReporter { ) -> Result { let findings = self.build_finding_records(args)?; let access_map = self.build_access_map_records(args); + let metadata = self.build_report_metadata(args, &findings, access_map.as_ref()); - Ok(ReportEnvelope { findings, access_map }) + Ok(ReportEnvelope { findings, access_map, metadata: Some(metadata) }) + } + + fn build_report_metadata( + &self, + args: &cli::commands::scan::ScanArgs, + findings: &[FindingReporterRecord], + access_map: Option<&Vec>, + ) -> ScanReportMetadata { + let mut active_findings = 0usize; + let mut inactive_findings = 0usize; + let mut unknown_validation_findings = 0usize; + + for record in findings { + let status = record.finding.validation.status.to_ascii_lowercase(); + if status.contains("inactive") { + inactive_findings += 1; + } else if status.contains("active") { + active_findings += 1; + } else { + unknown_validation_findings += 1; + } + } + + let command_line_args: Vec = std::env::args().collect(); + let sanitized_command_line_args = sanitize_command_line_args(&command_line_args); + let scan_timestamp = self.audit_context.as_ref().and_then(|ctx| ctx.scan_timestamp.clone()); + let generated_at = generated_at_for_scan_timezone(scan_timestamp.as_deref()); + let scan_timestamp = scan_timestamp.unwrap_or_else(|| generated_at.clone()); + + ScanReportMetadata { + generated_at: generated_at.clone(), + scan_timestamp, + target: derive_scan_target(args), + command_line_args: sanitized_command_line_args, + kingfisher_version: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.running_version.clone()) + .unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()), + latest_version_available: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.latest_version.clone()), + update_check_status: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.update_check_status.clone()), + summary: ScanReportSummary { + findings: findings.len(), + active_findings, + inactive_findings, + unknown_validation_findings, + access_map_identities: access_map.map_or(0, Vec::len), + rules_applied: self.audit_context.as_ref().and_then(|ctx| ctx.rules_applied), + confidence_level: args.confidence.to_string(), + custom_rules_used: !args.rules.rules_path.is_empty() || !args.rules.load_builtins, + successful_validations: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.successful_validations), + failed_validations: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.failed_validations), + skipped_validations: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.skipped_validations), + blobs_scanned: self.audit_context.as_ref().and_then(|ctx| ctx.blobs_scanned), + bytes_scanned: self.audit_context.as_ref().and_then(|ctx| ctx.bytes_scanned), + scan_duration_seconds: self + .audit_context + .as_ref() + .and_then(|ctx| ctx.scan_duration_seconds), + }, + } } fn build_access_map_records( @@ -1183,10 +1349,58 @@ impl Reportable for DetailsReporter { ReportOutputFormat::Jsonl => self.jsonl_format(writer, args), ReportOutputFormat::Bson => self.bson_format(writer, args), ReportOutputFormat::Sarif => self.sarif_format(writer, args.no_dedup, args), + ReportOutputFormat::Html => self.html_format(writer, args), } } } +fn generated_at_for_scan_timezone(scan_timestamp: Option<&str>) -> String { + if let Some(scan_timestamp) = scan_timestamp { + if let Ok(scan_dt) = chrono::DateTime::parse_from_rfc3339(scan_timestamp) { + return Utc::now().with_timezone(scan_dt.offset()).to_rfc3339(); + } + } + Local::now().to_rfc3339() +} + +fn derive_scan_target(args: &cli::commands::scan::ScanArgs) -> Option { + let mut targets = Vec::new(); + let input_args = &args.input_specifier_args; + + for path in &input_args.path_inputs { + targets.push(path.display().to_string()); + } + for git in &input_args.git_url { + targets.push(git.to_string()); + } + if let Some(bucket) = &input_args.s3_bucket { + targets.push(format!("s3://{bucket}")); + } + if let Some(bucket) = &input_args.gcs_bucket { + targets.push(format!("gcs://{bucket}")); + } + for image in &input_args.docker_image { + targets.push(format!("docker://{image}")); + } + if input_args.jira_url.is_some() { + targets.push("jira".to_string()); + } + if input_args.confluence_url.is_some() { + targets.push("confluence".to_string()); + } + if input_args.slack_query.is_some() { + targets.push("slack".to_string()); + } + + if targets.is_empty() { + return None; + } + if targets.len() == 1 { + return targets.pop(); + } + Some(format!("{} targets", targets.len())) +} + /// A match produced by one of kingfisher's rules. /// This corresponds to a single location. #[derive(Serialize, JsonSchema, Clone)] @@ -1256,6 +1470,48 @@ pub struct ReportEnvelope { pub findings: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub access_map: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct ScanReportMetadata { + pub generated_at: String, + pub scan_timestamp: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub target: Option, + pub command_line_args: Vec, + pub kingfisher_version: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub latest_version_available: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub update_check_status: Option, + pub summary: ScanReportSummary, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct ScanReportSummary { + pub findings: usize, + pub active_findings: usize, + pub inactive_findings: usize, + pub unknown_validation_findings: usize, + pub access_map_identities: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub rules_applied: Option, + pub confidence_level: String, + pub custom_rules_used: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub successful_validations: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub failed_validations: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub skipped_validations: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub blobs_scanned: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub bytes_scanned: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub scan_duration_seconds: Option, } #[derive(Serialize, JsonSchema, Clone, Debug)] @@ -1580,7 +1836,12 @@ mod tests { let temp = tempdir().unwrap(); let datastore = Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf()))); - let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false }; + let reporter = DetailsReporter { + datastore, + styles: Styles::new(false), + only_valid: false, + audit_context: None, + }; let (report_match, _) = sample_report_match(validation_body, StatusCode::OK.as_u16(), true); let mut scan_args = sample_scan_args(); @@ -1595,7 +1856,12 @@ mod tests { let temp = tempdir().unwrap(); let datastore = Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf()))); - let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false }; + let reporter = DetailsReporter { + datastore, + styles: Styles::new(false), + only_valid: false, + audit_context: None, + }; let (report_match, blob_path) = sample_report_match("Bad credentials", StatusCode::UNAUTHORIZED.as_u16(), false); @@ -1620,7 +1886,12 @@ mod tests { let temp = tempdir().unwrap(); let datastore = Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf()))); - let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false }; + let reporter = DetailsReporter { + datastore, + styles: Styles::new(false), + only_valid: false, + audit_context: None, + }; let (report_match, _) = sample_report_match( "(skip list entry) AWS validation not attempted for account 111122223333.", @@ -1661,6 +1932,56 @@ mod tests { assert!(response.chars().take(512).all(|ch| ch == 'é')); } + #[test] + fn sanitize_command_line_args_redacts_secret_values() { + let input = vec![ + "kingfisher".to_string(), + "scan".to_string(), + "--token".to_string(), + "abcd".to_string(), + "--output=report.html".to_string(), + "--arg=TOP_SECRET".to_string(), + "--var".to_string(), + "TOKEN=inline".to_string(), + "--path".to_string(), + "./repo".to_string(), + ]; + let sanitized = sanitize_command_line_args(&input); + + assert_eq!(sanitized[2], "--token"); + assert_eq!(sanitized[3], "***REDACTED***"); + assert_eq!(sanitized[4], "--output=report.html"); + assert_eq!(sanitized[5], "--arg=***REDACTED***"); + assert_eq!(sanitized[6], "--var"); + assert_eq!(sanitized[7], "***REDACTED***"); + } + + #[test] + fn report_envelope_contains_audit_metadata() { + let temp = tempdir().unwrap(); + let datastore = + Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf()))); + let reporter = DetailsReporter { + datastore, + styles: Styles::new(false), + only_valid: false, + audit_context: None, + }; + + let mut args = sample_scan_args(); + args.input_specifier_args.path_inputs.push(PathBuf::from("/tmp/project")); + + let envelope = reporter.build_report_envelope(&args).expect("build envelope"); + let metadata = envelope.metadata.expect("metadata should be present"); + + assert_eq!(metadata.summary.findings, 0); + assert_eq!(metadata.summary.active_findings, 0); + assert_eq!(metadata.summary.inactive_findings, 0); + assert_eq!(metadata.summary.access_map_identities, 0); + assert_eq!(metadata.target.as_deref(), Some("/tmp/project")); + assert_eq!(metadata.kingfisher_version, env!("CARGO_PKG_VERSION")); + } + use super::build_git_urls; #[test] diff --git a/src/reporter/html_format.rs b/src/reporter/html_format.rs new file mode 100644 index 0000000..b8b38f2 --- /dev/null +++ b/src/reporter/html_format.rs @@ -0,0 +1,329 @@ +use super::*; + +fn escape_html(input: &str) -> String { + input + .replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +fn format_timestamp(input: &str) -> String { + chrono::DateTime::parse_from_rfc3339(input) + .map(|dt| dt.format("%Y-%m-%d %H:%M:%S %:z").to_string()) + .unwrap_or_else(|_| input.to_string()) +} + +fn summary_line(label: &str, value: &str) -> String { + format!( + "
{}{}
", + escape_html(label), + escape_html(value) + ) +} + +fn render_metadata(metadata: &ScanReportMetadata) -> String { + let mut lines = Vec::new(); + lines.push(summary_line("Findings", &metadata.summary.findings.to_string())); + if let Some(successful) = metadata.summary.successful_validations { + lines.push(summary_line(" |__Successful Validations", &successful.to_string())); + } + if let Some(failed) = metadata.summary.failed_validations { + lines.push(summary_line(" |__Failed Validations", &failed.to_string())); + } + if let Some(skipped) = metadata.summary.skipped_validations { + lines.push(summary_line(" |__Skipped Validations", &skipped.to_string())); + } + if let Some(rules_applied) = metadata.summary.rules_applied { + lines.push(summary_line("Rules Applied", &rules_applied.to_string())); + } + if let Some(blobs) = metadata.summary.blobs_scanned { + lines.push(summary_line(" |__Blobs Scanned", &blobs.to_string())); + } + if let Some(bytes) = metadata.summary.bytes_scanned { + lines.push(summary_line("Bytes Scanned", &bytes.to_string())); + } + if let Some(duration) = metadata.summary.scan_duration_seconds { + lines.push(summary_line("Scan Duration", &format!("{duration:.3}s"))); + } + lines.push(summary_line("Scan Date", &format_timestamp(&metadata.scan_timestamp))); + lines.push(summary_line("Report Generated", &format_timestamp(&metadata.generated_at))); + lines.push(summary_line("Kingfisher Version", &metadata.kingfisher_version)); + if let Some(latest) = &metadata.latest_version_available { + lines.push(summary_line(" |__Latest Version", latest)); + } + if let Some(target) = &metadata.target { + lines.push(summary_line("Target", target)); + } + lines.push(summary_line( + "Confidence Level", + &metadata.summary.confidence_level.to_ascii_lowercase(), + )); + lines.push(summary_line( + "Custom Rules Used", + if metadata.summary.custom_rules_used { "yes" } else { "no" }, + )); + lines.push(summary_line( + "Validation Split", + &format!( + "Active {} | Inactive {} | Unknown {}", + metadata.summary.active_findings, + metadata.summary.inactive_findings, + metadata.summary.unknown_validation_findings + ), + )); + lines.push(summary_line( + "Access Map Identities", + &metadata.summary.access_map_identities.to_string(), + )); + + let cli_cmdline = + metadata.command_line_args.iter().map(|arg| escape_html(arg)).collect::>().join(" "); + + format!( + "
+

Scan Summary

+
{}
+

Sanitized command-line arguments

+
{}
+
", + lines.join(""), + cli_cmdline + ) +} + +fn validation_rank(status: &str) -> usize { + if status.eq_ignore_ascii_case("Active Credential") { + 0 + } else if status.eq_ignore_ascii_case("Inactive Credential") { + 1 + } else if status.eq_ignore_ascii_case("Not Attempted") { + 2 + } else { + 3 + } +} + +fn finding_git_url(record: &FindingReporterRecord) -> Option { + record + .finding + .git_metadata + .as_ref() + .and_then(|meta| { + meta.get("file").and_then(|file| file.get("url")).or_else(|| meta.get("repository_url")) + }) + .and_then(|url| url.as_str()) + .map(|url| url.to_string()) +} + +fn render_findings_table(findings: &[FindingReporterRecord]) -> String { + if findings.is_empty() { + return "

No findings detected.

".to_string(); + } + + let mut sorted = findings.to_vec(); + sorted.sort_by(|a, b| { + validation_rank(&a.finding.validation.status) + .cmp(&validation_rank(&b.finding.validation.status)) + .then_with(|| a.finding.path.cmp(&b.finding.path)) + .then_with(|| a.finding.line.cmp(&b.finding.line)) + }); + + let mut rows = String::new(); + for record in &sorted { + let status_class = if record.finding.validation.status == "Active Credential" { + "status-active" + } else if record.finding.validation.status == "Inactive Credential" { + "status-inactive" + } else { + "status-unknown" + }; + let git_url_html = finding_git_url(record) + .map(|url| { + format!( + "{}", + escape_html(&url), + escape_html(&url) + ) + }) + .unwrap_or_default(); + + rows.push_str(&format!( + "\ + {}\ + {}\ + {}\ + {}\ + {}\ + {}\ + {}\ + ", + escape_html(&record.rule.name), + escape_html(&record.rule.id), + escape_html(&record.finding.path), + git_url_html, + status_class, + escape_html(&record.finding.validation.status), + escape_html(&record.finding.confidence), + record.finding.line + )); + } + + format!( + " + + + + + + + + + + + + {rows} +
RuleRule IDPathGit URLValidationConfidenceLine
" + ) +} + +fn render_access_map(access_map: Option<&Vec>) -> String { + let Some(entries) = access_map else { + return String::new(); + }; + if entries.is_empty() { + return String::new(); + } + + let mut items = String::new(); + for entry in entries { + let account = entry.account.clone().unwrap_or_else(|| "(identity)".to_string()); + items.push_str(&format!( + "
  • {} {} ({} groups)
  • ", + escape_html(&account), + escape_html(&entry.provider.to_uppercase()), + entry.groups.len() + )); + } + format!( + "
    +

    Access Map Summary

    +
      {items}
    +
    " + ) +} + +fn build_html(envelope: &ReportEnvelope) -> String { + let metadata_html = envelope.metadata.as_ref().map(render_metadata).unwrap_or_default(); + let findings_html = render_findings_table(&envelope.findings); + let access_map_html = render_access_map(envelope.access_map.as_ref()); + + format!( + " + + + + + Kingfisher Audit Report + + + +

    Kingfisher Audit Report

    +
    Secret scanning report generated by MongoDB Kingfisher.
    + {metadata_html} +
    +

    Detailed Findings

    + {findings_html} +
    + {access_map_html} + +" + ) +} + +impl DetailsReporter { + pub fn html_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { + let envelope = self.build_report_envelope(args)?; + write!(writer, "{}", build_html(&envelope))?; + writeln!(writer)?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn build_html_includes_audit_title_and_cli_args() { + let envelope = ReportEnvelope { + findings: Vec::new(), + access_map: None, + metadata: Some(ScanReportMetadata { + generated_at: "2026-01-01T00:00:00Z".to_string(), + scan_timestamp: "2026-01-01T00:00:00Z".to_string(), + target: Some("/tmp/repo".to_string()), + command_line_args: vec![ + "kingfisher".to_string(), + "scan".to_string(), + "--token".to_string(), + "***REDACTED***".to_string(), + ], + kingfisher_version: "1.2.3".to_string(), + latest_version_available: Some("1.2.4".to_string()), + update_check_status: Some("ok".to_string()), + summary: ScanReportSummary { + findings: 0, + active_findings: 0, + inactive_findings: 0, + unknown_validation_findings: 0, + access_map_identities: 0, + rules_applied: Some(10), + confidence_level: "medium".to_string(), + custom_rules_used: false, + successful_validations: Some(0), + failed_validations: Some(0), + skipped_validations: Some(0), + blobs_scanned: Some(1), + bytes_scanned: Some(10), + scan_duration_seconds: Some(0.1), + }, + }), + }; + + let html = build_html(&envelope); + assert!(html.contains("Kingfisher Audit Report")); + assert!(html.contains("Sanitized command-line arguments")); + assert!(html.contains("***REDACTED***")); + assert!(html.contains("/tmp/repo")); + } +} diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index e78cf6a..b262462 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -279,6 +279,7 @@ mod tests { datastore: Arc::new(Mutex::new(datastore)), styles: Styles::new(false), only_valid: false, + audit_context: None, } } diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index 068c87d..9dfd6bc 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -163,6 +163,7 @@ mod tests { datastore: Arc::new(Mutex::new(store)), styles: Styles::new(false), only_valid: false, + audit_context: None, } } diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index e07cec7..c6943a4 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -430,6 +430,7 @@ fn deduplicate_new_matches( datastore: Arc::clone(store), styles: Styles::new(global_args.use_color(std::io::stdout())), only_valid: args.only_valid, + audit_context: None, }; let all_matches = reporter.get_unfiltered_matches(Some(false))?; @@ -456,6 +457,31 @@ fn deduplicate_new_matches( Ok(()) } +fn build_scan_audit_context( + args: &scan::ScanArgs, + rules_db: &RulesDatabase, + matcher_stats: &Arc>, + datastore: &Arc>, + start_time: Instant, + scan_started_at: chrono::DateTime, + update_status: &crate::update::UpdateStatus, +) -> crate::reporter::ScanAuditContext { + let totals = compute_scan_totals(datastore, args, matcher_stats.as_ref()); + crate::reporter::ScanAuditContext { + scan_timestamp: Some(scan_started_at.to_rfc3339()), + scan_duration_seconds: Some(start_time.elapsed().as_secs_f64()), + rules_applied: Some(rules_db.num_rules()), + successful_validations: Some(totals.successful_validations), + failed_validations: Some(totals.failed_validations), + skipped_validations: Some(totals.skipped_validations), + blobs_scanned: Some(totals.blobs_scanned), + bytes_scanned: Some(totals.bytes_scanned), + running_version: Some(update_status.running_version.clone()), + latest_version: update_status.latest_version.clone(), + update_check_status: Some(update_status.check_status.as_str().to_string()), + } +} + /// Applies baseline filtering if configured. fn apply_baseline_if_configured( args: &scan::ScanArgs, @@ -566,7 +592,16 @@ async fn run_sequential_scan( finalize_access_map(datastore, collector, args).await?; } - crate::reporter::run(global_args, Arc::clone(datastore), args) + let audit_context = build_scan_audit_context( + args, + rules_db, + matcher_stats, + datastore, + start_time, + scan_started_at, + update_status, + ); + crate::reporter::run(global_args, Arc::clone(datastore), args, Some(audit_context)) .context("Failed to run report command")?; print_scan_summary( start_time, @@ -727,8 +762,13 @@ async fn run_parallel_scan( } if !output_to_file { - crate::reporter::run(global_args, Arc::clone(&repo_datastore), &args) - .context("Failed to run report command")?; + crate::reporter::run( + global_args, + Arc::clone(&repo_datastore), + &args, + None, + ) + .context("Failed to run report command")?; } { @@ -765,7 +805,16 @@ async fn run_parallel_scan( } if output_to_file && ran_repo_scan.load(Ordering::Relaxed) { - crate::reporter::run(global_args, Arc::clone(datastore), args) + let audit_context = build_scan_audit_context( + args, + rules_db, + matcher_stats, + datastore, + start_time, + scan_started_at, + update_status, + ); + crate::reporter::run(global_args, Arc::clone(datastore), args, Some(audit_context)) .context("Failed to run report command")?; } @@ -780,7 +829,16 @@ async fn run_parallel_scan( finalize_access_map(datastore, collector, args).await?; } - crate::reporter::run(global_args, Arc::clone(datastore), args) + let audit_context = build_scan_audit_context( + args, + rules_db, + matcher_stats, + datastore, + start_time, + scan_started_at, + update_status, + ); + crate::reporter::run(global_args, Arc::clone(datastore), args, Some(audit_context)) .context("Failed to run report command")?; } diff --git a/tests/cli.rs b/tests/cli.rs index ee845cd..3f8323e 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1,5 +1,7 @@ use assert_cmd::Command; use predicates::{prelude::PredicateBooleanExt, str::contains}; +use std::fs; +use tempfile::tempdir; mod test { @@ -29,4 +31,34 @@ mod test { .success() .stdout(contains(env!("CARGO_PKG_VERSION"))); } + + #[test] + fn cli_scan_generates_html_audit_report() { + let temp = tempdir().expect("tempdir should be created"); + let input_dir = temp.path().join("repo"); + let output_html = temp.path().join("audit-report.html"); + fs::create_dir_all(&input_dir).expect("input directory should be created"); + fs::write(input_dir.join("README.txt"), "no credentials here") + .expect("seed file should be written"); + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + input_dir.to_str().unwrap(), + "--format", + "html", + "--output", + output_html.to_str().unwrap(), + "--rule", + "kingfisher.aws.1", + "--no-validate", + "--no-update-check", + ]) + .assert() + .success(); + + let html = fs::read_to_string(&output_html).expect("html report should be written"); + assert!(html.contains("Kingfisher Audit Report")); + assert!(html.contains("Audit Metadata")); + } } diff --git a/tests/fingerprint_dedup.rs b/tests/fingerprint_dedup.rs index 86549a4..ce123cf 100644 --- a/tests/fingerprint_dedup.rs +++ b/tests/fingerprint_dedup.rs @@ -114,6 +114,7 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> { datastore: Arc::new(Mutex::new(FindingsStore::new(PathBuf::from("/tmp")))), styles: Styles::new(false), only_valid: false, + audit_context: None, }; let matches = vec![ @@ -172,6 +173,7 @@ fn dedup_preserves_distinct_rules_with_same_fingerprint() -> Result<()> { datastore: Arc::new(Mutex::new(FindingsStore::new(PathBuf::from("/tmp")))), styles: Styles::new(false), only_valid: false, + audit_context: None, }; let matches = vec![