diff --git a/AGENTS.md b/AGENTS.md index 8c67ca1..30f715c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,7 +11,7 @@ Key capabilities: - Live credential validation against provider APIs - Direct secret revocation from CLI - Blast radius mapping (AWS, GCP, Azure, GitHub, GitLab, Slack) -- Output formats: JSON, SARIF, interactive HTML +- Output formats: TOON, JSON, SARIF, interactive HTML - Platform integrations: GitHub, GitLab, Azure Repos, Bitbucket, Gitea, Hugging Face, S3, GCS, Docker, Jira, Confluence, Slack ## Scope @@ -26,7 +26,7 @@ Key capabilities: - `src/matcher/`: pattern matching engine - `src/scanner/`: core scanning logic - `src/parser/`: language-aware parsing (`tree-sitter`) -- `src/reporter/`: JSON/SARIF/HTML report generation +- `src/reporter/`: TOON/JSON/SARIF/HTML report generation - `src/access_map/`: access mapping analysis - `crates/kingfisher-core/`: shared types and core logic - `crates/kingfisher-rules/`: rule loading and rule data @@ -130,6 +130,7 @@ Use this when creating or updating rules in `crates/kingfisher-rules/data/rules/ - Prefer targeted patches. - After changes, run the narrowest relevant tests first, then broader checks when practical. - If validation commands cannot be run, report exactly what was skipped and why. +- Prefer `kingfisher scan --format toon` when invoking Kingfisher from an LLM or agent workflow; keep `pretty` for interactive human CLI use unless the task explicitly calls for a different format. ## Documentation Pointers - `docs/USAGE.md` diff --git a/CHANGELOG.md b/CHANGELOG.md index 13d4059..576b04c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ All notable changes to this project will be documented in this file. ## [v1.89.0] +- Added TOON output for `scan`, `validate`, and `revoke`, optimized for LLM/agent workflows; prefer `--format toon` when calling Kingfisher from an LLM. - Expanded built-in revocation support with new YAML revocation flows for Cloudflare, Confluent, Doppler, Mapbox, Particle.io, Twitch, and additional Vercel token formats. - Added revocation coverage documentation: new `docs/REVOCATION_PROVIDERS.md` matrix and README links highlighting supported revocation providers/rule IDs. - Access Map: added Microsoft Teams provider. Parses Incoming Webhook URLs (legacy and workflow-based) to extract tenant and webhook identity, probes for active status, and reports channel-level blast radius. Supports standalone `access-map microsoftteams` (alias `msteams`) and automatic mapping for validated `kingfisher.msteams.*` and `kingfisher.microsoftteamswebhook.*` findings. diff --git a/Cargo.toml b/Cargo.toml index 0315541..c3fa1d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -112,6 +112,7 @@ hex = "0.4.3" vectorscan-rs = "0.0.5" regex = "1.12.2" serde_json = "1.0.145" +toon-format = { version = "0.4.4", default-features = false } lazy_static = "1.5.0" url = "2.5.7" include_dir = { version = "0.7", features = ["glob"] } diff --git a/docs/USAGE.md b/docs/USAGE.md index 1a61d2c..3514df4 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -67,6 +67,14 @@ kingfisher scan /path/to/repo --only-valid kingfisher scan . --format json | tee kingfisher.json ``` +### Output TOON for LLM and agent workflows + +```bash +kingfisher scan . --format toon +``` + +Use `--format toon` when Kingfisher is being called by an LLM or agent runtime. The TOON report is optimized for token efficiency, keeps the scan summary up front, and flattens each finding into an easier-to-reason-about row. + ### Output SARIF directly to disk ```bash @@ -93,7 +101,7 @@ Kingfisher's `--access-map` feature transforms secret detection from a simple al * Visualize the Blast Radius: See exactly which resources (S3 buckets, EC2 instances, projects, storage containers) are exposed and at risk. -Add `--access-map` to enrich JSON, JSONL, BSON, pretty, and SARIF reports with an `access_map` containing the resources and the permissions that the key can access - for each resource (grouped when identical). +Add `--access-map` to enrich TOON, JSON, JSONL, BSON, pretty, and SARIF reports with an `access_map` containing the resources and the permissions that the key can access - for each resource (grouped when identical). - If you validated cloud credentials without `--access-map`, Kingfisher will remind you on stderr to rerun with the flag so the access map appears in the output. - Run `kingfisher view ./kingfisher.json` to explore a report locally in a local web UI (opens your browser automatically when a report is provided). - Or use `kingfisher scan --view-report ...` to generate a JSON report, start the viewer at `http://127.0.0.1:7890`, and open it in your browser. @@ -154,6 +162,9 @@ kingfisher validate --rule opsgenie "12345678-9abc-def0-1234-56789abcdef0" # Validate from stdin echo "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" | kingfisher validate --rule github - +# TOON output for LLMs and agent tooling +kingfisher validate --rule slack "xoxb-..." --format toon + # JSON output for scripting kingfisher validate --rule slack "xoxb-..." --format json @@ -241,6 +252,9 @@ kingfisher revoke --rule gcp "$(cat service-account.json)" # JSON output for scripting kingfisher revoke --rule slack "xoxb-..." --format json + +# TOON output for LLMs and agent tooling +kingfisher revoke --rule slack "xoxb-..." --format toon ``` **Exit codes:** Returns `0` if any matching rule reports a successful revocation, `1` if all are failures or an error occurred. diff --git a/src/cli/commands/output.rs b/src/cli/commands/output.rs index fe38e92..b5ee1f5 100644 --- a/src/cli/commands/output.rs +++ b/src/cli/commands/output.rs @@ -15,7 +15,7 @@ pub struct OutputArgs { #[arg(global = true, long, short, value_hint = ValueHint::FilePath)] pub output: Option, - /// Output format (defaults to `pretty` if not specified) + /// Output format (defaults to `pretty`; use `toon` for LLM/agent integrations) #[arg(global = true, long, short, default_value = "pretty")] pub format: Format, } @@ -50,6 +50,9 @@ pub enum ReportOutputFormat { /// BSON (binary JSON) format Bson, + /// TOON format optimized for LLMs and agents + Toon, + /// SARIF format (experimental) Sarif, diff --git a/src/cli/commands/revoke.rs b/src/cli/commands/revoke.rs index aa6e33b..5df5574 100644 --- a/src/cli/commands/revoke.rs +++ b/src/cli/commands/revoke.rs @@ -50,7 +50,7 @@ pub struct RevokeArgs { #[arg(long = "no-builtins", default_value_t = false)] pub no_builtins: bool, - /// Output format: text or json - #[arg(long, default_value = "text", value_parser = ["text", "json"])] + /// Output format: text, json, or toon (`toon` is recommended for LLMs) + #[arg(long, default_value = "text", value_parser = ["text", "json", "toon"])] pub format: String, } diff --git a/src/cli/commands/validate.rs b/src/cli/commands/validate.rs index 138e650..d78ebd1 100644 --- a/src/cli/commands/validate.rs +++ b/src/cli/commands/validate.rs @@ -58,7 +58,7 @@ pub struct ValidateArgs { #[arg(long = "no-builtins", default_value_t = false)] pub no_builtins: bool, - /// Output format: text or json - #[arg(long, default_value = "text", value_parser = ["text", "json"])] + /// Output format: text, json, or toon (`toon` is recommended for LLMs) + #[arg(long, default_value = "text", value_parser = ["text", "json", "toon"])] pub format: String, } diff --git a/src/direct_revoke.rs b/src/direct_revoke.rs index db257b1..81db0cc 100644 --- a/src/direct_revoke.rs +++ b/src/direct_revoke.rs @@ -723,6 +723,14 @@ pub fn print_results(results: &[DirectRevocationResult], format: &str, use_color println!("{}", serde_json::to_string_pretty(results).unwrap()); } } + "toon" => { + let value = if results.len() == 1 { + serde_json::to_value(&results[0]).unwrap() + } else { + serde_json::to_value(results).unwrap() + }; + println!("{}", crate::toon::encode_llm_friendly(&value).unwrap()); + } _ => { for (i, result) in results.iter().enumerate() { if i > 0 { diff --git a/src/direct_validate.rs b/src/direct_validate.rs index e8a8128..9a163fa 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -988,6 +988,14 @@ pub fn print_results(results: &[DirectValidationResult], format: &str, use_color println!("{}", serde_json::to_string_pretty(results).unwrap()); } } + "toon" => { + let value = if results.len() == 1 { + serde_json::to_value(&results[0]).unwrap() + } else { + serde_json::to_value(results).unwrap() + }; + println!("{}", crate::toon::encode_llm_friendly(&value).unwrap()); + } _ => { for (i, result) in results.iter().enumerate() { if i > 0 { diff --git a/src/lib.rs b/src/lib.rs index 438c5b0..fe7facd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,6 +54,7 @@ pub mod slack; pub mod snippet; pub mod sqlite; pub mod teams; +pub mod toon; pub mod update; pub mod util; pub mod validation; diff --git a/src/reporter.rs b/src/reporter.rs index d540848..166a1a9 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -31,6 +31,7 @@ mod json_format; mod pretty_format; mod sarif_format; pub mod styles; +mod toon_format; use std::io::IsTerminal; use styles::{StyledObject, Styles}; @@ -1379,6 +1380,7 @@ impl Reportable for DetailsReporter { ReportOutputFormat::Json => self.json_format(writer, args), ReportOutputFormat::Jsonl => self.jsonl_format(writer, args), ReportOutputFormat::Bson => self.bson_format(writer, args), + ReportOutputFormat::Toon => self.toon_format(writer, args), ReportOutputFormat::Sarif => self.sarif_format(writer, args.no_dedup, args), ReportOutputFormat::Html => self.html_format(writer, args), } diff --git a/src/reporter/toon_format.rs b/src/reporter/toon_format.rs new file mode 100644 index 0000000..42951e7 --- /dev/null +++ b/src/reporter/toon_format.rs @@ -0,0 +1,157 @@ +use serde::Serialize; + +use super::*; + +#[derive(Serialize)] +struct ToonReportEnvelope { + schema: &'static str, + scan: ToonScanMetadata, + findings: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + access_map: Option>, +} + +#[derive(Serialize)] +struct ToonScanMetadata { + generated_at: String, + scan_timestamp: String, + target: Option, + kingfisher_version: String, + latest_version_available: Option, + update_check_status: Option, + summary: ScanReportSummary, +} + +#[derive(Serialize)] +struct ToonFindingRecord { + rule_id: String, + rule_name: String, + validation_status: String, + path: String, + line: u32, + column_start: u32, + column_end: u32, + confidence: String, + entropy: f32, + language: String, + fingerprint: String, + snippet: String, + validation_response: Option, + encoding: Option, + validate_command: Option, + revoke_command: Option, + git_repository_url: Option, + git_commit_id: Option, + git_commit_url: Option, + git_file_url: Option, +} + +impl ToonFindingRecord { + fn from_record(record: &FindingReporterRecord) -> Self { + let git = record.finding.git_metadata.as_ref(); + + Self { + rule_id: record.rule.id.clone(), + rule_name: record.rule.name.clone(), + validation_status: record.finding.validation.status.clone(), + path: record.finding.path.clone(), + line: record.finding.line, + column_start: record.finding.column_start, + column_end: record.finding.column_end, + confidence: record.finding.confidence.clone(), + entropy: record.finding.entropy.parse().unwrap_or_default(), + language: record.finding.language.clone(), + fingerprint: record.finding.fingerprint.clone(), + snippet: record.finding.snippet.clone(), + validation_response: non_empty(record.finding.validation.response.clone()), + encoding: record.finding.encoding.clone(), + validate_command: record.finding.validate_command.clone(), + revoke_command: record.finding.revoke_command.clone(), + git_repository_url: json_string(git, &["repository_url"]), + git_commit_id: json_string(git, &["commit", "id"]), + git_commit_url: json_string(git, &["commit", "url"]), + git_file_url: json_string(git, &["file", "url"]), + } + } +} + +fn json_string(value: Option<&serde_json::Value>, path: &[&str]) -> Option { + let mut current = value?; + for segment in path { + current = current.get(*segment)?; + } + current.as_str().map(str::trim).filter(|value| !value.is_empty()).map(str::to_string) +} + +fn non_empty(value: String) -> Option { + let trimmed = value.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + } +} + +impl DetailsReporter { + pub fn toon_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { + let envelope = self.build_report_envelope(args)?; + let payload = ToonReportEnvelope { + schema: "kingfisher.toon.v1", + scan: ToonScanMetadata { + generated_at: envelope + .metadata + .as_ref() + .map(|metadata| metadata.generated_at.clone()) + .unwrap_or_default(), + scan_timestamp: envelope + .metadata + .as_ref() + .map(|metadata| metadata.scan_timestamp.clone()) + .unwrap_or_default(), + target: envelope.metadata.as_ref().and_then(|metadata| metadata.target.clone()), + kingfisher_version: envelope + .metadata + .as_ref() + .map(|metadata| metadata.kingfisher_version.clone()) + .unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()), + latest_version_available: envelope + .metadata + .as_ref() + .and_then(|metadata| metadata.latest_version_available.clone()), + update_check_status: envelope + .metadata + .as_ref() + .and_then(|metadata| metadata.update_check_status.clone()), + summary: envelope.metadata.map(|metadata| metadata.summary).unwrap_or( + ScanReportSummary { + findings: envelope.findings.len(), + active_findings: 0, + inactive_findings: 0, + unknown_validation_findings: 0, + access_map_identities: envelope.access_map.as_ref().map_or(0, Vec::len), + rules_applied: None, + confidence_level: args.confidence.to_string(), + custom_rules_used: !args.rules.rules_path.is_empty() + || !args.rules.load_builtins, + successful_validations: None, + failed_validations: None, + skipped_validations: None, + blobs_scanned: None, + bytes_scanned: None, + scan_duration_seconds: None, + }, + ), + }, + findings: envelope.findings.iter().map(ToonFindingRecord::from_record).collect(), + access_map: envelope.access_map, + }; + + write!(writer, "{}", crate::toon::encode_llm_friendly(&payload)?)?; + writeln!(writer)?; + Ok(()) + } +} diff --git a/src/toon.rs b/src/toon.rs new file mode 100644 index 0000000..f5ed614 --- /dev/null +++ b/src/toon.rs @@ -0,0 +1,15 @@ +use anyhow::Result; +use serde::Serialize; +use toon_format::{Delimiter, EncodeOptions, Indent}; + +fn llm_encode_options() -> EncodeOptions { + EncodeOptions::new() + .with_delimiter(Delimiter::Pipe) + .with_indent(Indent::Spaces(2)) + .with_key_folding(toon_format::types::KeyFoldingMode::Safe) + .with_flatten_depth(2) +} + +pub fn encode_llm_friendly(value: &T) -> Result { + Ok(toon_format::encode(value, &llm_encode_options())?) +} diff --git a/tests/cli.rs b/tests/cli.rs index 5e9e86f..cf63b21 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1,5 +1,6 @@ use assert_cmd::Command; use predicates::{prelude::PredicateBooleanExt, str::contains}; +use serde_json::Value; use std::fs; use tempfile::tempdir; @@ -61,4 +62,52 @@ mod test { assert!(html.contains("Kingfisher Audit Report")); assert!(html.contains("Scan Summary")); } + + #[test] + fn cli_scan_generates_toon_report_for_llms() { + let temp = tempdir().expect("tempdir should be created"); + let rules_dir = temp.path().join("rules"); + let input_dir = temp.path().join("repo"); + let output_toon = temp.path().join("findings.toon"); + + fs::create_dir_all(&rules_dir).expect("rules directory should be created"); + fs::create_dir_all(&input_dir).expect("input directory should be created"); + fs::write( + rules_dir.join("demo.yml"), + r#" +rules: + - id: kingfisher.demo.1 + name: Demo secret + pattern: '(demo_secret_[0-9]{4})' + confidence: medium +"#, + ) + .expect("rule should be written"); + fs::write(input_dir.join("README.txt"), "demo_secret_1234") + .expect("seed file should be written"); + + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + input_dir.to_str().unwrap(), + "--format", + "toon", + "--output", + output_toon.to_str().unwrap(), + "--rules-path", + rules_dir.to_str().unwrap(), + "--load-builtins=false", + "--no-validate", + "--no-update-check", + ]) + .assert() + .code(200); + + let toon = fs::read_to_string(&output_toon).expect("toon report should be written"); + let decoded: Value = toon_format::decode_default(&toon).expect("toon should decode"); + assert_eq!(decoded["schema"], "kingfisher.toon.v1"); + assert_eq!(decoded["scan"]["summary"]["findings"], 1); + assert_eq!(decoded["findings"][0]["rule_id"], "kingfisher.demo.1"); + assert_eq!(decoded["findings"][0]["validation_status"], "Not Attempted"); + } } diff --git a/tests/cli_validate_revoke.rs b/tests/cli_validate_revoke.rs index 6db654e..ed018d2 100644 --- a/tests/cli_validate_revoke.rs +++ b/tests/cli_validate_revoke.rs @@ -6,6 +6,7 @@ use assert_cmd::Command; use predicates::{prelude::PredicateBooleanExt, str::contains}; +use serde_json::Value; use std::fs; use tempfile::TempDir; @@ -140,6 +141,29 @@ mod validate { ); } + #[test] + fn validate_toon_output() { + let assert = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "validate", + "--rule", + "kingfisher.opsgenie.1", + "fake-api-key-12345", + "--format", + "toon", + "--no-update-check", + ]) + .assert(); + + let output = assert.get_output(); + assert!(output.status.code().is_some_and(|code| code == 0 || code == 1)); + let toon = String::from_utf8(output.stdout.clone()).expect("stdout should be UTF-8"); + let decoded: Value = toon_format::decode_default(&toon).expect("toon should decode"); + assert!(decoded.get("rule_id").is_some()); + assert!(decoded.get("rule_name").is_some()); + assert!(decoded.get("message").is_some()); + } + #[test] fn validate_text_output() { Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) @@ -586,6 +610,29 @@ mod revoke { ); } + #[test] + fn revoke_toon_output() { + let assert = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "revoke", + "--rule", + "kingfisher.slack.1", + "xoxb-fake-token", + "--format", + "toon", + "--no-update-check", + ]) + .assert(); + + let output = assert.get_output(); + assert!(output.status.code().is_some_and(|code| code == 0 || code == 1)); + let toon = String::from_utf8(output.stdout.clone()).expect("stdout should be UTF-8"); + let decoded: Value = toon_format::decode_default(&toon).expect("toon should decode"); + assert!(decoded.get("rule_id").is_some()); + assert!(decoded.get("rule_name").is_some()); + assert!(decoded.get("message").is_some()); + } + #[test] fn revoke_text_output() { Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) diff --git a/tests/int_quiet.rs b/tests/int_quiet.rs index 169f0e7..31c3520 100644 --- a/tests/int_quiet.rs +++ b/tests/int_quiet.rs @@ -3,7 +3,7 @@ use predicates::prelude::*; use std::{fs, time::Duration}; use tempfile::TempDir; -const FORMATS: [&str; 4] = ["pretty", "json", "jsonl", "bson"]; +const FORMATS: [&str; 5] = ["pretty", "json", "jsonl", "bson", "toon"]; fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { haystack.windows(needle.len()).any(|window| window == needle)