Added TOON output support, to optimize usage of kingfisher from LLM/agent workflows

This commit is contained in:
Mick Grove 2026-03-15 15:00:59 -07:00
commit 349b8165aa
16 changed files with 316 additions and 9 deletions

View file

@ -11,7 +11,7 @@ Key capabilities:
- Live credential validation against provider APIs
- Direct secret revocation from CLI
- Blast radius mapping (AWS, GCP, Azure, GitHub, GitLab, Slack)
- Output formats: JSON, SARIF, interactive HTML
- Output formats: TOON, JSON, SARIF, interactive HTML
- Platform integrations: GitHub, GitLab, Azure Repos, Bitbucket, Gitea, Hugging Face, S3, GCS, Docker, Jira, Confluence, Slack
## Scope
@ -26,7 +26,7 @@ Key capabilities:
- `src/matcher/`: pattern matching engine
- `src/scanner/`: core scanning logic
- `src/parser/`: language-aware parsing (`tree-sitter`)
- `src/reporter/`: JSON/SARIF/HTML report generation
- `src/reporter/`: TOON/JSON/SARIF/HTML report generation
- `src/access_map/`: access mapping analysis
- `crates/kingfisher-core/`: shared types and core logic
- `crates/kingfisher-rules/`: rule loading and rule data
@ -130,6 +130,7 @@ Use this when creating or updating rules in `crates/kingfisher-rules/data/rules/
- Prefer targeted patches.
- After changes, run the narrowest relevant tests first, then broader checks when practical.
- If validation commands cannot be run, report exactly what was skipped and why.
- Prefer `kingfisher scan --format toon` when invoking Kingfisher from an LLM or agent workflow; keep `pretty` for interactive human CLI use unless the task explicitly calls for a different format.
## Documentation Pointers
- `docs/USAGE.md`

View file

@ -3,6 +3,7 @@
All notable changes to this project will be documented in this file.
## [v1.89.0]
- Added TOON output for `scan`, `validate`, and `revoke`, optimized for LLM/agent workflows; prefer `--format toon` when calling Kingfisher from an LLM.
- Expanded built-in revocation support with new YAML revocation flows for Cloudflare, Confluent, Doppler, Mapbox, Particle.io, Twitch, and additional Vercel token formats.
- Added revocation coverage documentation: new `docs/REVOCATION_PROVIDERS.md` matrix and README links highlighting supported revocation providers/rule IDs.
- Access Map: added Microsoft Teams provider. Parses Incoming Webhook URLs (legacy and workflow-based) to extract tenant and webhook identity, probes for active status, and reports channel-level blast radius. Supports standalone `access-map microsoftteams` (alias `msteams`) and automatic mapping for validated `kingfisher.msteams.*` and `kingfisher.microsoftteamswebhook.*` findings.

View file

@ -112,6 +112,7 @@ hex = "0.4.3"
vectorscan-rs = "0.0.5"
regex = "1.12.2"
serde_json = "1.0.145"
toon-format = { version = "0.4.4", default-features = false }
lazy_static = "1.5.0"
url = "2.5.7"
include_dir = { version = "0.7", features = ["glob"] }

View file

@ -67,6 +67,14 @@ kingfisher scan /path/to/repo --only-valid
kingfisher scan . --format json | tee kingfisher.json
```
### Output TOON for LLM and agent workflows
```bash
kingfisher scan . --format toon
```
Use `--format toon` when Kingfisher is being called by an LLM or agent runtime. The TOON report is optimized for token efficiency, keeps the scan summary up front, and flattens each finding into an easier-to-reason-about row.
### Output SARIF directly to disk
```bash
@ -93,7 +101,7 @@ Kingfisher's `--access-map` feature transforms secret detection from a simple al
* Visualize the Blast Radius: See exactly which resources (S3 buckets, EC2 instances, projects, storage containers) are exposed and at risk.
Add `--access-map` to enrich JSON, JSONL, BSON, pretty, and SARIF reports with an `access_map` containing the resources and the permissions that the key can access - for each resource (grouped when identical).
Add `--access-map` to enrich TOON, JSON, JSONL, BSON, pretty, and SARIF reports with an `access_map` containing the resources and the permissions that the key can access - for each resource (grouped when identical).
- If you validated cloud credentials without `--access-map`, Kingfisher will remind you on stderr to rerun with the flag so the access map appears in the output.
- Run `kingfisher view ./kingfisher.json` to explore a report locally in a local web UI (opens your browser automatically when a report is provided).
- Or use `kingfisher scan --view-report ...` to generate a JSON report, start the viewer at `http://127.0.0.1:7890`, and open it in your browser.
@ -154,6 +162,9 @@ kingfisher validate --rule opsgenie "12345678-9abc-def0-1234-56789abcdef0"
# Validate from stdin
echo "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" | kingfisher validate --rule github -
# TOON output for LLMs and agent tooling
kingfisher validate --rule slack "xoxb-..." --format toon
# JSON output for scripting
kingfisher validate --rule slack "xoxb-..." --format json
@ -241,6 +252,9 @@ kingfisher revoke --rule gcp "$(cat service-account.json)"
# JSON output for scripting
kingfisher revoke --rule slack "xoxb-..." --format json
# TOON output for LLMs and agent tooling
kingfisher revoke --rule slack "xoxb-..." --format toon
```
**Exit codes:** Returns `0` if any matching rule reports a successful revocation, `1` if all are failures or an error occurred.

View file

@ -15,7 +15,7 @@ pub struct OutputArgs<Format: ValueEnum + Send + Sync + 'static> {
#[arg(global = true, long, short, value_hint = ValueHint::FilePath)]
pub output: Option<PathBuf>,
/// Output format (defaults to `pretty` if not specified)
/// Output format (defaults to `pretty`; use `toon` for LLM/agent integrations)
#[arg(global = true, long, short, default_value = "pretty")]
pub format: Format,
}
@ -50,6 +50,9 @@ pub enum ReportOutputFormat {
/// BSON (binary JSON) format
Bson,
/// TOON format optimized for LLMs and agents
Toon,
/// SARIF format (experimental)
Sarif,

View file

@ -50,7 +50,7 @@ pub struct RevokeArgs {
#[arg(long = "no-builtins", default_value_t = false)]
pub no_builtins: bool,
/// Output format: text or json
#[arg(long, default_value = "text", value_parser = ["text", "json"])]
/// Output format: text, json, or toon (`toon` is recommended for LLMs)
#[arg(long, default_value = "text", value_parser = ["text", "json", "toon"])]
pub format: String,
}

View file

@ -58,7 +58,7 @@ pub struct ValidateArgs {
#[arg(long = "no-builtins", default_value_t = false)]
pub no_builtins: bool,
/// Output format: text or json
#[arg(long, default_value = "text", value_parser = ["text", "json"])]
/// Output format: text, json, or toon (`toon` is recommended for LLMs)
#[arg(long, default_value = "text", value_parser = ["text", "json", "toon"])]
pub format: String,
}

View file

@ -723,6 +723,14 @@ pub fn print_results(results: &[DirectRevocationResult], format: &str, use_color
println!("{}", serde_json::to_string_pretty(results).unwrap());
}
}
"toon" => {
let value = if results.len() == 1 {
serde_json::to_value(&results[0]).unwrap()
} else {
serde_json::to_value(results).unwrap()
};
println!("{}", crate::toon::encode_llm_friendly(&value).unwrap());
}
_ => {
for (i, result) in results.iter().enumerate() {
if i > 0 {

View file

@ -988,6 +988,14 @@ pub fn print_results(results: &[DirectValidationResult], format: &str, use_color
println!("{}", serde_json::to_string_pretty(results).unwrap());
}
}
"toon" => {
let value = if results.len() == 1 {
serde_json::to_value(&results[0]).unwrap()
} else {
serde_json::to_value(results).unwrap()
};
println!("{}", crate::toon::encode_llm_friendly(&value).unwrap());
}
_ => {
for (i, result) in results.iter().enumerate() {
if i > 0 {

View file

@ -54,6 +54,7 @@ pub mod slack;
pub mod snippet;
pub mod sqlite;
pub mod teams;
pub mod toon;
pub mod update;
pub mod util;
pub mod validation;

View file

@ -31,6 +31,7 @@ mod json_format;
mod pretty_format;
mod sarif_format;
pub mod styles;
mod toon_format;
use std::io::IsTerminal;
use styles::{StyledObject, Styles};
@ -1379,6 +1380,7 @@ impl Reportable for DetailsReporter {
ReportOutputFormat::Json => self.json_format(writer, args),
ReportOutputFormat::Jsonl => self.jsonl_format(writer, args),
ReportOutputFormat::Bson => self.bson_format(writer, args),
ReportOutputFormat::Toon => self.toon_format(writer, args),
ReportOutputFormat::Sarif => self.sarif_format(writer, args.no_dedup, args),
ReportOutputFormat::Html => self.html_format(writer, args),
}

157
src/reporter/toon_format.rs Normal file
View file

@ -0,0 +1,157 @@
use serde::Serialize;
use super::*;
#[derive(Serialize)]
struct ToonReportEnvelope {
schema: &'static str,
scan: ToonScanMetadata,
findings: Vec<ToonFindingRecord>,
#[serde(skip_serializing_if = "Option::is_none")]
access_map: Option<Vec<AccessMapEntry>>,
}
#[derive(Serialize)]
struct ToonScanMetadata {
generated_at: String,
scan_timestamp: String,
target: Option<String>,
kingfisher_version: String,
latest_version_available: Option<String>,
update_check_status: Option<String>,
summary: ScanReportSummary,
}
#[derive(Serialize)]
struct ToonFindingRecord {
rule_id: String,
rule_name: String,
validation_status: String,
path: String,
line: u32,
column_start: u32,
column_end: u32,
confidence: String,
entropy: f32,
language: String,
fingerprint: String,
snippet: String,
validation_response: Option<String>,
encoding: Option<String>,
validate_command: Option<String>,
revoke_command: Option<String>,
git_repository_url: Option<String>,
git_commit_id: Option<String>,
git_commit_url: Option<String>,
git_file_url: Option<String>,
}
impl ToonFindingRecord {
fn from_record(record: &FindingReporterRecord) -> Self {
let git = record.finding.git_metadata.as_ref();
Self {
rule_id: record.rule.id.clone(),
rule_name: record.rule.name.clone(),
validation_status: record.finding.validation.status.clone(),
path: record.finding.path.clone(),
line: record.finding.line,
column_start: record.finding.column_start,
column_end: record.finding.column_end,
confidence: record.finding.confidence.clone(),
entropy: record.finding.entropy.parse().unwrap_or_default(),
language: record.finding.language.clone(),
fingerprint: record.finding.fingerprint.clone(),
snippet: record.finding.snippet.clone(),
validation_response: non_empty(record.finding.validation.response.clone()),
encoding: record.finding.encoding.clone(),
validate_command: record.finding.validate_command.clone(),
revoke_command: record.finding.revoke_command.clone(),
git_repository_url: json_string(git, &["repository_url"]),
git_commit_id: json_string(git, &["commit", "id"]),
git_commit_url: json_string(git, &["commit", "url"]),
git_file_url: json_string(git, &["file", "url"]),
}
}
}
fn json_string(value: Option<&serde_json::Value>, path: &[&str]) -> Option<String> {
let mut current = value?;
for segment in path {
current = current.get(*segment)?;
}
current.as_str().map(str::trim).filter(|value| !value.is_empty()).map(str::to_string)
}
fn non_empty(value: String) -> Option<String> {
let trimmed = value.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
impl DetailsReporter {
pub fn toon_format<W: std::io::Write>(
&self,
mut writer: W,
args: &cli::commands::scan::ScanArgs,
) -> Result<()> {
let envelope = self.build_report_envelope(args)?;
let payload = ToonReportEnvelope {
schema: "kingfisher.toon.v1",
scan: ToonScanMetadata {
generated_at: envelope
.metadata
.as_ref()
.map(|metadata| metadata.generated_at.clone())
.unwrap_or_default(),
scan_timestamp: envelope
.metadata
.as_ref()
.map(|metadata| metadata.scan_timestamp.clone())
.unwrap_or_default(),
target: envelope.metadata.as_ref().and_then(|metadata| metadata.target.clone()),
kingfisher_version: envelope
.metadata
.as_ref()
.map(|metadata| metadata.kingfisher_version.clone())
.unwrap_or_else(|| env!("CARGO_PKG_VERSION").to_string()),
latest_version_available: envelope
.metadata
.as_ref()
.and_then(|metadata| metadata.latest_version_available.clone()),
update_check_status: envelope
.metadata
.as_ref()
.and_then(|metadata| metadata.update_check_status.clone()),
summary: envelope.metadata.map(|metadata| metadata.summary).unwrap_or(
ScanReportSummary {
findings: envelope.findings.len(),
active_findings: 0,
inactive_findings: 0,
unknown_validation_findings: 0,
access_map_identities: envelope.access_map.as_ref().map_or(0, Vec::len),
rules_applied: None,
confidence_level: args.confidence.to_string(),
custom_rules_used: !args.rules.rules_path.is_empty()
|| !args.rules.load_builtins,
successful_validations: None,
failed_validations: None,
skipped_validations: None,
blobs_scanned: None,
bytes_scanned: None,
scan_duration_seconds: None,
},
),
},
findings: envelope.findings.iter().map(ToonFindingRecord::from_record).collect(),
access_map: envelope.access_map,
};
write!(writer, "{}", crate::toon::encode_llm_friendly(&payload)?)?;
writeln!(writer)?;
Ok(())
}
}

15
src/toon.rs Normal file
View file

@ -0,0 +1,15 @@
use anyhow::Result;
use serde::Serialize;
use toon_format::{Delimiter, EncodeOptions, Indent};
fn llm_encode_options() -> EncodeOptions {
EncodeOptions::new()
.with_delimiter(Delimiter::Pipe)
.with_indent(Indent::Spaces(2))
.with_key_folding(toon_format::types::KeyFoldingMode::Safe)
.with_flatten_depth(2)
}
pub fn encode_llm_friendly<T: Serialize>(value: &T) -> Result<String> {
Ok(toon_format::encode(value, &llm_encode_options())?)
}

View file

@ -1,5 +1,6 @@
use assert_cmd::Command;
use predicates::{prelude::PredicateBooleanExt, str::contains};
use serde_json::Value;
use std::fs;
use tempfile::tempdir;
@ -61,4 +62,52 @@ mod test {
assert!(html.contains("Kingfisher Audit Report"));
assert!(html.contains("Scan Summary"));
}
#[test]
fn cli_scan_generates_toon_report_for_llms() {
let temp = tempdir().expect("tempdir should be created");
let rules_dir = temp.path().join("rules");
let input_dir = temp.path().join("repo");
let output_toon = temp.path().join("findings.toon");
fs::create_dir_all(&rules_dir).expect("rules directory should be created");
fs::create_dir_all(&input_dir).expect("input directory should be created");
fs::write(
rules_dir.join("demo.yml"),
r#"
rules:
- id: kingfisher.demo.1
name: Demo secret
pattern: '(demo_secret_[0-9]{4})'
confidence: medium
"#,
)
.expect("rule should be written");
fs::write(input_dir.join("README.txt"), "demo_secret_1234")
.expect("seed file should be written");
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
input_dir.to_str().unwrap(),
"--format",
"toon",
"--output",
output_toon.to_str().unwrap(),
"--rules-path",
rules_dir.to_str().unwrap(),
"--load-builtins=false",
"--no-validate",
"--no-update-check",
])
.assert()
.code(200);
let toon = fs::read_to_string(&output_toon).expect("toon report should be written");
let decoded: Value = toon_format::decode_default(&toon).expect("toon should decode");
assert_eq!(decoded["schema"], "kingfisher.toon.v1");
assert_eq!(decoded["scan"]["summary"]["findings"], 1);
assert_eq!(decoded["findings"][0]["rule_id"], "kingfisher.demo.1");
assert_eq!(decoded["findings"][0]["validation_status"], "Not Attempted");
}
}

View file

@ -6,6 +6,7 @@
use assert_cmd::Command;
use predicates::{prelude::PredicateBooleanExt, str::contains};
use serde_json::Value;
use std::fs;
use tempfile::TempDir;
@ -140,6 +141,29 @@ mod validate {
);
}
#[test]
fn validate_toon_output() {
let assert = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"validate",
"--rule",
"kingfisher.opsgenie.1",
"fake-api-key-12345",
"--format",
"toon",
"--no-update-check",
])
.assert();
let output = assert.get_output();
assert!(output.status.code().is_some_and(|code| code == 0 || code == 1));
let toon = String::from_utf8(output.stdout.clone()).expect("stdout should be UTF-8");
let decoded: Value = toon_format::decode_default(&toon).expect("toon should decode");
assert!(decoded.get("rule_id").is_some());
assert!(decoded.get("rule_name").is_some());
assert!(decoded.get("message").is_some());
}
#[test]
fn validate_text_output() {
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
@ -586,6 +610,29 @@ mod revoke {
);
}
#[test]
fn revoke_toon_output() {
let assert = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"revoke",
"--rule",
"kingfisher.slack.1",
"xoxb-fake-token",
"--format",
"toon",
"--no-update-check",
])
.assert();
let output = assert.get_output();
assert!(output.status.code().is_some_and(|code| code == 0 || code == 1));
let toon = String::from_utf8(output.stdout.clone()).expect("stdout should be UTF-8");
let decoded: Value = toon_format::decode_default(&toon).expect("toon should decode");
assert!(decoded.get("rule_id").is_some());
assert!(decoded.get("rule_name").is_some());
assert!(decoded.get("message").is_some());
}
#[test]
fn revoke_text_output() {
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))

View file

@ -3,7 +3,7 @@ use predicates::prelude::*;
use std::{fs, time::Duration};
use tempfile::TempDir;
const FORMATS: [&str; 4] = ["pretty", "json", "jsonl", "bson"];
const FORMATS: [&str; 5] = ["pretty", "json", "jsonl", "bson", "toon"];
fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool {
haystack.windows(needle.len()).any(|window| window == needle)