preparing for v1.78.0

This commit is contained in:
Mick Grove 2026-02-02 18:39:24 -08:00
commit 63f1d515ae
11 changed files with 308 additions and 51 deletions

View file

@ -2,6 +2,12 @@
All notable changes to this project will be documented in this file.
## [v1.78.0]
- Improved error messages for `kingfisher validate` command when rules require dependent variables from `depends_on` sections. Now clearly explains which variables are needed and from which dependent rules they are normally captured.
- Fixed `validate_command` and `revoke_command` generation in scan output to include all required `--var` arguments for rules with `depends_on` sections (e.g., PubNub, Azure Storage). Commands now include dependent variables like `--var SUBSCRIPTIONTOKEN=<value>` or `--var AZURENAME=<value>`.
- Updated Azure Storage validation to use `AZURENAME` variable (matching the `depends_on_rule` configuration) with `STORAGE_ACCOUNT` maintained as a backward-compatible alias.
- Added internal `dependent_captures` field to match records to preserve variables from dependent rules through the validation pipeline for accurate command generation.
## [v1.77.0]
- Added `kingfisher revoke` subcommand for revoking leaked credentials directly with the provider.
- Added optional `revocation` section to rules to support credential revocation (currently supporting AWS, GCP, GitHub, GitLab, Slack, and Buildkite).
@ -11,7 +17,7 @@ All notable changes to this project will be documented in this file.
- Refactored project into multiple crates for better modularity and maintainability.
- Ensured more CLI arguments are global and available across all subcommands.
- Added `kingfisher-auto` pre-commit hook that automatically downloads and caches the appropriate binary for your platform (no Docker or manual installation required).
- Added Husky integration support with `install-husky.sh` helper script and documentation for Node.js projects.
- Added Husky integration support with `install-husky.sh` helper script and documentation fclearor Node.js projects.
- Added `kingfisher-pre-commit-auto.sh` and `kingfisher-pre-commit-auto.ps1` scripts for automatic binary download in Git hooks (Linux, macOS, Windows support).
## [v1.76.0]

View file

@ -19,7 +19,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.77.0"
version = "1.78.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -122,7 +122,7 @@ kingfisher scan /path/to/code
kingfisher scan /path/to/code --view-report
```
### 4: Show only verified (live) secrets
### 4: Show only validated (live) secrets
```bash
kingfisher scan /path/to/code --only-valid

View file

@ -415,66 +415,194 @@ let all_findings: Vec<_> = files.par_iter()
## Complete Example
Here's a complete example that scans a directory for secrets:
Here's a complete CLI tool that scans files and directories for secrets with configurable options:
```rust
use std::sync::Arc;
use std::path::Path;
use walkdir::WalkDir;
use kingfisher_core::Blob;
use clap::Parser;
use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule, Confidence};
use kingfisher_scanner::{Scanner, ScannerConfig};
#[derive(Parser)]
#[command(name = "secret-scanner")]
#[command(about = "Scan files and directories for secrets using Kingfisher", long_about = None)]
struct Cli {
/// Path to scan (file or directory)
#[arg(value_name = "PATH")]
path: String,
/// Minimum confidence level (low, medium, high)
#[arg(short, long, default_value = "medium")]
confidence: String,
/// Enable base64 decoding
#[arg(short, long, default_value_t = true)]
base64: bool,
/// Redact secrets in output
#[arg(short, long, default_value_t = false)]
redact: bool,
}
fn main() -> anyhow::Result<()> {
// Load high-confidence rules only
let rules = get_builtin_rules(Some(Confidence::High))?;
println!("Loaded {} high-confidence rules", rules.num_rules());
// Compile rules
let rule_vec: Vec<Rule> = rules.iter_rules()
let cli = Cli::parse();
// Parse confidence level
let confidence = match cli.confidence.to_lowercase().as_str() {
"low" => Confidence::Low,
"medium" => Confidence::Medium,
"high" => Confidence::High,
_ => {
eprintln!("Invalid confidence level. Use: low, medium, or high");
std::process::exit(1);
}
};
// Load builtin rules
println!("Loading {} confidence rules...", cli.confidence);
let rules = get_builtin_rules(Some(confidence))?;
println!("Loaded {} rules", rules.num_rules());
// Convert to Rule objects and compile into a database
let rule_vec: Vec<Rule> = rules
.iter_rules()
.map(|syntax| Rule::new(syntax.clone()))
.collect();
let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
// Configure scanner
let config = ScannerConfig {
enable_base64_decoding: true,
enable_base64_decoding: cli.base64,
enable_dedup: true,
redact_secrets: true, // Redact secrets in output
redact_secrets: cli.redact,
..Default::default()
};
let scanner = Scanner::with_config(rules_db, config);
// Scan the path
let path = Path::new(&cli.path);
// Scan directory
let dir = Path::new("./src");
if !path.exists() {
eprintln!("Error: Path '{}' does not exist", cli.path);
std::process::exit(1);
}
let mut total_findings = 0;
for entry in WalkDir::new(dir)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
let path = entry.path();
let mut files_scanned = 0;
if path.is_file() {
// Scan single file
files_scanned = 1;
println!("\nScanning file: {}", path.display());
match scanner.scan_file(path) {
Ok(findings) if !findings.is_empty() => {
println!("\n{}", path.display());
for finding in &findings {
println!(" [{}] {} at line {}",
finding.rule_id,
finding.rule_name,
finding.location.start_line);
}
Ok(findings) => {
print_findings(path, &findings);
total_findings += findings.len();
}
Err(e) => eprintln!("Error scanning {}: {}", path.display(), e),
_ => {}
Err(e) => eprintln!("Error scanning file: {}", e),
}
} else if path.is_dir() {
// Scan directory recursively
println!("\nScanning directory: {}\n", path.display());
for entry in WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
let file_path = entry.path();
files_scanned += 1;
match scanner.scan_file(file_path) {
Ok(findings) if !findings.is_empty() => {
print_findings(file_path, &findings);
total_findings += findings.len();
}
Err(e) => {
// Silently skip files that can't be scanned (binary, etc.)
if std::env::var("DEBUG").is_ok() {
eprintln!("Error scanning {}: {}", file_path.display(), e);
}
}
_ => {}
}
}
}
// Print summary
println!("\n{}", "=".repeat(60));
println!("Scan complete!");
println!("Files scanned: {}", files_scanned);
println!("Total findings: {}", total_findings);
println!("\nTotal findings: {}", total_findings);
if total_findings > 0 {
println!("\n⚠ WARNING: Secrets detected! Please review the findings above.");
std::process::exit(1);
} else {
println!("✓ No secrets found.");
}
Ok(())
}
fn print_findings(path: &Path, findings: &[kingfisher_scanner::Finding]) {
println!("\n📁 {}", path.display());
println!("{}", "-".repeat(60));
for finding in findings {
println!(" 🔍 {} ({})", finding.rule_name, finding.rule_id);
println!(" Location: line {}:{} - {}:{}",
finding.location.line,
finding.location.column,
finding.location.end_line,
finding.location.end_column);
println!(" Secret: {}", finding.secret);
println!(" Entropy: {:.2}", finding.entropy);
println!(" Confidence: {:?}", finding.confidence);
println!(" Fingerprint: {}", finding.fingerprint);
if !finding.captures.is_empty() {
println!(" Captures:");
for (name, value) in &finding.captures {
println!(" {}: {}", name, value);
}
}
println!();
}
}
```
Add these dependencies to your `Cargo.toml`:
```toml
[package]
name = "secret-scanner"
version = "0.1.0"
edition = "2021"
[dependencies]
kingfisher-core = { git = "https://github.com/mongodb/kingfisher" }
kingfisher-rules = { git = "https://github.com/mongodb/kingfisher" }
kingfisher-scanner = { git = "https://github.com/mongodb/kingfisher" }
anyhow = "1.0"
walkdir = "2.5"
clap = { version = "4.5", features = ["derive"] }
```
Try it out:
```bash
# Scan a directory with medium confidence rules
cargo run -- -c medium ~/tmp
# Scan with high confidence only and redact secrets
cargo run -- -c high --redact ~/projects
# Scan a single file
cargo run -- config.yml
```
---

View file

@ -177,6 +177,7 @@ mod tests {
calculated_entropy: 0.0,
visible: true,
is_base64: false,
dependent_captures: std::collections::BTreeMap::new(),
};
let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf()));

View file

@ -160,7 +160,9 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet<String> {
}
Validation::AzureStorage => {
vars.insert("TOKEN".to_string());
vars.insert("STORAGE_ACCOUNT".to_string());
// AZURENAME matches the depends_on_rule variable in azurestorage.yml
// STORAGE_ACCOUNT is kept for backward compatibility
vars.insert("AZURENAME".to_string());
}
Validation::Coinbase => {
vars.insert("TOKEN".to_string());
@ -418,6 +420,42 @@ pub async fn run_direct_validation(
);
}
// Check for missing variables and provide helpful error messages
let missing_vars: Vec<&String> =
template_vars.iter().filter(|var| globals.get(var.as_str()).is_none()).collect();
if !missing_vars.is_empty() {
// Build a map from variable name to the rule it depends on
let depends_on_map: BTreeMap<String, &str> = rule
.syntax()
.depends_on_rule
.iter()
.flatten()
.map(|dep| (dep.variable.to_uppercase(), dep.rule_id.as_str()))
.collect();
let mut error_parts = Vec::new();
let mut var_hints = Vec::new();
for var in &missing_vars {
if let Some(source_rule) = depends_on_map.get(*var) {
error_parts
.push(format!(" {} (normally captured from rule '{}')", var, source_rule));
} else {
error_parts.push(format!(" {}", var));
}
var_hints.push(format!("--var {}=<value>", var));
}
bail!(
"Rule '{}' requires the following variable(s):\n{}\n\nProvide them using: kingfisher validate --rule {} {} <secret>",
rule_id,
error_parts.join("\n"),
rule_id,
var_hints.join(" ")
);
}
// Execute validation based on type
let mut result = match validation {
Validation::Http(http_validation) => {
@ -603,15 +641,18 @@ pub async fn run_direct_validation(
Validation::AzureStorage => {
// Azure Storage expects JSON with storage_account and storage_key
// Or use --var STORAGE_ACCOUNT=xxx and pass the storage key as the secret
// Or use --var AZURENAME=xxx (or STORAGE_ACCOUNT for backward compat) and pass the storage key as the secret
let azure_json = if secret.starts_with('{') {
// Secret is already JSON
secret.clone()
} else {
// Build JSON from variables
let storage_account = get_global_var(&globals, "STORAGE_ACCOUNT")
// AZURENAME matches the depends_on_rule variable in azurestorage.yml
// STORAGE_ACCOUNT is kept for backward compatibility
let storage_account = get_global_var(&globals, "AZURENAME")
.or_else(|| get_global_var(&globals, "STORAGE_ACCOUNT"))
.ok_or_else(|| anyhow!(
"Azure Storage validation requires either JSON input or --var STORAGE_ACCOUNT=<account_name> <storage_key>"
"Azure Storage validation requires either JSON input or --var AZURENAME=<account_name> <storage_key>"
))?;
serde_json::json!({
"storage_account": storage_account,

View file

@ -71,6 +71,9 @@ pub struct OwnedBlobMatch {
pub validation_success: bool,
pub calculated_entropy: f32,
pub is_base64: bool,
/// Variables captured from dependent rules (from depends_on_rule).
/// Maps variable name (uppercase) to captured value.
pub dependent_captures: std::collections::BTreeMap<String, String>,
}
impl<'a> Matcher<'a> {
pub fn get_profiling_report(&self) -> Option<Vec<RuleStats>> {
@ -92,6 +95,7 @@ impl OwnedBlobMatch {
validation_success: m.validation_success,
calculated_entropy: m.calculated_entropy,
is_base64: m.is_base64,
dependent_captures: m.dependent_captures.clone(),
}
}
@ -124,6 +128,7 @@ impl OwnedBlobMatch {
calculated_entropy: blob_match.calculated_entropy,
finding_fingerprint: 0, //default
is_base64: blob_match.is_base64,
dependent_captures: std::collections::BTreeMap::new(),
};
// Convert matching_finding to a &str (using lossy conversion if needed)
@ -1020,6 +1025,11 @@ pub struct Match {
pub visible: bool,
#[serde(default)]
pub is_base64: bool,
/// Variables captured from dependent rules (from depends_on_rule).
/// Maps variable name (uppercase) to captured value.
#[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
pub dependent_captures: std::collections::BTreeMap<String, String>,
}
impl Match {
#[inline]
@ -1071,6 +1081,7 @@ impl Match {
validation_success: owned_blob_match.validation_success,
calculated_entropy: owned_blob_match.calculated_entropy,
is_base64: owned_blob_match.is_base64,
dependent_captures: owned_blob_match.dependent_captures.clone(),
}
}

View file

@ -44,6 +44,33 @@ fn escape_for_shell(s: &str) -> String {
format!("'{}'", s.replace('\'', "'\\''"))
}
/// Build the --var arguments string from dependent captures.
fn build_var_args(
dependent_captures: &std::collections::BTreeMap<String, String>,
akid_from_captures: Option<&str>,
akid_from_validation_body: Option<&str>,
) -> String {
let mut var_args = Vec::new();
// Add AKID if available (for AWS)
if let Some(akid) = akid_from_captures.or(akid_from_validation_body) {
if !akid.is_empty() && !dependent_captures.contains_key("AKID") {
var_args.push(format!("--var AKID={}", escape_for_shell(akid)));
}
}
// Add all dependent captures as --var arguments
for (name, value) in dependent_captures {
var_args.push(format!("--var {}={}", name, escape_for_shell(value)));
}
if var_args.is_empty() {
String::new()
} else {
format!("{} ", var_args.join(" "))
}
}
/// Generate a kingfisher revoke command for an active credential if the rule supports revocation.
///
/// Returns `None` if:
@ -54,9 +81,13 @@ fn build_revoke_command(
rule_id: &str,
revocation: &Revocation,
snippet: &str,
dependent_captures: &std::collections::BTreeMap<String, String>,
akid_from_captures: Option<&str>,
akid_from_validation_body: Option<&str>,
) -> Option<String> {
let var_args =
build_var_args(dependent_captures, akid_from_captures, akid_from_validation_body);
match revocation {
Revocation::AWS => {
// AWS needs the access key ID (AKID) in addition to the secret
@ -66,19 +97,29 @@ fn build_revoke_command(
return None;
}
Some(format!(
"kingfisher revoke --rule {} --var AKID={} {}",
"kingfisher revoke --rule {} {}{}",
rule_id,
akid,
var_args,
escape_for_shell(snippet)
))
}
Revocation::GCP => {
// GCP revocation uses the service account JSON key (which is the snippet)
Some(format!("kingfisher revoke --rule {} {}", rule_id, escape_for_shell(snippet)))
Some(format!(
"kingfisher revoke --rule {} {}{}",
rule_id,
var_args,
escape_for_shell(snippet)
))
}
Revocation::Http(_) => {
// HTTP-based revocation just needs the token
Some(format!("kingfisher revoke --rule {} {}", rule_id, escape_for_shell(snippet)))
// HTTP-based revocation with dependent variables
Some(format!(
"kingfisher revoke --rule {} {}{}",
rule_id,
var_args,
escape_for_shell(snippet)
))
}
}
}
@ -90,11 +131,15 @@ fn build_validate_command(
rule_id: &str,
validation: &crate::rules::Validation,
snippet: &str,
dependent_captures: &std::collections::BTreeMap<String, String>,
akid_from_captures: Option<&str>,
akid_from_validation_body: Option<&str>,
) -> Option<String> {
use crate::rules::Validation;
let var_args =
build_var_args(dependent_captures, akid_from_captures, akid_from_validation_body);
match validation {
Validation::AWS => {
// AWS needs the access key ID (AKID) in addition to the secret
@ -103,19 +148,29 @@ fn build_validate_command(
return None;
}
Some(format!(
"kingfisher validate --rule {} --var AKID={} {}",
"kingfisher validate --rule {} {}{}",
rule_id,
akid,
var_args,
escape_for_shell(snippet)
))
}
Validation::GCP => {
// GCP validation uses the service account JSON key
Some(format!("kingfisher validate --rule {} {}", rule_id, escape_for_shell(snippet)))
Some(format!(
"kingfisher validate --rule {} {}{}",
rule_id,
var_args,
escape_for_shell(snippet)
))
}
Validation::Http(_) => {
// HTTP-based validation just needs the token
Some(format!("kingfisher validate --rule {} {}", rule_id, escape_for_shell(snippet)))
// HTTP-based validation with dependent variables
Some(format!(
"kingfisher validate --rule {} {}{}",
rule_id,
var_args,
escape_for_shell(snippet)
))
}
Validation::MongoDB
| Validation::MySQL
@ -125,8 +180,13 @@ fn build_validate_command(
| Validation::AzureStorage
| Validation::Coinbase
| Validation::Raw(_) => {
// These validators just need the token/connection string
Some(format!("kingfisher validate --rule {} {}", rule_id, escape_for_shell(snippet)))
// These validators with dependent variables
Some(format!(
"kingfisher validate --rule {} {}{}",
rule_id,
var_args,
escape_for_shell(snippet)
))
}
}
}
@ -648,6 +708,7 @@ impl DetailsReporter {
rm.m.rule.id(),
validation,
&raw_snippet,
&rm.m.dependent_captures,
akid_from_captures.as_deref(),
akid_from_body.as_deref(),
)
@ -662,6 +723,7 @@ impl DetailsReporter {
rm.m.rule.id(),
revocation,
&raw_snippet,
&rm.m.dependent_captures,
akid_from_captures.as_deref(),
akid_from_body.as_deref(),
)
@ -1233,6 +1295,7 @@ mod tests {
calculated_entropy: 5.29,
visible: true,
is_base64: false,
dependent_captures: std::collections::BTreeMap::new(),
},
comment: None,
match_confidence: Confidence::Medium,

View file

@ -243,6 +243,7 @@ mod tests {
calculated_entropy: 4.5,
visible: true,
is_base64: false,
dependent_captures: std::collections::BTreeMap::new(),
}
}

View file

@ -424,6 +424,9 @@ pub async fn run_secret_validation(
validation_success: om.validation_success,
validation_response_body: om.validation_response_body.clone(),
validation_response_status: om.validation_response_status.as_u16(),
// Copy dependent_captures from validated OwnedBlobMatch
// so they're available for building validate/revoke commands
dependent_captures: om.dependent_captures.clone(),
..orig.2.clone()
},
)));

View file

@ -403,6 +403,9 @@ async fn timed_validate_single_match<'a>(
span.start,
span.end,
));
// Store the dependent capture for later use in reporting
// (e.g., generating validate/revoke commands)
m.dependent_captures.insert(dep.variable.to_uppercase(), val.clone());
}
}
}