diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b24d6d..cb2ccf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [v1.78.0] +- Improved error messages for `kingfisher validate` command when rules require dependent variables from `depends_on` sections. Now clearly explains which variables are needed and from which dependent rules they are normally captured. +- Fixed `validate_command` and `revoke_command` generation in scan output to include all required `--var` arguments for rules with `depends_on` sections (e.g., PubNub, Azure Storage). Commands now include dependent variables like `--var SUBSCRIPTIONTOKEN=` or `--var AZURENAME=`. +- Updated Azure Storage validation to use `AZURENAME` variable (matching the `depends_on_rule` configuration) with `STORAGE_ACCOUNT` maintained as a backward-compatible alias. +- Added internal `dependent_captures` field to match records to preserve variables from dependent rules through the validation pipeline for accurate command generation. + ## [v1.77.0] - Added `kingfisher revoke` subcommand for revoking leaked credentials directly with the provider. - Added optional `revocation` section to rules to support credential revocation (currently supporting AWS, GCP, GitHub, GitLab, Slack, and Buildkite). @@ -11,7 +17,7 @@ All notable changes to this project will be documented in this file. - Refactored project into multiple crates for better modularity and maintainability. - Ensured more CLI arguments are global and available across all subcommands. - Added `kingfisher-auto` pre-commit hook that automatically downloads and caches the appropriate binary for your platform (no Docker or manual installation required). -- Added Husky integration support with `install-husky.sh` helper script and documentation for Node.js projects. +- Added Husky integration support with `install-husky.sh` helper script and documentation fclearor Node.js projects. - Added `kingfisher-pre-commit-auto.sh` and `kingfisher-pre-commit-auto.ps1` scripts for automatic binary download in Git hooks (Linux, macOS, Windows support). ## [v1.76.0] diff --git a/Cargo.toml b/Cargo.toml index 5914d90..a7e79f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ publish = false [package] name = "kingfisher" -version = "1.77.0" +version = "1.78.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index 03ec285..4f9b168 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ kingfisher scan /path/to/code kingfisher scan /path/to/code --view-report ``` -### 4: Show only verified (live) secrets +### 4: Show only validated (live) secrets ```bash kingfisher scan /path/to/code --only-valid diff --git a/docs/LIBRARY.md b/docs/LIBRARY.md index b7562bc..54fc423 100644 --- a/docs/LIBRARY.md +++ b/docs/LIBRARY.md @@ -415,66 +415,194 @@ let all_findings: Vec<_> = files.par_iter() ## Complete Example -Here's a complete example that scans a directory for secrets: +Here's a complete CLI tool that scans files and directories for secrets with configurable options: ```rust use std::sync::Arc; use std::path::Path; use walkdir::WalkDir; -use kingfisher_core::Blob; +use clap::Parser; + use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule, Confidence}; use kingfisher_scanner::{Scanner, ScannerConfig}; +#[derive(Parser)] +#[command(name = "secret-scanner")] +#[command(about = "Scan files and directories for secrets using Kingfisher", long_about = None)] +struct Cli { + /// Path to scan (file or directory) + #[arg(value_name = "PATH")] + path: String, + + /// Minimum confidence level (low, medium, high) + #[arg(short, long, default_value = "medium")] + confidence: String, + + /// Enable base64 decoding + #[arg(short, long, default_value_t = true)] + base64: bool, + + /// Redact secrets in output + #[arg(short, long, default_value_t = false)] + redact: bool, +} + fn main() -> anyhow::Result<()> { - // Load high-confidence rules only - let rules = get_builtin_rules(Some(Confidence::High))?; - println!("Loaded {} high-confidence rules", rules.num_rules()); - - // Compile rules - let rule_vec: Vec = rules.iter_rules() + let cli = Cli::parse(); + + // Parse confidence level + let confidence = match cli.confidence.to_lowercase().as_str() { + "low" => Confidence::Low, + "medium" => Confidence::Medium, + "high" => Confidence::High, + _ => { + eprintln!("Invalid confidence level. Use: low, medium, or high"); + std::process::exit(1); + } + }; + + // Load builtin rules + println!("Loading {} confidence rules...", cli.confidence); + let rules = get_builtin_rules(Some(confidence))?; + println!("Loaded {} rules", rules.num_rules()); + + // Convert to Rule objects and compile into a database + let rule_vec: Vec = rules + .iter_rules() .map(|syntax| Rule::new(syntax.clone())) .collect(); let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?); - + // Configure scanner let config = ScannerConfig { - enable_base64_decoding: true, + enable_base64_decoding: cli.base64, enable_dedup: true, - redact_secrets: true, // Redact secrets in output + redact_secrets: cli.redact, ..Default::default() }; let scanner = Scanner::with_config(rules_db, config); + + // Scan the path + let path = Path::new(&cli.path); - // Scan directory - let dir = Path::new("./src"); + if !path.exists() { + eprintln!("Error: Path '{}' does not exist", cli.path); + std::process::exit(1); + } + let mut total_findings = 0; - - for entry in WalkDir::new(dir) - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| e.file_type().is_file()) - { - let path = entry.path(); - + let mut files_scanned = 0; + + if path.is_file() { + // Scan single file + files_scanned = 1; + println!("\nScanning file: {}", path.display()); match scanner.scan_file(path) { - Ok(findings) if !findings.is_empty() => { - println!("\n{}", path.display()); - for finding in &findings { - println!(" [{}] {} at line {}", - finding.rule_id, - finding.rule_name, - finding.location.start_line); - } + Ok(findings) => { + print_findings(path, &findings); total_findings += findings.len(); } - Err(e) => eprintln!("Error scanning {}: {}", path.display(), e), - _ => {} + Err(e) => eprintln!("Error scanning file: {}", e), + } + } else if path.is_dir() { + // Scan directory recursively + println!("\nScanning directory: {}\n", path.display()); + + for entry in WalkDir::new(path) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.file_type().is_file()) + { + let file_path = entry.path(); + files_scanned += 1; + + match scanner.scan_file(file_path) { + Ok(findings) if !findings.is_empty() => { + print_findings(file_path, &findings); + total_findings += findings.len(); + } + Err(e) => { + // Silently skip files that can't be scanned (binary, etc.) + if std::env::var("DEBUG").is_ok() { + eprintln!("Error scanning {}: {}", file_path.display(), e); + } + } + _ => {} + } } } + + // Print summary + println!("\n{}", "=".repeat(60)); + println!("Scan complete!"); + println!("Files scanned: {}", files_scanned); + println!("Total findings: {}", total_findings); - println!("\nTotal findings: {}", total_findings); + if total_findings > 0 { + println!("\nāš ļø WARNING: Secrets detected! Please review the findings above."); + std::process::exit(1); + } else { + println!("āœ“ No secrets found."); + } + Ok(()) } + +fn print_findings(path: &Path, findings: &[kingfisher_scanner::Finding]) { + println!("\nšŸ“ {}", path.display()); + println!("{}", "-".repeat(60)); + + for finding in findings { + println!(" šŸ” {} ({})", finding.rule_name, finding.rule_id); + println!(" Location: line {}:{} - {}:{}", + finding.location.line, + finding.location.column, + finding.location.end_line, + finding.location.end_column); + println!(" Secret: {}", finding.secret); + println!(" Entropy: {:.2}", finding.entropy); + println!(" Confidence: {:?}", finding.confidence); + println!(" Fingerprint: {}", finding.fingerprint); + + if !finding.captures.is_empty() { + println!(" Captures:"); + for (name, value) in &finding.captures { + println!(" {}: {}", name, value); + } + } + println!(); + } +} +``` + +Add these dependencies to your `Cargo.toml`: + +```toml +[package] +name = "secret-scanner" +version = "0.1.0" +edition = "2021" + +[dependencies] +kingfisher-core = { git = "https://github.com/mongodb/kingfisher" } +kingfisher-rules = { git = "https://github.com/mongodb/kingfisher" } +kingfisher-scanner = { git = "https://github.com/mongodb/kingfisher" } +anyhow = "1.0" +walkdir = "2.5" +clap = { version = "4.5", features = ["derive"] } +``` + +Try it out: + +```bash +# Scan a directory with medium confidence rules +cargo run -- -c medium ~/tmp + +# Scan with high confidence only and redact secrets +cargo run -- -c high --redact ~/projects + +# Scan a single file +cargo run -- config.yml ``` --- diff --git a/src/baseline.rs b/src/baseline.rs index 6d91e67..4144830 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -177,6 +177,7 @@ mod tests { calculated_entropy: 0.0, visible: true, is_base64: false, + dependent_captures: std::collections::BTreeMap::new(), }; let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf())); diff --git a/src/direct_validate.rs b/src/direct_validate.rs index c49609f..7fbb9fb 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -160,7 +160,9 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet { } Validation::AzureStorage => { vars.insert("TOKEN".to_string()); - vars.insert("STORAGE_ACCOUNT".to_string()); + // AZURENAME matches the depends_on_rule variable in azurestorage.yml + // STORAGE_ACCOUNT is kept for backward compatibility + vars.insert("AZURENAME".to_string()); } Validation::Coinbase => { vars.insert("TOKEN".to_string()); @@ -418,6 +420,42 @@ pub async fn run_direct_validation( ); } + // Check for missing variables and provide helpful error messages + let missing_vars: Vec<&String> = + template_vars.iter().filter(|var| globals.get(var.as_str()).is_none()).collect(); + + if !missing_vars.is_empty() { + // Build a map from variable name to the rule it depends on + let depends_on_map: BTreeMap = rule + .syntax() + .depends_on_rule + .iter() + .flatten() + .map(|dep| (dep.variable.to_uppercase(), dep.rule_id.as_str())) + .collect(); + + let mut error_parts = Vec::new(); + let mut var_hints = Vec::new(); + + for var in &missing_vars { + if let Some(source_rule) = depends_on_map.get(*var) { + error_parts + .push(format!(" {} (normally captured from rule '{}')", var, source_rule)); + } else { + error_parts.push(format!(" {}", var)); + } + var_hints.push(format!("--var {}=", var)); + } + + bail!( + "Rule '{}' requires the following variable(s):\n{}\n\nProvide them using: kingfisher validate --rule {} {} ", + rule_id, + error_parts.join("\n"), + rule_id, + var_hints.join(" ") + ); + } + // Execute validation based on type let mut result = match validation { Validation::Http(http_validation) => { @@ -603,15 +641,18 @@ pub async fn run_direct_validation( Validation::AzureStorage => { // Azure Storage expects JSON with storage_account and storage_key - // Or use --var STORAGE_ACCOUNT=xxx and pass the storage key as the secret + // Or use --var AZURENAME=xxx (or STORAGE_ACCOUNT for backward compat) and pass the storage key as the secret let azure_json = if secret.starts_with('{') { // Secret is already JSON secret.clone() } else { // Build JSON from variables - let storage_account = get_global_var(&globals, "STORAGE_ACCOUNT") + // AZURENAME matches the depends_on_rule variable in azurestorage.yml + // STORAGE_ACCOUNT is kept for backward compatibility + let storage_account = get_global_var(&globals, "AZURENAME") + .or_else(|| get_global_var(&globals, "STORAGE_ACCOUNT")) .ok_or_else(|| anyhow!( - "Azure Storage validation requires either JSON input or --var STORAGE_ACCOUNT= " + "Azure Storage validation requires either JSON input or --var AZURENAME= " ))?; serde_json::json!({ "storage_account": storage_account, diff --git a/src/matcher.rs b/src/matcher.rs index adc8d57..82a45ff 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -71,6 +71,9 @@ pub struct OwnedBlobMatch { pub validation_success: bool, pub calculated_entropy: f32, pub is_base64: bool, + /// Variables captured from dependent rules (from depends_on_rule). + /// Maps variable name (uppercase) to captured value. + pub dependent_captures: std::collections::BTreeMap, } impl<'a> Matcher<'a> { pub fn get_profiling_report(&self) -> Option> { @@ -92,6 +95,7 @@ impl OwnedBlobMatch { validation_success: m.validation_success, calculated_entropy: m.calculated_entropy, is_base64: m.is_base64, + dependent_captures: m.dependent_captures.clone(), } } @@ -124,6 +128,7 @@ impl OwnedBlobMatch { calculated_entropy: blob_match.calculated_entropy, finding_fingerprint: 0, //default is_base64: blob_match.is_base64, + dependent_captures: std::collections::BTreeMap::new(), }; // Convert matching_finding to a &str (using lossy conversion if needed) @@ -1020,6 +1025,11 @@ pub struct Match { pub visible: bool, #[serde(default)] pub is_base64: bool, + + /// Variables captured from dependent rules (from depends_on_rule). + /// Maps variable name (uppercase) to captured value. + #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")] + pub dependent_captures: std::collections::BTreeMap, } impl Match { #[inline] @@ -1071,6 +1081,7 @@ impl Match { validation_success: owned_blob_match.validation_success, calculated_entropy: owned_blob_match.calculated_entropy, is_base64: owned_blob_match.is_base64, + dependent_captures: owned_blob_match.dependent_captures.clone(), } } diff --git a/src/reporter.rs b/src/reporter.rs index 58c9550..3d8ac79 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -44,6 +44,33 @@ fn escape_for_shell(s: &str) -> String { format!("'{}'", s.replace('\'', "'\\''")) } +/// Build the --var arguments string from dependent captures. +fn build_var_args( + dependent_captures: &std::collections::BTreeMap, + akid_from_captures: Option<&str>, + akid_from_validation_body: Option<&str>, +) -> String { + let mut var_args = Vec::new(); + + // Add AKID if available (for AWS) + if let Some(akid) = akid_from_captures.or(akid_from_validation_body) { + if !akid.is_empty() && !dependent_captures.contains_key("AKID") { + var_args.push(format!("--var AKID={}", escape_for_shell(akid))); + } + } + + // Add all dependent captures as --var arguments + for (name, value) in dependent_captures { + var_args.push(format!("--var {}={}", name, escape_for_shell(value))); + } + + if var_args.is_empty() { + String::new() + } else { + format!("{} ", var_args.join(" ")) + } +} + /// Generate a kingfisher revoke command for an active credential if the rule supports revocation. /// /// Returns `None` if: @@ -54,9 +81,13 @@ fn build_revoke_command( rule_id: &str, revocation: &Revocation, snippet: &str, + dependent_captures: &std::collections::BTreeMap, akid_from_captures: Option<&str>, akid_from_validation_body: Option<&str>, ) -> Option { + let var_args = + build_var_args(dependent_captures, akid_from_captures, akid_from_validation_body); + match revocation { Revocation::AWS => { // AWS needs the access key ID (AKID) in addition to the secret @@ -66,19 +97,29 @@ fn build_revoke_command( return None; } Some(format!( - "kingfisher revoke --rule {} --var AKID={} {}", + "kingfisher revoke --rule {} {}{}", rule_id, - akid, + var_args, escape_for_shell(snippet) )) } Revocation::GCP => { // GCP revocation uses the service account JSON key (which is the snippet) - Some(format!("kingfisher revoke --rule {} {}", rule_id, escape_for_shell(snippet))) + Some(format!( + "kingfisher revoke --rule {} {}{}", + rule_id, + var_args, + escape_for_shell(snippet) + )) } Revocation::Http(_) => { - // HTTP-based revocation just needs the token - Some(format!("kingfisher revoke --rule {} {}", rule_id, escape_for_shell(snippet))) + // HTTP-based revocation with dependent variables + Some(format!( + "kingfisher revoke --rule {} {}{}", + rule_id, + var_args, + escape_for_shell(snippet) + )) } } } @@ -90,11 +131,15 @@ fn build_validate_command( rule_id: &str, validation: &crate::rules::Validation, snippet: &str, + dependent_captures: &std::collections::BTreeMap, akid_from_captures: Option<&str>, akid_from_validation_body: Option<&str>, ) -> Option { use crate::rules::Validation; + let var_args = + build_var_args(dependent_captures, akid_from_captures, akid_from_validation_body); + match validation { Validation::AWS => { // AWS needs the access key ID (AKID) in addition to the secret @@ -103,19 +148,29 @@ fn build_validate_command( return None; } Some(format!( - "kingfisher validate --rule {} --var AKID={} {}", + "kingfisher validate --rule {} {}{}", rule_id, - akid, + var_args, escape_for_shell(snippet) )) } Validation::GCP => { // GCP validation uses the service account JSON key - Some(format!("kingfisher validate --rule {} {}", rule_id, escape_for_shell(snippet))) + Some(format!( + "kingfisher validate --rule {} {}{}", + rule_id, + var_args, + escape_for_shell(snippet) + )) } Validation::Http(_) => { - // HTTP-based validation just needs the token - Some(format!("kingfisher validate --rule {} {}", rule_id, escape_for_shell(snippet))) + // HTTP-based validation with dependent variables + Some(format!( + "kingfisher validate --rule {} {}{}", + rule_id, + var_args, + escape_for_shell(snippet) + )) } Validation::MongoDB | Validation::MySQL @@ -125,8 +180,13 @@ fn build_validate_command( | Validation::AzureStorage | Validation::Coinbase | Validation::Raw(_) => { - // These validators just need the token/connection string - Some(format!("kingfisher validate --rule {} {}", rule_id, escape_for_shell(snippet))) + // These validators with dependent variables + Some(format!( + "kingfisher validate --rule {} {}{}", + rule_id, + var_args, + escape_for_shell(snippet) + )) } } } @@ -648,6 +708,7 @@ impl DetailsReporter { rm.m.rule.id(), validation, &raw_snippet, + &rm.m.dependent_captures, akid_from_captures.as_deref(), akid_from_body.as_deref(), ) @@ -662,6 +723,7 @@ impl DetailsReporter { rm.m.rule.id(), revocation, &raw_snippet, + &rm.m.dependent_captures, akid_from_captures.as_deref(), akid_from_body.as_deref(), ) @@ -1233,6 +1295,7 @@ mod tests { calculated_entropy: 5.29, visible: true, is_base64: false, + dependent_captures: std::collections::BTreeMap::new(), }, comment: None, match_confidence: Confidence::Medium, diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 117c2fe..3171dcd 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -243,6 +243,7 @@ mod tests { calculated_entropy: 4.5, visible: true, is_base64: false, + dependent_captures: std::collections::BTreeMap::new(), } } diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 24c77e9..c88f362 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -424,6 +424,9 @@ pub async fn run_secret_validation( validation_success: om.validation_success, validation_response_body: om.validation_response_body.clone(), validation_response_status: om.validation_response_status.as_u16(), + // Copy dependent_captures from validated OwnedBlobMatch + // so they're available for building validate/revoke commands + dependent_captures: om.dependent_captures.clone(), ..orig.2.clone() }, ))); diff --git a/src/validation.rs b/src/validation.rs index 2e431ff..a0eda14 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -403,6 +403,9 @@ async fn timed_validate_single_match<'a>( span.start, span.end, )); + // Store the dependent capture for later use in reporting + // (e.g., generating validate/revoke commands) + m.dependent_captures.insert(dep.variable.to_uppercase(), val.clone()); } } }