From 706723e3841adabaf8e597eb76623d5dc01aa857 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 17:25:32 -0700 Subject: [PATCH 01/10] removed unused cli argument, snippet-length --- src/cli/commands/scan.rs | 4 - src/cli/commands/scan.rs.orig | 132 ++++++++ src/main.rs | 1 - src/main.rs.orig | 528 +++++++++++++++++++++++++++++ src/reporter/json_format.rs | 1 - src/reporter/json_format.rs.orig | 270 +++++++++++++++ tests/int_dedup.rs | 1 - tests/int_dedup.rs.orig | 171 ++++++++++ tests/int_github.rs | 1 - tests/int_github.rs.orig | 149 ++++++++ tests/int_gitlab.rs | 2 - tests/int_gitlab.rs.orig | 243 +++++++++++++ tests/int_redact.rs | 1 - tests/int_redact.rs.orig | 115 +++++++ tests/int_slack.rs | 2 - tests/int_slack.rs.orig | 207 +++++++++++ tests/int_validation_cache.rs | 1 - tests/int_validation_cache.rs.orig | 211 ++++++++++++ tests/int_vulnerable_files.rs | 2 - tests/int_vulnerable_files.rs.orig | 257 ++++++++++++++ 20 files changed, 2283 insertions(+), 16 deletions(-) create mode 100644 src/cli/commands/scan.rs.orig create mode 100644 src/main.rs.orig create mode 100644 src/reporter/json_format.rs.orig create mode 100644 tests/int_dedup.rs.orig create mode 100644 tests/int_github.rs.orig create mode 100644 tests/int_gitlab.rs.orig create mode 100644 tests/int_redact.rs.orig create mode 100644 tests/int_slack.rs.orig create mode 100644 tests/int_validation_cache.rs.orig create mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index fe4d5d6..ae2b4f0 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -99,10 +99,6 @@ pub struct ScanArgs { #[command(flatten)] pub output_args: OutputArgs, - /// Bytes of context before and after each match - #[arg(long, default_value_t = 256, value_name = "BYTES")] - pub snippet_length: usize, - /// Baseline file to filter known secrets #[arg(long, value_name = "FILE")] pub baseline_file: Option, diff --git a/src/cli/commands/scan.rs.orig b/src/cli/commands/scan.rs.orig new file mode 100644 index 0000000..fe4d5d6 --- /dev/null +++ b/src/cli/commands/scan.rs.orig @@ -0,0 +1,132 @@ +use clap::{Args, ValueEnum}; +use strum::Display; +use tracing::debug; + +use crate::{ + cli::{ + commands::{ + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + }, + global::RAM_GB, + }, + rules::rule::Confidence, +}; + +/// Determine the default number of parallel scan jobs. +/// +/// * Target = `num_cpus * 2`. +/// * Cap by RAM at ≈ 1 GiB per job (so 16 GiB ⇒ max 16 jobs). +/// * Always ≥ 1. +/// * When `-v/--verbose` is passed, the computed value is logged at DEBUG. +fn default_scan_jobs() -> usize { + // How many logical CPUs do we see? (Falls back to 1 on error.) + let cpu_count = std::thread::available_parallelism().map(usize::from).unwrap_or(1); + + // Desired parallelism is CPU * 2. + let desired = cpu_count * 2; + + match *RAM_GB { + // If we know how much RAM we have, cap by a 1 GiB-per-job heuristic. + Some(ram_gb) => { + let max_by_ram = ram_gb.ceil() as usize; // 1 GiB per job + let jobs = desired.min(max_by_ram).max(1); + + debug!( + "Using {jobs} parallel scan jobs \ + (cpus = {cpu_count}, desired = {desired}, \ + ram = {ram_gb:.1} GiB, cap_by_ram = {max_by_ram})" + ); + jobs + } + // If RAM is unknown, just use the desired value. + None => { + debug!("Using {desired} parallel scan jobs (cpus = {cpu_count}, ram unknown)"); + desired + } + } +} + +/// `kingfisher scan` command and flags +#[derive(Args, Debug, Clone)] +pub struct ScanArgs { + /// Number of parallel scanning threads + #[arg(long = "jobs", short = 'j', default_value_t = default_scan_jobs())] + pub num_jobs: usize, + + #[command(flatten)] + pub rules: RuleSpecifierArgs, + + #[command(flatten)] + pub input_specifier_args: InputSpecifierArgs, + + #[command(flatten)] + pub content_filtering_args: ContentFilteringArgs, + + /// Minimum confidence level for reporting findings + #[arg(long, short = 'c', default_value = "medium")] + pub confidence: ConfidenceLevel, + + /// Disable secret validation + #[arg(long, short = 'n', default_value_t = false)] + pub no_validate: bool, + + /// Display only validated findings + #[arg(long, default_value_t = false)] + pub only_valid: bool, + + /// Override the default minimum entropy threshold + #[arg(long, short = 'e')] + pub min_entropy: Option, + + /// Show performance statistics for each rule + #[arg(long, default_value_t = false)] + pub rule_stats: bool, + + /// Display every occurrence of a finding + #[arg(long, default_value_t = false)] + pub no_dedup: bool, + + /// Redact findings values using a secure hash + #[arg(long, short = 'r', default_value_t = false)] + pub redact: bool, + + /// Timeout for Git repository scanning in seconds + #[arg(long, default_value_t = 1800, value_name = "SECONDS")] + pub git_repo_timeout: u64, + + #[command(flatten)] + pub output_args: OutputArgs, + + /// Bytes of context before and after each match + #[arg(long, default_value_t = 256, value_name = "BYTES")] + pub snippet_length: usize, + + /// Baseline file to filter known secrets + #[arg(long, value_name = "FILE")] + pub baseline_file: Option, + + /// Create or update the baseline file with current findings + #[arg(long, default_value_t = false)] + pub manage_baseline: bool, +} + +/// Confidence levels for findings +#[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +pub enum ConfidenceLevel { + Low, + Medium, + High, +} + +impl From for Confidence { + fn from(level: ConfidenceLevel) -> Self { + match level { + ConfidenceLevel::Low => Confidence::Low, + ConfidenceLevel::Medium => Confidence::Medium, + ConfidenceLevel::High => Confidence::High, + } + } +} diff --git a/src/main.rs b/src/main.rs index 58145e6..ac78ef7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -319,7 +319,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { redact: false, git_repo_timeout: 1800, no_dedup: false, - snippet_length: 256, baseline_file: None, manage_baseline: false, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, diff --git a/src/main.rs.orig b/src/main.rs.orig new file mode 100644 index 0000000..58145e6 --- /dev/null +++ b/src/main.rs.orig @@ -0,0 +1,528 @@ +// ──────────────────────────────────────────────────────────── +// Global allocator setup +// * Default - mimalloc (no feature flags) +// * Debug - jemalloc (`use-jemalloc` feature) +// * Fallback - system allocator (`system-alloc` feature) +// ──────────────────────────────────────────────────────────── + +// --- jemalloc (opt-in) --- +#[cfg(feature = "use-jemalloc")] +#[global_allocator] +static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; + +// --- mimalloc (default) --- +#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] +#[global_allocator] +static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; + +// --- system allocator (explicit opt-out) --- +#[cfg(feature = "system-alloc")] +use std::alloc::System; +#[cfg(feature = "system-alloc")] +#[global_allocator] +static GLOBAL: System = System; + +// use std::alloc::System; +// #[global_allocator] +// static GLOBAL: System = System; + +use std::{ + io::Read, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + self, + commands::{ + github::{ + GitCloneMode, GitHistoryMode, GitHubCommand, GitHubRepoType, GitHubReposCommand, + }, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::{ + RuleSpecifierArgs, RulesCheckArgs, RulesCommand, RulesListArgs, + RulesListOutputFormat, + }, + }, + global::Command, + CommandLineArgs, GlobalArgs, + }, + findings_store, + findings_store::FindingsStore, + github, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::{load_and_record_rules, run_scan}, + update::check_for_update, +}; +use serde_json::json; +use tempfile::TempDir; +use term_size; +use tokio::runtime::Builder; +use tracing::{error, info, warn}; +use tracing_core::metadata::LevelFilter; +use tracing_subscriber::{ + self, fmt, prelude::__tracing_subscriber_SubscriberExt, registry, util::SubscriberInitExt, +}; +use url::Url; + +use crate::cli::commands::gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}; + +fn main() -> anyhow::Result<()> { + color_backtrace::install(); + // Parse command-line arguments + let args = CommandLineArgs::parse_args(); + + // Determine the number of jobs, defaulting to the number of CPUs + let num_jobs = match args.command { + Command::Scan(ref scan_args) => scan_args.num_jobs, + Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands + Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands + Command::Rules(_) => num_cpus::get(), // Default for Rules commands + }; + + // Set up the Tokio runtime with the specified number of threads + let runtime = Builder::new_multi_thread() + .worker_threads(num_jobs) + .enable_all() + .build() + .context("Failed to create Tokio runtime")?; + runtime.block_on(async_main(args)) +} + +fn setup_logging(global_args: &GlobalArgs) { + // Determine log level based on global verbosity + let (level, all_targets) = if global_args.quiet { + (LevelFilter::ERROR, false) + } else { + let level = match global_args.verbose { + 0 => LevelFilter::INFO, // Default level if no `-v` is provided + 1 => LevelFilter::DEBUG, // `-v` + 2 => LevelFilter::TRACE, // `-vv` + _ => LevelFilter::TRACE, // `-vvv` or more + }; + let all_targets = global_args.verbose > 2; // Enable all targets for `-vvv` or more + (level, all_targets) + }; + // Create a filter for logging + let filter = if all_targets { + // Enable TRACE for all modules + tracing_subscriber::filter::Targets::new().with_default(LevelFilter::TRACE) + } else { + // Per-target filtering, only TRACE for `kingfisher` + tracing_subscriber::filter::Targets::new() + .with_default(LevelFilter::ERROR) // Default for all modules + .with_target("kingfisher", level) // Replace `kingfisher` with your + // crate's name + }; + // Configure the formatter layer + let fmt_layer = fmt::layer() + .with_writer(std::io::stderr) // Write logs to stderr + .with_target(true) // Enable target filtering + .with_ansi(false) // Disable colors + .without_time(); // Remove timestamps + // Build and initialize the registry + registry() + .with(fmt_layer) // Attach the formatter layer + .with(filter) // Attach the filter + .init(); +} + +pub fn determine_exit_code(datastore: &Arc>) -> i32 { + // exit with code 200 if _any_ findings are discovered + // exit with code 205 if VALIDATED findings are discovered + // exit with code 0 if there are NO findings discovered + let ds = datastore.lock().unwrap(); + // Get all matches + // let all_matches = ds.get_matches(); + + // Only consider visible matches when determining the exit code + let all_matches = ds + .get_matches() + .iter() + .filter(|msg| { + let (_, _, match_item) = &***msg; + match_item.visible + }) + .collect::>(); + + if all_matches.is_empty() { + // No findings discovered + 0 + } else { + // Check if there are any validated findings + let validated_matches = all_matches + .iter() + .filter(|msg| { + let (_, _, match_item) = &****msg; + match_item.validation_success + }) + .count(); + if validated_matches > 0 { + // Validated findings discovered + 205 + } else { + // Findings discovered, but not validated + 200 + } + } +} + +async fn async_main(args: CommandLineArgs) -> Result<()> { + // Create a temporary directory + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Create the in-memory datastore + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + setup_logging(&args.global_args); + let update_msg = check_for_update(&args.global_args, None); + match args.command { + Command::Scan(mut scan_args) => { + // ————————————————————————————————————————— + // If no paths or a single "-", slurp stdin into a temp file + // ————————————————————————————————————————— + info!( + "Launching with {} concurrent scan jobs. Use --num-jobs to override.", + &scan_args.num_jobs + ); + let paths = &scan_args.input_specifier_args.path_inputs; + let is_dash = paths.iter().any(|p| p.as_os_str() == "-"); + if (paths.is_empty() || is_dash) && !atty::is(atty::Stream::Stdin) { + // read all stdin + let mut buf = Vec::new(); + std::io::stdin().read_to_end(&mut buf)?; + // write into temp_dir + let stdin_file = temp_dir.path().join("stdin_input"); + std::fs::write(&stdin_file, buf)?; + // replace inputs + scan_args.input_specifier_args.path_inputs = vec![stdin_file.into()]; + } + + // now proceed exactly as before + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + run_scan(&args.global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await?; + let exit_code = determine_exit_code(&datastore); + + if let Err(e) = temp_dir.close() { + eprintln!("Failed to close temporary directory: {}", e); + } + std::process::exit(exit_code); + } + Command::Rules(ref rule_args) => match &rule_args.command { + RulesCommand::Check(check_args) => { + run_rules_check(&check_args)?; + } + RulesCommand::List(list_args) => { + run_rules_list(&list_args)?; + } + }, + Command::GitHub(github_args) => match github_args.command { + GitHubCommand::Repos(repos_command) => match repos_command { + GitHubReposCommand::List(list_args) => { + github::list_repositories( + github_args.github_api_url, + args.global_args.ignore_certs, + args.global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.organization, + list_args.repo_specifiers.all_organizations, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, + Command::GitLab(gitlab_args) => match gitlab_args.command { + GitLabCommand::Repos(repos_command) => match repos_command { + GitLabReposCommand::List(list_args) => { + kingfisher::gitlab::list_repositories( + gitlab_args.gitlab_api_url, + args.global_args.ignore_certs, + args.global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.group, + list_args.repo_specifiers.all_groups, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, + } + if let Some(msg) = update_msg { + info!("{msg}"); + } + Ok(()) +} + +/// Create a default ScanArgs instance for rule loading +fn create_default_scan_args() -> cli::commands::scan::ScanArgs { + use cli::commands::scan::*; + ScanArgs { + num_jobs: 1, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: url::Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::All, + + jira_url: None, + jql: None, + max_results: 100, + + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Slack query + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + + // Docker image scanning + docker_image: Vec::new(), + + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: true, + extraction_depth: 2, + exclude: Vec::new(), // Exclude patterns + no_binary: true, + }, + confidence: ConfidenceLevel::Medium, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + no_dedup: false, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + } +} +/// Run the rules check command +pub fn run_rules_check(args: &RulesCheckArgs) -> Result<()> { + let mut num_errors = 0; + let mut num_warnings = 0; + // Load and check rules + let loader = RuleLoader::from_rule_specifiers(&args.rules); + let loaded = loader.load(&create_default_scan_args())?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + + // Check each rule + for (rule_index, rule) in rules_db.rules().iter().enumerate() { + let rule_syntax = rule.syntax(); + // Basic rule validation checks + if rule.name().len() < 3 { + warn!("Rule '{}' has a very short name", rule.name()); + num_warnings += 1; + } + if rule.syntax().pattern.len() < 5 { + warn!("Rule '{}' has a very short pattern", rule.name()); + num_warnings += 1; + } + if rule.syntax().examples.is_empty() { + warn!("Rule '{}' has no examples", rule.name()); + num_warnings += 1; + continue; + } + // Check regex compilation + if let Err(e) = rule.syntax().as_regex() { + error!("Rule '{}' has invalid regex: {}", rule.name(), e); + num_errors += 1; + continue; + } + // Test each example against both vectorscan and regex + for (example_index, example) in rule_syntax.examples.iter().enumerate() { + // Create a test blob from the example + // let blob = Blob::new(BlobId::new(example.as_bytes()), + // example.as_bytes().to_vec()); let origin = OriginSet::new( + // Origin::from_file(PathBuf::from("test_example")), + // Vec::new(), + // ); + // // Check vectorscan match + // let vectorscan_matched = match matcher.scan_blob(&blob, &origin, None)? { + // ScanResult::New(matches) => !matches.is_empty(), + // _ => false, + // }; + // Check regex match + // Get the regex using the public method + let re = + rules_db.get_regex_by_rule_id(rule.id()).expect("Failed to get regex for rule"); + let regex_matched = re.is_match(example.as_bytes()); + if !regex_matched { + // ||!vectorscan_matched { + println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name); + println!(" Processing example {}", example_index + 1); + println!(" [!] Mismatch detected for example: {}", example); + // if !vectorscan_matched { + // println!(" Vectorscan match: {}", vectorscan_matched); + // num_errors += 1; + // } + if !regex_matched { + println!(" Regex match: {}", regex_matched); + num_errors += 1; + } + } + + // // Report any mismatches + // if !vectorscan_matched || !regex_matched { + // error!("Rule '{}' example {} failed validation:", + // rule.name(), example_index + 1); println!(" + // Example text: {}", example); + + // if !vectorscan_matched { + // error!(" - Vectorscan pattern did not match example"); + // num_errors += 1; + // } + + // if !regex_matched { + // error!(" - Regex pattern did not match example"); + // num_errors += 1; + // } + // } + } + } + // Print summary + if num_errors > 0 || num_warnings > 0 { + println!("\nCheck Summary:"); + println!(" Errors: {}", num_errors); + println!(" Warnings: {}", num_warnings); + println!("\nError types include:"); + println!(" - Invalid regex patterns"); + println!(" - Examples that don't match their patterns"); + println!("\nWarning types include:"); + println!(" - Rules with very short names"); + println!(" - Rules with very short patterns"); + println!(" - Rules without examples"); + } else { + println!("\nAll rules passed validation successfully!"); + } + // Exit with error if there are errors or if warnings are treated as errors + if num_errors > 0 || (args.warnings_as_errors && num_warnings > 0) { + std::process::exit(1); + } + Ok(()) +} +/// Run the rules list command +pub fn run_rules_list(args: &RulesListArgs) -> Result<()> { + // Load rules + let loader = RuleLoader::from_rule_specifiers(&args.rules); + let loaded = loader.load(&create_default_scan_args())?; + let resolved = loaded.resolve_enabled_rules()?; + let mut writer = args.output_args.get_writer()?; + match args.output_args.format { + RulesListOutputFormat::Pretty => { + // Determine terminal width if possible, otherwise use default + let term_width = term_size::dimensions().map(|(w, _)| w).unwrap_or(120); + // First pass: calculate column widths + let max_name_width = resolved.iter().map(|r| r.name().len()).max().unwrap_or(0).max(4); // "Rule" header + let max_id_width = resolved.iter().map(|r| r.id().len()).max().unwrap_or(0).max(2); // "ID" header + let max_conf_width = resolved + .iter() + .map(|r| format!("{:?}", r.confidence()).len()) + .max() + .unwrap_or(0) + .max(10); // "Confidence" header + // Calculate pattern width based on terminal width + let reserved_width = max_name_width + max_id_width + max_conf_width + 10; + let pattern_width = term_width.saturating_sub(reserved_width); + // Format pattern on a single line + let format_pattern = |pattern: &str| { + let single_line = pattern + .replace('\n', " ") + .replace('\r', " ") + .split_whitespace() + .collect::>() + .join(" "); + if single_line.len() > pattern_width { + format!("{}...", &single_line[..pattern_width.saturating_sub(3)]) + } else { + single_line + } + }; + // Print header + writeln!( + writer, + "\n{:name_width$} │ {:id_width$} │ {:conf_width$} │ Pattern", + "Rule", + "ID", + "Confidence", + name_width = max_name_width, + id_width = max_id_width, + conf_width = max_conf_width + )?; + // Print separator + writeln!( + writer, + "{0:─ { + // Create JSON format + let rules_json: Vec<_> = resolved + .iter() + .map(|rule| { + json!({ + "name": rule.name(), + "id": rule.id(), + "pattern": rule.syntax().pattern, + "confidence": rule.confidence(), + "examples": rule.syntax().examples, + "visible": rule.visible(), + }) + }) + .collect(); + serde_json::to_writer_pretty(&mut writer, &rules_json)?; + writeln!(writer)?; + } + } + Ok(()) +} diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index aae16fc..d9eda56 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -119,7 +119,6 @@ mod tests { redact: false, git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - snippet_length: 256, baseline_file: None, manage_baseline: false, } diff --git a/src/reporter/json_format.rs.orig b/src/reporter/json_format.rs.orig new file mode 100644 index 0000000..aae16fc --- /dev/null +++ b/src/reporter/json_format.rs.orig @@ -0,0 +1,270 @@ +use super::*; + +impl DetailsReporter { + pub fn json_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { + let records = self.build_finding_records(args)?; + if !records.is_empty() { + serde_json::to_writer_pretty(&mut writer, &records)?; + writeln!(writer)?; + } + Ok(()) + } + + pub fn jsonl_format( + &self, + mut writer: W, + args: &cli::commands::scan::ScanArgs, + ) -> Result<()> { + let records = self.build_finding_records(args)?; + for record in records { + serde_json::to_writer(&mut writer, &record)?; + writeln!(writer)?; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::cli::commands::github::GitCloneMode; + use crate::cli::commands::github::GitHistoryMode; + use crate::cli::commands::rules::RuleSpecifierArgs; + use crate::matcher::{SerializableCapture, SerializableCaptures}; + use crate::util::intern; + use crate::{ + blob::BlobId, + cli::commands::github::GitHubRepoType, + cli::commands::inputs::ContentFilteringArgs, + cli::commands::inputs::InputSpecifierArgs, + cli::commands::output::{OutputArgs, ReportOutputFormat}, + cli::commands::scan::ConfidenceLevel, + findings_store::FindingsStore, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, + matcher::Match, + origin::Origin, + reporter::styles::Styles, + }; + use std::{ + io::Cursor, + path::PathBuf, + sync::{Arc, Mutex}, + }; + use url::Url; + fn create_default_args() -> cli::commands::scan::ScanArgs { + use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope + + cli::commands::scan::ScanArgs { + num_jobs: 1, + no_dedup: false, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + // local path / git URL inputs + path_inputs: Vec::new(), + git_url: Vec::new(), + + // GitHub + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + + // GitLab + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::All, + // Jira options + jira_url: None, + jql: None, + max_results: 100, + // Slack options + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + + docker_image: Vec::new(), + // clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + exclude: Vec::new(), // Exclude patterns + no_binary: true, + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + } + } + + fn create_mock_match( + rule_name: &str, + rule_text_id: &str, + rule_finding_fingerprint: &str, + validation_success: bool, + ) -> Match { + Match { + location: Location { + offset_span: OffsetSpan { start: 10, end: 20 }, + source_span: SourceSpan { + start: SourcePoint { line: 5, column: 10 }, + end: SourcePoint { line: 5, column: 20 }, + }, + }, + groups: SerializableCaptures { + captures: vec![SerializableCapture { + name: Some("token".to_string()), + match_number: 1, + start: 10, + end: 20, + value: "mock_token".into(), + }], + }, + blob_id: BlobId::new(b"mock_blob"), + finding_fingerprint: 0123, + rule_finding_fingerprint: intern(rule_finding_fingerprint), + rule_text_id: intern(rule_text_id), + rule_name: intern(rule_name), + rule_confidence: Confidence::Medium, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success, + calculated_entropy: 4.5, + visible: true, + } + } + + fn setup_mock_reporter(matches: Vec) -> DetailsReporter { + let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); + if !matches.is_empty() { + let blob_metadata = BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }; + let dedup = true; + for m in matches.clone() { + datastore.record( + vec![( + Arc::new(OriginSet::new( + Origin::from_file(PathBuf::from("/mock/path/file.rs")), + vec![], + )), + Arc::new(blob_metadata.clone()), + m.m.clone(), + )], + dedup, + ); + } + } + DetailsReporter { + datastore: Arc::new(Mutex::new(datastore)), + styles: Styles::new(false), + only_valid: false, + } + } + + #[test] + fn test_json_format() -> Result<()> { + let mock_match = + create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); + let matches = vec![ReportMatch { + origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]), + blob_metadata: BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }, + m: mock_match, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success: true, + }]; + let reporter = setup_mock_reporter(matches); + let mut output = Cursor::new(Vec::new()); + reporter.json_format(&mut output, &create_default_args())?; + let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + assert!(!json_output.is_empty(), "JSON output should not be empty"); + let first = &json_output[0]; + assert_eq!(first["rule"]["name"], "MockRule"); + assert_eq!(first["finding"]["language"], "Rust"); + Ok(()) + } + + #[test] + fn test_validation_status_in_json() -> Result<()> { + let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; + for (validation_success, expected_status) in test_cases { + let mock_match = create_mock_match( + "MockRule", + "mock_rule_1", + "mock_finding_fingerprint", + validation_success, + ); + let matches = vec![ReportMatch { + origin: OriginSet::new( + Origin::from_file(PathBuf::from("/mock/path/file.rs")), + vec![], + ), + blob_metadata: BlobMetadata { + id: BlobId::new(b"mock_blob"), + num_bytes: 1024, + mime_essence: Some("text/plain".to_string()), + charset: Some("UTF-8".to_string()), + language: Some("Rust".to_string()), + }, + m: mock_match, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "validation response".to_string(), + validation_response_status: 200, + validation_success, + }]; + let reporter = setup_mock_reporter(matches); + let mut output = Cursor::new(Vec::new()); + reporter.json_format(&mut output, &create_default_args())?; + let json_output: Vec = serde_json::from_slice(&output.into_inner())?; + assert!(!json_output.is_empty(), "JSON output should not be empty"); + let first = &json_output[0]; + let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); + assert_eq!(validation_status, expected_status); + } + Ok(()) + } +} diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 0c93023..4c3be19 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -113,7 +113,6 @@ rules: git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup, - snippet_length: 64, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_dedup.rs.orig b/tests/int_dedup.rs.orig new file mode 100644 index 0000000..0c93023 --- /dev/null +++ b/tests/int_dedup.rs.orig @@ -0,0 +1,171 @@ +//! Proves that run_async_scan collapses identical findings when +//! ── no_dedup == false ── +//! while keeping them separate when no_dedup == true. + +use std::{ + fs, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Helper: run a scan with the supplied `no_dedup` flag and return how many +/// findings the `FindingsStore` ends up containing. +fn run_scan(count_rt: &Runtime, no_dedup: bool) -> Result { + // ── temp workspace ────────────────────────────────────────────── + let work = TempDir::new()?; + let rules_dir = work.path().join("rules"); + fs::create_dir_all(&rules_dir)?; + let inputs_dir = work.path().join("in"); + fs::create_dir_all(&inputs_dir)?; + + // 1. Tiny custom rule that matches `secret_1234` + fs::write( + rules_dir.join("demo.yml"), + r#" +rules: + - id: demo.secret + name: Demo secret + pattern: "secret_[0-9]{4}" + confidence: low +"#, + )?; + + // 2. Two different blobs that both contain the SAME secret + fs::write(inputs_dir.join("a.txt"), "secret_1234\n")?; + fs::write(inputs_dir.join("b.txt"), "secret_1234\n")?; + + // ── build ScanArgs ────────────────────────────────────────────── + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: vec![rules_dir.clone()], + rule: vec!["all".into()], + load_builtins: false, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![inputs_dir.join("a.txt"), inputs_dir.join("b.txt")], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 5.0, + extraction_depth: 1, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup, + snippet_length: 64, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Never, + progress: Mode::Never, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 8192 }, + }; + + // ── load rules once ───────────────────────────────────────────── + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); + + // Fresh FindingsStore for this run + let store_path = work.path().join("store"); + fs::create_dir_all(&store_path)?; + let datastore = Arc::new(Mutex::new(FindingsStore::new(store_path))); + + // run_async_scan is async – use the supplied Tokio runtime + count_rt.block_on(run_async_scan( + &global_args, + &scan_args, + Arc::clone(&datastore), + &rules_db, + ))?; + + let x = Ok(datastore.lock().unwrap().get_matches().len()); + x +} + +#[test] +fn test_dedup_branch() -> Result<()> { + // A *single* runtime reused for both scans keeps the test fast + let rt = Runtime::new().unwrap(); + + let findings_with_dups = run_scan(&rt, true)?; // keep duplicates + let findings_deduped = run_scan(&rt, false)?; // collapse duplicates + + assert!( + findings_with_dups > findings_deduped, + "expected deduplication to reduce finding count ({} -- {})", + findings_with_dups, + findings_deduped + ); + assert_eq!(findings_deduped, 1, "exactly one unique finding should remain after dedup"); + + Ok(()) +} diff --git a/tests/int_github.rs b/tests/int_github.rs index 2892b91..8edc022 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -100,7 +100,6 @@ fn test_github_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_github.rs.orig b/tests/int_github.rs.orig new file mode 100644 index 0000000..2892b91 --- /dev/null +++ b/tests/int_github.rs.orig @@ -0,0 +1,149 @@ +// tests/int_github.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; +/// Helper function to determine exit code based on findings +fn determine_exit_code(total_findings: usize, validated_findings: usize) -> i32 { + if total_findings == 0 { + 0 // No findings discovered + } else if validated_findings > 0 { + 205 // Validated findings discovered + } else { + 200 // Findings discovered but none validated + } +} +#[test] +fn test_github_remote_scan() -> Result<()> { + // Create a temporary directory for the scan + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + // Create test repository URL + let test_repo_url = "https://github.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("Failed to parse Git URL"); + // Create scan arguments + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + // Create global arguments + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + // Create in-memory datastore + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + // Create the runtime first + let runtime = Runtime::new().expect("Failed to create Tokio runtime"); + // Load rules + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + // Run the scan using runtime.block_on + runtime.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + // Get scan results + let ds = datastore.lock().unwrap(); + let matches = ds.get_matches(); + let total_findings = matches.len(); + let validated_findings = matches.iter().filter(|arc| arc.as_ref().2.validation_success).count(); + + // Print validation statistics + println!("Total findings: {}, Validated findings: {}", total_findings, validated_findings); + // Check total number of findings + assert!(total_findings >= 10, "Expected at least 10 findings, but got {}", total_findings); + // Determine exit code + let exit_code = determine_exit_code(total_findings, validated_findings); + // Test passes if we found some kind of findings (exit code >= 200) + assert!( + exit_code >= 200, + "Test failed: Expected to find vulnerabilities (exit code >= 200), got exit code {}", + exit_code + ); + // Drop the runtime explicitly here, outside of async context + drop(runtime); + Ok(()) +} diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index fa3e169..4668439 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -98,7 +98,6 @@ fn test_gitlab_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; @@ -200,7 +199,6 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig new file mode 100644 index 0000000..fa3e169 --- /dev/null +++ b/tests/int_gitlab.rs.orig @@ -0,0 +1,243 @@ +// tests/int_gitlab.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Derive process exit-codes from findings +fn determine_exit_code(total: usize, validated: usize) -> i32 { + match (total, validated) { + (0, _) => 0, + (_, v) if v > 0 => 205, + _ => 200, + } +} + +#[test] +fn test_gitlab_remote_scan() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Public GitLab repo seeded with test secrets + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 796d019..45f3767 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -80,7 +80,6 @@ async fn test_redact_hashes_finding_values() -> Result<()> { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_redact.rs.orig b/tests/int_redact.rs.orig new file mode 100644 index 0000000..796d019 --- /dev/null +++ b/tests/int_redact.rs.orig @@ -0,0 +1,115 @@ +// Integration test to ensure --redact replaces secret values with hashes +use std::{ + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, GlobalArgs, Mode}, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[tokio::test] +async fn test_redact_hashes_finding_values() -> Result<()> { + let temp_dir = TempDir::new()?; + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![PathBuf::from("testdata/generic_secrets.py")], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: true, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Never, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(temp_dir.path().to_path_buf()))); + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; + + let ds = datastore.lock().unwrap(); + let matches = ds.get_matches(); + assert!(!matches.is_empty()); + for m_arc in matches { + let m = &m_arc.2; + assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); + } + + Ok(()) +} diff --git a/tests/int_slack.rs b/tests/int_slack.rs index d22b8f0..7284e1d 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -86,7 +86,6 @@ impl TestContext { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 128, baseline_file: None, manage_baseline: false, }; @@ -178,7 +177,6 @@ async fn test_scan_slack_messages() -> Result<()> { git_repo_timeout: 1800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 128, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig new file mode 100644 index 0000000..d22b8f0 --- /dev/null +++ b/tests/int_slack.rs.orig @@ -0,0 +1,207 @@ +use std::{ + env, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +struct TestContext { + rules_db: Arc, +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + Ok(Self { rules_db: Arc::new(rules_db) }) + } +} + +#[tokio::test] +async fn test_scan_slack_messages() -> Result<()> { + let ctx = TestContext::new()?; + + let server = MockServer::start().await; + let response = serde_json::json!({ + "ok": true, + "messages": { + "matches": [{ + "permalink": "https://example.slack.com/archives/C123/p1234", + "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "ts": "1234.56", + "channel": {"id": "C123", "name": "general"} + }], + "pagination": {"page": 1, "page_count": 1} + } + }); + Mock::given(method("GET")) + .and(path("/search.messages")) + .respond_with(ResponseTemplate::new(200).set_body_json(response)) + .mount(&server) + .await; + + env::set_var("KF_SLACK_TOKEN", "xoxp-test"); + + let temp_dir = TempDir::new()?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: Some("test".into()), + slack_api_url: Url::parse(&format!("{}/", server.uri()))?, + max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + assert!(findings > 0); + Ok(()) +} diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index a7ab9ea..46d4521 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -156,7 +156,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, // keep duplicates so the cache is stressed - snippet_length: 128, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_validation_cache.rs.orig b/tests/int_validation_cache.rs.orig new file mode 100644 index 0000000..a7ab9ea --- /dev/null +++ b/tests/int_validation_cache.rs.orig @@ -0,0 +1,211 @@ +// tests/int_validation_cache.rs +use std::{ + fs, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, Mutex, + }, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, Request, ResponseTemplate, +}; + +#[tokio::test] +async fn test_validation_cache_and_depvars() -> Result<()> { + /* --------------------------------------------------------- * + * 1. Spin-up Wiremock and count incoming validation calls * + * --------------------------------------------------------- */ + let server = MockServer::start().await; + let hit_counter = Arc::new(AtomicUsize::new(0)); + let counter_clone = Arc::clone(&hit_counter); + + Mock::given(method("GET")) + .and(path("/validate")) + .respond_with(move |_req: &Request| { + counter_clone.fetch_add(1, Ordering::SeqCst); + ResponseTemplate::new(200).set_body_string("ok") + }) + .mount(&server) + .await; + + /* --------------------------------------------------------- * + * 2. Synthetic rules exercising depends_on_rule + HTTP val * + * --------------------------------------------------------- */ + let rules_yaml = format!( + r#" + rules: + - name: Demo API Key + id: demo.key.1 + pattern: '(demokey_[a-z0-9]{{8}})' + confidence: low + min_entropy: 0.0 + + - name: Demo API Key Validation + id: demo.key.validation.1 + depends_on_rule: + - rule_id: demo.key.1 + variable: TOKEN + pattern: '(demokey_[a-z0-9]{{8}})' + confidence: low + validation: + type: Http + content: + request: + method: GET + url: '{base}/validate?token={{ {{ TOKEN }} }}' + response_matcher: + - report_response: true + - type: WordMatch + words: + - '"error_code":"403003"' + negative: true + "#, + base = server.uri() + ); + + /* --------------------------------------------------------- * + * 3. Temp workspace: rules file + input with 2 duplicates * + * --------------------------------------------------------- */ + let work_dir = TempDir::new()?; + let rules_file = work_dir.path().join("demo.yml"); + fs::write(&rules_file, rules_yaml)?; + + let secret_file = work_dir.path().join("secrets.txt"); + fs::write(&secret_file, "demokey_abcdefgh\ndemokey_abcdefgh")?; + + /* --------------------------------------------------------- * + * 4. Build Scan / Global args (no_dedup=true to keep dups) * + * --------------------------------------------------------- */ + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: vec![work_dir.path().to_path_buf()], + rule: vec!["all".into()], + load_builtins: false, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![secret_file.clone()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, // keep duplicates so the cache is stressed + snippet_length: 128, + baseline_file: None, + manage_baseline: false, + }; + + /* --------------------------------------------------------- * + * 5. Load rules, run scan * + * --------------------------------------------------------- */ + // --------------------------------------------------------- + // 5. Load rules, record them, run scan + // --------------------------------------------------------- + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); + + let datastore = Arc::new(Mutex::new(FindingsStore::new(work_dir.path().to_path_buf()))); + + // NEW: make the datastore aware of every rule + { + let mut ds = datastore.lock().unwrap(); + ds.record_rules(rules_db.rules()); // <-- **add this line** + } + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + progress: Mode::Never, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 8192 }, + }; + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; + + /* --------------------------------------------------------- * + * 6. Assertions * + * --------------------------------------------------------- */ + // There are two matches for demo.key.validation.1, but the validator + // should have been called only once thanks to SkipMap caching. + assert_eq!( + hit_counter.load(Ordering::SeqCst), + 1, + "validator endpoint should be hit exactly once" + ); + + let ds = datastore.lock().unwrap(); + let total_matches = ds.get_matches().len(); + assert_eq!(total_matches, 4, "expected 2 matches per rule (dup secrets)"); // 2 for each rule + + Ok(()) +} diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index abeb6f1..c53adae 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -99,7 +99,6 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; @@ -176,7 +175,6 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, - snippet_length: 256, baseline_file: None, manage_baseline: false, }; diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig new file mode 100644 index 0000000..abeb6f1 --- /dev/null +++ b/tests/int_vulnerable_files.rs.orig @@ -0,0 +1,257 @@ +// tests/integration_scan.rs + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[derive(Debug)] +struct TestCase { + file_name: &'static str, + min_expected_findings: usize, +} + +struct TestContext { + rules_db: Arc, +} + +fn root_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) + .load(&scan_args) + .context("Failed to load rules")?; + + let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; + + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) + .context("Failed to compile rules")?; + + Ok(Self { rules_db: Arc::new(rules_db) }) + } + + async fn scan_file(&self, file_path: &Path) -> Result { + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![file_path.to_path_buf()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + snippet_length: 256, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + + Ok(findings) + } +} + +#[tokio::test] +async fn test_scan_vulnerable_files() -> Result<()> { + let test_context = TestContext::new()?; + + let test_cases = vec![ + TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, + TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, + TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, + TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, + TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, + TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, + TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, + TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, + TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, + TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, + TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, + TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, + TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, + TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, + TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, + TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, + TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, + TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, + TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, + TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, + TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, + TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, + TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, + TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, + ]; + + let root = root_dir(); + + for test_case in test_cases { + let test_file = root.join(test_case.file_name); + println!("Testing file: {}", test_case.file_name); + + let findings = test_context.scan_file(&test_file).await?; + + assert!( + findings >= test_case.min_expected_findings, + "File: {} - Expected >= {} findings, got {}", + test_case.file_name, + test_case.min_expected_findings, + findings + ); + } + + Ok(()) +} From f4a1e85b2653f6e5efe7a6ac28cb46e8d3cb88b5 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 17:27:36 -0700 Subject: [PATCH 02/10] removed unused cli argument, snippet-length --- src/cli/commands/scan.rs.orig | 132 -------- src/main.rs.orig | 528 ----------------------------- src/reporter/json_format.rs.orig | 270 --------------- tests/int_dedup.rs.orig | 171 ---------- tests/int_github.rs.orig | 149 -------- tests/int_gitlab.rs.orig | 243 ------------- tests/int_redact.rs.orig | 115 ------- tests/int_slack.rs.orig | 207 ----------- tests/int_validation_cache.rs.orig | 211 ------------ tests/int_vulnerable_files.rs.orig | 257 -------------- 10 files changed, 2283 deletions(-) delete mode 100644 src/cli/commands/scan.rs.orig delete mode 100644 src/main.rs.orig delete mode 100644 src/reporter/json_format.rs.orig delete mode 100644 tests/int_dedup.rs.orig delete mode 100644 tests/int_github.rs.orig delete mode 100644 tests/int_gitlab.rs.orig delete mode 100644 tests/int_redact.rs.orig delete mode 100644 tests/int_slack.rs.orig delete mode 100644 tests/int_validation_cache.rs.orig delete mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/src/cli/commands/scan.rs.orig b/src/cli/commands/scan.rs.orig deleted file mode 100644 index fe4d5d6..0000000 --- a/src/cli/commands/scan.rs.orig +++ /dev/null @@ -1,132 +0,0 @@ -use clap::{Args, ValueEnum}; -use strum::Display; -use tracing::debug; - -use crate::{ - cli::{ - commands::{ - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - }, - global::RAM_GB, - }, - rules::rule::Confidence, -}; - -/// Determine the default number of parallel scan jobs. -/// -/// * Target = `num_cpus * 2`. -/// * Cap by RAM at ≈ 1 GiB per job (so 16 GiB ⇒ max 16 jobs). -/// * Always ≥ 1. -/// * When `-v/--verbose` is passed, the computed value is logged at DEBUG. -fn default_scan_jobs() -> usize { - // How many logical CPUs do we see? (Falls back to 1 on error.) - let cpu_count = std::thread::available_parallelism().map(usize::from).unwrap_or(1); - - // Desired parallelism is CPU * 2. - let desired = cpu_count * 2; - - match *RAM_GB { - // If we know how much RAM we have, cap by a 1 GiB-per-job heuristic. - Some(ram_gb) => { - let max_by_ram = ram_gb.ceil() as usize; // 1 GiB per job - let jobs = desired.min(max_by_ram).max(1); - - debug!( - "Using {jobs} parallel scan jobs \ - (cpus = {cpu_count}, desired = {desired}, \ - ram = {ram_gb:.1} GiB, cap_by_ram = {max_by_ram})" - ); - jobs - } - // If RAM is unknown, just use the desired value. - None => { - debug!("Using {desired} parallel scan jobs (cpus = {cpu_count}, ram unknown)"); - desired - } - } -} - -/// `kingfisher scan` command and flags -#[derive(Args, Debug, Clone)] -pub struct ScanArgs { - /// Number of parallel scanning threads - #[arg(long = "jobs", short = 'j', default_value_t = default_scan_jobs())] - pub num_jobs: usize, - - #[command(flatten)] - pub rules: RuleSpecifierArgs, - - #[command(flatten)] - pub input_specifier_args: InputSpecifierArgs, - - #[command(flatten)] - pub content_filtering_args: ContentFilteringArgs, - - /// Minimum confidence level for reporting findings - #[arg(long, short = 'c', default_value = "medium")] - pub confidence: ConfidenceLevel, - - /// Disable secret validation - #[arg(long, short = 'n', default_value_t = false)] - pub no_validate: bool, - - /// Display only validated findings - #[arg(long, default_value_t = false)] - pub only_valid: bool, - - /// Override the default minimum entropy threshold - #[arg(long, short = 'e')] - pub min_entropy: Option, - - /// Show performance statistics for each rule - #[arg(long, default_value_t = false)] - pub rule_stats: bool, - - /// Display every occurrence of a finding - #[arg(long, default_value_t = false)] - pub no_dedup: bool, - - /// Redact findings values using a secure hash - #[arg(long, short = 'r', default_value_t = false)] - pub redact: bool, - - /// Timeout for Git repository scanning in seconds - #[arg(long, default_value_t = 1800, value_name = "SECONDS")] - pub git_repo_timeout: u64, - - #[command(flatten)] - pub output_args: OutputArgs, - - /// Bytes of context before and after each match - #[arg(long, default_value_t = 256, value_name = "BYTES")] - pub snippet_length: usize, - - /// Baseline file to filter known secrets - #[arg(long, value_name = "FILE")] - pub baseline_file: Option, - - /// Create or update the baseline file with current findings - #[arg(long, default_value_t = false)] - pub manage_baseline: bool, -} - -/// Confidence levels for findings -#[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] -#[strum(serialize_all = "kebab-case")] -pub enum ConfidenceLevel { - Low, - Medium, - High, -} - -impl From for Confidence { - fn from(level: ConfidenceLevel) -> Self { - match level { - ConfidenceLevel::Low => Confidence::Low, - ConfidenceLevel::Medium => Confidence::Medium, - ConfidenceLevel::High => Confidence::High, - } - } -} diff --git a/src/main.rs.orig b/src/main.rs.orig deleted file mode 100644 index 58145e6..0000000 --- a/src/main.rs.orig +++ /dev/null @@ -1,528 +0,0 @@ -// ──────────────────────────────────────────────────────────── -// Global allocator setup -// * Default - mimalloc (no feature flags) -// * Debug - jemalloc (`use-jemalloc` feature) -// * Fallback - system allocator (`system-alloc` feature) -// ──────────────────────────────────────────────────────────── - -// --- jemalloc (opt-in) --- -#[cfg(feature = "use-jemalloc")] -#[global_allocator] -static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; - -// --- mimalloc (default) --- -#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))] -#[global_allocator] -static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; - -// --- system allocator (explicit opt-out) --- -#[cfg(feature = "system-alloc")] -use std::alloc::System; -#[cfg(feature = "system-alloc")] -#[global_allocator] -static GLOBAL: System = System; - -// use std::alloc::System; -// #[global_allocator] -// static GLOBAL: System = System; - -use std::{ - io::Read, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - self, - commands::{ - github::{ - GitCloneMode, GitHistoryMode, GitHubCommand, GitHubRepoType, GitHubReposCommand, - }, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::{ - RuleSpecifierArgs, RulesCheckArgs, RulesCommand, RulesListArgs, - RulesListOutputFormat, - }, - }, - global::Command, - CommandLineArgs, GlobalArgs, - }, - findings_store, - findings_store::FindingsStore, - github, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::{load_and_record_rules, run_scan}, - update::check_for_update, -}; -use serde_json::json; -use tempfile::TempDir; -use term_size; -use tokio::runtime::Builder; -use tracing::{error, info, warn}; -use tracing_core::metadata::LevelFilter; -use tracing_subscriber::{ - self, fmt, prelude::__tracing_subscriber_SubscriberExt, registry, util::SubscriberInitExt, -}; -use url::Url; - -use crate::cli::commands::gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}; - -fn main() -> anyhow::Result<()> { - color_backtrace::install(); - // Parse command-line arguments - let args = CommandLineArgs::parse_args(); - - // Determine the number of jobs, defaulting to the number of CPUs - let num_jobs = match args.command { - Command::Scan(ref scan_args) => scan_args.num_jobs, - Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands - Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands - Command::Rules(_) => num_cpus::get(), // Default for Rules commands - }; - - // Set up the Tokio runtime with the specified number of threads - let runtime = Builder::new_multi_thread() - .worker_threads(num_jobs) - .enable_all() - .build() - .context("Failed to create Tokio runtime")?; - runtime.block_on(async_main(args)) -} - -fn setup_logging(global_args: &GlobalArgs) { - // Determine log level based on global verbosity - let (level, all_targets) = if global_args.quiet { - (LevelFilter::ERROR, false) - } else { - let level = match global_args.verbose { - 0 => LevelFilter::INFO, // Default level if no `-v` is provided - 1 => LevelFilter::DEBUG, // `-v` - 2 => LevelFilter::TRACE, // `-vv` - _ => LevelFilter::TRACE, // `-vvv` or more - }; - let all_targets = global_args.verbose > 2; // Enable all targets for `-vvv` or more - (level, all_targets) - }; - // Create a filter for logging - let filter = if all_targets { - // Enable TRACE for all modules - tracing_subscriber::filter::Targets::new().with_default(LevelFilter::TRACE) - } else { - // Per-target filtering, only TRACE for `kingfisher` - tracing_subscriber::filter::Targets::new() - .with_default(LevelFilter::ERROR) // Default for all modules - .with_target("kingfisher", level) // Replace `kingfisher` with your - // crate's name - }; - // Configure the formatter layer - let fmt_layer = fmt::layer() - .with_writer(std::io::stderr) // Write logs to stderr - .with_target(true) // Enable target filtering - .with_ansi(false) // Disable colors - .without_time(); // Remove timestamps - // Build and initialize the registry - registry() - .with(fmt_layer) // Attach the formatter layer - .with(filter) // Attach the filter - .init(); -} - -pub fn determine_exit_code(datastore: &Arc>) -> i32 { - // exit with code 200 if _any_ findings are discovered - // exit with code 205 if VALIDATED findings are discovered - // exit with code 0 if there are NO findings discovered - let ds = datastore.lock().unwrap(); - // Get all matches - // let all_matches = ds.get_matches(); - - // Only consider visible matches when determining the exit code - let all_matches = ds - .get_matches() - .iter() - .filter(|msg| { - let (_, _, match_item) = &***msg; - match_item.visible - }) - .collect::>(); - - if all_matches.is_empty() { - // No findings discovered - 0 - } else { - // Check if there are any validated findings - let validated_matches = all_matches - .iter() - .filter(|msg| { - let (_, _, match_item) = &****msg; - match_item.validation_success - }) - .count(); - if validated_matches > 0 { - // Validated findings discovered - 205 - } else { - // Findings discovered, but not validated - 200 - } - } -} - -async fn async_main(args: CommandLineArgs) -> Result<()> { - // Create a temporary directory - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Create the in-memory datastore - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - setup_logging(&args.global_args); - let update_msg = check_for_update(&args.global_args, None); - match args.command { - Command::Scan(mut scan_args) => { - // ————————————————————————————————————————— - // If no paths or a single "-", slurp stdin into a temp file - // ————————————————————————————————————————— - info!( - "Launching with {} concurrent scan jobs. Use --num-jobs to override.", - &scan_args.num_jobs - ); - let paths = &scan_args.input_specifier_args.path_inputs; - let is_dash = paths.iter().any(|p| p.as_os_str() == "-"); - if (paths.is_empty() || is_dash) && !atty::is(atty::Stream::Stdin) { - // read all stdin - let mut buf = Vec::new(); - std::io::stdin().read_to_end(&mut buf)?; - // write into temp_dir - let stdin_file = temp_dir.path().join("stdin_input"); - std::fs::write(&stdin_file, buf)?; - // replace inputs - scan_args.input_specifier_args.path_inputs = vec![stdin_file.into()]; - } - - // now proceed exactly as before - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - run_scan(&args.global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await?; - let exit_code = determine_exit_code(&datastore); - - if let Err(e) = temp_dir.close() { - eprintln!("Failed to close temporary directory: {}", e); - } - std::process::exit(exit_code); - } - Command::Rules(ref rule_args) => match &rule_args.command { - RulesCommand::Check(check_args) => { - run_rules_check(&check_args)?; - } - RulesCommand::List(list_args) => { - run_rules_list(&list_args)?; - } - }, - Command::GitHub(github_args) => match github_args.command { - GitHubCommand::Repos(repos_command) => match repos_command { - GitHubReposCommand::List(list_args) => { - github::list_repositories( - github_args.github_api_url, - args.global_args.ignore_certs, - args.global_args.use_progress(), - &list_args.repo_specifiers.user, - &list_args.repo_specifiers.organization, - list_args.repo_specifiers.all_organizations, - list_args.repo_specifiers.repo_type.into(), - ) - .await?; - } - }, - }, - Command::GitLab(gitlab_args) => match gitlab_args.command { - GitLabCommand::Repos(repos_command) => match repos_command { - GitLabReposCommand::List(list_args) => { - kingfisher::gitlab::list_repositories( - gitlab_args.gitlab_api_url, - args.global_args.ignore_certs, - args.global_args.use_progress(), - &list_args.repo_specifiers.user, - &list_args.repo_specifiers.group, - list_args.repo_specifiers.all_groups, - list_args.repo_specifiers.repo_type.into(), - ) - .await?; - } - }, - }, - } - if let Some(msg) = update_msg { - info!("{msg}"); - } - Ok(()) -} - -/// Create a default ScanArgs instance for rule loading -fn create_default_scan_args() -> cli::commands::scan::ScanArgs { - use cli::commands::scan::*; - ScanArgs { - num_jobs: 1, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: url::Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::All, - - jira_url: None, - jql: None, - max_results: 100, - - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Slack query - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - - // Docker image scanning - docker_image: Vec::new(), - - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: true, - extraction_depth: 2, - exclude: Vec::new(), // Exclude patterns - no_binary: true, - }, - confidence: ConfidenceLevel::Medium, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, - no_dedup: false, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - } -} -/// Run the rules check command -pub fn run_rules_check(args: &RulesCheckArgs) -> Result<()> { - let mut num_errors = 0; - let mut num_warnings = 0; - // Load and check rules - let loader = RuleLoader::from_rule_specifiers(&args.rules); - let loaded = loader.load(&create_default_scan_args())?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - - // Check each rule - for (rule_index, rule) in rules_db.rules().iter().enumerate() { - let rule_syntax = rule.syntax(); - // Basic rule validation checks - if rule.name().len() < 3 { - warn!("Rule '{}' has a very short name", rule.name()); - num_warnings += 1; - } - if rule.syntax().pattern.len() < 5 { - warn!("Rule '{}' has a very short pattern", rule.name()); - num_warnings += 1; - } - if rule.syntax().examples.is_empty() { - warn!("Rule '{}' has no examples", rule.name()); - num_warnings += 1; - continue; - } - // Check regex compilation - if let Err(e) = rule.syntax().as_regex() { - error!("Rule '{}' has invalid regex: {}", rule.name(), e); - num_errors += 1; - continue; - } - // Test each example against both vectorscan and regex - for (example_index, example) in rule_syntax.examples.iter().enumerate() { - // Create a test blob from the example - // let blob = Blob::new(BlobId::new(example.as_bytes()), - // example.as_bytes().to_vec()); let origin = OriginSet::new( - // Origin::from_file(PathBuf::from("test_example")), - // Vec::new(), - // ); - // // Check vectorscan match - // let vectorscan_matched = match matcher.scan_blob(&blob, &origin, None)? { - // ScanResult::New(matches) => !matches.is_empty(), - // _ => false, - // }; - // Check regex match - // Get the regex using the public method - let re = - rules_db.get_regex_by_rule_id(rule.id()).expect("Failed to get regex for rule"); - let regex_matched = re.is_match(example.as_bytes()); - if !regex_matched { - // ||!vectorscan_matched { - println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name); - println!(" Processing example {}", example_index + 1); - println!(" [!] Mismatch detected for example: {}", example); - // if !vectorscan_matched { - // println!(" Vectorscan match: {}", vectorscan_matched); - // num_errors += 1; - // } - if !regex_matched { - println!(" Regex match: {}", regex_matched); - num_errors += 1; - } - } - - // // Report any mismatches - // if !vectorscan_matched || !regex_matched { - // error!("Rule '{}' example {} failed validation:", - // rule.name(), example_index + 1); println!(" - // Example text: {}", example); - - // if !vectorscan_matched { - // error!(" - Vectorscan pattern did not match example"); - // num_errors += 1; - // } - - // if !regex_matched { - // error!(" - Regex pattern did not match example"); - // num_errors += 1; - // } - // } - } - } - // Print summary - if num_errors > 0 || num_warnings > 0 { - println!("\nCheck Summary:"); - println!(" Errors: {}", num_errors); - println!(" Warnings: {}", num_warnings); - println!("\nError types include:"); - println!(" - Invalid regex patterns"); - println!(" - Examples that don't match their patterns"); - println!("\nWarning types include:"); - println!(" - Rules with very short names"); - println!(" - Rules with very short patterns"); - println!(" - Rules without examples"); - } else { - println!("\nAll rules passed validation successfully!"); - } - // Exit with error if there are errors or if warnings are treated as errors - if num_errors > 0 || (args.warnings_as_errors && num_warnings > 0) { - std::process::exit(1); - } - Ok(()) -} -/// Run the rules list command -pub fn run_rules_list(args: &RulesListArgs) -> Result<()> { - // Load rules - let loader = RuleLoader::from_rule_specifiers(&args.rules); - let loaded = loader.load(&create_default_scan_args())?; - let resolved = loaded.resolve_enabled_rules()?; - let mut writer = args.output_args.get_writer()?; - match args.output_args.format { - RulesListOutputFormat::Pretty => { - // Determine terminal width if possible, otherwise use default - let term_width = term_size::dimensions().map(|(w, _)| w).unwrap_or(120); - // First pass: calculate column widths - let max_name_width = resolved.iter().map(|r| r.name().len()).max().unwrap_or(0).max(4); // "Rule" header - let max_id_width = resolved.iter().map(|r| r.id().len()).max().unwrap_or(0).max(2); // "ID" header - let max_conf_width = resolved - .iter() - .map(|r| format!("{:?}", r.confidence()).len()) - .max() - .unwrap_or(0) - .max(10); // "Confidence" header - // Calculate pattern width based on terminal width - let reserved_width = max_name_width + max_id_width + max_conf_width + 10; - let pattern_width = term_width.saturating_sub(reserved_width); - // Format pattern on a single line - let format_pattern = |pattern: &str| { - let single_line = pattern - .replace('\n', " ") - .replace('\r', " ") - .split_whitespace() - .collect::>() - .join(" "); - if single_line.len() > pattern_width { - format!("{}...", &single_line[..pattern_width.saturating_sub(3)]) - } else { - single_line - } - }; - // Print header - writeln!( - writer, - "\n{:name_width$} │ {:id_width$} │ {:conf_width$} │ Pattern", - "Rule", - "ID", - "Confidence", - name_width = max_name_width, - id_width = max_id_width, - conf_width = max_conf_width - )?; - // Print separator - writeln!( - writer, - "{0:─ { - // Create JSON format - let rules_json: Vec<_> = resolved - .iter() - .map(|rule| { - json!({ - "name": rule.name(), - "id": rule.id(), - "pattern": rule.syntax().pattern, - "confidence": rule.confidence(), - "examples": rule.syntax().examples, - "visible": rule.visible(), - }) - }) - .collect(); - serde_json::to_writer_pretty(&mut writer, &rules_json)?; - writeln!(writer)?; - } - } - Ok(()) -} diff --git a/src/reporter/json_format.rs.orig b/src/reporter/json_format.rs.orig deleted file mode 100644 index aae16fc..0000000 --- a/src/reporter/json_format.rs.orig +++ /dev/null @@ -1,270 +0,0 @@ -use super::*; - -impl DetailsReporter { - pub fn json_format( - &self, - mut writer: W, - args: &cli::commands::scan::ScanArgs, - ) -> Result<()> { - let records = self.build_finding_records(args)?; - if !records.is_empty() { - serde_json::to_writer_pretty(&mut writer, &records)?; - writeln!(writer)?; - } - Ok(()) - } - - pub fn jsonl_format( - &self, - mut writer: W, - args: &cli::commands::scan::ScanArgs, - ) -> Result<()> { - let records = self.build_finding_records(args)?; - for record in records { - serde_json::to_writer(&mut writer, &record)?; - writeln!(writer)?; - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cli::commands::github::GitCloneMode; - use crate::cli::commands::github::GitHistoryMode; - use crate::cli::commands::rules::RuleSpecifierArgs; - use crate::matcher::{SerializableCapture, SerializableCaptures}; - use crate::util::intern; - use crate::{ - blob::BlobId, - cli::commands::github::GitHubRepoType, - cli::commands::inputs::ContentFilteringArgs, - cli::commands::inputs::InputSpecifierArgs, - cli::commands::output::{OutputArgs, ReportOutputFormat}, - cli::commands::scan::ConfidenceLevel, - findings_store::FindingsStore, - location::{Location, OffsetSpan, SourcePoint, SourceSpan}, - matcher::Match, - origin::Origin, - reporter::styles::Styles, - }; - use std::{ - io::Cursor, - path::PathBuf, - sync::{Arc, Mutex}, - }; - use url::Url; - fn create_default_args() -> cli::commands::scan::ScanArgs { - use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope - - cli::commands::scan::ScanArgs { - num_jobs: 1, - no_dedup: false, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - // local path / git URL inputs - path_inputs: Vec::new(), - git_url: Vec::new(), - - // GitHub - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - - // GitLab - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::All, - // Jira options - jira_url: None, - jql: None, - max_results: 100, - // Slack options - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - - docker_image: Vec::new(), - // clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - exclude: Vec::new(), // Exclude patterns - no_binary: true, - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - } - } - - fn create_mock_match( - rule_name: &str, - rule_text_id: &str, - rule_finding_fingerprint: &str, - validation_success: bool, - ) -> Match { - Match { - location: Location { - offset_span: OffsetSpan { start: 10, end: 20 }, - source_span: SourceSpan { - start: SourcePoint { line: 5, column: 10 }, - end: SourcePoint { line: 5, column: 20 }, - }, - }, - groups: SerializableCaptures { - captures: vec![SerializableCapture { - name: Some("token".to_string()), - match_number: 1, - start: 10, - end: 20, - value: "mock_token".into(), - }], - }, - blob_id: BlobId::new(b"mock_blob"), - finding_fingerprint: 0123, - rule_finding_fingerprint: intern(rule_finding_fingerprint), - rule_text_id: intern(rule_text_id), - rule_name: intern(rule_name), - rule_confidence: Confidence::Medium, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success, - calculated_entropy: 4.5, - visible: true, - } - } - - fn setup_mock_reporter(matches: Vec) -> DetailsReporter { - let mut datastore = FindingsStore::new(PathBuf::from("/tmp")); - if !matches.is_empty() { - let blob_metadata = BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }; - let dedup = true; - for m in matches.clone() { - datastore.record( - vec![( - Arc::new(OriginSet::new( - Origin::from_file(PathBuf::from("/mock/path/file.rs")), - vec![], - )), - Arc::new(blob_metadata.clone()), - m.m.clone(), - )], - dedup, - ); - } - } - DetailsReporter { - datastore: Arc::new(Mutex::new(datastore)), - styles: Styles::new(false), - only_valid: false, - } - } - - #[test] - fn test_json_format() -> Result<()> { - let mock_match = - create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true); - let matches = vec![ReportMatch { - origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]), - blob_metadata: BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }, - m: mock_match, - comment: None, - match_confidence: Confidence::Medium, - visible: true, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success: true, - }]; - let reporter = setup_mock_reporter(matches); - let mut output = Cursor::new(Vec::new()); - reporter.json_format(&mut output, &create_default_args())?; - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; - assert_eq!(first["rule"]["name"], "MockRule"); - assert_eq!(first["finding"]["language"], "Rust"); - Ok(()) - } - - #[test] - fn test_validation_status_in_json() -> Result<()> { - let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")]; - for (validation_success, expected_status) in test_cases { - let mock_match = create_mock_match( - "MockRule", - "mock_rule_1", - "mock_finding_fingerprint", - validation_success, - ); - let matches = vec![ReportMatch { - origin: OriginSet::new( - Origin::from_file(PathBuf::from("/mock/path/file.rs")), - vec![], - ), - blob_metadata: BlobMetadata { - id: BlobId::new(b"mock_blob"), - num_bytes: 1024, - mime_essence: Some("text/plain".to_string()), - charset: Some("UTF-8".to_string()), - language: Some("Rust".to_string()), - }, - m: mock_match, - comment: None, - match_confidence: Confidence::Medium, - visible: true, - validation_response_body: "validation response".to_string(), - validation_response_status: 200, - validation_success, - }]; - let reporter = setup_mock_reporter(matches); - let mut output = Cursor::new(Vec::new()); - reporter.json_format(&mut output, &create_default_args())?; - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; - let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); - assert_eq!(validation_status, expected_status); - } - Ok(()) - } -} diff --git a/tests/int_dedup.rs.orig b/tests/int_dedup.rs.orig deleted file mode 100644 index 0c93023..0000000 --- a/tests/int_dedup.rs.orig +++ /dev/null @@ -1,171 +0,0 @@ -//! Proves that run_async_scan collapses identical findings when -//! ── no_dedup == false ── -//! while keeping them separate when no_dedup == true. - -use std::{ - fs, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; - -/// Helper: run a scan with the supplied `no_dedup` flag and return how many -/// findings the `FindingsStore` ends up containing. -fn run_scan(count_rt: &Runtime, no_dedup: bool) -> Result { - // ── temp workspace ────────────────────────────────────────────── - let work = TempDir::new()?; - let rules_dir = work.path().join("rules"); - fs::create_dir_all(&rules_dir)?; - let inputs_dir = work.path().join("in"); - fs::create_dir_all(&inputs_dir)?; - - // 1. Tiny custom rule that matches `secret_1234` - fs::write( - rules_dir.join("demo.yml"), - r#" -rules: - - id: demo.secret - name: Demo secret - pattern: "secret_[0-9]{4}" - confidence: low -"#, - )?; - - // 2. Two different blobs that both contain the SAME secret - fs::write(inputs_dir.join("a.txt"), "secret_1234\n")?; - fs::write(inputs_dir.join("b.txt"), "secret_1234\n")?; - - // ── build ScanArgs ────────────────────────────────────────────── - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: vec![rules_dir.clone()], - rule: vec!["all".into()], - load_builtins: false, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![inputs_dir.join("a.txt"), inputs_dir.join("b.txt")], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 5.0, - extraction_depth: 1, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup, - snippet_length: 64, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Never, - progress: Mode::Never, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 8192 }, - }; - - // ── load rules once ───────────────────────────────────────────── - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); - - // Fresh FindingsStore for this run - let store_path = work.path().join("store"); - fs::create_dir_all(&store_path)?; - let datastore = Arc::new(Mutex::new(FindingsStore::new(store_path))); - - // run_async_scan is async – use the supplied Tokio runtime - count_rt.block_on(run_async_scan( - &global_args, - &scan_args, - Arc::clone(&datastore), - &rules_db, - ))?; - - let x = Ok(datastore.lock().unwrap().get_matches().len()); - x -} - -#[test] -fn test_dedup_branch() -> Result<()> { - // A *single* runtime reused for both scans keeps the test fast - let rt = Runtime::new().unwrap(); - - let findings_with_dups = run_scan(&rt, true)?; // keep duplicates - let findings_deduped = run_scan(&rt, false)?; // collapse duplicates - - assert!( - findings_with_dups > findings_deduped, - "expected deduplication to reduce finding count ({} -- {})", - findings_with_dups, - findings_deduped - ); - assert_eq!(findings_deduped, 1, "exactly one unique finding should remain after dedup"); - - Ok(()) -} diff --git a/tests/int_github.rs.orig b/tests/int_github.rs.orig deleted file mode 100644 index 2892b91..0000000 --- a/tests/int_github.rs.orig +++ /dev/null @@ -1,149 +0,0 @@ -// tests/int_github.rs -use std::{ - str::FromStr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - git_url::GitUrl, - scanner::{load_and_record_rules, run_scan}, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; -/// Helper function to determine exit code based on findings -fn determine_exit_code(total_findings: usize, validated_findings: usize) -> i32 { - if total_findings == 0 { - 0 // No findings discovered - } else if validated_findings > 0 { - 205 // Validated findings discovered - } else { - 200 // Findings discovered but none validated - } -} -#[test] -fn test_github_remote_scan() -> Result<()> { - // Create a temporary directory for the scan - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - // Create test repository URL - let test_repo_url = "https://github.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("Failed to parse Git URL"); - // Create scan arguments - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - // Create global arguments - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - // Create in-memory datastore - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - // Create the runtime first - let runtime = Runtime::new().expect("Failed to create Tokio runtime"); - // Load rules - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - // Run the scan using runtime.block_on - runtime.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - // Get scan results - let ds = datastore.lock().unwrap(); - let matches = ds.get_matches(); - let total_findings = matches.len(); - let validated_findings = matches.iter().filter(|arc| arc.as_ref().2.validation_success).count(); - - // Print validation statistics - println!("Total findings: {}, Validated findings: {}", total_findings, validated_findings); - // Check total number of findings - assert!(total_findings >= 10, "Expected at least 10 findings, but got {}", total_findings); - // Determine exit code - let exit_code = determine_exit_code(total_findings, validated_findings); - // Test passes if we found some kind of findings (exit code >= 200) - assert!( - exit_code >= 200, - "Test failed: Expected to find vulnerabilities (exit code >= 200), got exit code {}", - exit_code - ); - // Drop the runtime explicitly here, outside of async context - drop(runtime); - Ok(()) -} diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig deleted file mode 100644 index fa3e169..0000000 --- a/tests/int_gitlab.rs.orig +++ /dev/null @@ -1,243 +0,0 @@ -// tests/int_gitlab.rs -use std::{ - str::FromStr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - git_url::GitUrl, - scanner::{load_and_record_rules, run_scan}, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; - -/// Derive process exit-codes from findings -fn determine_exit_code(total: usize, validated: usize) -> i32 { - match (total, validated) { - (0, _) => 0, - (_, v) if v > 0 => 205, - _ => 200, - } -} - -#[test] -fn test_gitlab_remote_scan() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Public GitLab repo seeded with test secrets - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} - -#[test] -fn test_gitlab_remote_scan_no_history() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::None, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} diff --git a/tests/int_redact.rs.orig b/tests/int_redact.rs.orig deleted file mode 100644 index 796d019..0000000 --- a/tests/int_redact.rs.orig +++ /dev/null @@ -1,115 +0,0 @@ -// Integration test to ensure --redact replaces secret values with hashes -use std::{ - path::PathBuf, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, GlobalArgs, Mode}, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; - -#[tokio::test] -async fn test_redact_hashes_finding_values() -> Result<()> { - let temp_dir = TempDir::new()?; - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![PathBuf::from("testdata/generic_secrets.py")], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: true, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Never, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(temp_dir.path().to_path_buf()))); - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; - - let ds = datastore.lock().unwrap(); - let matches = ds.get_matches(); - assert!(!matches.is_empty()); - for m_arc in matches { - let m = &m_arc.2; - assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:"))); - } - - Ok(()) -} diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig deleted file mode 100644 index d22b8f0..0000000 --- a/tests/int_slack.rs.orig +++ /dev/null @@ -1,207 +0,0 @@ -use std::{ - env, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, -}; - -struct TestContext { - rules_db: Arc, -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - max_results: 10, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 128, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - Ok(Self { rules_db: Arc::new(rules_db) }) - } -} - -#[tokio::test] -async fn test_scan_slack_messages() -> Result<()> { - let ctx = TestContext::new()?; - - let server = MockServer::start().await; - let response = serde_json::json!({ - "ok": true, - "messages": { - "matches": [{ - "permalink": "https://example.slack.com/archives/C123/p1234", - "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", - "ts": "1234.56", - "channel": {"id": "C123", "name": "general"} - }], - "pagination": {"page": 1, "page_count": 1} - } - }); - Mock::given(method("GET")) - .and(path("/search.messages")) - .respond_with(ResponseTemplate::new(200).set_body_json(response)) - .mount(&server) - .await; - - env::set_var("KF_SLACK_TOKEN", "xoxp-test"); - - let temp_dir = TempDir::new()?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: Some("test".into()), - slack_api_url: Url::parse(&format!("{}/", server.uri()))?, - max_results: 10, - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 128, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - assert!(findings > 0); - Ok(()) -} diff --git a/tests/int_validation_cache.rs.orig b/tests/int_validation_cache.rs.orig deleted file mode 100644 index a7ab9ea..0000000 --- a/tests/int_validation_cache.rs.orig +++ /dev/null @@ -1,211 +0,0 @@ -// tests/int_validation_cache.rs -use std::{ - fs, - sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, - }, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, Request, ResponseTemplate, -}; - -#[tokio::test] -async fn test_validation_cache_and_depvars() -> Result<()> { - /* --------------------------------------------------------- * - * 1. Spin-up Wiremock and count incoming validation calls * - * --------------------------------------------------------- */ - let server = MockServer::start().await; - let hit_counter = Arc::new(AtomicUsize::new(0)); - let counter_clone = Arc::clone(&hit_counter); - - Mock::given(method("GET")) - .and(path("/validate")) - .respond_with(move |_req: &Request| { - counter_clone.fetch_add(1, Ordering::SeqCst); - ResponseTemplate::new(200).set_body_string("ok") - }) - .mount(&server) - .await; - - /* --------------------------------------------------------- * - * 2. Synthetic rules exercising depends_on_rule + HTTP val * - * --------------------------------------------------------- */ - let rules_yaml = format!( - r#" - rules: - - name: Demo API Key - id: demo.key.1 - pattern: '(demokey_[a-z0-9]{{8}})' - confidence: low - min_entropy: 0.0 - - - name: Demo API Key Validation - id: demo.key.validation.1 - depends_on_rule: - - rule_id: demo.key.1 - variable: TOKEN - pattern: '(demokey_[a-z0-9]{{8}})' - confidence: low - validation: - type: Http - content: - request: - method: GET - url: '{base}/validate?token={{ {{ TOKEN }} }}' - response_matcher: - - report_response: true - - type: WordMatch - words: - - '"error_code":"403003"' - negative: true - "#, - base = server.uri() - ); - - /* --------------------------------------------------------- * - * 3. Temp workspace: rules file + input with 2 duplicates * - * --------------------------------------------------------- */ - let work_dir = TempDir::new()?; - let rules_file = work_dir.path().join("demo.yml"); - fs::write(&rules_file, rules_yaml)?; - - let secret_file = work_dir.path().join("secrets.txt"); - fs::write(&secret_file, "demokey_abcdefgh\ndemokey_abcdefgh")?; - - /* --------------------------------------------------------- * - * 4. Build Scan / Global args (no_dedup=true to keep dups) * - * --------------------------------------------------------- */ - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: vec![work_dir.path().to_path_buf()], - rule: vec!["all".into()], - load_builtins: false, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![secret_file.clone()], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, // keep duplicates so the cache is stressed - snippet_length: 128, - baseline_file: None, - manage_baseline: false, - }; - - /* --------------------------------------------------------- * - * 5. Load rules, run scan * - * --------------------------------------------------------- */ - // --------------------------------------------------------- - // 5. Load rules, record them, run scan - // --------------------------------------------------------- - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = Arc::new(RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?); - - let datastore = Arc::new(Mutex::new(FindingsStore::new(work_dir.path().to_path_buf()))); - - // NEW: make the datastore aware of every rule - { - let mut ds = datastore.lock().unwrap(); - ds.record_rules(rules_db.rules()); // <-- **add this line** - } - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - progress: Mode::Never, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 8192 }, - }; - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &rules_db).await?; - - /* --------------------------------------------------------- * - * 6. Assertions * - * --------------------------------------------------------- */ - // There are two matches for demo.key.validation.1, but the validator - // should have been called only once thanks to SkipMap caching. - assert_eq!( - hit_counter.load(Ordering::SeqCst), - 1, - "validator endpoint should be hit exactly once" - ); - - let ds = datastore.lock().unwrap(); - let total_matches = ds.get_matches().len(); - assert_eq!(total_matches, 4, "expected 2 matches per rule (dup secrets)"); // 2 for each rule - - Ok(()) -} diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig deleted file mode 100644 index abeb6f1..0000000 --- a/tests/int_vulnerable_files.rs.orig +++ /dev/null @@ -1,257 +0,0 @@ -// tests/integration_scan.rs - -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; - -#[derive(Debug)] -struct TestCase { - file_name: &'static str, - min_expected_findings: usize, -} - -struct TestContext { - rules_db: Arc, -} - -fn root_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) - .load(&scan_args) - .context("Failed to load rules")?; - - let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; - - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) - .context("Failed to compile rules")?; - - Ok(Self { rules_db: Arc::new(rules_db) }) - } - - async fn scan_file(&self, file_path: &Path) -> Result { - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![file_path.to_path_buf()], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - snippet_length: 256, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - - Ok(findings) - } -} - -#[tokio::test] -async fn test_scan_vulnerable_files() -> Result<()> { - let test_context = TestContext::new()?; - - let test_cases = vec![ - TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, - TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, - TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, - TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, - TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, - TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, - TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, - TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, - TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, - TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, - TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, - TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, - TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, - TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, - TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, - TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, - TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, - TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, - TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, - TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, - TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, - TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, - TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, - TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, - ]; - - let root = root_dir(); - - for test_case in test_cases { - let test_file = root.join(test_case.file_name); - println!("Testing file: {}", test_case.file_name); - - let findings = test_context.scan_file(&test_file).await?; - - assert!( - findings >= test_case.min_expected_findings, - "File: {} - Expected >= {} findings, got {}", - test_case.file_name, - test_case.min_expected_findings, - findings - ); - } - - Ok(()) -} From 22c5594b535f9b247ce0c8080b28e58956fc2c04 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:51:31 -0700 Subject: [PATCH 03/10] Added support for scanning Confluence pages --- CHANGELOG.md | 3 + README.md | 46 +++++- src/cli/commands/inputs.rs | 11 +- src/confluence.rs | 142 ++++++++++++++++ src/findings_store.rs | 10 ++ src/gitlab.rs | 2 +- src/lib.rs | 1 + src/main.rs | 2 + src/reporter.rs | 9 + src/reporter/json_format.rs | 3 + src/scanner/repos.rs | 36 +++- src/scanner/runner.rs | 7 +- tests/int_dedup.rs | 2 + tests/int_github.rs | 2 + tests/int_gitlab.rs | 4 + tests/int_gitlab.rs.orig | 241 +++++++++++++++++++++++++++ tests/int_quiet.rs | 2 +- tests/int_redact.rs | 2 + tests/int_slack.rs | 4 + tests/int_slack.rs.orig | 205 +++++++++++++++++++++++ tests/int_validation_cache.rs | 2 + tests/int_vulnerable_files.rs | 4 + tests/int_vulnerable_files.rs.orig | 255 +++++++++++++++++++++++++++++ 23 files changed, 985 insertions(+), 10 deletions(-) create mode 100644 src/confluence.rs create mode 100644 tests/int_gitlab.rs.orig create mode 100644 tests/int_slack.rs.orig create mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/CHANGELOG.md b/CHANGELOG.md index b8d7720..a4e50d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.39.0] +- Added support for scanning Confluence pages via `--confluence-url` and `--cql` + ## [1.38.0] - `--quiet` now suppresses scan summaries and rule statistics unless `--rule-stats` is explicitly provided - Added X Consumer key detection and validation diff --git a/README.md b/README.md index 1fba1fb..18b940c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs -- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages +- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, Confluence pages, and Slack messages - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline mode**: ignore known secrets, flag only new ones - **Language-aware detection** (source-code parsing) for ~20 languages @@ -24,9 +24,10 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Multiple targets**: - **Git history**: local repos or GitHub/GitLab orgs/users - - **Docker images**: public or private via `--docker-image` - - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - - **Slack messages**: query‑based scans with `--slack-query` + - **Docker images**: public or private via `--docker-image` + - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` + - **Confluence pages**: CQL‑driven scans with `--confluence-url` and `--cql` + - **Slack messages**: query‑based scans with `--slack-query` - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) @@ -421,7 +422,36 @@ KF_JIRA_TOKEN="token" kingfisher scan \ --jql 'ORDER BY created DESC' \ --max-results 1000 ``` ---- + +## Scanning Confluence + +### Scan Confluence pages matching a CQL query + +```bash +# Bearer token +KF_CONFLUENCE_TOKEN="token" kingfisher scan \ + --confluence-url https://confluence.company.com \ + --cql "label = secret" \ + --max-results 500 + +# Basic auth with username and token +KF_CONFLUENCE_USER="user@example.com" KF_CONFLUENCE_TOKEN="token" kingfisher scan \ + --confluence-url https://confluence.company.com \ + --cql "label = secret" \ + --max-results 500 +``` + +Use the base URL of your Confluence site for `--confluence-url`. Kingfisher +automatically adds `/rest/api` to the end, so `https://example.com/wiki` and +`https://example.com` both work depending on your server configuration. + +Generate a [personal access token](https://confluence.atlassian.com/doc/personal-access-tokens-938022290.html) and +set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher +sends the token as a bearer token in the `Authorization` header. To use basic +authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email +address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a +Basic auth header. If the server responds with a redirect to a login page, the +credentials are invalid or lack the required permissions. ## Scanning Slack ### Scan Slack messages matching a search query @@ -444,6 +474,7 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | +| `KF_CONFLUENCE_TOKEN` | Confluence API token | | `KF_SLACK_TOKEN` | Slack API token | | `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | | `KF_AWS_KEY` and `KF_AWS_SECRET` | AWS Credentials to use with S3 bucket scanning | @@ -465,6 +496,11 @@ To authenticate Jira requests: export KF_JIRA_TOKEN="token" ``` +To authenticate Confluence requests: +```bash +export KF_CONFLUENCE_TOKEN="token" +``` + _If no token is provided Kingfisher still works for public repositories._ --- diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 2249640..e6f9168 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -27,6 +27,7 @@ pub struct InputSpecifierArgs { "all_github_organizations", "all_gitlab_groups", "jira_url", + "confluence_url", "docker_image", "slack_query", "s3_bucket" @@ -96,6 +97,14 @@ pub struct InputSpecifierArgs { #[arg(long, requires = "jira_url")] pub jql: Option, + /// Confluence base URL (e.g. https://confluence.example.com) + #[arg(long, value_hint = ValueHint::Url, requires = "cql")] + pub confluence_url: Option, + + /// CQL query to select Confluence pages + #[arg(long, requires = "confluence_url")] + pub cql: Option, + /// Slack search query #[arg(long)] pub slack_query: Option, @@ -104,7 +113,7 @@ pub struct InputSpecifierArgs { #[arg(long, default_value = "https://slack.com/api/", value_hint = ValueHint::Url)] pub slack_api_url: Url, - /// Maximum number of Slack or Jira results to fetch + /// Maximum number of Slack, Jira, or Confluence results to fetch #[arg(long, default_value_t = 100)] pub max_results: usize, diff --git a/src/confluence.rs b/src/confluence.rs new file mode 100644 index 0000000..50e525c --- /dev/null +++ b/src/confluence.rs @@ -0,0 +1,142 @@ +use anyhow::{bail, Context, Result}; +use reqwest::{header, Client}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use url::Url; + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluencePage { + pub id: String, + pub title: String, + #[serde(default)] + pub body: Option, + #[serde(rename = "_links")] + pub links: ConfluenceLinks, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceBody { + #[serde(default)] + pub storage: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceStorage { + #[serde(default)] + pub value: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceLinks { + pub webui: String, +} + +#[derive(Debug, Deserialize)] +struct ConfluenceSearchResponse { + results: Vec, + #[serde(rename = "_links")] + links: ConfluenceResultLinks, +} + +#[derive(Debug, Deserialize)] +struct ConfluenceResultLinks { + next: Option, +} + +pub async fn search_pages( + confluence_url: Url, + cql: &str, + max_results: usize, + ignore_certs: bool, +) -> Result> { + let token = std::env::var("KF_CONFLUENCE_TOKEN") + .context("KF_CONFLUENCE_TOKEN environment variable must be set")?; + let user = std::env::var("KF_CONFLUENCE_USER").ok(); + if let Some(ref u) = user { + if !u.contains('@') { + bail!("KF_CONFLUENCE_USER must be an email address"); + } + } + + let client = Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .danger_accept_invalid_certs(ignore_certs) + .build() + .context("Failed to build HTTP client")?; + + let base = confluence_url.as_str().trim_end_matches('/'); + let api_base = format!("{}/rest/api/content/search", base); + + let mut pages = Vec::new(); + let mut start = 0usize; + while pages.len() < max_results { + let limit = std::cmp::min(100, max_results - pages.len()); + let url = Url::parse(&api_base)?; + let req = client.get(url).query(&[ + ("cql", cql), + ("limit", &limit.to_string()), + ("start", &start.to_string()), + ("expand", "body.storage"), + ]); + let req = if let Some(user) = &user { + req.basic_auth(user, Some(token.clone())) + } else { + req.bearer_auth(&token) + }; + let resp = req.send().await.context("Failed to send Confluence request")?; + + let status = resp.status(); + if !status.is_success() { + let location = resp + .headers() + .get(header::LOCATION) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + let body = resp.text().await.unwrap_or_default(); + if let Some(loc) = location { + bail!( + "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER", + status, + loc + ); + } else { + bail!("Confluence API request failed with status {}: {}", status, body); + } + } + + let body: ConfluenceSearchResponse = + resp.json().await.context("Failed to parse Confluence response")?; + for p in body.results { + pages.push(p); + if pages.len() >= max_results { + break; + } + } + if pages.len() >= max_results || body.links.next.is_none() { + break; + } + start += limit; + } + Ok(pages) +} + +pub async fn download_pages_to_dir( + confluence_url: Url, + cql: &str, + max_results: usize, + ignore_certs: bool, + output_dir: &PathBuf, +) -> Result> { + std::fs::create_dir_all(output_dir)?; + let pages = search_pages(confluence_url.clone(), cql, max_results, ignore_certs).await?; + let mut paths = Vec::new(); + let base = confluence_url.as_str().trim_end_matches('/'); + let web_base = base.to_string(); + for page in pages { + let file = output_dir.join(format!("{}.json", page.id)); + std::fs::write(&file, serde_json::to_vec(&page)?)?; + let link = format!("{}{}", web_base, page.links.webui); + paths.push((file, link)); + } + Ok(paths) +} \ No newline at end of file diff --git a/src/findings_store.rs b/src/findings_store.rs index a1c94d4..8b8e0a4 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -54,6 +54,7 @@ pub struct FindingsStore { origin_meta: FxHashMap>, docker_images: FxHashMap, slack_links: FxHashMap, + confluence_links: FxHashMap, s3_buckets: FxHashMap, } impl FindingsStore { @@ -74,6 +75,7 @@ impl FindingsStore { bloom_items: 0, docker_images: FxHashMap::default(), slack_links: FxHashMap::default(), + confluence_links: FxHashMap::default(), s3_buckets: FxHashMap::default(), } } @@ -308,6 +310,14 @@ impl FindingsStore { &self.slack_links } + pub fn register_confluence_page(&mut self, path: PathBuf, link: String) { + self.confluence_links.insert(path, link); + } + + pub fn confluence_links(&self) -> &FxHashMap { + &self.confluence_links + } + pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) { self.s3_buckets.insert(dir, bucket); } diff --git a/src/gitlab.rs b/src/gitlab.rs index be9b4a5..e7df15e 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -187,4 +187,4 @@ pub async fn list_repositories( } Ok(()) -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 04f7303..a531c4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod blob; pub mod bstring_escape; pub mod bstring_table; pub mod cli; +pub mod confluence; pub mod content_type; pub mod decompress; pub mod defaults; diff --git a/src/main.rs b/src/main.rs index ac78ef7..3943b1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -285,6 +285,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, s3_bucket: None, diff --git a/src/reporter.rs b/src/reporter.rs index ef4c763..13ca84c 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -134,6 +134,13 @@ impl DetailsReporter { } } + /// If the given file path corresponds to a Confluence page downloaded to disk, + /// return the URL for that page. + fn confluence_page_url(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + ds.confluence_links().get(path).cloned() + } + /// If the given file path corresponds to a Slack message downloaded to disk, /// return the permalink for that message. fn slack_message_url(&self, path: &std::path::Path) -> Option { @@ -333,6 +340,8 @@ impl DetailsReporter { Origin::File(e) => { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(url) = self.confluence_page_url(&e.path) { + Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) } else if let Some(mapped) = self.s3_display_path(&e.path) { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index d9eda56..10d7aee 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -87,6 +87,9 @@ mod tests { // Jira options jira_url: None, jql: None, + // Confluence options + confluence_url: None, + cql: None, max_results: 100, // Slack options slack_query: None, diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 7bd6b8d..a249f02 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -15,7 +15,7 @@ use crate::{ commands::{github::GitCloneMode, github::GitHistoryMode, scan}, global, }, - findings_store, + confluence, findings_store, git_binary::{CloneMode, Git}, git_url::GitUrl, github, gitlab, @@ -263,6 +263,40 @@ pub async fn fetch_jira_issues( Ok(vec![output_dir]) } +pub async fn fetch_confluence_pages( + args: &scan::ScanArgs, + global_args: &global::GlobalArgs, + datastore: &Arc>, +) -> Result> { + let Some(confluence_url) = args.input_specifier_args.confluence_url.clone() else { + return Ok(Vec::new()); + }; + let Some(cql) = args.input_specifier_args.cql.as_deref() else { + return Ok(Vec::new()); + }; + let max_results = args.input_specifier_args.max_results; + let output_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let output_dir = output_root.join("confluence_pages"); + let paths = confluence::download_pages_to_dir( + confluence_url, + cql, + max_results, + global_args.ignore_certs, + &output_dir, + ) + .await?; + { + let mut ds = datastore.lock().unwrap(); + for (path, link) in &paths { + ds.register_confluence_page(path.clone(), link.clone()); + } + } + Ok(vec![output_dir]) +} + pub async fn fetch_slack_messages( args: &scan::ScanArgs, global_args: &global::GlobalArgs, diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 10b6e51..e389543 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -19,7 +19,8 @@ use crate::{ scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{ - enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, + enumerate_gitlab_repos, fetch_confluence_pages, fetch_jira_issues, fetch_s3_objects, + fetch_slack_messages, }, run_secret_validation, save_docker_images, summary::print_scan_summary, @@ -70,6 +71,10 @@ pub async fn run_async_scan( let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); + // Fetch Confluence pages if requested + let confluence_dirs = fetch_confluence_pages(args, global_args, &datastore).await?; + input_roots.extend(confluence_dirs); + // Fetch Slack messages if requested let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 4c3be19..68b9663 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -81,6 +81,8 @@ rules: jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_github.rs b/tests/int_github.rs index 8edc022..0bae089 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -68,6 +68,8 @@ fn test_github_remote_scan() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 4668439..7e48f60 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -67,6 +67,8 @@ fn test_gitlab_remote_scan() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), @@ -170,6 +172,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig new file mode 100644 index 0000000..4668439 --- /dev/null +++ b/tests/int_gitlab.rs.orig @@ -0,0 +1,241 @@ +// tests/int_gitlab.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Derive process exit-codes from findings +fn determine_exit_code(total: usize, validated: usize) -> i32 { + match (total, validated) { + (0, _) => 0, + (_, v) if v > 0 => 205, + _ => 200, + } +} + +#[test] +fn test_gitlab_remote_scan() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Public GitLab repo seeded with test secrets + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} diff --git a/tests/int_quiet.rs b/tests/int_quiet.rs index 598f103..1968972 100644 --- a/tests/int_quiet.rs +++ b/tests/int_quiet.rs @@ -56,4 +56,4 @@ fn scan_quiet_with_rule_stats_prints_rule_stats() { contains_bytes(out, b"Rule Performance Stats") })); } -} \ No newline at end of file +} diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 45f3767..69a1061 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -51,6 +51,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 7284e1d..e94607f 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -57,6 +57,8 @@ impl TestContext { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), s3_bucket: None, @@ -147,6 +149,8 @@ async fn test_scan_slack_messages() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, slack_query: Some("test".into()), slack_api_url: Url::parse(&format!("{}/", server.uri()))?, max_results: 10, diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig new file mode 100644 index 0000000..7284e1d --- /dev/null +++ b/tests/int_slack.rs.orig @@ -0,0 +1,205 @@ +use std::{ + env, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +struct TestContext { + rules_db: Arc, +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + Ok(Self { rules_db: Arc::new(rules_db) }) + } +} + +#[tokio::test] +async fn test_scan_slack_messages() -> Result<()> { + let ctx = TestContext::new()?; + + let server = MockServer::start().await; + let response = serde_json::json!({ + "ok": true, + "messages": { + "matches": [{ + "permalink": "https://example.slack.com/archives/C123/p1234", + "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "ts": "1234.56", + "channel": {"id": "C123", "name": "general"} + }], + "pagination": {"page": 1, "page_count": 1} + } + }); + Mock::given(method("GET")) + .and(path("/search.messages")) + .respond_with(ResponseTemplate::new(200).set_body_json(response)) + .mount(&server) + .await; + + env::set_var("KF_SLACK_TOKEN", "xoxp-test"); + + let temp_dir = TempDir::new()?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: Some("test".into()), + slack_api_url: Url::parse(&format!("{}/", server.uri()))?, + max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + assert!(findings > 0); + Ok(()) +} diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 46d4521..2caa10a 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -124,6 +124,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index c53adae..2478170 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -67,6 +67,8 @@ impl TestContext { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), @@ -143,6 +145,8 @@ impl TestContext { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig new file mode 100644 index 0000000..c53adae --- /dev/null +++ b/tests/int_vulnerable_files.rs.orig @@ -0,0 +1,255 @@ +// tests/integration_scan.rs + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[derive(Debug)] +struct TestCase { + file_name: &'static str, + min_expected_findings: usize, +} + +struct TestContext { + rules_db: Arc, +} + +fn root_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) + .load(&scan_args) + .context("Failed to load rules")?; + + let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; + + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) + .context("Failed to compile rules")?; + + Ok(Self { rules_db: Arc::new(rules_db) }) + } + + async fn scan_file(&self, file_path: &Path) -> Result { + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![file_path.to_path_buf()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + + Ok(findings) + } +} + +#[tokio::test] +async fn test_scan_vulnerable_files() -> Result<()> { + let test_context = TestContext::new()?; + + let test_cases = vec![ + TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, + TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, + TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, + TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, + TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, + TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, + TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, + TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, + TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, + TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, + TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, + TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, + TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, + TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, + TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, + TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, + TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, + TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, + TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, + TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, + TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, + TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, + TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, + TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, + ]; + + let root = root_dir(); + + for test_case in test_cases { + let test_file = root.join(test_case.file_name); + println!("Testing file: {}", test_case.file_name); + + let findings = test_context.scan_file(&test_file).await?; + + assert!( + findings >= test_case.min_expected_findings, + "File: {} - Expected >= {} findings, got {}", + test_case.file_name, + test_case.min_expected_findings, + findings + ); + } + + Ok(()) +} From 053d1a322452369035c5900ff98a0aabf189b1d3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:54:26 -0700 Subject: [PATCH 04/10] Added support for scanning Confluence pages --- tests/int_gitlab.rs.orig | 241 --------------------------- tests/int_slack.rs.orig | 205 ----------------------- tests/int_vulnerable_files.rs.orig | 255 ----------------------------- 3 files changed, 701 deletions(-) delete mode 100644 tests/int_gitlab.rs.orig delete mode 100644 tests/int_slack.rs.orig delete mode 100644 tests/int_vulnerable_files.rs.orig diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig deleted file mode 100644 index 4668439..0000000 --- a/tests/int_gitlab.rs.orig +++ /dev/null @@ -1,241 +0,0 @@ -// tests/int_gitlab.rs -use std::{ - str::FromStr, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - git_url::GitUrl, - scanner::{load_and_record_rules, run_scan}, -}; -use tempfile::TempDir; -use tokio::runtime::Runtime; -use url::Url; - -/// Derive process exit-codes from findings -fn determine_exit_code(total: usize, validated: usize) -> i32 { - match (total, validated) { - (0, _) => 0, - (_, v) if v > 0 => 205, - _ => 200, - } -} - -#[test] -fn test_gitlab_remote_scan() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - // Public GitLab repo seeded with test secrets - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} - -#[test] -fn test_gitlab_remote_scan_no_history() -> Result<()> { - let temp_dir = TempDir::new().context("tmp dir")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; - let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: vec![git_url], - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/")?, - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/")?, - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::None, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - no_extract_archives: false, - extraction_depth: 2, - no_binary: true, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Medium, - no_validate: false, - rule_stats: false, - only_valid: false, - min_entropy: None, - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: false, - color: Mode::Auto, - progress: Mode::Auto, - no_update_check: false, - self_update: false, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16_384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let rt = Runtime::new()?; - - let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); - - rt.block_on(async { - run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await - })?; - - let ds = datastore.lock().unwrap(); - let findings = ds.get_matches(); - let total = findings.len(); - let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); - - assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); - - let exit_code = determine_exit_code(total, validated); - assert!( - exit_code >= 200, - "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" - ); - - drop(rt); - Ok(()) -} diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig deleted file mode 100644 index 7284e1d..0000000 --- a/tests/int_slack.rs.orig +++ /dev/null @@ -1,205 +0,0 @@ -use std::{ - env, - sync::{Arc, Mutex}, -}; - -use anyhow::Result; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, -}; - -struct TestContext { - rules_db: Arc, -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - max_results: 10, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; - let resolved = loaded.resolve_enabled_rules()?; - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; - Ok(Self { rules_db: Arc::new(rules_db) }) - } -} - -#[tokio::test] -async fn test_scan_slack_messages() -> Result<()> { - let ctx = TestContext::new()?; - - let server = MockServer::start().await; - let response = serde_json::json!({ - "ok": true, - "messages": { - "matches": [{ - "permalink": "https://example.slack.com/archives/C123/p1234", - "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", - "ts": "1234.56", - "channel": {"id": "C123", "name": "general"} - }], - "pagination": {"page": 1, "page_count": 1} - } - }); - Mock::given(method("GET")) - .and(path("/search.messages")) - .respond_with(ResponseTemplate::new(200).set_body_json(response)) - .mount(&server) - .await; - - env::set_var("KF_SLACK_TOKEN", "xoxp-test"); - - let temp_dir = TempDir::new()?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - jira_url: None, - jql: None, - slack_query: Some("test".into()), - slack_api_url: Url::parse(&format!("{}/", server.uri()))?, - max_results: 10, - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - docker_image: Vec::new(), - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - assert!(findings > 0); - Ok(()) -} diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig deleted file mode 100644 index c53adae..0000000 --- a/tests/int_vulnerable_files.rs.orig +++ /dev/null @@ -1,255 +0,0 @@ -// tests/integration_scan.rs - -use std::{ - path::{Path, PathBuf}, - sync::{Arc, Mutex}, -}; - -use anyhow::{Context, Result}; -use kingfisher::{ - cli::{ - commands::{ - github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, - gitlab::GitLabRepoType, - inputs::{ContentFilteringArgs, InputSpecifierArgs}, - output::{OutputArgs, ReportOutputFormat}, - rules::RuleSpecifierArgs, - scan::{ConfidenceLevel, ScanArgs}, - }, - global::{AdvancedArgs, Mode}, - GlobalArgs, - }, - findings_store::FindingsStore, - rule_loader::RuleLoader, - rules_database::RulesDatabase, - scanner::run_async_scan, -}; -use tempfile::TempDir; -use url::Url; - -#[derive(Debug)] -struct TestCase { - file_name: &'static str, - min_expected_findings: usize, -} - -struct TestContext { - rules_db: Arc, -} - -fn root_dir() -> PathBuf { - PathBuf::from(env!("CARGO_MANIFEST_DIR")) -} - -impl TestContext { - fn new() -> Result { - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: Vec::new(), - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) - .load(&scan_args) - .context("Failed to load rules")?; - - let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; - - let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) - .context("Failed to compile rules")?; - - Ok(Self { rules_db: Arc::new(rules_db) }) - } - - async fn scan_file(&self, file_path: &Path) -> Result { - let temp_dir = TempDir::new().context("Failed to create temporary directory")?; - let clone_dir = temp_dir.path().to_path_buf(); - - let scan_args = ScanArgs { - num_jobs: 2, - rules: RuleSpecifierArgs { - rules_path: Vec::new(), - rule: vec!["all".into()], - load_builtins: true, - }, - input_specifier_args: InputSpecifierArgs { - path_inputs: vec![file_path.to_path_buf()], - git_url: Vec::new(), - github_user: Vec::new(), - github_organization: Vec::new(), - all_github_organizations: false, - github_api_url: Url::parse("https://api.github.com/").unwrap(), - github_repo_type: GitHubRepoType::Source, - // new GitLab defaults - gitlab_user: Vec::new(), - gitlab_group: Vec::new(), - all_gitlab_groups: false, - gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), - gitlab_repo_type: GitLabRepoType::Owner, - - jira_url: None, - jql: None, - max_results: 100, - slack_query: None, - slack_api_url: Url::parse("https://slack.com/api/").unwrap(), - // s3 - s3_bucket: None, - s3_prefix: None, - role_arn: None, - aws_local_profile: None, - // Docker image scanning - docker_image: Vec::new(), - // git clone / history options - git_clone: GitCloneMode::Bare, - git_history: GitHistoryMode::Full, - scan_nested_repos: true, - commit_metadata: true, - }, - content_filtering_args: ContentFilteringArgs { - max_file_size_mb: 25.0, - extraction_depth: 2, - no_binary: true, - no_extract_archives: false, - exclude: Vec::new(), // Exclude patterns - }, - confidence: ConfidenceLevel::Low, - no_validate: true, - rule_stats: false, - only_valid: false, - min_entropy: Some(0.0), - redact: false, - git_repo_timeout: 1800, // 30 minutes - output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, - no_dedup: true, - baseline_file: None, - manage_baseline: false, - }; - - let global_args = GlobalArgs { - verbose: 0, - quiet: true, - color: Mode::Auto, - no_update_check: false, - self_update: false, - progress: Mode::Never, - ignore_certs: false, - advanced: AdvancedArgs { rlimit_nofile: 16384 }, - }; - - let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - - run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; - - let findings = { - let ds = datastore.lock().unwrap(); - ds.get_matches().len() - }; - - Ok(findings) - } -} - -#[tokio::test] -async fn test_scan_vulnerable_files() -> Result<()> { - let test_context = TestContext::new()?; - - let test_cases = vec![ - TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, - TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, - TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, - TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, - TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, - TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, - TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, - TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, - TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, - TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, - TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, - TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, - TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, - TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, - TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, - TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, - TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, - TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, - TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, - TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, - TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, - TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, - TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, - TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, - ]; - - let root = root_dir(); - - for test_case in test_cases { - let test_file = root.join(test_case.file_name); - println!("Testing file: {}", test_case.file_name); - - let findings = test_context.scan_file(&test_file).await?; - - assert!( - findings >= test_case.min_expected_findings, - "File: {} - Expected >= {} findings, got {}", - test_case.file_name, - test_case.min_expected_findings, - findings - ); - } - - Ok(()) -} From 4489b576f88fe9e545f251a9e852d123d40ec339 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:55:45 -0700 Subject: [PATCH 05/10] Added support for scanning Confluence pages --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 18b940c..06a0a70 100644 --- a/README.md +++ b/README.md @@ -437,7 +437,7 @@ KF_CONFLUENCE_TOKEN="token" kingfisher scan \ # Basic auth with username and token KF_CONFLUENCE_USER="user@example.com" KF_CONFLUENCE_TOKEN="token" kingfisher scan \ --confluence-url https://confluence.company.com \ - --cql "label = secret" \ + --cql "text ~ 'password'" \ --max-results 500 ``` From 0e7a0b57dd3426640f73ff92c11de009f3820bd3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 10 Aug 2025 21:57:35 -0700 Subject: [PATCH 06/10] Added support for scanning Confluence pages --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 06a0a70..d51f0b3 100644 --- a/README.md +++ b/README.md @@ -445,13 +445,10 @@ Use the base URL of your Confluence site for `--confluence-url`. Kingfisher automatically adds `/rest/api` to the end, so `https://example.com/wiki` and `https://example.com` both work depending on your server configuration. -Generate a [personal access token](https://confluence.atlassian.com/doc/personal-access-tokens-938022290.html) and -set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher -sends the token as a bearer token in the `Authorization` header. To use basic -authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email -address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a -Basic auth header. If the server responds with a redirect to a login page, the -credentials are invalid or lack the required permissions. +Generate a personal access token and set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher sends the token as a bearer token in the `Authorization` header. + +To use basic authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a Basic auth header. If the server responds with a redirect to a login page, the credentials are invalid or lack the required permissions. + ## Scanning Slack ### Scan Slack messages matching a search query From 38371b538eb969418a74aa443c73e815b5ef8aca Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 08:04:52 -0700 Subject: [PATCH 07/10] Added support for scanning Confluence pages --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5eb52fc..90ac4a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.38.0" +version = "1.39.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true From 39588cc0988f58aaf61c6a2e86c92fe4add67587 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 08:25:24 -0700 Subject: [PATCH 08/10] Added support for scanning Confluence pages --- src/confluence.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/confluence.rs b/src/confluence.rs index 50e525c..7150e90 100644 --- a/src/confluence.rs +++ b/src/confluence.rs @@ -67,11 +67,13 @@ pub async fn search_pages( let base = confluence_url.as_str().trim_end_matches('/'); let api_base = format!("{}/rest/api/content/search", base); + let api_url = Url::parse(&api_base)?; let mut pages = Vec::new(); let mut start = 0usize; + while pages.len() < max_results { let limit = std::cmp::min(100, max_results - pages.len()); - let url = Url::parse(&api_base)?; + let url = api_url.clone(); let req = client.get(url).query(&[ ("cql", cql), ("limit", &limit.to_string()), From 35e4b9011def97eb6bb9df0118fb62184792b13e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 08:26:49 -0700 Subject: [PATCH 09/10] Added support for scanning Confluence pages --- src/confluence.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/confluence.rs b/src/confluence.rs index 7150e90..aecf637 100644 --- a/src/confluence.rs +++ b/src/confluence.rs @@ -94,7 +94,8 @@ pub async fn search_pages( .get(header::LOCATION) .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); - let body = resp.text().await.unwrap_or_default(); + let body = resp.text().await.unwrap_or_else(|e| format!("Failed to read response: {}", e)); + if let Some(loc) = location { bail!( "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER", From 98ce4b9296ce9dd900c72de40b2a938c555be719 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 11 Aug 2025 09:03:58 -0700 Subject: [PATCH 10/10] Added support for scanning Confluence pages --- src/confluence.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/confluence.rs b/src/confluence.rs index aecf637..2a1bddb 100644 --- a/src/confluence.rs +++ b/src/confluence.rs @@ -81,7 +81,7 @@ pub async fn search_pages( ("expand", "body.storage"), ]); let req = if let Some(user) = &user { - req.basic_auth(user, Some(token.clone())) + req.basic_auth(user, Some(&token)) } else { req.bearer_auth(&token) }; @@ -95,7 +95,7 @@ pub async fn search_pages( .and_then(|v| v.to_str().ok()) .map(|s| s.to_string()); let body = resp.text().await.unwrap_or_else(|e| format!("Failed to read response: {}", e)); - + if let Some(loc) = location { bail!( "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER",