diff --git a/CHANGELOG.md b/CHANGELOG.md index b8d7720..a4e50d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.39.0] +- Added support for scanning Confluence pages via `--confluence-url` and `--cql` + ## [1.38.0] - `--quiet` now suppresses scan summaries and rule statistics unless `--rule-stats` is explicitly provided - Added X Consumer key detection and validation diff --git a/README.md b/README.md index 1fba1fb..18b940c 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co ## What Kingfisher Adds - **Live validation** via cloud-provider APIs -- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages +- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, Confluence pages, and Slack messages - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline mode**: ignore known secrets, flag only new ones - **Language-aware detection** (source-code parsing) for ~20 languages @@ -24,9 +24,10 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Multiple targets**: - **Git history**: local repos or GitHub/GitLab orgs/users - - **Docker images**: public or private via `--docker-image` - - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - - **Slack messages**: query‑based scans with `--slack-query` + - **Docker images**: public or private via `--docker-image` + - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` + - **Confluence pages**: CQL‑driven scans with `--confluence-url` and `--cql` + - **Slack messages**: query‑based scans with `--slack-query` - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) @@ -421,7 +422,36 @@ KF_JIRA_TOKEN="token" kingfisher scan \ --jql 'ORDER BY created DESC' \ --max-results 1000 ``` ---- + +## Scanning Confluence + +### Scan Confluence pages matching a CQL query + +```bash +# Bearer token +KF_CONFLUENCE_TOKEN="token" kingfisher scan \ + --confluence-url https://confluence.company.com \ + --cql "label = secret" \ + --max-results 500 + +# Basic auth with username and token +KF_CONFLUENCE_USER="user@example.com" KF_CONFLUENCE_TOKEN="token" kingfisher scan \ + --confluence-url https://confluence.company.com \ + --cql "label = secret" \ + --max-results 500 +``` + +Use the base URL of your Confluence site for `--confluence-url`. Kingfisher +automatically adds `/rest/api` to the end, so `https://example.com/wiki` and +`https://example.com` both work depending on your server configuration. + +Generate a [personal access token](https://confluence.atlassian.com/doc/personal-access-tokens-938022290.html) and +set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher +sends the token as a bearer token in the `Authorization` header. To use basic +authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email +address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a +Basic auth header. If the server responds with a redirect to a login page, the +credentials are invalid or lack the required permissions. ## Scanning Slack ### Scan Slack messages matching a search query @@ -444,6 +474,7 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | | `KF_JIRA_TOKEN` | Jira API token | +| `KF_CONFLUENCE_TOKEN` | Confluence API token | | `KF_SLACK_TOKEN` | Slack API token | | `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used | | `KF_AWS_KEY` and `KF_AWS_SECRET` | AWS Credentials to use with S3 bucket scanning | @@ -465,6 +496,11 @@ To authenticate Jira requests: export KF_JIRA_TOKEN="token" ``` +To authenticate Confluence requests: +```bash +export KF_CONFLUENCE_TOKEN="token" +``` + _If no token is provided Kingfisher still works for public repositories._ --- diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 2249640..e6f9168 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -27,6 +27,7 @@ pub struct InputSpecifierArgs { "all_github_organizations", "all_gitlab_groups", "jira_url", + "confluence_url", "docker_image", "slack_query", "s3_bucket" @@ -96,6 +97,14 @@ pub struct InputSpecifierArgs { #[arg(long, requires = "jira_url")] pub jql: Option, + /// Confluence base URL (e.g. https://confluence.example.com) + #[arg(long, value_hint = ValueHint::Url, requires = "cql")] + pub confluence_url: Option, + + /// CQL query to select Confluence pages + #[arg(long, requires = "confluence_url")] + pub cql: Option, + /// Slack search query #[arg(long)] pub slack_query: Option, @@ -104,7 +113,7 @@ pub struct InputSpecifierArgs { #[arg(long, default_value = "https://slack.com/api/", value_hint = ValueHint::Url)] pub slack_api_url: Url, - /// Maximum number of Slack or Jira results to fetch + /// Maximum number of Slack, Jira, or Confluence results to fetch #[arg(long, default_value_t = 100)] pub max_results: usize, diff --git a/src/confluence.rs b/src/confluence.rs new file mode 100644 index 0000000..50e525c --- /dev/null +++ b/src/confluence.rs @@ -0,0 +1,142 @@ +use anyhow::{bail, Context, Result}; +use reqwest::{header, Client}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use url::Url; + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluencePage { + pub id: String, + pub title: String, + #[serde(default)] + pub body: Option, + #[serde(rename = "_links")] + pub links: ConfluenceLinks, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceBody { + #[serde(default)] + pub storage: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceStorage { + #[serde(default)] + pub value: Option, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfluenceLinks { + pub webui: String, +} + +#[derive(Debug, Deserialize)] +struct ConfluenceSearchResponse { + results: Vec, + #[serde(rename = "_links")] + links: ConfluenceResultLinks, +} + +#[derive(Debug, Deserialize)] +struct ConfluenceResultLinks { + next: Option, +} + +pub async fn search_pages( + confluence_url: Url, + cql: &str, + max_results: usize, + ignore_certs: bool, +) -> Result> { + let token = std::env::var("KF_CONFLUENCE_TOKEN") + .context("KF_CONFLUENCE_TOKEN environment variable must be set")?; + let user = std::env::var("KF_CONFLUENCE_USER").ok(); + if let Some(ref u) = user { + if !u.contains('@') { + bail!("KF_CONFLUENCE_USER must be an email address"); + } + } + + let client = Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .danger_accept_invalid_certs(ignore_certs) + .build() + .context("Failed to build HTTP client")?; + + let base = confluence_url.as_str().trim_end_matches('/'); + let api_base = format!("{}/rest/api/content/search", base); + + let mut pages = Vec::new(); + let mut start = 0usize; + while pages.len() < max_results { + let limit = std::cmp::min(100, max_results - pages.len()); + let url = Url::parse(&api_base)?; + let req = client.get(url).query(&[ + ("cql", cql), + ("limit", &limit.to_string()), + ("start", &start.to_string()), + ("expand", "body.storage"), + ]); + let req = if let Some(user) = &user { + req.basic_auth(user, Some(token.clone())) + } else { + req.bearer_auth(&token) + }; + let resp = req.send().await.context("Failed to send Confluence request")?; + + let status = resp.status(); + if !status.is_success() { + let location = resp + .headers() + .get(header::LOCATION) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()); + let body = resp.text().await.unwrap_or_default(); + if let Some(loc) = location { + bail!( + "Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER", + status, + loc + ); + } else { + bail!("Confluence API request failed with status {}: {}", status, body); + } + } + + let body: ConfluenceSearchResponse = + resp.json().await.context("Failed to parse Confluence response")?; + for p in body.results { + pages.push(p); + if pages.len() >= max_results { + break; + } + } + if pages.len() >= max_results || body.links.next.is_none() { + break; + } + start += limit; + } + Ok(pages) +} + +pub async fn download_pages_to_dir( + confluence_url: Url, + cql: &str, + max_results: usize, + ignore_certs: bool, + output_dir: &PathBuf, +) -> Result> { + std::fs::create_dir_all(output_dir)?; + let pages = search_pages(confluence_url.clone(), cql, max_results, ignore_certs).await?; + let mut paths = Vec::new(); + let base = confluence_url.as_str().trim_end_matches('/'); + let web_base = base.to_string(); + for page in pages { + let file = output_dir.join(format!("{}.json", page.id)); + std::fs::write(&file, serde_json::to_vec(&page)?)?; + let link = format!("{}{}", web_base, page.links.webui); + paths.push((file, link)); + } + Ok(paths) +} \ No newline at end of file diff --git a/src/findings_store.rs b/src/findings_store.rs index a1c94d4..8b8e0a4 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -54,6 +54,7 @@ pub struct FindingsStore { origin_meta: FxHashMap>, docker_images: FxHashMap, slack_links: FxHashMap, + confluence_links: FxHashMap, s3_buckets: FxHashMap, } impl FindingsStore { @@ -74,6 +75,7 @@ impl FindingsStore { bloom_items: 0, docker_images: FxHashMap::default(), slack_links: FxHashMap::default(), + confluence_links: FxHashMap::default(), s3_buckets: FxHashMap::default(), } } @@ -308,6 +310,14 @@ impl FindingsStore { &self.slack_links } + pub fn register_confluence_page(&mut self, path: PathBuf, link: String) { + self.confluence_links.insert(path, link); + } + + pub fn confluence_links(&self) -> &FxHashMap { + &self.confluence_links + } + pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) { self.s3_buckets.insert(dir, bucket); } diff --git a/src/gitlab.rs b/src/gitlab.rs index be9b4a5..e7df15e 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -187,4 +187,4 @@ pub async fn list_repositories( } Ok(()) -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 04f7303..a531c4a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod blob; pub mod bstring_escape; pub mod bstring_table; pub mod cli; +pub mod confluence; pub mod content_type; pub mod decompress; pub mod defaults; diff --git a/src/main.rs b/src/main.rs index ac78ef7..3943b1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -285,6 +285,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, s3_bucket: None, diff --git a/src/reporter.rs b/src/reporter.rs index ef4c763..13ca84c 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -134,6 +134,13 @@ impl DetailsReporter { } } + /// If the given file path corresponds to a Confluence page downloaded to disk, + /// return the URL for that page. + fn confluence_page_url(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + ds.confluence_links().get(path).cloned() + } + /// If the given file path corresponds to a Slack message downloaded to disk, /// return the permalink for that message. fn slack_message_url(&self, path: &std::path::Path) -> Option { @@ -333,6 +340,8 @@ impl DetailsReporter { Origin::File(e) => { if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) + } else if let Some(url) = self.confluence_page_url(&e.path) { + Some(url) } else if let Some(url) = self.slack_message_url(&e.path) { Some(url) } else if let Some(mapped) = self.s3_display_path(&e.path) { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index d9eda56..10d7aee 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -87,6 +87,9 @@ mod tests { // Jira options jira_url: None, jql: None, + // Confluence options + confluence_url: None, + cql: None, max_results: 100, // Slack options slack_query: None, diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 7bd6b8d..a249f02 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -15,7 +15,7 @@ use crate::{ commands::{github::GitCloneMode, github::GitHistoryMode, scan}, global, }, - findings_store, + confluence, findings_store, git_binary::{CloneMode, Git}, git_url::GitUrl, github, gitlab, @@ -263,6 +263,40 @@ pub async fn fetch_jira_issues( Ok(vec![output_dir]) } +pub async fn fetch_confluence_pages( + args: &scan::ScanArgs, + global_args: &global::GlobalArgs, + datastore: &Arc>, +) -> Result> { + let Some(confluence_url) = args.input_specifier_args.confluence_url.clone() else { + return Ok(Vec::new()); + }; + let Some(cql) = args.input_specifier_args.cql.as_deref() else { + return Ok(Vec::new()); + }; + let max_results = args.input_specifier_args.max_results; + let output_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let output_dir = output_root.join("confluence_pages"); + let paths = confluence::download_pages_to_dir( + confluence_url, + cql, + max_results, + global_args.ignore_certs, + &output_dir, + ) + .await?; + { + let mut ds = datastore.lock().unwrap(); + for (path, link) in &paths { + ds.register_confluence_page(path.clone(), link.clone()); + } + } + Ok(vec![output_dir]) +} + pub async fn fetch_slack_messages( args: &scan::ScanArgs, global_args: &global::GlobalArgs, diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 10b6e51..e389543 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -19,7 +19,8 @@ use crate::{ scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{ - enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, + enumerate_gitlab_repos, fetch_confluence_pages, fetch_jira_issues, fetch_s3_objects, + fetch_slack_messages, }, run_secret_validation, save_docker_images, summary::print_scan_summary, @@ -70,6 +71,10 @@ pub async fn run_async_scan( let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); + // Fetch Confluence pages if requested + let confluence_dirs = fetch_confluence_pages(args, global_args, &datastore).await?; + input_roots.extend(confluence_dirs); + // Fetch Slack messages if requested let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?; input_roots.extend(slack_dirs); diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 4c3be19..68b9663 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -81,6 +81,8 @@ rules: jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_github.rs b/tests/int_github.rs index 8edc022..0bae089 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -68,6 +68,8 @@ fn test_github_remote_scan() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 4668439..7e48f60 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -67,6 +67,8 @@ fn test_gitlab_remote_scan() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), @@ -170,6 +172,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_gitlab.rs.orig b/tests/int_gitlab.rs.orig new file mode 100644 index 0000000..4668439 --- /dev/null +++ b/tests/int_gitlab.rs.orig @@ -0,0 +1,241 @@ +// tests/int_gitlab.rs +use std::{ + str::FromStr, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + git_url::GitUrl, + scanner::{load_and_record_rules, run_scan}, +}; +use tempfile::TempDir; +use tokio::runtime::Runtime; +use url::Url; + +/// Derive process exit-codes from findings +fn determine_exit_code(total: usize, validated: usize) -> i32 { + match (total, validated) { + (0, _) => 0, + (_, v) if v > 0 => 205, + _ => 200, + } +} + +#[test] +fn test_gitlab_remote_scan() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + // Public GitLab repo seeded with test secrets + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} + +#[test] +fn test_gitlab_remote_scan_no_history() -> Result<()> { + let temp_dir = TempDir::new().context("tmp dir")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let test_repo_url = "https://gitlab.com/micksmix/SecretsTest.git"; + let git_url = GitUrl::from_str(test_repo_url).expect("parse GitLab URL"); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: vec![git_url], + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/")?, + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/")?, + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::None, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + no_extract_archives: false, + extraction_depth: 2, + no_binary: true, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + rule_stats: false, + only_valid: false, + min_entropy: None, + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: false, + color: Mode::Auto, + progress: Mode::Auto, + no_update_check: false, + self_update: false, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16_384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + let rt = Runtime::new()?; + + let rules_db = Arc::new(load_and_record_rules(&scan_args, &datastore)?); + + rt.block_on(async { + run_scan(&global_args, &scan_args, &rules_db, Arc::clone(&datastore)).await + })?; + + let ds = datastore.lock().unwrap(); + let findings = ds.get_matches(); + let total = findings.len(); + let validated = findings.iter().filter(|m| m.as_ref().2.validation_success).count(); + + assert!(total >= 10, "expected at least 10 findings from GitLab repo, got {total}"); + + let exit_code = determine_exit_code(total, validated); + assert!( + exit_code >= 200, + "expected kingfisher to report findings (exit_code >= 200), got {exit_code}" + ); + + drop(rt); + Ok(()) +} diff --git a/tests/int_quiet.rs b/tests/int_quiet.rs index 598f103..1968972 100644 --- a/tests/int_quiet.rs +++ b/tests/int_quiet.rs @@ -56,4 +56,4 @@ fn scan_quiet_with_rule_stats_prints_rule_stats() { contains_bytes(out, b"Rule Performance Stats") })); } -} \ No newline at end of file +} diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 45f3767..69a1061 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -51,6 +51,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 7284e1d..e94607f 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -57,6 +57,8 @@ impl TestContext { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), s3_bucket: None, @@ -147,6 +149,8 @@ async fn test_scan_slack_messages() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, jira_url: None, jql: None, + confluence_url: None, + cql: None, slack_query: Some("test".into()), slack_api_url: Url::parse(&format!("{}/", server.uri()))?, max_results: 10, diff --git a/tests/int_slack.rs.orig b/tests/int_slack.rs.orig new file mode 100644 index 0000000..7284e1d --- /dev/null +++ b/tests/int_slack.rs.orig @@ -0,0 +1,205 @@ +use std::{ + env, + sync::{Arc, Mutex}, +}; + +use anyhow::Result; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +struct TestContext { + rules_db: Arc, +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + max_results: 10, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; + let resolved = loaded.resolve_enabled_rules()?; + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect())?; + Ok(Self { rules_db: Arc::new(rules_db) }) + } +} + +#[tokio::test] +async fn test_scan_slack_messages() -> Result<()> { + let ctx = TestContext::new()?; + + let server = MockServer::start().await; + let response = serde_json::json!({ + "ok": true, + "messages": { + "matches": [{ + "permalink": "https://example.slack.com/archives/C123/p1234", + "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "ts": "1234.56", + "channel": {"id": "C123", "name": "general"} + }], + "pagination": {"page": 1, "page_count": 1} + } + }); + Mock::given(method("GET")) + .and(path("/search.messages")) + .respond_with(ResponseTemplate::new(200).set_body_json(response)) + .mount(&server) + .await; + + env::set_var("KF_SLACK_TOKEN", "xoxp-test"); + + let temp_dir = TempDir::new()?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + jira_url: None, + jql: None, + slack_query: Some("test".into()), + slack_api_url: Url::parse(&format!("{}/", server.uri()))?, + max_results: 10, + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &ctx.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + assert!(findings > 0); + Ok(()) +} diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 46d4521..2caa10a 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -124,6 +124,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index c53adae..2478170 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -67,6 +67,8 @@ impl TestContext { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), @@ -143,6 +145,8 @@ impl TestContext { jira_url: None, jql: None, + confluence_url: None, + cql: None, max_results: 100, slack_query: None, slack_api_url: Url::parse("https://slack.com/api/").unwrap(), diff --git a/tests/int_vulnerable_files.rs.orig b/tests/int_vulnerable_files.rs.orig new file mode 100644 index 0000000..c53adae --- /dev/null +++ b/tests/int_vulnerable_files.rs.orig @@ -0,0 +1,255 @@ +// tests/integration_scan.rs + +use std::{ + path::{Path, PathBuf}, + sync::{Arc, Mutex}, +}; + +use anyhow::{Context, Result}; +use kingfisher::{ + cli::{ + commands::{ + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + inputs::{ContentFilteringArgs, InputSpecifierArgs}, + output::{OutputArgs, ReportOutputFormat}, + rules::RuleSpecifierArgs, + scan::{ConfidenceLevel, ScanArgs}, + }, + global::{AdvancedArgs, Mode}, + GlobalArgs, + }, + findings_store::FindingsStore, + rule_loader::RuleLoader, + rules_database::RulesDatabase, + scanner::run_async_scan, +}; +use tempfile::TempDir; +use url::Url; + +#[derive(Debug)] +struct TestCase { + file_name: &'static str, + min_expected_findings: usize, +} + +struct TestContext { + rules_db: Arc, +} + +fn root_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +impl TestContext { + fn new() -> Result { + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) + .load(&scan_args) + .context("Failed to load rules")?; + + let resolved = loaded.resolve_enabled_rules().context("Failed to resolve rules")?; + + let rules_db = RulesDatabase::from_rules(resolved.into_iter().cloned().collect()) + .context("Failed to compile rules")?; + + Ok(Self { rules_db: Arc::new(rules_db) }) + } + + async fn scan_file(&self, file_path: &Path) -> Result { + let temp_dir = TempDir::new().context("Failed to create temporary directory")?; + let clone_dir = temp_dir.path().to_path_buf(); + + let scan_args = ScanArgs { + num_jobs: 2, + rules: RuleSpecifierArgs { + rules_path: Vec::new(), + rule: vec!["all".into()], + load_builtins: true, + }, + input_specifier_args: InputSpecifierArgs { + path_inputs: vec![file_path.to_path_buf()], + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + // new GitLab defaults + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::Owner, + + jira_url: None, + jql: None, + max_results: 100, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + // s3 + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + // Docker image scanning + docker_image: Vec::new(), + // git clone / history options + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + scan_nested_repos: true, + commit_metadata: true, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 25.0, + extraction_depth: 2, + no_binary: true, + no_extract_archives: false, + exclude: Vec::new(), // Exclude patterns + }, + confidence: ConfidenceLevel::Low, + no_validate: true, + rule_stats: false, + only_valid: false, + min_entropy: Some(0.0), + redact: false, + git_repo_timeout: 1800, // 30 minutes + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + no_dedup: true, + baseline_file: None, + manage_baseline: false, + }; + + let global_args = GlobalArgs { + verbose: 0, + quiet: true, + color: Mode::Auto, + no_update_check: false, + self_update: false, + progress: Mode::Never, + ignore_certs: false, + advanced: AdvancedArgs { rlimit_nofile: 16384 }, + }; + + let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); + + run_async_scan(&global_args, &scan_args, Arc::clone(&datastore), &self.rules_db).await?; + + let findings = { + let ds = datastore.lock().unwrap(); + ds.get_matches().len() + }; + + Ok(findings) + } +} + +#[tokio::test] +async fn test_scan_vulnerable_files() -> Result<()> { + let test_context = TestContext::new()?; + + let test_cases = vec![ + TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, + TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, + TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, + TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, + TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, + TestCase { file_name: "testdata/go_vulnerable.go", min_expected_findings: 4 }, + TestCase { file_name: "testdata/java_vulnerable.java", min_expected_findings: 4 }, + TestCase { file_name: "testdata/javascript_vulnerable.js", min_expected_findings: 4 }, + TestCase { file_name: "testdata/json_vulnerable.json", min_expected_findings: 4 }, + TestCase { file_name: "testdata/kotlin_vulnerable.kt", min_expected_findings: 7 }, + TestCase { file_name: "testdata/objc_vulnerable.m", min_expected_findings: 4 }, + TestCase { file_name: "testdata/php_vulnerable.php", min_expected_findings: 5 }, + TestCase { file_name: "testdata/python_vulnerable.py", min_expected_findings: 10 }, + TestCase { file_name: "testdata/python2_vulnerable.py", min_expected_findings: 4 }, + TestCase { file_name: "testdata/ruby_vulnerable.rb", min_expected_findings: 6 }, + TestCase { file_name: "testdata/rust_vulnerable.rs", min_expected_findings: 3 }, + TestCase { file_name: "testdata/scala_vulnerable.scala", min_expected_findings: 3 }, + TestCase { file_name: "testdata/shell_vulnerable.sh", min_expected_findings: 2 }, + TestCase { file_name: "testdata/slack_tokens.properties", min_expected_findings: 17 }, + TestCase { file_name: "testdata/swift_vulnerable.swift", min_expected_findings: 2 }, + TestCase { file_name: "testdata/toml_vulnerable.toml", min_expected_findings: 4 }, + TestCase { file_name: "testdata/tsx_vulnerable.tsx", min_expected_findings: 1 }, + TestCase { file_name: "testdata/typescript_vulnerable.ts", min_expected_findings: 1 }, + TestCase { file_name: "testdata/yaml_vulnerable.yaml", min_expected_findings: 4 }, + ]; + + let root = root_dir(); + + for test_case in test_cases { + let test_file = root.join(test_case.file_name); + println!("Testing file: {}", test_case.file_name); + + let findings = test_context.scan_file(&test_file).await?; + + assert!( + findings >= test_case.min_expected_findings, + "File: {} - Expected >= {} findings, got {}", + test_case.file_name, + test_case.min_expected_findings, + findings + ); + } + + Ok(()) +}