Merge pull request #86 from mongodb/development

v1.39.0
This commit is contained in:
Mick Grove 2025-08-11 10:03:33 -07:00 committed by GitHub
commit cedcd6a875
22 changed files with 285 additions and 27 deletions

View file

@ -2,6 +2,9 @@
All notable changes to this project will be documented in this file.
## [1.39.0]
- Added support for scanning Confluence pages via `--confluence-url` and `--cql`
## [1.38.0]
- `--quiet` now suppresses scan summaries and rule statistics unless `--rule-stats` is explicitly provided
- Added X Consumer key detection and validation

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.38.0"
version = "1.39.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -12,7 +12,7 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
## What Kingfisher Adds
- **Live validation** via cloud-provider APIs
- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, and Slack messages
- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, Confluence pages, and Slack messages
- **Compressed Files**: Supports extracting and scanning compressed files for secrets
- **Baseline mode**: ignore known secrets, flag only new ones
- **Language-aware detection** (source-code parsing) for ~20 languages
@ -24,9 +24,10 @@ Kingfisher originated as a fork of Praetorian's [Nosey Parker](https://github.co
- **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md))
- **Multiple targets**:
- **Git history**: local repos or GitHub/GitLab orgs/users
- **Docker images**: public or private via `--docker-image`
- **Jira issues**: JQLdriven scans with `--jira-url` and `--jql`
- **Slack messages**: querybased scans with `--slack-query`
- **Docker images**: public or private via `--docker-image`
- **Jira issues**: JQLdriven scans with `--jira-url` and `--jql`
- **Confluence pages**: CQLdriven scans with `--confluence-url` and `--cql`
- **Slack messages**: querybased scans with `--slack-query`
- **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous
- **Compressed Files**: Supports extracting and scanning compressed files for secrets
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
@ -421,7 +422,33 @@ KF_JIRA_TOKEN="token" kingfisher scan \
--jql 'ORDER BY created DESC' \
--max-results 1000
```
---
## Scanning Confluence
### Scan Confluence pages matching a CQL query
```bash
# Bearer token
KF_CONFLUENCE_TOKEN="token" kingfisher scan \
--confluence-url https://confluence.company.com \
--cql "label = secret" \
--max-results 500
# Basic auth with username and token
KF_CONFLUENCE_USER="user@example.com" KF_CONFLUENCE_TOKEN="token" kingfisher scan \
--confluence-url https://confluence.company.com \
--cql "text ~ 'password'" \
--max-results 500
```
Use the base URL of your Confluence site for `--confluence-url`. Kingfisher
automatically adds `/rest/api` to the end, so `https://example.com/wiki` and
`https://example.com` both work depending on your server configuration.
Generate a personal access token and set it in the `KF_CONFLUENCE_TOKEN` environment variable. By default, Kingfisher sends the token as a bearer token in the `Authorization` header.
To use basic authentication instead, also set `KF_CONFLUENCE_USER` to your Confluence email address; Kingfisher will then send the username and `KF_CONFLUENCE_TOKEN` as a Basic auth header. If the server responds with a redirect to a login page, the credentials are invalid or lack the required permissions.
## Scanning Slack
### Scan Slack messages matching a search query
@ -444,6 +471,7 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \
| `KF_GITHUB_TOKEN` | GitHub Personal Access Token |
| `KF_GITLAB_TOKEN` | GitLab Personal Access Token |
| `KF_JIRA_TOKEN` | Jira API token |
| `KF_CONFLUENCE_TOKEN` | Confluence API token |
| `KF_SLACK_TOKEN` | Slack API token |
| `KF_DOCKER_TOKEN` | Docker registry token (`user:pass` or bearer token). If unset, credentials from the Docker keychain are used |
| `KF_AWS_KEY` and `KF_AWS_SECRET` | AWS Credentials to use with S3 bucket scanning |
@ -465,6 +493,11 @@ To authenticate Jira requests:
export KF_JIRA_TOKEN="token"
```
To authenticate Confluence requests:
```bash
export KF_CONFLUENCE_TOKEN="token"
```
_If no token is provided Kingfisher still works for public repositories._
---

View file

@ -27,6 +27,7 @@ pub struct InputSpecifierArgs {
"all_github_organizations",
"all_gitlab_groups",
"jira_url",
"confluence_url",
"docker_image",
"slack_query",
"s3_bucket"
@ -96,6 +97,14 @@ pub struct InputSpecifierArgs {
#[arg(long, requires = "jira_url")]
pub jql: Option<String>,
/// Confluence base URL (e.g. https://confluence.example.com)
#[arg(long, value_hint = ValueHint::Url, requires = "cql")]
pub confluence_url: Option<Url>,
/// CQL query to select Confluence pages
#[arg(long, requires = "confluence_url")]
pub cql: Option<String>,
/// Slack search query
#[arg(long)]
pub slack_query: Option<String>,
@ -104,7 +113,7 @@ pub struct InputSpecifierArgs {
#[arg(long, default_value = "https://slack.com/api/", value_hint = ValueHint::Url)]
pub slack_api_url: Url,
/// Maximum number of Slack or Jira results to fetch
/// Maximum number of Slack, Jira, or Confluence results to fetch
#[arg(long, default_value_t = 100)]
pub max_results: usize,

View file

@ -99,10 +99,6 @@ pub struct ScanArgs {
#[command(flatten)]
pub output_args: OutputArgs<ReportOutputFormat>,
/// Bytes of context before and after each match
#[arg(long, default_value_t = 256, value_name = "BYTES")]
pub snippet_length: usize,
/// Baseline file to filter known secrets
#[arg(long, value_name = "FILE")]
pub baseline_file: Option<std::path::PathBuf>,

145
src/confluence.rs Normal file
View file

@ -0,0 +1,145 @@
use anyhow::{bail, Context, Result};
use reqwest::{header, Client};
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use url::Url;
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluencePage {
pub id: String,
pub title: String,
#[serde(default)]
pub body: Option<ConfluenceBody>,
#[serde(rename = "_links")]
pub links: ConfluenceLinks,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluenceBody {
#[serde(default)]
pub storage: Option<ConfluenceStorage>,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluenceStorage {
#[serde(default)]
pub value: Option<String>,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct ConfluenceLinks {
pub webui: String,
}
#[derive(Debug, Deserialize)]
struct ConfluenceSearchResponse {
results: Vec<ConfluencePage>,
#[serde(rename = "_links")]
links: ConfluenceResultLinks,
}
#[derive(Debug, Deserialize)]
struct ConfluenceResultLinks {
next: Option<String>,
}
pub async fn search_pages(
confluence_url: Url,
cql: &str,
max_results: usize,
ignore_certs: bool,
) -> Result<Vec<ConfluencePage>> {
let token = std::env::var("KF_CONFLUENCE_TOKEN")
.context("KF_CONFLUENCE_TOKEN environment variable must be set")?;
let user = std::env::var("KF_CONFLUENCE_USER").ok();
if let Some(ref u) = user {
if !u.contains('@') {
bail!("KF_CONFLUENCE_USER must be an email address");
}
}
let client = Client::builder()
.redirect(reqwest::redirect::Policy::none())
.danger_accept_invalid_certs(ignore_certs)
.build()
.context("Failed to build HTTP client")?;
let base = confluence_url.as_str().trim_end_matches('/');
let api_base = format!("{}/rest/api/content/search", base);
let api_url = Url::parse(&api_base)?;
let mut pages = Vec::new();
let mut start = 0usize;
while pages.len() < max_results {
let limit = std::cmp::min(100, max_results - pages.len());
let url = api_url.clone();
let req = client.get(url).query(&[
("cql", cql),
("limit", &limit.to_string()),
("start", &start.to_string()),
("expand", "body.storage"),
]);
let req = if let Some(user) = &user {
req.basic_auth(user, Some(&token))
} else {
req.bearer_auth(&token)
};
let resp = req.send().await.context("Failed to send Confluence request")?;
let status = resp.status();
if !status.is_success() {
let location = resp
.headers()
.get(header::LOCATION)
.and_then(|v| v.to_str().ok())
.map(|s| s.to_string());
let body = resp.text().await.unwrap_or_else(|e| format!("Failed to read response: {}", e));
if let Some(loc) = location {
bail!(
"Confluence API request returned {} redirect to {}. Check KF_CONFLUENCE_TOKEN and KF_CONFLUENCE_USER",
status,
loc
);
} else {
bail!("Confluence API request failed with status {}: {}", status, body);
}
}
let body: ConfluenceSearchResponse =
resp.json().await.context("Failed to parse Confluence response")?;
for p in body.results {
pages.push(p);
if pages.len() >= max_results {
break;
}
}
if pages.len() >= max_results || body.links.next.is_none() {
break;
}
start += limit;
}
Ok(pages)
}
pub async fn download_pages_to_dir(
confluence_url: Url,
cql: &str,
max_results: usize,
ignore_certs: bool,
output_dir: &PathBuf,
) -> Result<Vec<(PathBuf, String)>> {
std::fs::create_dir_all(output_dir)?;
let pages = search_pages(confluence_url.clone(), cql, max_results, ignore_certs).await?;
let mut paths = Vec::new();
let base = confluence_url.as_str().trim_end_matches('/');
let web_base = base.to_string();
for page in pages {
let file = output_dir.join(format!("{}.json", page.id));
std::fs::write(&file, serde_json::to_vec(&page)?)?;
let link = format!("{}{}", web_base, page.links.webui);
paths.push((file, link));
}
Ok(paths)
}

View file

@ -54,6 +54,7 @@ pub struct FindingsStore {
origin_meta: FxHashMap<u64, Arc<OriginSet>>,
docker_images: FxHashMap<PathBuf, String>,
slack_links: FxHashMap<PathBuf, String>,
confluence_links: FxHashMap<PathBuf, String>,
s3_buckets: FxHashMap<PathBuf, String>,
}
impl FindingsStore {
@ -74,6 +75,7 @@ impl FindingsStore {
bloom_items: 0,
docker_images: FxHashMap::default(),
slack_links: FxHashMap::default(),
confluence_links: FxHashMap::default(),
s3_buckets: FxHashMap::default(),
}
}
@ -308,6 +310,14 @@ impl FindingsStore {
&self.slack_links
}
pub fn register_confluence_page(&mut self, path: PathBuf, link: String) {
self.confluence_links.insert(path, link);
}
pub fn confluence_links(&self) -> &FxHashMap<PathBuf, String> {
&self.confluence_links
}
pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) {
self.s3_buckets.insert(dir, bucket);
}

View file

@ -187,4 +187,4 @@ pub async fn list_repositories(
}
Ok(())
}
}

View file

@ -4,6 +4,7 @@ pub mod blob;
pub mod bstring_escape;
pub mod bstring_table;
pub mod cli;
pub mod confluence;
pub mod content_type;
pub mod decompress;
pub mod defaults;

View file

@ -285,6 +285,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
s3_bucket: None,
@ -319,7 +321,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
redact: false,
git_repo_timeout: 1800,
no_dedup: false,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },

View file

@ -134,6 +134,13 @@ impl DetailsReporter {
}
}
/// If the given file path corresponds to a Confluence page downloaded to disk,
/// return the URL for that page.
fn confluence_page_url(&self, path: &std::path::Path) -> Option<String> {
let ds = self.datastore.lock().ok()?;
ds.confluence_links().get(path).cloned()
}
/// If the given file path corresponds to a Slack message downloaded to disk,
/// return the permalink for that message.
fn slack_message_url(&self, path: &std::path::Path) -> Option<String> {
@ -333,6 +340,8 @@ impl DetailsReporter {
Origin::File(e) => {
if let Some(url) = self.jira_issue_url(&e.path, args) {
Some(url)
} else if let Some(url) = self.confluence_page_url(&e.path) {
Some(url)
} else if let Some(url) = self.slack_message_url(&e.path) {
Some(url)
} else if let Some(mapped) = self.s3_display_path(&e.path) {

View file

@ -87,6 +87,9 @@ mod tests {
// Jira options
jira_url: None,
jql: None,
// Confluence options
confluence_url: None,
cql: None,
max_results: 100,
// Slack options
slack_query: None,
@ -119,7 +122,6 @@ mod tests {
redact: false,
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
}

View file

@ -15,7 +15,7 @@ use crate::{
commands::{github::GitCloneMode, github::GitHistoryMode, scan},
global,
},
findings_store,
confluence, findings_store,
git_binary::{CloneMode, Git},
git_url::GitUrl,
github, gitlab,
@ -263,6 +263,40 @@ pub async fn fetch_jira_issues(
Ok(vec![output_dir])
}
pub async fn fetch_confluence_pages(
args: &scan::ScanArgs,
global_args: &global::GlobalArgs,
datastore: &Arc<Mutex<findings_store::FindingsStore>>,
) -> Result<Vec<PathBuf>> {
let Some(confluence_url) = args.input_specifier_args.confluence_url.clone() else {
return Ok(Vec::new());
};
let Some(cql) = args.input_specifier_args.cql.as_deref() else {
return Ok(Vec::new());
};
let max_results = args.input_specifier_args.max_results;
let output_root = {
let ds = datastore.lock().unwrap();
ds.clone_root()
};
let output_dir = output_root.join("confluence_pages");
let paths = confluence::download_pages_to_dir(
confluence_url,
cql,
max_results,
global_args.ignore_certs,
&output_dir,
)
.await?;
{
let mut ds = datastore.lock().unwrap();
for (path, link) in &paths {
ds.register_confluence_page(path.clone(), link.clone());
}
}
Ok(vec![output_dir])
}
pub async fn fetch_slack_messages(
args: &scan::ScanArgs,
global_args: &global::GlobalArgs,

View file

@ -19,7 +19,8 @@ use crate::{
scanner::{
clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos,
repos::{
enumerate_gitlab_repos, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages,
enumerate_gitlab_repos, fetch_confluence_pages, fetch_jira_issues, fetch_s3_objects,
fetch_slack_messages,
},
run_secret_validation, save_docker_images,
summary::print_scan_summary,
@ -70,6 +71,10 @@ pub async fn run_async_scan(
let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?;
input_roots.extend(jira_dirs);
// Fetch Confluence pages if requested
let confluence_dirs = fetch_confluence_pages(args, global_args, &datastore).await?;
input_roots.extend(confluence_dirs);
// Fetch Slack messages if requested
let slack_dirs = fetch_slack_messages(args, global_args, &datastore).await?;
input_roots.extend(slack_dirs);

View file

@ -81,6 +81,8 @@ rules:
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -113,7 +115,6 @@ rules:
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup,
snippet_length: 64,
baseline_file: None,
manage_baseline: false,
};

View file

@ -68,6 +68,8 @@ fn test_github_remote_scan() -> Result<()> {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -100,7 +102,6 @@ fn test_github_remote_scan() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};

View file

@ -67,6 +67,8 @@ fn test_gitlab_remote_scan() -> Result<()> {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -98,7 +100,6 @@ fn test_gitlab_remote_scan() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};
@ -171,6 +172,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -200,7 +203,6 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
git_repo_timeout: 1800,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};

View file

@ -56,4 +56,4 @@ fn scan_quiet_with_rule_stats_prints_rule_stats() {
contains_bytes(out, b"Rule Performance Stats")
}));
}
}
}

View file

@ -51,6 +51,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
gitlab_repo_type: GitLabRepoType::Owner,
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -80,7 +82,6 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
git_repo_timeout: 1800,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};

View file

@ -57,6 +57,8 @@ impl TestContext {
gitlab_repo_type: GitLabRepoType::Owner,
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
s3_bucket: None,
@ -86,7 +88,6 @@ impl TestContext {
git_repo_timeout: 1800,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 128,
baseline_file: None,
manage_baseline: false,
};
@ -148,6 +149,8 @@ async fn test_scan_slack_messages() -> Result<()> {
gitlab_repo_type: GitLabRepoType::Owner,
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
slack_query: Some("test".into()),
slack_api_url: Url::parse(&format!("{}/", server.uri()))?,
max_results: 10,
@ -178,7 +181,6 @@ async fn test_scan_slack_messages() -> Result<()> {
git_repo_timeout: 1800,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 128,
baseline_file: None,
manage_baseline: false,
};

View file

@ -124,6 +124,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -156,7 +158,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true, // keep duplicates so the cache is stressed
snippet_length: 128,
baseline_file: None,
manage_baseline: false,
};

View file

@ -67,6 +67,8 @@ impl TestContext {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -99,7 +101,6 @@ impl TestContext {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};
@ -144,6 +145,8 @@ impl TestContext {
jira_url: None,
jql: None,
confluence_url: None,
cql: None,
max_results: 100,
slack_query: None,
slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
@ -176,7 +179,6 @@ impl TestContext {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
};