From 6e4c94ddc3366e4d04afb8ff92f0e30828099764 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 20 Aug 2025 20:41:11 -0700 Subject: [PATCH 1/6] - Added '--repo-artifacts' flag to scan repository issues, gists/snippets, and wikis when cloning via '--git-url' --- CHANGELOG.md | 3 + README.md | 25 ++++- src/cli/commands/inputs.rs | 4 + src/findings_store.rs | 10 ++ src/github.rs | 199 +++++++++++++++++++++++++++++++++- src/gitlab.rs | 127 +++++++++++++++++++++- src/main.rs | 3 +- src/reporter.rs | 9 +- src/reporter/json_format.rs | 3 +- src/scanner/repos.rs | 48 +++++++- src/scanner/runner.rs | 28 ++++- tests/int_allowlist.rs | 3 +- tests/int_dedup.rs | 3 +- tests/int_github.rs | 3 +- tests/int_gitlab.rs | 6 +- tests/int_redact.rs | 3 +- tests/int_slack.rs | 6 +- tests/int_validation_cache.rs | 3 +- tests/int_vulnerable_files.rs | 6 +- 19 files changed, 470 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47af7f1..414ed97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.45.0] +- Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` + ## [1.44.0] - Fixed issue with self-update on Linux - Reverted the change to json and jsonl outputs by rule diff --git a/README.md b/README.md index 054dfea..66c4555 100644 --- a/README.md +++ b/README.md @@ -369,12 +369,21 @@ kingfisher scan --github-organization my-org ### Scan remote GitHub repository +`--git-url` clones the repository and scans its files and history. To also inspect +related server-side data, supply `--repo-artifacts`. This flag pulls down the +repository's issues (including pull requests), wiki, and any public gists owned by +the repository owner and scans them for secrets. Fetching these extras counts +against API rate limits and private artifacts require a `KF_GITHUB_TOKEN`. + ```bash +# Scan the repository only kingfisher scan --git-url https://github.com/org/repo.git -# Optionally provide a GitHub Token -KF_GITHUB_TOKEN="ghp_…" kingfisher scan --git-url https://github.com/org/private_repo.git +# Include issues, wiki, and owner gists +kingfisher scan --git-url https://github.com/org/repo.git --repo-artifacts +# Private repositories or artifacts +KF_GITHUB_TOKEN="ghp_…" kingfisher scan --git-url https://github.com/org/private_repo.git --repo-artifacts ``` --- @@ -397,8 +406,20 @@ kingfisher scan --gitlab-user johndoe ### Scan remote GitLab repository by URL +`--git-url` by itself clones the project repository. To include server-side +artifacts owned by the project, add `--repo-artifacts`. Kingfisher will retrieve +the project's issues, wiki, and snippets and scan them for secrets. These extra +requests may take longer and require a `KF_GITLAB_TOKEN` for private projects. + ```bash +# Scan the repository only kingfisher scan --git-url https://gitlab.com/group/project.git + +# Include issues, wiki, and snippets +kingfisher scan --git-url https://gitlab.com/group/project.git --repo-artifacts + +# Private projects or artifacts +KF_GITLAB_TOKEN="glpat-…" kingfisher scan --git-url https://gitlab.com/group/private_project.git --repo-artifacts ``` ### List GitLab repositories diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 7836d79..f0b8fa8 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -154,6 +154,10 @@ pub struct InputSpecifierArgs { #[arg(long, default_value_t = true, action = clap::ArgAction::Set, help_heading = "Git Options")] pub commit_metadata: bool, + /// Also scan repository host artifacts like issues, wikis, and gists/snippets + #[arg(long, help_heading = "Git Options")] + pub repo_artifacts: bool, + /// Enable or disable scanning nested git repositories #[arg(long, default_value_t = true)] pub scan_nested_repos: bool, diff --git a/src/findings_store.rs b/src/findings_store.rs index 8b8e0a4..fc610f2 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -56,6 +56,7 @@ pub struct FindingsStore { slack_links: FxHashMap, confluence_links: FxHashMap, s3_buckets: FxHashMap, + repo_links: FxHashMap, } impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { @@ -77,6 +78,7 @@ impl FindingsStore { slack_links: FxHashMap::default(), confluence_links: FxHashMap::default(), s3_buckets: FxHashMap::default(), + repo_links: FxHashMap::default(), } } @@ -318,6 +320,14 @@ impl FindingsStore { &self.confluence_links } + pub fn register_repo_link(&mut self, path: PathBuf, link: String) { + self.repo_links.insert(path, link); + } + + pub fn repo_links(&self) -> &FxHashMap { + &self.repo_links + } + pub fn register_s3_bucket(&mut self, dir: PathBuf, bucket: String) { self.s3_buckets.insert(dir, bucket); } diff --git a/src/github.rs b/src/github.rs index 54c6a84..599db85 100644 --- a/src/github.rs +++ b/src/github.rs @@ -1,4 +1,10 @@ -use std::{env, sync::Arc, time::Duration}; +use std::{ + collections::HashSet, + env, fs, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + time::Duration, +}; use anyhow::{Context, Result}; use indicatif::{ProgressBar, ProgressStyle}; @@ -7,8 +13,12 @@ use octorust::{ types::{Order, ReposListOrgSort, ReposListOrgType, ReposListUserType}, Client, }; +use serde_json::Value; use url::Url; +use crate::{findings_store, git_url::GitUrl}; +use std::str::FromStr; + #[derive(Debug)] pub struct RepoSpecifiers { pub user: Vec, @@ -161,3 +171,190 @@ pub async fn list_repositories( } Ok(()) } + +fn parse_repo(repo_url: &GitUrl) -> Option<(String, String, String)> { + let url = Url::parse(repo_url.as_str()).ok()?; + let host = url.host_str()?.to_string(); + let mut segments = url.path_segments()?; + let owner = segments.next()?.to_string(); + let mut repo = segments.next()?.to_string(); + if let Some(stripped) = repo.strip_suffix(".git") { + repo = stripped.to_string(); + } + Some((host, owner, repo)) +} + +pub fn wiki_url(repo_url: &GitUrl) -> Option { + let (host, owner, repo) = parse_repo(repo_url)?; + let wiki = format!("https://{host}/{owner}/{repo}.wiki.git"); + GitUrl::from_str(&wiki).ok() +} + +pub async fn fetch_repo_items( + repo_url: &GitUrl, + ignore_certs: bool, + output_root: &Path, + datastore: &Arc>, +) -> Result> { + let (_, owner, repo) = parse_repo(repo_url).context("invalid GitHub repo URL")?; + let client = reqwest::Client::builder().danger_accept_invalid_certs(ignore_certs).build()?; + + let mut dirs = Vec::new(); + + // Issues + let issues_dir = output_root.join("github_issues").join(&owner).join(&repo); + fs::create_dir_all(&issues_dir)?; + let mut page = 1; + loop { + let url = format!( + "https://api.github.com/repos/{owner}/{repo}/issues?state=all&per_page=100&page={page}" + ); + let mut req = client.get(&url).header("User-Agent", "kingfisher"); + if let Ok(token) = env::var("KF_GITHUB_TOKEN") { + if !token.is_empty() { + req = req.bearer_auth(token); + } + } + let resp = req.send().await?; + if !resp.status().is_success() { + break; + } + let issues: Vec = resp.json().await?; + if issues.is_empty() { + break; + } + for issue in issues { + let number = issue.get("number").and_then(|v| v.as_u64()).unwrap_or(0); + let title = issue.get("title").and_then(|v| v.as_str()).unwrap_or(""); + let body = issue.get("body").and_then(|v| v.as_str()).unwrap_or(""); + let content = format!("# {title}\n\n{body}"); + let file_path = issues_dir.join(format!("issue_{number}.md")); + fs::write(&file_path, content)?; + let url = format!("https://github.com/{owner}/{repo}/issues/{number}"); + let mut ds = datastore.lock().unwrap(); + ds.register_repo_link(file_path, url); + } + page += 1; + } + if issues_dir.read_dir().ok().and_then(|mut d| d.next()).is_some() { + dirs.push(issues_dir); + } + + // Gists + let gists_dir = output_root.join("github_gists").join(&owner); + fs::create_dir_all(&gists_dir)?; + let mut seen = HashSet::new(); + + // Public gists for the owner + page = 1; + loop { + let url = format!("https://api.github.com/users/{owner}/gists?per_page=100&page={page}"); + let mut req = client.get(&url).header("User-Agent", "kingfisher"); + if let Ok(token) = env::var("KF_GITHUB_TOKEN") { + if !token.is_empty() { + req = req.bearer_auth(&token); + } + } + let resp = req.send().await?; + if !resp.status().is_success() { + break; + } + let gists: Vec = resp.json().await?; + if gists.is_empty() { + break; + } + for gist in gists { + if let Some(id) = gist.get("id").and_then(|v| v.as_str()) { + if seen.insert(id.to_string()) { + let mut req_g = client + .get(&format!("https://api.github.com/gists/{id}")) + .header("User-Agent", "kingfisher"); + if let Ok(token) = env::var("KF_GITHUB_TOKEN") { + if !token.is_empty() { + req_g = req_g.bearer_auth(&token); + } + } + let detail: Value = req_g.send().await?.json().await?; + if let Some(files) = detail.get("files").and_then(|v| v.as_object()) { + let gist_dir = gists_dir.join(id); + fs::create_dir_all(&gist_dir)?; + for (fname, fobj) in files { + if let Some(content) = fobj.get("content").and_then(|v| v.as_str()) { + let file_path = gist_dir.join(fname); + fs::write(&file_path, content)?; + let url = format!("https://gist.github.com/{id}"); + let mut ds = datastore.lock().unwrap(); + ds.register_repo_link(file_path, url); + } + } + } + } + } + } + page += 1; + } + + // Private gists for authenticated user if they own the repo + if let Ok(token) = env::var("KF_GITHUB_TOKEN") { + if !token.is_empty() { + page = 1; + loop { + let url = format!("https://api.github.com/gists?per_page=100&page={page}"); + let resp = client + .get(&url) + .header("User-Agent", "kingfisher") + .bearer_auth(&token) + .send() + .await?; + if !resp.status().is_success() { + break; + } + let gists: Vec = resp.json().await?; + if gists.is_empty() { + break; + } + for gist in gists { + let owner_login = + gist.get("owner").and_then(|o| o.get("login")).and_then(|v| v.as_str()); + if owner_login == Some(owner.as_str()) { + if let Some(id) = gist.get("id").and_then(|v| v.as_str()) { + if seen.insert(id.to_string()) { + let detail: Value = client + .get(&format!("https://api.github.com/gists/{id}")) + .header("User-Agent", "kingfisher") + .bearer_auth(&token) + .send() + .await? + .json() + .await?; + if let Some(files) = detail.get("files").and_then(|v| v.as_object()) + { + let gist_dir = gists_dir.join(id); + fs::create_dir_all(&gist_dir)?; + for (fname, fobj) in files { + if let Some(content) = + fobj.get("content").and_then(|v| v.as_str()) + { + let file_path = gist_dir.join(fname); + fs::write(&file_path, content)?; + let url = format!("https://gist.github.com/{id}"); + let mut ds = datastore.lock().unwrap(); + ds.register_repo_link(file_path, url); + } + } + } + } + } + } + } + page += 1; + } + } + } + + if gists_dir.read_dir().ok().and_then(|mut d| d.next()).is_some() { + dirs.push(gists_dir); + } + + Ok(dirs) +} diff --git a/src/gitlab.rs b/src/gitlab.rs index 42e8a39..e311c5b 100644 --- a/src/gitlab.rs +++ b/src/gitlab.rs @@ -1,4 +1,9 @@ -use std::{env, time::Duration}; +use std::{ + env, fs, + path::{Path, PathBuf}, + sync::{Arc, Mutex}, + time::Duration, +}; use anyhow::{Context, Result}; use gitlab::{ @@ -12,7 +17,11 @@ use gitlab::{ }; use indicatif::{ProgressBar, ProgressStyle}; use serde::Deserialize; -use url::Url; +use serde_json::Value; +use url::{form_urlencoded, Url}; + +use crate::{findings_store, git_url::GitUrl}; +use std::str::FromStr; #[derive(Deserialize)] struct SimpleUser { @@ -197,3 +206,117 @@ pub async fn list_repositories( Ok(()) } + +fn parse_repo(repo_url: &GitUrl) -> Option<(String, String)> { + let url = Url::parse(repo_url.as_str()).ok()?; + let host = url.host_str()?.to_string(); + let mut path = url.path().trim_start_matches('/').to_string(); + if let Some(stripped) = path.strip_suffix(".git") { + path = stripped.to_string(); + } + Some((host, path)) +} + +pub fn wiki_url(repo_url: &GitUrl) -> Option { + let (host, path) = parse_repo(repo_url)?; + let wiki = format!("https://{host}/{path}.wiki.git"); + GitUrl::from_str(&wiki).ok() +} + +pub async fn fetch_repo_items( + repo_url: &GitUrl, + ignore_certs: bool, + output_root: &Path, + datastore: &Arc>, +) -> Result> { + let (host, path) = parse_repo(repo_url).context("invalid GitLab repo URL")?; + let encoded = form_urlencoded::byte_serialize(path.as_bytes()).collect::(); + let client = reqwest::Client::builder().danger_accept_invalid_certs(ignore_certs).build()?; + + let mut dirs = Vec::new(); + + // Issues + let issues_dir = output_root.join("gitlab_issues").join(path.replace('/', "_")); + fs::create_dir_all(&issues_dir)?; + let mut page = 1; + loop { + let url = format!( + "https://{host}/api/v4/projects/{encoded}/issues?scope=all&state=all&per_page=100&page={page}" + ); + let mut req = client.get(&url); + if let Ok(token) = env::var("KF_GITLAB_TOKEN") { + if !token.is_empty() { + req = req.header("PRIVATE-TOKEN", token); + } + } + let resp = req.send().await?; + if !resp.status().is_success() { + break; + } + let issues: Vec = resp.json().await?; + if issues.is_empty() { + break; + } + for issue in issues { + let number = issue.get("iid").and_then(|v| v.as_u64()).unwrap_or(0); + let title = issue.get("title").and_then(|v| v.as_str()).unwrap_or(""); + let body = issue.get("description").and_then(|v| v.as_str()).unwrap_or(""); + let content = format!("# {title}\n\n{body}"); + let file_path = issues_dir.join(format!("issue_{number}.md")); + fs::write(&file_path, content)?; + let url = format!("https://{host}/{path}/-/issues/{number}"); + let mut ds = datastore.lock().unwrap(); + ds.register_repo_link(file_path, url); + } + page += 1; + } + if issues_dir.read_dir().ok().and_then(|mut d| d.next()).is_some() { + dirs.push(issues_dir); + } + + // Snippets + let snippets_dir = output_root.join("gitlab_snippets").join(path.replace('/', "_")); + fs::create_dir_all(&snippets_dir)?; + page = 1; + loop { + let url = + format!("https://{host}/api/v4/projects/{encoded}/snippets?per_page=100&page={page}"); + let mut req = client.get(&url); + if let Ok(token) = env::var("KF_GITLAB_TOKEN") { + if !token.is_empty() { + req = req.header("PRIVATE-TOKEN", token); + } + } + let resp = req.send().await?; + if !resp.status().is_success() { + break; + } + let snippets: Vec = resp.json().await?; + if snippets.is_empty() { + break; + } + for snip in snippets { + if let Some(id) = snip.get("id").and_then(|v| v.as_u64()) { + let raw_url = format!("https://{host}/api/v4/projects/{encoded}/snippets/{id}/raw"); + let mut req_s = client.get(&raw_url); + if let Ok(token) = env::var("KF_GITLAB_TOKEN") { + if !token.is_empty() { + req_s = req_s.header("PRIVATE-TOKEN", token); + } + } + let raw = req_s.send().await?.text().await?; + let file_path = snippets_dir.join(format!("snippet_{id}")); + fs::write(&file_path, raw)?; + let url = format!("https://{host}/{path}/-/snippets/{id}"); + let mut ds = datastore.lock().unwrap(); + ds.register_repo_link(file_path, url); + } + } + page += 1; + } + if snippets_dir.read_dir().ok().and_then(|mut d| d.next()).is_some() { + dirs.push(snippets_dir); + } + + Ok(dirs) +} diff --git a/src/main.rs b/src/main.rs index eff2a57..38c0a88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -305,8 +305,9 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/src/reporter.rs b/src/reporter.rs index a42e682..9a08cc2 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -148,6 +148,11 @@ impl DetailsReporter { ds.slack_links().get(path).cloned() } + fn repo_artifact_url(&self, path: &std::path::Path) -> Option { + let ds = self.datastore.lock().ok()?; + ds.repo_links().get(path).cloned() + } + fn s3_display_path(&self, path: &std::path::Path) -> Option { let ds = self.datastore.lock().ok()?; for (dir, bucket) in ds.s3_buckets().iter() { @@ -338,7 +343,9 @@ impl DetailsReporter { .iter() .find_map(|origin| match origin { Origin::File(e) => { - if let Some(url) = self.jira_issue_url(&e.path, args) { + if let Some(url) = self.repo_artifact_url(&e.path) { + Some(url) + } else if let Some(url) = self.jira_issue_url(&e.path, args) { Some(url) } else if let Some(url) = self.confluence_page_url(&e.path) { Some(url) diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index d1e78f9..7bf68af 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -105,8 +105,9 @@ mod tests { // clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index e3bb3e9..68f6f67 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -7,6 +7,7 @@ use anyhow::{Context, Result}; use indicatif::{HumanCount, ProgressBar, ProgressStyle}; use tokio::time::Duration; use tracing::{debug, error, info}; +use url::Url; use crate::blob::BlobIdMap; use crate::{ @@ -102,7 +103,12 @@ pub fn clone_or_update_git_repos( progress.suspend(|| info!("Cloning {repo_url}...")); if let Err(e) = git.create_fresh_clone(repo_url, &output_dir, clone_mode) { progress.suspend(|| { - error!("Failed to clone {repo_url} to {}: {e}", output_dir.display()); + if repo_url.as_str().ends_with(".wiki.git") { + info!("Wiki repository not found for {repo_url}, skipping"); + debug!("Failed to clone {repo_url} to {}: {e}", output_dir.display()); + } else { + error!("Failed to clone {repo_url} to {}: {e}", output_dir.display()); + } debug!("Skipping scan of {repo_url}"); }); progress.inc(1); @@ -328,6 +334,46 @@ pub async fn fetch_slack_messages( Ok(vec![output_dir]) } +pub async fn fetch_git_host_artifacts( + repo_urls: &[GitUrl], + global_args: &global::GlobalArgs, + datastore: &Arc>, +) -> Result> { + let output_root = { + let ds = datastore.lock().unwrap(); + ds.clone_root() + }; + let mut dirs = Vec::new(); + for repo_url in repo_urls { + let host = Url::parse(repo_url.as_str()) + .ok() + .and_then(|u| u.host_str().map(|s| s.to_string())) + .unwrap_or_default(); + if host.contains("github") { + dirs.extend( + github::fetch_repo_items( + repo_url, + global_args.ignore_certs, + &output_root, + datastore, + ) + .await?, + ); + } else if host.contains("gitlab") { + dirs.extend( + gitlab::fetch_repo_items( + repo_url, + global_args.ignore_certs, + &output_root, + datastore, + ) + .await?, + ); + } + } + Ok(dirs) +} + pub async fn fetch_s3_objects( args: &scan::ScanArgs, datastore: &Arc>, diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 08bac87..1fbd9c2 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -10,6 +10,7 @@ use crate::{ cli::{commands::scan, global}, findings_store, findings_store::{FindingsStore, FindingsStoreMessage}, + github, gitlab, liquid_filters::register_all, matcher::MatcherStats, reporter::styles::Styles, @@ -20,8 +21,8 @@ use crate::{ scanner::{ clone_or_update_git_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{ - enumerate_gitlab_repos, fetch_confluence_pages, fetch_jira_issues, fetch_s3_objects, - fetch_slack_messages, + enumerate_gitlab_repos, fetch_confluence_pages, fetch_git_host_artifacts, + fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, }, run_secret_validation, save_docker_images, summary::print_scan_summary, @@ -76,7 +77,30 @@ pub async fn run_async_scan( repo_urls.sort(); repo_urls.dedup(); + // Add wiki repositories for each URL when requested + if args.input_specifier_args.repo_artifacts { + let mut wiki_urls = Vec::new(); + for url in &repo_urls { + if let Some(w) = github::wiki_url(url) { + wiki_urls.push(w); + } + if let Some(w) = gitlab::wiki_url(url) { + wiki_urls.push(w); + } + } + repo_urls.extend(wiki_urls); + repo_urls.sort(); + repo_urls.dedup(); + } + let mut input_roots = clone_or_update_git_repos(args, global_args, &repo_urls, &datastore)?; + + // Fetch issues, gists, and wikis if enabled + if args.input_specifier_args.repo_artifacts { + let repo_artifact_dirs = + fetch_git_host_artifacts(&repo_urls, global_args, &datastore).await?; + input_roots.extend(repo_artifact_dirs); + } // Fetch Jira issues if requested let jira_dirs = fetch_jira_issues(args, global_args, &datastore).await?; input_roots.extend(jira_dirs); diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index 2dfa70b..cc65f6f 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -81,8 +81,9 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 601262f..a7e75d3 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -82,8 +82,9 @@ fn test_gitlab_remote_scan() -> Result<()> { docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, @@ -188,8 +189,9 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::None, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_redact.rs b/tests/int_redact.rs index b309a54..03e8afb 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -64,8 +64,9 @@ async fn test_redact_hashes_finding_values() -> Result<()> { docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 8726231..abbf3ba 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -70,8 +70,9 @@ impl TestContext { docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, @@ -166,8 +167,9 @@ async fn test_scan_slack_messages() -> Result<()> { docker_image: Vec::new(), git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index a220f88..24148ec 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -140,8 +140,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> { // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index e301143..34fe709 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -83,8 +83,9 @@ impl TestContext { // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, @@ -164,8 +165,9 @@ impl TestContext { // git clone / history options git_clone: GitCloneMode::Bare, git_history: GitHistoryMode::Full, - scan_nested_repos: true, commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, }, content_filtering_args: ContentFilteringArgs { max_file_size_mb: 25.0, From f820aaad6e869db18ec57a26aa4000676e864c9f Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 20 Aug 2025 20:41:19 -0700 Subject: [PATCH 2/6] - Added '--repo-artifacts' flag to scan repository issues, gists/snippets, and wikis when cloning via '--git-url' --- src/baseline.rs | 5 +---- src/content_type.rs | 1 - src/update.rs | 2 +- tests/int_allowlist.rs | 5 +++-- tests/smoke_baseline.rs | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/baseline.rs b/src/baseline.rs index adfab88..7616dd5 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -46,10 +46,7 @@ fn normalize_path(p: &Path, roots: &[PathBuf]) -> String { for root in roots { if let Ok(stripped) = p.strip_prefix(root) { if let Some(name) = root.file_name() { - return PathBuf::from(name) - .join(stripped) - .to_string_lossy() - .replace('\\', "/"); + return PathBuf::from(name).join(stripped).to_string_lossy().replace('\\', "/"); } } } diff --git a/src/content_type.rs b/src/content_type.rs index 3d9a25d..197258d 100644 --- a/src/content_type.rs +++ b/src/content_type.rs @@ -2,7 +2,6 @@ use once_cell::sync::Lazy; use std::path::Path; use tokei::LanguageType; - // Precompute all (shebang_prefix_bytes, language) pairs once. // Sort longest-first so more specific shebangs win. static SHEBANG_PREFIXES: Lazy> = Lazy::new(|| { diff --git a/src/update.rs b/src/update.rs index ea3e221..8b9b138 100644 --- a/src/update.rs +++ b/src/update.rs @@ -105,7 +105,7 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt // Linux releases also ship as .deb and .rpm packages; select the .tgz asset for self‑updates #[cfg(not(target_os = "windows"))] builder.identifier("tgz"); - + // Build the updater. let Ok(updater) = builder.build() else { warn!("Failed to configure update checker"); diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index cc65f6f..d287616 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -126,7 +126,8 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { let count = run_skiplist(Vec::new(), vec!["test".into()])?; assert_eq!(count, 1); Ok(()) -} \ No newline at end of file +} diff --git a/tests/smoke_baseline.rs b/tests/smoke_baseline.rs index db3e250..1c53a0f 100644 --- a/tests/smoke_baseline.rs +++ b/tests/smoke_baseline.rs @@ -108,4 +108,4 @@ fn baseline_exclude_prunes_entries() -> anyhow::Result<()> { assert!(!content.contains(".git/secret.txt")); Ok(()) -} \ No newline at end of file +} From 81d2f47c67d3088ca5e693b54fbbb07b6ee9e648 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 21 Aug 2025 15:39:04 -0700 Subject: [PATCH 3/6] - Added '--repo-artifacts' flag to scan repository issues, gists/snippets, and wikis when cloning via '--git-url' - Added rules for sendbird, mattermost, langchain, notion - JWT validation hardened to reject alg:none by default (only allowed if explicitly configured), require iss for OIDC/JWKS verification, ensuring Active Credential means cryptographically verified and time-valid, not just unexpired - Updated the Git cloning logic to include all refs and minimize clone output, allowing Kingfisher to analyze pull request and deleted branch history --- CHANGELOG.md | 3 + Cargo.toml | 2 +- README.md | 3 +- data/rules/langchain.yml | 52 ++++++++ data/rules/mattermost.yml | 66 ++++++++++ data/rules/notion.yml | 83 ++++++++++++ data/rules/sendbird.yml | 57 ++++++++ data/rules/stackhawk.yml | 8 +- src/git_binary.rs | 3 + src/update.rs | 6 +- src/validation/jwt.rs | 271 +++++++++++++++++++++++--------------- 11 files changed, 441 insertions(+), 113 deletions(-) create mode 100644 data/rules/langchain.yml create mode 100644 data/rules/mattermost.yml create mode 100644 data/rules/notion.yml create mode 100644 data/rules/sendbird.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 414ed97..db2e20b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` +- Added rules for sendbird, mattermost, langchain, notion +- JWT validation hardened to reject alg:none by default (only allowed if explicitly configured), require iss for OIDC/JWKS verification, ensuring "Active Credential" means cryptographically verified and time-valid, not just unexpired +- Updated the Git cloning logic to include all refs and minimize clone output, allowing Kingfisher to analyze pull request and deleted branch history ## [1.44.0] - Fixed issue with self-update on Linux diff --git a/Cargo.toml b/Cargo.toml index ba14c38..9ebd262 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.44.0" +version = "1.45.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index 66c4555..56e5f18 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,8 @@ Kingfisher originated as a fork of Praetorian's Nosey Parker, and is built atop - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Multiple targets**: - - **Git history**: local repos or GitHub/GitLab orgs/users + - **Git history**: local repos or GitHub/GitLab orgs/users + - **Repository artifacts**: with `--repo-artifacts`, scan GitHub/GitLab repository artifacts such as issues, pull/merge requests, wikis, snippets, and owner gists in addition to code - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Confluence pages**: CQL‑driven scans with `--confluence-url` and `--cql` diff --git a/data/rules/langchain.yml b/data/rules/langchain.yml new file mode 100644 index 0000000..cd853e2 --- /dev/null +++ b/data/rules/langchain.yml @@ -0,0 +1,52 @@ +rules: + - name: LangSmith Personal Access Token + id: kingfisher.langchain.1 + pattern: | + (?xi) + \b + ( + lsv2_(?:pt)_[0-9a-f]{32}_[0-9a-f]{10} + ) + \b + min_entropy: 4.0 + examples: + - "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" + validation: + type: Http + content: + request: + method: GET + url: "https://api.smith.langchain.com/api/v1/api-key/current" + headers: + X-API-Key: "{{ TOKEN }}" + Accept: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - name: LangSmith Service Key + id: kingfisher.langchain.2 + pattern: | + (?xi) + \b + ( + lsv2_sk_[0-9a-f]{32}_[0-9a-f]{10} + ) + \b + min_entropy: 4.0 + examples: + - "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" + validation: + type: Http + content: + request: + method: GET + url: "https://api.smith.langchain.com/api/v1/orgs/current" + headers: + X-API-Key: "{{ TOKEN }}" + Accept: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] \ No newline at end of file diff --git a/data/rules/mattermost.yml b/data/rules/mattermost.yml new file mode 100644 index 0000000..564adb6 --- /dev/null +++ b/data/rules/mattermost.yml @@ -0,0 +1,66 @@ +rules: + - name: Mattermost URL + id: kingfisher.mattermost.1 + pattern: | + (?xi) + \b + mattermost + (?:.|[\n\r]){0,32}? + ( + https?:\/\/[a-z0-9.-]+ + (?::\d{2,5})? + (?:\/[A-Za-z0-9._~\-\/]*)? + ) + \b + confidence: medium + visible: false + min_entropy: 2.0 + examples: + - mattermost_url = "https://community.mattermost.com" + - mattermost_url='http://localhost:8065' + - 'mattermost_url: https://intra.example.com/mattermost' + + - name: Mattermost Access Token + id: kingfisher.mattermost.2 + pattern: | + (?xi) + \b + mattermost + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Z0-9]{26} + ) + \b + confidence: medium + min_entropy: 4.0 + examples: + - "mattermost_token: abcde12345fghij67890klmno1" + validation: + type: Http + content: + request: + method: GET + # Normalize any captured base that already includes /api/v4 + url: > + {%- assign base = MATTERMOST_URL | replace: "/api/v4/", "/" | replace: "/api/v4", "" -%} + {{ base }}/api/v4/users/me + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + words: ['"id"', '"username"'] + match_all_words: true + depends_on_rule: + - rule_id: "kingfisher.mattermost.1" + variable: MATTERMOST_URL + references: + - https://developers.mattermost.com/api-documentation/ + - https://developers.mattermost.com/integrate/faq/ diff --git a/data/rules/notion.yml b/data/rules/notion.yml new file mode 100644 index 0000000..b4a0b44 --- /dev/null +++ b/data/rules/notion.yml @@ -0,0 +1,83 @@ +rules: + - name: Notion Legacy Token + id: kingfisher.notion.1 + pattern: | + (?xi) + notion + (?:.|[\\n\r]){0,32}? + \b + ( + secret_[A-Z0-9]{43} + ) + \b + min_entropy: 4.0 + confidence: medium + examples: + - "notion secret_efky1RtWsI0CB1Sn4TRRBLpemW1V11XwPRX3lzUKc5Q" + validation: + type: Http + content: + request: + headers: + Notion-Version: "2022-06-28" + Authorization: "Bearer {{ TOKEN }}" + method: GET + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"object":"user"' + - '"type":"bot"' + match_all_words: true + url: https://api.notion.com/v1/users/me + - name: Notion Token + id: kingfisher.notion.2 + pattern: | + (?xi) + notion + (?:.|[\\n\r]){0,32}? + \b + ( + ntn_[A-Z0-9]{40,55} + ) + \b + min_entropy: 4.0 + confidence: medium + references: + - https://developers.notion.com/page/changelog#september-11-2024 + examples: + - "notion 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" + validation: + type: Http + content: + request: + headers: + Notion-Version: "2022-06-28" + Authorization: "Bearer {{ TOKEN }}" + method: GET + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"object":"user"' + - '"type":"bot"' + match_all_words: true + url: https://api.notion.com/v1/users/me + + - name: Notion OAuth Refresh Token + id: kingfisher.notion.2 + pattern: | + (?xi) + \b + ( + nrt_[A-Z0-9_]{40,55} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - "nrt_4Y29zY29vbF9leGFtcGxlX3JlZnJlc2hfdG9rZW4xMjM0NQ" \ No newline at end of file diff --git a/data/rules/sendbird.yml b/data/rules/sendbird.yml new file mode 100644 index 0000000..9c36a46 --- /dev/null +++ b/data/rules/sendbird.yml @@ -0,0 +1,57 @@ +rules: + - name: Sendbird Application ID + id: kingfisher.sendbird.1 + pattern: | + (?xi) + sendbird + (?:.|[\\n\r]){0,32}? + (?:APPLICATION|APP_ID|APP|CLIENT|ID) + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} + ) + \b + confidence: medium + visible: false + min_entropy: 3.0 + examples: + - "sendbird_app_id: 12345678-1234-1234-1234-1234567890ab" + + - name: Sendbird API Token + id: kingfisher.sendbird.2 + pattern: | + (?xi) + sendbird + (?:.|[\\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{40} + ) + \b + confidence: medium + min_entropy: 4.0 + examples: + - "sendbird_api_token: 1234567890abcdef1234567890abcdef12345678" + validation: + type: Http + content: + request: + method: GET + url: "https://api-{{SENDBIRD_APP_ID}}.sendbird.com/v3/users" + headers: + "Api-Token": "{{TOKEN}}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"users":' + depends_on_rule: + - rule_id: "kingfisher.sendbird.1" + variable: SENDBIRD_APP_ID + references: + - https://sendbird.com/docs/chat/platform-api/v3/prepare-to-use-api#2-authentication \ No newline at end of file diff --git a/data/rules/stackhawk.yml b/data/rules/stackhawk.yml index fc62628..b8fec69 100644 --- a/data/rules/stackhawk.yml +++ b/data/rules/stackhawk.yml @@ -1,7 +1,13 @@ rules: - name: StackHawk API Key id: kingfisher.stackhawk.1 - pattern: '\b(hawk\.[0-9A-Za-z_-]{20}\.[0-9A-Za-z_-]{20})\b' + pattern: | + (?xi) + \b + ( + hawk\.[0-9A-Z_-]{20}\.[0-9A-Z_-]{20} + ) + \b confidence: medium min_entropy: 3.0 examples: diff --git a/src/git_binary.rs b/src/git_binary.rs index 6e9bd8d..bf32f96 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -137,6 +137,9 @@ impl Git { if let Some(arg) = clone_mode.arg() { cmd.arg(arg); } + cmd.arg("--quiet"); + cmd.arg("-c"); + cmd.arg("remote.origin.fetch=+refs/*:refs/remotes/origin/*"); cmd.arg(repo_url.as_str()); cmd.arg(output_dir); debug!("{cmd:#?}"); diff --git a/src/update.rs b/src/update.rs index 8b9b138..8f66c59 100644 --- a/src/update.rs +++ b/src/update.rs @@ -56,7 +56,7 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt info!( "{}", styles.style_finding_active_heading.apply_to( - "Homebrew install detected – will notify about updates but not self‑update" + "Homebrew install detected - will notify about updates but not self-update" ) ); } @@ -158,9 +158,9 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt warn!( "{}", styles.style_finding_active_heading.apply_to( - "Cannot replace the current binary – permission denied.\n\ + "Cannot replace the current binary - permission denied.\n\ If you installed via a package manager, run its upgrade command.\n\ - Otherwise reinstall to a user‑writable directory or re‑run with sudo." + Otherwise reinstall to a user-writable directory or re-run with sudo." ) ); } diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index bf4fc5f..3fc35fd 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -46,8 +46,33 @@ struct Claims { aud: Option, } +/// Runtime options for JWT validation policy. +#[derive(Clone, Default)] +pub struct ValidateOptions { + /// If true, accept unsigned tokens (`alg: "none"`) as long as temporal checks pass. + /// Default is **false** (more secure). + pub allow_alg_none: bool, + + /// If provided and `iss` is absent, use this key to cryptographically verify the token. + /// Useful for non-OIDC flows where you already know the verification key. + pub fallback_decoding_key: Option, +} + +/// Backwards-compatible entry point with secure defaults: +/// - `alg: none` is **rejected** +/// - `iss` is **required** unless `fallback_decoding_key` is supplied (not supplied here) pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { - // --- insecure payload decode ------------------------------------------------- + validate_jwt_with( + token, + &ValidateOptions { allow_alg_none: false, fallback_decoding_key: None }, + ) + .await +} + +/// Strict validator with policy control. +/// Returns (is_active_credential, explanation). +pub async fn validate_jwt_with(token: &str, opts: &ValidateOptions) -> Result<(bool, String)> { + // --- insecure payload decode to read claims -------------------------------- let claims: Claims = { let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?; let payload_json = URL_SAFE_NO_PAD @@ -69,132 +94,146 @@ pub async fn validate_jwt(token: &str) -> Result<(bool, String)> { } } + // parse header (for alg, kid) let header_b64 = token.split('.').next().ok_or_else(|| anyhow!("invalid JWT format"))?; let header_json = URL_SAFE_NO_PAD.decode(header_b64).map_err(|e| anyhow!("invalid base64 in header: {e}"))?; let header_val: serde_json::Value = serde_json::from_slice(&header_json).map_err(|e| anyhow!("invalid header json: {e}"))?; let alg_str = header_val.get("alg").and_then(|v| v.as_str()).unwrap_or(""); + let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; + let alg = header.alg; - // If alg is "none", skip signature/JWKS entirely + // --- Policy: reject `alg: none` unless explicitly allowed ------------------ if alg_str.eq_ignore_ascii_case("none") { - // still enforce your time/claims checks that already ran - return Ok(( - true, - format!( - "JWT valid (alg: none, iss: {}, aud: {:?})", - claims.iss.clone().unwrap_or_default(), - extract_aud_strings(&claims), - ), - )); + if opts.allow_alg_none { + // time-valid is enough if explicitly allowed + return Ok(( + true, + format!( + "JWT valid (alg: none, iss: {}, aud: {:?})", + claims.iss.clone().unwrap_or_default(), + extract_aud_strings(&claims), + ), + )); + } else { + return Ok((false, "unsigned JWT (alg: none) not allowed".into())); + } } - // --------------------------------------------------------------------------- let issuer = claims.iss.clone().unwrap_or_default(); let aud_strings = extract_aud_strings(&claims); - if issuer.trim().is_empty() && aud_strings.iter().all(|s| s.trim().is_empty()) { - return Ok((false, "JWT missing issuer and audience".into())); - } - if let Some(iss) = claims.iss.clone() { - // parse header now (kid, alg) - let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; - let alg = header.alg; + // --- New rule: require `iss` OR use fallback key for crypto verification --- + if issuer.trim().is_empty() { + // No issuer — we may still accept if we can cryptographically verify with a fallback key + if let Some(decoding_key) = opts.fallback_decoding_key.as_ref() { + // Verify signature (aud checked if present) + let mut validation = JwtValidation::new(alg); + if !aud_strings.is_empty() { + validation.set_audience(&aud_strings); + } + // We already did exp/nbf manually. + validation.validate_exp = false; + validation.validate_nbf = false; - // build discovery URL and fetch it (redirects disabled) - let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/')); - let cfg_resp = NO_REDIRECT_CLIENT - .get(&config_url) - .send() - .await - .map_err(|e| anyhow!("issuer discovery failed: {e}"))?; + decode::(token, decoding_key, &validation) + .map_err(|e| anyhow!("signature verification (fallback key) failed: {e}"))?; - if !cfg_resp.status().is_success() { - return Ok((false, format!("issuer discovery failed: {}", cfg_resp.status()))); - } - - let cfg_json: serde_json::Value = - cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?; - - // extract jwks_uri - let jwks_uri = cfg_json - .get("jwks_uri") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow!("jwks_uri missing"))?; - - // must be HTTPS - let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?; - if url.scheme() != "https" { - return Ok((false, "jwks_uri must use https".to_string())); - } - - // host must match issuer host  —  prevents open redirects / SSRF-on-other-host - let iss_host = Url::parse(&iss) - .map_err(|e| anyhow!("invalid iss: {e}"))? - .host_str() - .unwrap_or_default() - .to_ascii_lowercase(); - let jwks_host = url.host_str().unwrap_or_default().to_ascii_lowercase(); - if jwks_host != iss_host { + return Ok(( + true, + format!("JWT valid via fallback key (alg: {:?}, aud: {:?})", alg, aud_strings), + )); + } else { return Ok(( false, - format!("jwks_uri host ({jwks_host}) must match issuer host ({iss_host})"), + "issuer (iss) required or a fallback verification key must be provided".into(), )); } + } - // ----------------------------------------------------------------------- - // DNS resolution + private-range block - for addr in lookup_host((jwks_host.as_str(), 443)).await? { - if is_blocked_ip(addr.ip()) { - return Ok((false, "jwks_uri resolves to private or link-local IP".to_string())); - } - } + // --- With `iss`: OIDC discovery + JWKS verification path ------------------- + // build discovery URL and fetch it (redirects disabled) + let config_url = format!("{}/.well-known/openid-configuration", issuer.trim_end_matches('/')); + let cfg_resp = NO_REDIRECT_CLIENT + .get(&config_url) + .send() + .await + .map_err(|e| anyhow!("issuer discovery failed: {e}"))?; - // reachability check (existing helper) - check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?; + if !cfg_resp.status().is_success() { + return Ok((false, format!("issuer discovery failed: {}", cfg_resp.status()))); + } - // fetch JWKS with redirect-free client - let jwks_resp = NO_REDIRECT_CLIENT - .get(url) - .send() - .await - .map_err(|e| anyhow!("jwks fetch failed: {e}"))?; - if !jwks_resp.status().is_success() { - return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status()))); - } + let cfg_json: serde_json::Value = + cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?; - let jwk_set: JwkSet = - jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?; + // extract jwks_uri + let jwks_uri = cfg_json + .get("jwks_uri") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow!("jwks_uri missing"))?; - // select key by kid - let kid = header.kid.ok_or_else(|| anyhow!("no kid in header"))?; - let jwk = jwk_set - .keys - .iter() - .find(|k| k.common.key_id.as_deref() == Some(&kid)) - .ok_or_else(|| anyhow!("kid not found in jwks"))?; - - // verify signature - let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?; - let mut validation = JwtValidation::new(header.alg); - validation.set_audience(&extract_aud_strings(&claims)); - validation.validate_exp = false; - validation.validate_nbf = false; - - decode::(token, &decoding_key, &validation) - .map_err(|e| anyhow!("signature verification failed: {e}"))?; + // must be HTTPS + let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?; + if url.scheme() != "https" { + return Ok((false, "jwks_uri must use https".to_string())); + } + // host must match issuer host + let iss_host = Url::parse(&issuer) + .map_err(|e| anyhow!("invalid iss: {e}"))? + .host_str() + .unwrap_or_default() + .to_ascii_lowercase(); + let jwks_host = url.host_str().unwrap_or_default().to_ascii_lowercase(); + if jwks_host != iss_host { return Ok(( - true, - format!( - "JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", - alg, - extract_aud_strings(&claims) - ), + false, + format!("jwks_uri host ({jwks_host}) must match issuer host ({iss_host})"), )); } - Ok((true, format!("JWT not expired (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)))) + // DNS resolution + private-range block + for addr in lookup_host((jwks_host.as_str(), 443)).await? { + if is_blocked_ip(addr.ip()) { + return Ok((false, "jwks_uri resolves to private or link-local IP".to_string())); + } + } + + // reachability check (existing helper) + check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?; + + // fetch JWKS with redirect-free client + let jwks_resp = + NO_REDIRECT_CLIENT.get(url).send().await.map_err(|e| anyhow!("jwks fetch failed: {e}"))?; + if !jwks_resp.status().is_success() { + return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status()))); + } + + let jwk_set: JwkSet = jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?; + + // select key by kid + let kid = header.kid.ok_or_else(|| anyhow!("no kid in header"))?; + let jwk = jwk_set + .keys + .iter() + .find(|k| k.common.key_id.as_deref() == Some(&kid)) + .ok_or_else(|| anyhow!("kid not found in jwks"))?; + + // verify signature + let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?; + let mut validation = JwtValidation::new(header.alg); + if !aud_strings.is_empty() { + validation.set_audience(&aud_strings); + } + validation.validate_exp = false; + validation.validate_nbf = false; + + decode::(token, &decoding_key, &validation) + .map_err(|e| anyhow!("signature verification failed: {e}"))?; + + Ok((true, format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, aud_strings))) } /// Helper: normalize aud into a flat Vec @@ -212,12 +251,11 @@ fn is_blocked_ip(ip: std::net::IpAddr) -> bool { #[cfg(test)] mod tests { + use super::{validate_jwt, validate_jwt_with, ValidateOptions}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::{Duration as ChronoDuration, Utc}; - use super::validate_jwt; - - fn build_token(exp_offset: i64) -> String { + fn build_unsigned_token(exp_offset: i64) -> String { let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#); let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp(); let payload = URL_SAFE_NO_PAD.encode(format!( @@ -231,16 +269,35 @@ mod tests { } #[tokio::test] - async fn valid_token() { - let token = build_token(60); + async fn unsigned_token_rejected_by_default() { + let token = build_unsigned_token(60); let res = validate_jwt(&token).await.unwrap(); - assert!(res.0); + assert!(!res.0); + assert!(res.1.contains("unsigned JWT (alg: none) not allowed")); } #[tokio::test] - async fn expired_token() { - let token = build_token(-60); - let res = validate_jwt(&token).await.unwrap(); + async fn valid_token_allows_alg_none_when_opted_in() { + let token = build_unsigned_token(60); + let res = validate_jwt_with( + &token, + &ValidateOptions { allow_alg_none: true, fallback_decoding_key: None }, + ) + .await + .unwrap(); + assert!(res.0, "expected success when alg none is explicitly allowed"); + } + + #[tokio::test] + async fn expired_token_still_rejected() { + let token = build_unsigned_token(-60); + let res = validate_jwt_with( + &token, + &ValidateOptions { allow_alg_none: true, fallback_decoding_key: None }, + ) + .await + .unwrap(); assert!(!res.0); + assert!(res.1.contains("expired")); } } From d841b72e6cde99169e7c53f69b5ad48f0027482c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 21 Aug 2025 16:10:52 -0700 Subject: [PATCH 4/6] fixed failing tests --- src/validation/jwt.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs index 3fc35fd..1f9b2e7 100644 --- a/src/validation/jwt.rs +++ b/src/validation/jwt.rs @@ -94,15 +94,14 @@ pub async fn validate_jwt_with(token: &str, opts: &ValidateOptions) -> Result<(b } } - // parse header (for alg, kid) + // parse header enough to read "alg" without jsonwebtoken's enum (which rejects "none") let header_b64 = token.split('.').next().ok_or_else(|| anyhow!("invalid JWT format"))?; let header_json = URL_SAFE_NO_PAD.decode(header_b64).map_err(|e| anyhow!("invalid base64 in header: {e}"))?; let header_val: serde_json::Value = serde_json::from_slice(&header_json).map_err(|e| anyhow!("invalid header json: {e}"))?; let alg_str = header_val.get("alg").and_then(|v| v.as_str()).unwrap_or(""); - let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; - let alg = header.alg; + // --- Policy: reject `alg: none` unless explicitly allowed ------------------ if alg_str.eq_ignore_ascii_case("none") { @@ -120,6 +119,10 @@ pub async fn validate_jwt_with(token: &str, opts: &ValidateOptions) -> Result<(b return Ok((false, "unsigned JWT (alg: none) not allowed".into())); } } + + // Safe to decode full header now that we know alg != none + let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; + let alg = header.alg; let issuer = claims.iss.clone().unwrap_or_default(); let aud_strings = extract_aud_strings(&claims); From 36d9ba54a17bf284236abb27c79e418381bc70d3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 21 Aug 2025 16:11:34 -0700 Subject: [PATCH 5/6] fixed failing tests --- data/rules/notion.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/rules/notion.yml b/data/rules/notion.yml index b4a0b44..52b1073 100644 --- a/data/rules/notion.yml +++ b/data/rules/notion.yml @@ -69,7 +69,7 @@ rules: url: https://api.notion.com/v1/users/me - name: Notion OAuth Refresh Token - id: kingfisher.notion.2 + id: kingfisher.notion.3 pattern: | (?xi) \b From 71d0e02fc47c7b9e9cb6a2561261f90101415bcc Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 21 Aug 2025 16:13:03 -0700 Subject: [PATCH 6/6] fixed failing tests --- data/rules/notion.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/data/rules/notion.yml b/data/rules/notion.yml index 52b1073..642128e 100644 --- a/data/rules/notion.yml +++ b/data/rules/notion.yml @@ -72,6 +72,8 @@ rules: id: kingfisher.notion.3 pattern: | (?xi) + notion + (?:.|[\\n\r]){0,32}? \b ( nrt_[A-Z0-9_]{40,55} @@ -80,4 +82,4 @@ rules: min_entropy: 3.5 confidence: medium examples: - - "nrt_4Y29zY29vbF9leGFtcGxlX3JlZnJlc2hfdG9rZW4xMjM0NQ" \ No newline at end of file + - "notion refresh token = nrt_4Y29zY29vbF9leGFtcGxlX3JlZnJlc2hfdG9rZW4xMjM0NQ" \ No newline at end of file