From 6a974907ee606aade1d5939bf6a4e61e36a07d7a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 13:07:45 -0700 Subject: [PATCH 01/12] Added support for Gitea --- CHANGELOG.md | 3 + Cargo.toml | 2 +- README.md | 67 +++++- src/cli/commands/gitea.rs | 96 ++++++++ src/cli/commands/inputs.rs | 33 +++ src/cli/commands/mod.rs | 1 + src/cli/global.rs | 8 +- src/git_binary.rs | 20 +- src/git_url.rs | 4 +- src/gitea.rs | 440 ++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 28 ++- src/reporter/json_format.rs | 10 + src/scanner/repos.rs | 64 ++++- src/scanner/runner.rs | 11 +- tests/int_allowlist.rs | 7 + tests/int_bitbucket.rs | 8 + tests/int_dedup.rs | 8 + tests/int_github.rs | 8 + tests/int_gitlab.rs | 15 ++ tests/int_redact.rs | 7 + tests/int_slack.rs | 15 ++ tests/int_validation_cache.rs | 9 + tests/int_vulnerable_files.rs | 15 ++ 24 files changed, 865 insertions(+), 15 deletions(-) create mode 100644 src/cli/commands/gitea.rs create mode 100644 src/gitea.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index d67e87f..488a7c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [v1.54.0] +- Added first-class Gitea support, including CLI commands, environment-based authentication, documentation, and integration with scans and repository enumeration. + ## [v1.53.0] - Added first-class Bitbucket support, including CLI commands, authentication helpers, documentation, and integration testing. diff --git a/Cargo.toml b/Cargo.toml index 6b75952..b743646 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.53.0" +version = "1.54.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index 2d13eaa..596dc4c 100644 --- a/README.md +++ b/README.md @@ -8,15 +8,15 @@ Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production

-Originally forked from Praetorian’s Nosey Parker, Kingfisher adds live cloud-API validation; many more targets (GitLab, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. +Originally forked from Praetorian’s Nosey Parker, Kingfisher **adds** live cloud-API validation; many more targets (GitLab, BitBucket, Gitea, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. ## Key Features - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more - **Multiple targets**: - - **Git history**: local repos or GitHub/GitLab/Bitbucket orgs, users, and workspaces - - **Repository artifacts**: with `--repo-artifacts`, scan GitHub/GitLab/Bitbucket repository artifacts such as issues, pull/merge requests, wikis, snippets, and owner gists in addition to code + - **Git history**: local repos or GitHub/GitLab/Gitea/Bitbucket orgs, users, and workspaces + - **Repository artifacts**: with `--repo-artifacts`, scan GitHub/GitLab/Bitbucket repository artifacts such as issues, pull/merge requests, wikis, snippets, and owner gists in addition to code (Gitea wikis are also cloned when available) - **Docker images**: public or private via `--docker-image` - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - **Confluence pages**: CQL‑driven scans with `--confluence-url` and `--cql` @@ -71,6 +71,12 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) - [Skip specific GitLab projects during enumeration](#skip-specific-gitlab-projects-during-enumeration) - [Scan remote GitLab repository by URL](#scan-remote-gitlab-repository-by-url) - [List GitLab repositories](#list-gitlab-repositories) + - [Scanning Gitea](#scanning-gitea) + - [Scan Gitea organization (requires `KF_GITEA_TOKEN`)](#scan-gitea-organization-requires-kf_gitea_token) + - [Scan Gitea user](#scan-gitea-user) + - [Skip specific Gitea repositories during enumeration](#skip-specific-gitea-repositories-during-enumeration) + - [Scan remote Gitea repository by URL](#scan-remote-gitea-repository-by-url) + - [List Gitea repositories](#list-gitea-repositories) - [Scanning Bitbucket](#scanning-bitbucket) - [Scan Bitbucket workspace](#scan-bitbucket-workspace) - [Scan Bitbucket user](#scan-bitbucket-user) @@ -560,6 +566,59 @@ kingfisher gitlab repos list --group my-group --include-subgroups kingfisher gitlab repos list --group my-group --gitlab-exclude my-group/**/legacy-* ``` +## Scanning Gitea + +### Scan Gitea organization (requires `KF_GITEA_TOKEN`) + +```bash +kingfisher scan --gitea-organization my-org +# self-hosted example +KF_GITEA_TOKEN="gtoken" kingfisher scan --gitea-organization platform --gitea-api-url https://gitea.internal.example/api/v1/ +``` + +### Scan Gitea user + +```bash +kingfisher scan --gitea-user johndoe +``` + +### Skip specific Gitea repositories during enumeration + +Repeat `--gitea-exclude` for each repository you want to ignore when scanning users +or organizations. Accepts `owner/repo` identifiers or gitignore-style glob patterns +like `team/**/archive-*`. + +```bash +kingfisher scan --gitea-organization my-org \ + --gitea-exclude my-org/legacy-repo \ + --gitea-exclude my-org/**/archive-* +``` + +### Scan remote Gitea repository by URL + +`--git-url` clones the repository and scans its history. Adding `--repo-artifacts` +also clones the repository wiki if one exists. Private repositories and wikis +require `KF_GITEA_TOKEN` (and `KF_GITEA_USERNAME` when cloning via HTTPS). + +```bash +# Scan the repository only +kingfisher scan --git-url https://gitea.com/org/repo.git + +# Include the repository wiki (if present) +KF_GITEA_TOKEN="gtoken" KF_GITEA_USERNAME="org" \ + kingfisher scan --git-url https://gitea.com/org/repo.git --repo-artifacts +``` + +### List Gitea repositories + +```bash +kingfisher gitea repos list --gitea-organization my-org +# enumerate every organization visible to the authenticated user +KF_GITEA_TOKEN="gtoken" kingfisher gitea repos list --all-gitea-organizations +# self-hosted example +KF_GITEA_TOKEN="gtoken" kingfisher gitea repos list --user johndoe --gitea-api-url https://gitea.internal.example/api/v1/ +``` + ## Scanning Bitbucket ### Scan Bitbucket workspace @@ -700,6 +759,8 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \ | ----------------- | ---------------------------- | | `KF_GITHUB_TOKEN` | GitHub Personal Access Token | | `KF_GITLAB_TOKEN` | GitLab Personal Access Token | +| `KF_GITEA_TOKEN` | Gitea Personal Access Token | +| `KF_GITEA_USERNAME` | Username for private Gitea clones (used with `KF_GITEA_TOKEN`) | | `KF_BITBUCKET_USERNAME` | Bitbucket username for basic authentication | | `KF_BITBUCKET_APP_PASSWORD` / `KF_BITBUCKET_TOKEN` | Bitbucket app password or server token | | `KF_BITBUCKET_OAUTH_TOKEN` | Bitbucket OAuth or PAT token | diff --git a/src/cli/commands/gitea.rs b/src/cli/commands/gitea.rs new file mode 100644 index 0000000..6bdb393 --- /dev/null +++ b/src/cli/commands/gitea.rs @@ -0,0 +1,96 @@ +use clap::{Args, Subcommand, ValueEnum, ValueHint}; +use strum_macros::Display; +use url::Url; + +use crate::cli::commands::output::OutputArgs; + +use super::github::GitHubOutputFormat; + +/// Top-level Gitea command group +#[derive(Args, Debug)] +pub struct GiteaArgs { + #[command(subcommand)] + pub command: GiteaCommand, + + /// Override Gitea API URL (e.g. self-hosted) + #[arg(global = true, long, default_value = "https://gitea.com/api/v1/", value_hint = ValueHint::Url)] + pub gitea_api_url: Url, +} + +#[derive(Subcommand, Debug)] +pub enum GiteaCommand { + /// Interact with Gitea repositories + #[command(subcommand)] + Repos(GiteaReposCommand), +} + +#[derive(Subcommand, Debug)] +pub enum GiteaReposCommand { + /// List repositories for a user or organization + List(GiteaReposListArgs), +} + +/// `kingfisher gitea repos` +#[derive(Args, Debug, Clone)] +pub struct GiteaReposListArgs { + #[command(flatten)] + pub repo_specifiers: GiteaRepoSpecifiers, + + #[command(flatten)] + pub output_args: OutputArgs, +} + +/// Options for selecting Gitea repos +#[derive(Args, Debug, Clone)] +pub struct GiteaRepoSpecifiers { + /// Repositories belonging to these users + #[arg(long, alias = "gitea-user")] + pub user: Vec, + + /// Repositories belonging to these organizations + #[arg(long, alias = "org", alias = "gitea-organization", alias = "gitea-org")] + pub organization: Vec, + + /// Skip repositories when enumerating Gitea users or organizations (format: owner/repo) + #[arg(long = "gitea-exclude", alias = "gitea-exclude-repo", value_name = "OWNER/REPO")] + pub exclude_repos: Vec, + + /// Repositories for all organizations accessible to the authenticated user + #[arg(long, alias = "all-gitea-organizations", alias = "all-gitea-orgs")] + pub all_organizations: bool, + + /// Filter by repository type + #[arg(long, default_value_t = GiteaRepoType::Source, alias = "gitea-repo-type")] + pub repo_type: GiteaRepoType, +} + +impl GiteaRepoSpecifiers { + pub fn is_empty(&self) -> bool { + self.user.is_empty() && self.organization.is_empty() && !self.all_organizations + } +} + +/// Gitea repository type filter +#[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)] +#[strum(serialize_all = "kebab-case")] +pub enum GiteaRepoType { + /// Only source repositories (not forks) + Source, + /// Only fork repositories + #[value(alias = "forks")] + Fork, + /// Include all repositories + All, +} + +pub type GiteaOutputFormat = GitHubOutputFormat; + +impl From for crate::gitea::RepoType { + fn from(val: GiteaRepoType) -> Self { + match val { + GiteaRepoType::Source => crate::gitea::RepoType::Source, + GiteaRepoType::Fork => crate::gitea::RepoType::Fork, + GiteaRepoType::All => crate::gitea::RepoType::All, + } + } +} diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index a3fcac6..6c6f81b 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -6,6 +6,7 @@ use url::Url; use crate::{ cli::commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, }, @@ -24,12 +25,15 @@ pub struct InputSpecifierArgs { "github_organization", "gitlab_user", "gitlab_group", + "gitea_user", + "gitea_organization", "bitbucket_user", "bitbucket_workspace", "bitbucket_project", "git_url", "all_github_organizations", "all_gitlab_groups", + "all_gitea_organizations", "all_bitbucket_workspaces", "jira_url", "confluence_url", @@ -112,6 +116,35 @@ pub struct InputSpecifierArgs { #[arg(long, alias = "include-subgroups")] pub gitlab_include_subgroups: bool, + // Gitea Options + /// Scan repositories belonging to the specified Gitea user + #[arg(long)] + pub gitea_user: Vec, + + /// Scan repositories belonging to the specified Gitea organization + #[arg(long, alias = "gitea-org")] + pub gitea_organization: Vec, + + /// Skip repositories when enumerating Gitea users or organizations (format: owner/repo) + #[arg(long = "gitea-exclude", alias = "gitea-exclude-repo", value_name = "OWNER/REPO")] + pub gitea_exclude: Vec, + + /// Scan repositories from all accessible Gitea organizations (requires KF_GITEA_TOKEN) + #[arg(long, alias = "all-gitea-orgs")] + pub all_gitea_organizations: bool, + + /// Use the specified URL for Gitea API access (e.g. for self-hosted instances) + #[arg( + long, + alias="gitea-api-url", + default_value = "https://gitea.com/api/v1/", + value_hint = ValueHint::Url + )] + pub gitea_api_url: Url, + + #[arg(long, default_value_t = GiteaRepoType::Source)] + pub gitea_repo_type: GiteaRepoType, + // Bitbucket Options /// Scan repositories belonging to the specified Bitbucket users #[arg(long)] diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 243ab1b..b7717bd 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -1,4 +1,5 @@ pub mod bitbucket; +pub mod gitea; pub mod github; pub mod gitlab; pub mod inputs; diff --git a/src/cli/global.rs b/src/cli/global.rs index c19d10d..edd79dc 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -7,8 +7,8 @@ use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use tracing::Level; use crate::cli::commands::{ - bitbucket::BitbucketArgs, github::GitHubArgs, gitlab::GitLabArgs, rules::RulesArgs, - scan::ScanArgs, + bitbucket::BitbucketArgs, gitea::GiteaArgs, github::GitHubArgs, gitlab::GitLabArgs, + rules::RulesArgs, scan::ScanArgs, }; #[deny(missing_docs)] @@ -69,6 +69,10 @@ pub enum Command { #[command(name = "gitlab")] GitLab(GitLabArgs), + /// Interact with the Gitea API + #[command(name = "gitea")] + Gitea(GiteaArgs), + /// Interact with the Bitbucket API #[command(name = "bitbucket")] Bitbucket(BitbucketArgs), diff --git a/src/git_binary.rs b/src/git_binary.rs index 4f62564..09f6658 100644 --- a/src/git_binary.rs +++ b/src/git_binary.rs @@ -23,6 +23,14 @@ const BITBUCKET_CREDENTIAL_HELPER: &str = r#"credential.helper=!_bbcreds() { fi }; _bbcreds"#; +const GITEA_CREDENTIAL_HELPER: &str = r#"credential.helper=!_gteacreds() { + if [ -n "$KF_GITEA_TOKEN" ]; then + user="${KF_GITEA_USERNAME:-gitea}"; + echo username="$user"; + echo password="$KF_GITEA_TOKEN"; + fi +}; _gteacreds"#; + /// Represents errors that can occur when interacting with the `git` CLI. #[derive(Debug, thiserror::Error)] pub enum GitError { @@ -40,7 +48,7 @@ pub enum GitError { /// A helper struct for running `git` commands. /// -/// It supports optional GitHub, GitLab, and Bitbucket credentials passed via +/// It supports optional GitHub, GitLab, Gitea, and Bitbucket credentials passed via /// environment variables and optionally ignores TLS certificate validation if /// requested. pub struct Git { @@ -59,6 +67,8 @@ impl Git { matches!(std::env::var("KF_GITHUB_TOKEN"), Ok(token) if !token.is_empty()); let has_gitlab_token = matches!(std::env::var("KF_GITLAB_TOKEN"), Ok(token) if !token.is_empty()); + let has_gitea_token = + matches!(std::env::var("KF_GITEA_TOKEN"), Ok(token) if !token.is_empty()); let has_bitbucket_username = matches!(std::env::var("KF_BITBUCKET_USERNAME"), Ok(value) if !value.is_empty()); let has_bitbucket_password = @@ -71,7 +81,7 @@ impl Git { has_bitbucket_oauth_token || (has_bitbucket_username && has_bitbucket_password); // If credentials are provided via environment variables, clear existing helpers first. - if has_github_token || has_gitlab_token || has_bitbucket_credentials { + if has_github_token || has_gitlab_token || has_gitea_token || has_bitbucket_credentials { credentials.push("-c".into()); credentials.push(r#"credential.helper="#.into()); } @@ -92,6 +102,12 @@ impl Git { ); } + // Inject Gitea token helper + if has_gitea_token { + credentials.push("-c".into()); + credentials.push(GITEA_CREDENTIAL_HELPER.into()); + } + // Inject Bitbucket credential helper for OAuth tokens or basic auth. if has_bitbucket_credentials { credentials.push("-c".into()); diff --git a/src/git_url.rs b/src/git_url.rs index 1cc9827..7458bcc 100644 --- a/src/git_url.rs +++ b/src/git_url.rs @@ -64,8 +64,8 @@ impl TryFrom for GitUrl { type Error = &'static str; fn try_from(url: Url) -> Result { - if url.scheme() != "https" - || url.host().is_none() + // if url.scheme() != "https" + if url.host().is_none() || !url.username().is_empty() || url.password().is_some() || url.query().is_some() diff --git a/src/gitea.rs b/src/gitea.rs new file mode 100644 index 0000000..a5a5def --- /dev/null +++ b/src/gitea.rs @@ -0,0 +1,440 @@ +use std::{collections::HashSet, env, str::FromStr, time::Duration}; + +use anyhow::{anyhow, Result}; +use globset::{Glob, GlobSet, GlobSetBuilder}; +use indicatif::{ProgressBar, ProgressStyle}; +use reqwest::StatusCode; +use serde::Deserialize; +use tracing::warn; +use url::Url; + +use crate::{git_url::GitUrl, validation::GLOBAL_USER_AGENT}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RepoType { + All, + Source, + Fork, +} + +impl RepoType { + fn allows(self, is_fork: bool) -> bool { + match self { + RepoType::All => true, + RepoType::Source => !is_fork, + RepoType::Fork => is_fork, + } + } +} + +#[derive(Debug, Clone)] +pub struct RepoSpecifiers { + pub user: Vec, + pub organization: Vec, + pub all_organizations: bool, + pub repo_filter: RepoType, + pub exclude_repos: Vec, +} + +impl RepoSpecifiers { + pub fn is_empty(&self) -> bool { + self.user.is_empty() && self.organization.is_empty() && !self.all_organizations + } +} + +#[derive(Debug, Deserialize)] +struct GiteaRepository { + full_name: String, + clone_url: String, + #[serde(default)] + fork: bool, +} + +#[derive(Debug, Deserialize)] +struct GiteaOrganization { + username: String, +} + +struct ExcludeMatcher { + exact: HashSet, + globs: Option, +} + +impl ExcludeMatcher { + fn matches(&self, name: &str) -> bool { + if self.exact.contains(name) { + return true; + } + if let Some(globs) = &self.globs { + return globs.is_match(name); + } + false + } + + fn is_empty(&self) -> bool { + self.exact.is_empty() && self.globs.is_none() + } +} + +fn looks_like_glob(pattern: &str) -> bool { + pattern.contains('*') || pattern.contains('?') || pattern.contains('[') +} + +fn normalize_repo_identifier(raw: &str) -> Option { + let trimmed = raw.trim().trim_matches('/'); + if trimmed.is_empty() { + return None; + } + let without_git = trimmed.strip_suffix(".git").unwrap_or(trimmed); + let mut parts = without_git.split('/').filter(|segment| !segment.is_empty()); + let owner = parts.next()?; + let repo = parts.next()?; + Some(format!("{}/{}", owner.to_lowercase(), repo.to_lowercase())) +} + +fn parse_excluded_repo(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + + if let Ok(url) = Url::parse(trimmed) { + if let Some(name) = normalize_repo_identifier(url.path()) { + return Some(name); + } + } + + if let Some(idx) = trimmed.rfind(':') { + if let Some(name) = normalize_repo_identifier(&trimmed[idx + 1..]) { + return Some(name); + } + } + + normalize_repo_identifier(trimmed) +} + +fn build_exclude_matcher(excludes: &[String]) -> ExcludeMatcher { + let mut exact = HashSet::new(); + let mut glob_builder = GlobSetBuilder::new(); + let mut has_glob = false; + + for raw in excludes { + match parse_excluded_repo(raw) { + Some(name) => { + if looks_like_glob(&name) { + match Glob::new(&name) { + Ok(glob) => { + glob_builder.add(glob); + has_glob = true; + } + Err(err) => { + warn!("Ignoring invalid Gitea exclusion pattern '{raw}': {err}"); + exact.insert(name); + } + } + } else { + exact.insert(name); + } + } + None => { + warn!("Ignoring invalid Gitea exclusion '{raw}' (expected owner/repo)"); + } + } + } + + let globs = if has_glob { + match glob_builder.build() { + Ok(set) => Some(set), + Err(err) => { + warn!("Failed to build Gitea exclusion patterns: {err}"); + None + } + } + } else { + None + }; + + ExcludeMatcher { exact, globs } +} + +fn should_exclude_repo(repo: &GiteaRepository, excludes: &ExcludeMatcher) -> bool { + if excludes.is_empty() { + return false; + } + excludes.matches(&repo.full_name.to_lowercase()) +} + +async fn fetch_paginated_repos( + client: &reqwest::Client, + token: Option<&str>, + mut url: Url, + repo_filter: RepoType, + excludes: &ExcludeMatcher, + progress: Option<&ProgressBar>, +) -> Result> { + let mut page = 1u32; + let mut repos = Vec::new(); + loop { + url.query_pairs_mut() + .clear() + .append_pair("page", &page.to_string()) + .append_pair("limit", "50"); + if let Some(pb) = progress { + pb.set_message(format!("Fetching Gitea repositories (page {page})")); + } + let mut req = client.get(url.clone()).header("User-Agent", GLOBAL_USER_AGENT.as_str()); + if let Some(token) = token { + req = req.header("Authorization", format!("token {token}")); + } + let resp = req.send().await?; + match resp.status() { + StatusCode::OK => {} + StatusCode::NOT_FOUND => { + warn!("Gitea endpoint {} returned 404", url); + break; + } + status => { + return Err(anyhow!("Failed to fetch repositories from {} (status {status})", url)); + } + } + let page_repos: Vec = resp.json().await?; + if page_repos.is_empty() { + break; + } + for repo in page_repos { + if !repo_filter.allows(repo.fork) { + continue; + } + if should_exclude_repo(&repo, excludes) { + continue; + } + repos.push(repo.clone_url); + } + page += 1; + } + Ok(repos) +} + +async fn fetch_user_repos( + client: &reqwest::Client, + token: Option<&str>, + api_url: &Url, + username: &str, + repo_filter: RepoType, + excludes: &ExcludeMatcher, + progress: Option<&ProgressBar>, +) -> Result> { + let endpoint = format!("users/{}/repos", username); + let url = api_url.join(&endpoint)?; + fetch_paginated_repos(client, token, url, repo_filter, excludes, progress).await +} + +async fn fetch_org_repos( + client: &reqwest::Client, + token: Option<&str>, + api_url: &Url, + org: &str, + repo_filter: RepoType, + excludes: &ExcludeMatcher, + progress: Option<&ProgressBar>, +) -> Result> { + let endpoint = format!("orgs/{}/repos", org); + let url = api_url.join(&endpoint)?; + fetch_paginated_repos(client, token, url, repo_filter, excludes, progress).await +} + +async fn fetch_authenticated_orgs( + client: &reqwest::Client, + token: Option<&str>, + api_url: &Url, +) -> Result> { + let Some(token) = token else { + return Err(anyhow!("KF_GITEA_TOKEN must be set to enumerate all organizations")); + }; + let url = api_url.join("user/orgs")?; + let resp = client + .get(url.clone()) + .header("User-Agent", GLOBAL_USER_AGENT.as_str()) + .header("Authorization", format!("token {token}")) + .send() + .await?; + match resp.status() { + StatusCode::OK => {} + StatusCode::NOT_FOUND => { + warn!("Gitea endpoint {} returned 404", url); + return Ok(Vec::new()); + } + status => { + return Err(anyhow!( + "Failed to enumerate organizations from {} (status {status})", + url + )); + } + } + let orgs: Vec = resp.json().await?; + Ok(orgs.into_iter().map(|org| org.username).collect()) +} + +pub async fn enumerate_repo_urls( + specifiers: &RepoSpecifiers, + api_url: Url, + ignore_certs: bool, + mut progress: Option<&mut ProgressBar>, +) -> Result> { + let excludes = build_exclude_matcher(&specifiers.exclude_repos); + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(30)) + .danger_accept_invalid_certs(ignore_certs) + .build()?; + let token = env::var("KF_GITEA_TOKEN").ok().filter(|t| !t.is_empty()); + + let mut repos = Vec::new(); + let mut seen = HashSet::new(); + + for user in &specifiers.user { + if let Some(pb) = progress.as_mut() { + pb.set_message(format!("Enumerating Gitea user {user}")); + } + match fetch_user_repos( + &client, + token.as_deref(), + &api_url, + user, + specifiers.repo_filter, + &excludes, + progress.as_deref(), + ) + .await + { + Ok(mut urls) => { + for url in urls.drain(..) { + if seen.insert(url.clone()) { + repos.push(url); + } + } + } + Err(err) => { + warn!("Failed to enumerate Gitea repositories for user {user}: {err}"); + } + } + } + + let mut organizations = specifiers.organization.clone(); + if specifiers.all_organizations { + match fetch_authenticated_orgs(&client, token.as_deref(), &api_url).await { + Ok(mut orgs) => organizations.append(&mut orgs), + Err(err) => warn!("Failed to enumerate Gitea organizations: {err}"), + } + } + organizations.sort(); + organizations.dedup(); + + for org in organizations { + if let Some(pb) = progress.as_mut() { + pb.set_message(format!("Enumerating Gitea organization {org}")); + } + match fetch_org_repos( + &client, + token.as_deref(), + &api_url, + &org, + specifiers.repo_filter, + &excludes, + progress.as_deref(), + ) + .await + { + Ok(mut urls) => { + for url in urls.drain(..) { + if seen.insert(url.clone()) { + repos.push(url); + } + } + } + Err(err) => { + warn!("Failed to enumerate Gitea repositories for organization {org}: {err}"); + } + } + } + + repos.sort(); + repos.dedup(); + Ok(repos) +} + +pub async fn list_repositories( + api_url: Url, + ignore_certs: bool, + progress_enabled: bool, + users: &[String], + orgs: &[String], + all_orgs: bool, + exclude_repos: &[String], + repo_filter: RepoType, +) -> Result<()> { + let mut progress = if progress_enabled { + let style = ProgressStyle::with_template("{spinner} {msg} [{elapsed_precise}]") + .expect("progress bar style template should compile"); + let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching repositories"); + pb.enable_steady_tick(Duration::from_millis(500)); + pb + } else { + ProgressBar::hidden() + }; + + let specifiers = RepoSpecifiers { + user: users.to_vec(), + organization: orgs.to_vec(), + all_organizations: all_orgs, + repo_filter, + exclude_repos: exclude_repos.to_vec(), + }; + + let urls = enumerate_repo_urls(&specifiers, api_url, ignore_certs, Some(&mut progress)).await?; + for url in urls { + println!("{}", url); + } + progress.finish_and_clear(); + Ok(()) +} + +fn parse_repo(repo_url: &GitUrl) -> Option<(String, String, String)> { + let url = Url::parse(repo_url.as_str()).ok()?; + let host = url.host_str()?.to_string(); + let mut segments = url.path_segments()?; + let owner = segments.next()?.to_string(); + let mut repo = segments.next()?.to_string(); + if let Some(stripped) = repo.strip_suffix(".git") { + repo = stripped.to_string(); + } + Some((host, owner, repo)) +} + +pub fn wiki_url(repo_url: &GitUrl) -> Option { + let (host, owner, repo) = parse_repo(repo_url)?; + let url = format!("https://{host}/{owner}/{repo}.wiki.git"); + GitUrl::from_str(&url).ok() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_excluded_repo_variants() { + assert_eq!(parse_excluded_repo("Owner/Repo").as_deref(), Some("owner/repo")); + assert_eq!( + parse_excluded_repo("https://gitea.example.com/Owner/Repo.git").as_deref(), + Some("owner/repo") + ); + assert_eq!( + parse_excluded_repo("ssh://git@example.com:3000/Owner/Repo.git").as_deref(), + Some("owner/repo") + ); + } + + #[test] + fn normalize_repo_identifier_handles_git_suffix() { + assert_eq!(normalize_repo_identifier("owner/repo.git"), Some("owner/repo".into())); + } +} diff --git a/src/lib.rs b/src/lib.rs index 920ae3a..598c278 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ pub mod git_commit_metadata; pub mod git_metadata_graph; mod git_repo_enumerator; pub mod git_url; +pub mod gitea; pub mod github; pub mod gitlab; pub mod jira; diff --git a/src/main.rs b/src/main.rs index 670b5c6..d73bcc1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -52,7 +52,7 @@ use kingfisher::{ }, findings_store, findings_store::FindingsStore, - github, + gitea, github, rule_loader::RuleLoader, rules_database::RulesDatabase, scanner::{load_and_record_rules, run_scan}, @@ -72,6 +72,7 @@ use url::Url; use crate::cli::commands::{ bitbucket::{BitbucketAuthArgs, BitbucketCommand, BitbucketRepoType, BitbucketReposCommand}, + gitea::{GiteaCommand, GiteaRepoType, GiteaReposCommand}, gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand}, }; @@ -89,6 +90,7 @@ fn main() -> anyhow::Result<()> { Command::GitHub(_) => num_cpus::get(), // Default for GitHub commands Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands Command::Bitbucket(_) => num_cpus::get(), // Default for Bitbucket commands + Command::Gitea(_) => num_cpus::get(), // Default for Gitea commands Command::Rules(_) => num_cpus::get(), // Default for Rules commands }; @@ -265,6 +267,23 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { } }, }, + Command::Gitea(gitea_args) => match gitea_args.command { + GiteaCommand::Repos(repos_command) => match repos_command { + GiteaReposCommand::List(list_args) => { + gitea::list_repositories( + gitea_args.gitea_api_url, + global_args.ignore_certs, + global_args.use_progress(), + &list_args.repo_specifiers.user, + &list_args.repo_specifiers.organization, + list_args.repo_specifiers.all_organizations, + &list_args.repo_specifiers.exclude_repos, + list_args.repo_specifiers.repo_type.into(), + ) + .await?; + } + }, + }, Command::Bitbucket(bitbucket_args) => match bitbucket_args.command { BitbucketCommand::Repos(repos_command) => match repos_command { BitbucketReposCommand::List(list_args) => { @@ -329,6 +348,13 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { gitlab_repo_type: GitLabRepoType::All, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index b369c62..4149469 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -40,6 +40,7 @@ mod tests { use crate::{ blob::BlobId, cli::commands::bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + cli::commands::gitea::GiteaRepoType, cli::commands::github::GitHubRepoType, cli::commands::inputs::ContentFilteringArgs, cli::commands::inputs::InputSpecifierArgs, @@ -90,6 +91,15 @@ mod tests { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::All, gitlab_include_subgroups: false, + + // Gitea + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + // Bitbucket bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 833d6f8..95144a7 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -20,7 +20,7 @@ use crate::{ confluence, findings_store, git_binary::{CloneMode, Git}, git_url::GitUrl, - github, gitlab, jira, + gitea, github, gitlab, jira, matcher::{Match, Matcher, MatcherStats}, origin::{Origin, OriginSet}, rules_database::RulesDatabase, @@ -243,6 +243,68 @@ pub async fn enumerate_gitlab_repos( Ok(repo_urls) } +pub async fn enumerate_gitea_repos( + args: &scan::ScanArgs, + global_args: &global::GlobalArgs, +) -> Result> { + let repo_specifiers = gitea::RepoSpecifiers { + user: args.input_specifier_args.gitea_user.clone(), + organization: args.input_specifier_args.gitea_organization.clone(), + all_organizations: args.input_specifier_args.all_gitea_organizations, + repo_filter: args.input_specifier_args.gitea_repo_type.into(), + exclude_repos: args.input_specifier_args.gitea_exclude.clone(), + }; + + let mut repo_urls = args.input_specifier_args.git_url.clone(); + if !repo_specifiers.is_empty() { + let mut progress = if global_args.use_progress() { + let style = + ProgressStyle::with_template("{spinner} {msg} {human_len} [{elapsed_precise}]") + .expect("progress bar style template should compile"); + let pb = ProgressBar::new_spinner() + .with_style(style) + .with_message("Enumerating Gitea repositories..."); + pb.enable_steady_tick(Duration::from_millis(500)); + pb + } else { + ProgressBar::hidden() + }; + + let mut num_found: u64 = 0; + let api_url = args.input_specifier_args.gitea_api_url.clone(); + let repo_strings = gitea::enumerate_repo_urls( + &repo_specifiers, + api_url, + global_args.ignore_certs, + Some(&mut progress), + ) + .await + .context("Failed to enumerate Gitea repositories")?; + + for repo_string in repo_strings { + match GitUrl::from_str(&repo_string) { + Ok(repo_url) => { + repo_urls.push(repo_url); + num_found += 1; + } + Err(e) => { + progress.suspend(|| { + error!("Failed to parse repo URL from {repo_string}: {e}"); + }); + } + } + } + + progress.finish_with_message(format!( + "Found {} repositories from Gitea", + HumanCount(num_found) + )); + } + repo_urls.sort(); + repo_urls.dedup(); + Ok(repo_urls) +} + pub async fn enumerate_bitbucket_repos( args: &scan::ScanArgs, global_args: &global::GlobalArgs, diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index a4a35b4..9d394dc 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -11,7 +11,7 @@ use crate::{ cli::{commands::scan, global}, findings_store, findings_store::{FindingsStore, FindingsStoreMessage}, - github, gitlab, + gitea, github, gitlab, liquid_filters::register_all, matcher::MatcherStats, reporter::styles::Styles, @@ -23,8 +23,8 @@ use crate::{ clone_or_update_git_repos, enumerate_bitbucket_repos, enumerate_filesystem_inputs, enumerate_github_repos, repos::{ - enumerate_gitlab_repos, fetch_confluence_pages, fetch_git_host_artifacts, - fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, + enumerate_gitea_repos, enumerate_gitlab_repos, fetch_confluence_pages, + fetch_git_host_artifacts, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages, }, run_secret_validation, save_docker_images, summary::print_scan_summary, @@ -73,10 +73,12 @@ pub async fn run_async_scan( let mut repo_urls = enumerate_github_repos(args, global_args).await?; let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?; + let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?; let bitbucket_repo_urls = enumerate_bitbucket_repos(args, global_args).await?; // Combine repository URLs repo_urls.extend(gitlab_repo_urls); + repo_urls.extend(gitea_repo_urls); repo_urls.extend(bitbucket_repo_urls); repo_urls.sort(); repo_urls.dedup(); @@ -91,6 +93,9 @@ pub async fn run_async_scan( if let Some(w) = gitlab::wiki_url(url) { wiki_urls.push(w); } + if let Some(w) = gitea::wiki_url(url) { + wiki_urls.push(w); + } if let Some(w) = bitbucket::wiki_url(url) { wiki_urls.push(w); } diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index e775766..5e119f3 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -8,6 +8,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -70,6 +71,12 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/")?, + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index b7719c6..0e243f8 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -12,6 +12,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -83,6 +84,13 @@ rules: gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_github.rs b/tests/int_github.rs index d5eb0ce..180a441 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -9,6 +9,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -70,6 +71,13 @@ fn test_github_remote_scan() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index cecdb60..d295660 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -9,6 +9,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -69,6 +70,13 @@ fn test_gitlab_remote_scan() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/")?, + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), @@ -192,6 +200,13 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/")?, + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 86dc0db..1e7f9b5 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -9,6 +9,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -53,6 +54,12 @@ async fn test_redact_hashes_finding_values() -> Result<()> { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_slack.rs b/tests/int_slack.rs index e9e3b74..d7b3118 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -8,6 +8,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -59,6 +60,13 @@ impl TestContext { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), @@ -168,6 +176,13 @@ async fn test_scan_slack_messages() -> Result<()> { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 3ff5ec1..28c7bda 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -12,6 +12,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -125,6 +126,14 @@ async fn test_validation_cache_and_depvars() -> Result<()> { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 3fe9aff..6141037 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -10,6 +10,7 @@ use kingfisher::{ cli::{ commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, inputs::{ContentFilteringArgs, InputSpecifierArgs}, @@ -69,6 +70,13 @@ impl TestContext { gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), @@ -165,6 +173,13 @@ impl TestContext { gitlab_repo_type: GitLabRepoType::Owner, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, + bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), From 42cb233122953942e5ec62d22aa4f39f8cb7d16a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:18:44 -0700 Subject: [PATCH 02/12] Updated README --- README.md | 22 ++++++++++++++-------- docs/icons.sh | 17 +++++++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) create mode 100755 docs/icons.sh diff --git a/README.md b/README.md index 596dc4c..874c595 100644 --- a/README.md +++ b/README.md @@ -11,17 +11,23 @@ Kingfisher is a blazingly fast secret‑scanning and live validation tool built Originally forked from Praetorian’s Nosey Parker, Kingfisher **adds** live cloud-API validation; many more targets (GitLab, BitBucket, Gitea, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. ## Key Features +- **Multiple Scan Targets**:

+ Files + Local Git + GitHub + GitLab + Bitbucket + Gitea + Docker + Jira + Confluence + Slack + AWS S3 +

+ - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more -- **Multiple targets**: - - **Git history**: local repos or GitHub/GitLab/Gitea/Bitbucket orgs, users, and workspaces - - **Repository artifacts**: with `--repo-artifacts`, scan GitHub/GitLab/Bitbucket repository artifacts such as issues, pull/merge requests, wikis, snippets, and owner gists in addition to code (Gitea wikis are also cloned when available) - - **Docker images**: public or private via `--docker-image` - - **Jira issues**: JQL‑driven scans with `--jira-url` and `--jql` - - **Confluence pages**: CQL‑driven scans with `--confluence-url` and `--cql` - - **Slack messages**: query‑based scans with `--slack-query` - - **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous - **Compressed Files**: Supports extracting and scanning compressed files for secrets - Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64` - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) diff --git a/docs/icons.sh b/docs/icons.sh new file mode 100755 index 0000000..31d287c --- /dev/null +++ b/docs/icons.sh @@ -0,0 +1,17 @@ +# Create a local icon dir +mkdir -p icons + +# Simple Icons (CDN source is stable and permissively licensed) +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/github.svg -o icons/github.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/gitlab.svg -o icons/gitlab.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/bitbucket.svg -o icons/bitbucket.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/gitea.svg -o icons/gitea.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/slack.svg -o icons/slack.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/jirasoftware.svg -o icons/jira.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/confluence.svg -o icons/confluence.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/amazonaws.svg -o icons/aws.svg +curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/docker.svg -o icons/docker.svg + +# A neutral folder + git icon from Octicons for files/dirs + local git repos +curl -fsSL https://raw.githubusercontent.com/primer/octicons/main/icons/file-directory-24.svg -o icons/folder.svg +curl -fsSL https://raw.githubusercontent.com/primer/octicons/main/icons/git-branch-24.svg -o icons/git.svg From b533a4207f477dc95f005ad9d4a349067661b675 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:19:06 -0700 Subject: [PATCH 03/12] Updated README --- docs/icons.sh | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100755 docs/icons.sh diff --git a/docs/icons.sh b/docs/icons.sh deleted file mode 100755 index 31d287c..0000000 --- a/docs/icons.sh +++ /dev/null @@ -1,17 +0,0 @@ -# Create a local icon dir -mkdir -p icons - -# Simple Icons (CDN source is stable and permissively licensed) -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/github.svg -o icons/github.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/gitlab.svg -o icons/gitlab.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/bitbucket.svg -o icons/bitbucket.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/gitea.svg -o icons/gitea.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/slack.svg -o icons/slack.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/jirasoftware.svg -o icons/jira.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/confluence.svg -o icons/confluence.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/amazonaws.svg -o icons/aws.svg -curl -fsSL https://cdn.jsdelivr.net/npm/simple-icons@v11/icons/docker.svg -o icons/docker.svg - -# A neutral folder + git icon from Octicons for files/dirs + local git repos -curl -fsSL https://raw.githubusercontent.com/primer/octicons/main/icons/file-directory-24.svg -o icons/folder.svg -curl -fsSL https://raw.githubusercontent.com/primer/octicons/main/icons/git-branch-24.svg -o icons/git.svg From 71ae0f89b0f52deebfd87e353639f82f4b82ab57 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:21:17 -0700 Subject: [PATCH 04/12] Updated README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 874c595..8419991 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Kingfisher is a blazingly fast secret‑scanning and live validation tool built Originally forked from Praetorian’s Nosey Parker, Kingfisher **adds** live cloud-API validation; many more targets (GitLab, BitBucket, Gitea, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. ## Key Features -- **Multiple Scan Targets**:

+- **Multiple Scan Targets**:

Files Local Git GitHub From 74b7626f4ded290794ece07d3c96a5fb3b611acd Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:23:12 -0700 Subject: [PATCH 05/12] Updated README --- README.md | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8419991..df73c9b 100644 --- a/README.md +++ b/README.md @@ -11,19 +11,22 @@ Kingfisher is a blazingly fast secret‑scanning and live validation tool built Originally forked from Praetorian’s Nosey Parker, Kingfisher **adds** live cloud-API validation; many more targets (GitLab, BitBucket, Gitea, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. ## Key Features -- **Multiple Scan Targets**:

- Files - Local Git - GitHub - GitLab - Bitbucket - Gitea - Docker - Jira - Confluence - Slack - AWS S3 -

+- **Multiple Scan Targets**: +

+ Files & Dirs + Local Git + GitHub + GitLab + Bitbucket + Gitea +
+ Docker + Jira + Confluence + Slack + AWS S3 +

+ - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) From dbf921937d7a670bf5a873df746bcec0267f3434 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:27:53 -0700 Subject: [PATCH 06/12] Updated README --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index df73c9b..831d582 100644 --- a/README.md +++ b/README.md @@ -30,9 +30,8 @@ Originally forked from Praetorian’s Nosey Parker, Kingfisher **adds** live clo - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - - **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more +- **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more - **Compressed Files**: Supports extracting and scanning compressed files for secrets -- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64` - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -908,6 +907,7 @@ leaves the default unchanged. ## Notable Scan Options - `--no-dedup`: Report every occurrence of a finding (disable the default de-duplicate behavior) +- `--no-base64`: By default, Kingfisher finds and decodes base64 blobs and scans them for secrets. This adds a slight performance overhead; use this flag to disable - `--confidence `: (low|medium|high) - `--min-entropy `: Override default threshold - `--no-binary`: Skip binary files @@ -919,7 +919,6 @@ leaves the default unchanged. - `--manage-baseline`: Create or update the baseline file with current findings - `--skip-regex `: Ignore findings whose text matches this regex (repeatable) - `--skip-word `: Ignore findings containing this case-insensitive word (repeatable) - ## Understanding `--confidence` The `--confidence` flag sets a minimum confidence threshold, not an exact match. From f4505b94ab94fbc6b77b7cf40f52391e7b397309 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:29:13 -0700 Subject: [PATCH 07/12] Updated README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 831d582..ec89f63 100644 --- a/README.md +++ b/README.md @@ -975,7 +975,7 @@ Since that initial fork, it has diverged heavily from Nosey Parker: - Collapsed the workflow into a single scan-and-report phase with direct JSON/BSON/SARIF outputs - Added Tree-Sitter parsing on top of Hyperscan for deeper language-aware detection - Removed datastore-driven reporting/annotations in favor of live validation, baselines, allowlists, and compressed-file extraction -- Expanded support for new targets (GitLab, Jira, Confluence, Slack, S3, Docker, etc.) +- Expanded support for new targets (GitLab, BitBucket, Gitea, Jira, Confluence, Slack, S3, Docker, etc.) - Delivered cross-platform builds, including native Windows From e82f9ace8470d62ce4b05691e727ad5ea0139fb9 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:39:47 -0700 Subject: [PATCH 08/12] Updated README --- src/reporter/styles.rs | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/reporter/styles.rs b/src/reporter/styles.rs index 9f96c9c..56b32b1 100644 --- a/src/reporter/styles.rs +++ b/src/reporter/styles.rs @@ -11,16 +11,14 @@ pub struct Styles { pub style_active_creds: Style, pub style_match: Style, pub style_metadata: Style, - is_term: bool, } impl Styles { pub fn new(use_color: bool) -> Self { - let stdout_is_tty = std::io::stdout().is_terminal(); - let is_term = Term::stdout().is_term(); + // Trust the `use_color` decision from the caller. + let styles_enabled = use_color; - // Enable color only when explicitly requested and stdout is a terminal. - let styles_enabled = use_color && stdout_is_tty && is_term; let style_finding_heading = Style::new().bright().white().force_styling(styles_enabled); + let style_finding_active_heading = Style::new().bold().bright().cyan().force_styling(styles_enabled); let style_rule = Style::new().bright().bold().blue().force_styling(styles_enabled); @@ -36,14 +34,6 @@ impl Styles { style_match, style_metadata, style_active_creds, - is_term, } } - // pub fn apply>(&self, text: T, style: &Style) -> String { - // if self.is_term { - // style.apply_to(text.as_ref()).to_string() - // } else { - // text.as_ref().to_string() - // } - // } } From ea24d9a0d5a0144e9843adfa729c80f25d67648e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 16:41:04 -0700 Subject: [PATCH 09/12] Updated README --- src/reporter/styles.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/reporter/styles.rs b/src/reporter/styles.rs index 56b32b1..0a16ac4 100644 --- a/src/reporter/styles.rs +++ b/src/reporter/styles.rs @@ -1,5 +1,3 @@ -use std::io::IsTerminal; - pub use console::{Style, StyledObject, Term}; #[allow(dead_code)] From 08b87eadf462999ae093c4cb373f08828b454d95 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 23 Sep 2025 17:24:11 -0700 Subject: [PATCH 10/12] Populate the finding path from git blob metadata so history-derived secrets display their file location instead of an empty path --- CHANGELOG.md | 1 + src/reporter.rs | 188 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 187 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 488a7c2..2fb02ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [v1.54.0] - Added first-class Gitea support, including CLI commands, environment-based authentication, documentation, and integration with scans and repository enumeration. +- Populate the finding path from git blob metadata so history-derived secrets display their file location instead of an empty path ## [v1.53.0] - Added first-class Bitbucket support, including CLI commands, authentication helpers, documentation, and integration testing. diff --git a/src/reporter.rs b/src/reporter.rs index bc4bd86..eccdcf1 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -449,10 +449,15 @@ impl DetailsReporter { Some(e.path.display().to_string()) } } + Origin::GitRepo(e) => e.first_commit.as_ref().map(|c| c.blob_path.clone()), Origin::Extended(e) => e.path().map(|p| p.display().to_string()), - _ => None, }) - .unwrap_or_default(); + .unwrap_or_else(|| { + rm.origin + .iter() + .find_map(|origin| origin.blob_path().map(|p| p.display().to_string())) + .unwrap_or_default() + }); FindingReporterRecord { rule: RuleMetadata { @@ -617,6 +622,185 @@ pub struct FindingRecordData { pub git_metadata: Option, } +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + blob::{BlobId, BlobMetadata}, + cli::commands::inputs::{ContentFilteringArgs, InputSpecifierArgs}, + cli::commands::output::OutputArgs, + cli::commands::scan::{ConfidenceLevel, ScanArgs}, + cli::commands::{ + bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, + gitlab::GitLabRepoType, + rules::RuleSpecifierArgs, + }, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, + matcher::{SerializableCapture, SerializableCaptures}, + origin::OriginSet, + rules::rule::{Confidence, Rule, RuleSyntax}, + }; + use gix::{date::Time, ObjectId}; + use smallvec::SmallVec; + use std::path::PathBuf; + use tempfile::tempdir; + + #[test] + fn build_finding_record_uses_git_blob_path() { + let temp = tempdir().unwrap(); + let datastore = + Arc::new(Mutex::new(findings_store::FindingsStore::new(temp.path().to_path_buf()))); + let reporter = DetailsReporter { datastore, styles: Styles::new(false), only_valid: false }; + + let repo_path = Arc::new(PathBuf::from("/tmp/repo")); + let commit_metadata = Arc::new(CommitMetadata { + commit_id: ObjectId::from_hex(b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").unwrap(), + committer_name: "Alice".into(), + committer_email: "alice@example.com".into(), + committer_timestamp: Time::new(0, 0), + }); + let blob_path = "path/in/history.txt".to_string(); + let origin = OriginSet::new( + Origin::from_git_repo_with_first_commit(repo_path, commit_metadata, blob_path.clone()), + vec![], + ); + + let rule = Arc::new(Rule::new(RuleSyntax { + name: "Test Rule".into(), + id: "test.rule".into(), + pattern: ".*".into(), + min_entropy: 0.0, + confidence: Confidence::Medium, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None, + depends_on_rule: vec![], + })); + + let blob_id = BlobId::new(b"blob-data"); + let report_match = ReportMatch { + origin, + blob_metadata: BlobMetadata { + id: blob_id, + num_bytes: 42, + mime_essence: None, + language: Some("Unknown".into()), + }, + m: Match { + location: Location { + offset_span: OffsetSpan { start: 0, end: 10 }, + source_span: SourceSpan { + start: SourcePoint { line: 19, column: 0 }, + end: SourcePoint { line: 19, column: 10 }, + }, + }, + groups: SerializableCaptures { + captures: SmallVec::<[SerializableCapture; 2]>::new(), + }, + blob_id, + finding_fingerprint: 123, + rule: Arc::clone(&rule), + validation_response_body: "Bad credentials".into(), + validation_response_status: 401, + validation_success: false, + calculated_entropy: 5.29, + visible: true, + is_base64: false, + }, + comment: None, + match_confidence: Confidence::Medium, + visible: true, + validation_response_body: "Bad credentials".into(), + validation_response_status: 401, + validation_success: false, + }; + + let scan_args = ScanArgs { + num_jobs: 1, + rules: RuleSpecifierArgs::default(), + input_specifier_args: InputSpecifierArgs { + path_inputs: Vec::new(), + git_url: Vec::new(), + github_user: Vec::new(), + github_organization: Vec::new(), + github_exclude: Vec::new(), + all_github_organizations: false, + github_api_url: Url::parse("https://api.github.com/").unwrap(), + github_repo_type: GitHubRepoType::Source, + gitlab_user: Vec::new(), + gitlab_group: Vec::new(), + gitlab_exclude: Vec::new(), + all_gitlab_groups: false, + gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), + gitlab_repo_type: GitLabRepoType::All, + gitlab_include_subgroups: false, + bitbucket_user: Vec::new(), + bitbucket_workspace: Vec::new(), + bitbucket_project: Vec::new(), + bitbucket_exclude: Vec::new(), + all_bitbucket_workspaces: false, + bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(), + bitbucket_repo_type: BitbucketRepoType::Source, + bitbucket_auth: BitbucketAuthArgs::default(), + jira_url: None, + jql: None, + confluence_url: None, + cql: None, + slack_query: None, + slack_api_url: Url::parse("https://slack.com/api/").unwrap(), + max_results: 100, + s3_bucket: None, + s3_prefix: None, + role_arn: None, + aws_local_profile: None, + docker_image: Vec::new(), + git_clone: GitCloneMode::Bare, + git_history: GitHistoryMode::Full, + commit_metadata: true, + repo_artifacts: false, + scan_nested_repos: true, + since_commit: None, + branch: None, + }, + content_filtering_args: ContentFilteringArgs { + max_file_size_mb: 256.0, + exclude: Vec::new(), + no_extract_archives: false, + extraction_depth: 2, + no_binary: false, + }, + confidence: ConfidenceLevel::Medium, + no_validate: false, + only_valid: false, + min_entropy: None, + rule_stats: false, + no_dedup: false, + redact: false, + git_repo_timeout: 1_800, + output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, + baseline_file: None, + manage_baseline: false, + skip_regex: Vec::new(), + skip_word: Vec::new(), + }; + + let record = reporter.build_finding_record(&report_match, &scan_args); + assert_eq!(record.finding.path, blob_path); + let git_file_path = record + .finding + .git_metadata + .as_ref() + .and_then(|git| git.get("file")) + .and_then(|file| file.get("path")) + .and_then(|path| path.as_str()) + .unwrap(); + assert_eq!(git_file_path, "path/in/history.txt"); + } +} + impl From for ReportMatch { fn from(e: finding_data::FindingDataEntry) -> Self { ReportMatch { From 645bfa2e01a1a41673ee941d7079ad9bfe317d0e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 24 Sep 2025 10:06:47 -0700 Subject: [PATCH 11/12] Populate the finding path from git blob metadata so history-derived secrets display their file location instead of an empty path --- buildwin.bat | 1 + data/rules/openweather.yml | 37 ------------------------------------ data/rules/travisci.yml | 2 +- src/git_url.rs | 9 ++------- src/reporter.rs | 39 ++++++++++++++++++++++++++++++++++++-- src/update.rs | 2 +- 6 files changed, 42 insertions(+), 48 deletions(-) delete mode 100644 data/rules/openweather.yml diff --git a/buildwin.bat b/buildwin.bat index 55ca71f..deed257 100644 --- a/buildwin.bat +++ b/buildwin.bat @@ -20,6 +20,7 @@ if "%VCINSTALLDIR%"=="" ( echo VCINSTALLDIR not set - attempting auto-detection… for %%P in ( "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC" + "C:\Program Files\Microsoft Visual Studio\2022\Community\VC" "C:\Program Files\Microsoft Visual Studio\2022\Professional\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC" "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC" diff --git a/data/rules/openweather.yml b/data/rules/openweather.yml deleted file mode 100644 index 2153e64..0000000 --- a/data/rules/openweather.yml +++ /dev/null @@ -1,37 +0,0 @@ -rules: - - name: OpenWeather Map API Key - id: kingfisher.openweather.1 - pattern: | - (?xi) - (?:pyowm|openweather|\bowm\b) - (?:.|[\n\r]){0,64}? - \b - ( - (?: - [a-z0-9]{32} - ) - \b - |APPID= - (?: - [a-z0-9]{32} - ) - ) - \b - min_entropy: 3.5 - examples: - - pyowm = '3k144a5af729351d0fc58bdrj9a21mkr' - - owm = '3k144a5af729351d0fc58bdrj9a21mkr' - - openweatherapikey=cd2b1d12d01ae2deffecfebafcc3c31d - - apikey=openweather:cd2b1d12d01ae2deffecfebafcc3c31d - validation: - type: Http - content: - request: - method: GET - response_matcher: - - report_response: true - - match_all_status: true - status: - - 200 - type: StatusMatch - url: https://api.openweathermap.org/geo/1.0/reverse?lat=0&lon=0&limit=1&appid={{ TOKEN }} \ No newline at end of file diff --git a/data/rules/travisci.yml b/data/rules/travisci.yml index 5a61c0a..73e75c4 100644 --- a/data/rules/travisci.yml +++ b/data/rules/travisci.yml @@ -32,7 +32,7 @@ rules: - type: StatusMatch status: [200] - name: Travis CI Encrypted Variable - id: kingfisher.travisci.1 + id: kingfisher.travisci.2 pattern: | (?xis) \b diff --git a/src/git_url.rs b/src/git_url.rs index 7458bcc..67e6e90 100644 --- a/src/git_url.rs +++ b/src/git_url.rs @@ -64,8 +64,8 @@ impl TryFrom for GitUrl { type Error = &'static str; fn try_from(url: Url) -> Result { - // if url.scheme() != "https" - if url.host().is_none() + if (url.scheme() != "https" && url.scheme() != "http") + || url.host().is_none() || !url.username().is_empty() || url.password().is_some() || url.query().is_some() @@ -104,11 +104,6 @@ mod test { assert!(GitUrl::from_str("ssh://example.com/repo.git").is_err()); } - #[test] - fn bad_scheme_04() { - assert!(GitUrl::from_str("http://example.com/repo.git").is_err()); - } - #[test] fn bad_query_params() { assert!(GitUrl::from_str("https://example.com/repo.git?admin=1").is_err()); diff --git a/src/reporter.rs b/src/reporter.rs index eccdcf1..5e9d49b 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -428,10 +428,10 @@ impl DetailsReporter { }) .next(); - let file_path = rm + let mut file_path = rm .origin .iter() - .find_map(|origin| match origin { + .filter_map(|origin| match origin { Origin::File(e) => { if let Some(url) = self.repo_artifact_url(&e.path) { Some(url) @@ -452,6 +452,7 @@ impl DetailsReporter { Origin::GitRepo(e) => e.first_commit.as_ref().map(|c| c.blob_path.clone()), Origin::Extended(e) => e.path().map(|p| p.display().to_string()), }) + .find(|path| !path.trim().is_empty()) .unwrap_or_else(|| { rm.origin .iter() @@ -459,6 +460,31 @@ impl DetailsReporter { .unwrap_or_default() }); + // If the file path is still empty, and we have git blob metadata, + // try to reconstruct the path from the git object ID. + if file_path.is_empty() { + let blob_hex = rm.blob_metadata.id.hex(); + if let Some(repo_origin) = rm.origin.iter().find_map(|origin| match origin { + Origin::GitRepo(e) => Some(e), + _ => None, + }) { + let (prefix, suffix) = blob_hex.split_at(2); + let repo_path = repo_origin.repo_path.as_ref(); + let git_dir_objects = repo_path.join(".git").join("objects"); + let objects_dir = if git_dir_objects.is_dir() { + git_dir_objects + } else { + repo_path.join("objects") + }; + let fallback_path = objects_dir.join(prefix).join(suffix); + file_path = fallback_path.display().to_string(); + } + + if file_path.is_empty() { + file_path = format!("blob:{blob_hex}"); + } + } + FindingReporterRecord { rule: RuleMetadata { name: rm.m.rule.name().to_string(), @@ -632,10 +658,12 @@ mod tests { cli::commands::scan::{ConfidenceLevel, ScanArgs}, cli::commands::{ bitbucket::{BitbucketAuthArgs, BitbucketRepoType}, + gitea::GiteaRepoType, github::{GitCloneMode, GitHistoryMode, GitHubRepoType}, gitlab::GitLabRepoType, rules::RuleSpecifierArgs, }, + git_commit_metadata::CommitMetadata, location::{Location, OffsetSpan, SourcePoint, SourceSpan}, matcher::{SerializableCapture, SerializableCaptures}, origin::OriginSet, @@ -737,6 +765,12 @@ mod tests { gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(), gitlab_repo_type: GitLabRepoType::All, gitlab_include_subgroups: false, + gitea_user: Vec::new(), + gitea_organization: Vec::new(), + gitea_exclude: Vec::new(), + all_gitea_organizations: false, + gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), bitbucket_project: Vec::new(), @@ -779,6 +813,7 @@ mod tests { rule_stats: false, no_dedup: false, redact: false, + no_base64: false, git_repo_timeout: 1_800, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, baseline_file: None, diff --git a/src/update.rs b/src/update.rs index 878182d..b765768 100644 --- a/src/update.rs +++ b/src/update.rs @@ -102,7 +102,7 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt // ───────────── Case 1: running == latest ───────────── if release.version == running_v { let plain = format!("Kingfisher {running_v} is up to date"); - info!("{}", styled_heading(&styles, plain.as_str())); + info!("{}", plain.as_str()); return Some(plain); } From 0c022b4ed5a4a5a655dfc666fd3e6fd1f693d5aa Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 24 Sep 2025 10:43:51 -0700 Subject: [PATCH 12/12] Changes in response to code review --- src/reporter.rs | 116 ++++++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 53 deletions(-) diff --git a/src/reporter.rs b/src/reporter.rs index 5e9d49b..caa6aa8 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -428,62 +428,20 @@ impl DetailsReporter { }) .next(); - let mut file_path = rm + let file_path = rm .origin .iter() - .filter_map(|origin| match origin { - Origin::File(e) => { - if let Some(url) = self.repo_artifact_url(&e.path) { - Some(url) - } else if let Some(url) = self.jira_issue_url(&e.path, args) { - Some(url) - } else if let Some(url) = self.confluence_page_url(&e.path) { - Some(url) - } else if let Some(url) = self.slack_message_url(&e.path) { - Some(url) - } else if let Some(mapped) = self.s3_display_path(&e.path) { - Some(mapped) - } else if let Some(mapped) = self.docker_display_path(&e.path) { - Some(mapped) - } else { - Some(e.path.display().to_string()) - } - } - Origin::GitRepo(e) => e.first_commit.as_ref().map(|c| c.blob_path.clone()), - Origin::Extended(e) => e.path().map(|p| p.display().to_string()), + .find_map(|origin| self.origin_display_path(origin, args)) + .or_else(|| { + rm.origin.iter().find_map(|origin| { + origin + .blob_path() + .map(|p| p.display().to_string()) + .and_then(Self::non_empty_string) + }) }) - .find(|path| !path.trim().is_empty()) - .unwrap_or_else(|| { - rm.origin - .iter() - .find_map(|origin| origin.blob_path().map(|p| p.display().to_string())) - .unwrap_or_default() - }); - - // If the file path is still empty, and we have git blob metadata, - // try to reconstruct the path from the git object ID. - if file_path.is_empty() { - let blob_hex = rm.blob_metadata.id.hex(); - if let Some(repo_origin) = rm.origin.iter().find_map(|origin| match origin { - Origin::GitRepo(e) => Some(e), - _ => None, - }) { - let (prefix, suffix) = blob_hex.split_at(2); - let repo_path = repo_origin.repo_path.as_ref(); - let git_dir_objects = repo_path.join(".git").join("objects"); - let objects_dir = if git_dir_objects.is_dir() { - git_dir_objects - } else { - repo_path.join("objects") - }; - let fallback_path = objects_dir.join(prefix).join(suffix); - file_path = fallback_path.display().to_string(); - } - - if file_path.is_empty() { - file_path = format!("blob:{blob_hex}"); - } - } + .or_else(|| self.git_object_fallback_path(rm)) + .unwrap_or_else(|| format!("blob:{}", rm.blob_metadata.id.hex())); FindingReporterRecord { rule: RuleMetadata { @@ -511,6 +469,58 @@ impl DetailsReporter { } } + fn origin_display_path( + &self, + origin: &Origin, + args: &cli::commands::scan::ScanArgs, + ) -> Option { + match origin { + Origin::File(e) => self + .repo_artifact_url(&e.path) + .and_then(Self::non_empty_string) + .or_else(|| self.jira_issue_url(&e.path, args).and_then(Self::non_empty_string)) + .or_else(|| self.confluence_page_url(&e.path).and_then(Self::non_empty_string)) + .or_else(|| self.slack_message_url(&e.path).and_then(Self::non_empty_string)) + .or_else(|| self.s3_display_path(&e.path).and_then(Self::non_empty_string)) + .or_else(|| self.docker_display_path(&e.path).and_then(Self::non_empty_string)) + .or_else(|| Self::non_empty_string(e.path.display().to_string())), + Origin::GitRepo(e) => { + e.first_commit.as_ref().and_then(|c| Self::non_empty_string(c.blob_path.clone())) + } + Origin::Extended(e) => { + e.path().map(|p| p.display().to_string()).and_then(Self::non_empty_string) + } + } + } + + fn git_object_fallback_path(&self, rm: &ReportMatch) -> Option { + let blob_hex = rm.blob_metadata.id.hex(); + rm.origin.iter().find_map(|origin| { + if let Origin::GitRepo(repo_origin) = origin { + let (prefix, suffix) = blob_hex.split_at(2); + let repo_path = repo_origin.repo_path.as_ref(); + let git_dir_objects = repo_path.join(".git").join("objects"); + let objects_dir = if git_dir_objects.is_dir() { + git_dir_objects + } else { + repo_path.join("objects") + }; + let fallback_path = objects_dir.join(prefix).join(suffix); + Self::non_empty_string(fallback_path.display().to_string()) + } else { + None + } + }) + } + + fn non_empty_string(value: String) -> Option { + if value.trim().is_empty() { + None + } else { + Some(value) + } + } + pub fn build_finding_records( &self, args: &cli::commands::scan::ScanArgs,