From 20e08105cfb64404018d1cefcfc0c5176e6bc42e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 30 Apr 2026 16:40:43 -0700 Subject: [PATCH] improved github organization scanning --- CHANGELOG.md | 3 +++ Cargo.lock | 2 +- Cargo.toml | 2 +- docs-site/docs/changelog.md | 5 ++++- docs-site/docs/usage/basic-scanning.md | 2 +- docs-site/docs/usage/integrations.md | 10 ++++++---- docs/INTEGRATIONS.md | 10 ++++++---- docs/USAGE.md | 2 +- src/cli/commands/github.rs | 3 +++ src/github.rs | 10 ++++++++++ src/scanner/repos.rs | 1 + 11 files changed, 37 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f154e7..eeb5951 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [v1.99.0] +- `--include-contributors` now respects `--github-repo-type` when enumerating contributor-owned repositories: by default contributor forks are excluded (matching the existing `Source` default), previously they were always included regardless of the flag. Added a new `--github-repo-type all` option to opt into the prior behavior of scanning both source and fork repos for contributors, organizations, and users. + ## [v1.98.0] - Bounded disk usage for large multi-repo scans (e.g. `--include-contributors --repo-artifacts` against orgs with thousands of repos): cloning, artifact fetching, and scanning now run concurrently through bounded channels, and each cloned repo is removed from the temp directory as soon as its scan completes. On-disk footprint stays roughly `O(num_jobs)` regardless of total repo count instead of growing without bound. `--keep-clones` and `--git-clone-dir` opt out of the per-repo cleanup as before. - Parallelized `--repo-artifacts` fetching with `buffer_unordered(num_jobs)` so issue/PR/wiki API calls run concurrently and stream into the scan loop, replacing the previous per-repo serial loop that delayed the start of scanning by hours on large fan-outs. diff --git a/Cargo.lock b/Cargo.lock index 8024fe1..8275480 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4973,7 +4973,7 @@ dependencies = [ [[package]] name = "kingfisher" -version = "1.98.0" +version = "1.99.0" dependencies = [ "anyhow", "asar", diff --git a/Cargo.toml b/Cargo.toml index 7244cae..71e199d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ http = "1.4" [package] name = "kingfisher" -version = "1.98.0" +version = "1.99.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index b1812ad..a8e5452 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,7 +7,10 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. -## [unreleased v1.98.0] +## [unreleased v1.99.0] +- `--include-contributors` now respects `--github-repo-type` when enumerating contributor-owned repositories: by default contributor forks are excluded (matching the existing `Source` default), previously they were always included regardless of the flag. Added a new `--github-repo-type all` option to opt into the prior behavior of scanning both source and fork repos for contributors, organizations, and users. + +## [v1.98.0] - Added first-class **Postman** scanning target: new `kingfisher scan postman` subcommand (and equivalent `--postman-*` flags) fetches workspaces, collections, and environments via the Postman API and scans them for hard-coded credentials in request `auth` blocks, pre-request/test scripts, saved example responses, and — notably — `secret`-typed environment variables, which the API returns in plaintext despite the UI mask. Selectors: `--workspace`, `--collection`, `--environment`, `--all`, with optional `--include-mocks-monitors` and `--api-url` for self-hosted endpoints. Authenticates via `KF_POSTMAN_TOKEN` (or `POSTMAN_API_KEY`) sent as `X-Api-Key`; honors `X-RateLimit-RetryAfter` on 429s. Findings link back to `https://go.postman.co/...` URLs in reports. - Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. - Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. diff --git a/docs-site/docs/usage/basic-scanning.md b/docs-site/docs/usage/basic-scanning.md index b64d837..3172424 100644 --- a/docs-site/docs/usage/basic-scanning.md +++ b/docs-site/docs/usage/basic-scanning.md @@ -655,7 +655,7 @@ kingfisher scan github --organization my-org \ ### Scan remote GitHub repository -Pass a repository URL as a positional scan target to clone and scan its files and history. (The legacy `--git-url` flag still works but is deprecated.) When the URL targets GitHub and you pass `--include-contributors`, Kingfisher enumerates repository contributors and attempts to clone **all public repos owned by those contributors**—a common offensive and blue-team pivot when developers leak secrets in personal or side projects. Use `--repo-clone-limit` to cap how many repositories are cloned during this enumeration. +Pass a repository URL as a positional scan target to clone and scan its files and history. (The legacy `--git-url` flag still works but is deprecated.) When the URL targets GitHub and you pass `--include-contributors`, Kingfisher enumerates repository contributors and attempts to clone the public repos owned by those contributors—a common offensive and blue-team pivot when developers leak secrets in personal or side projects. By default Kingfisher excludes forks; pass `--github-repo-type all` to include them or `--github-repo-type fork` for forks only. Use `--repo-clone-limit` to cap how many repositories are cloned during this enumeration. **NOTE**: This may cause you to be temporarily rate-limited by GitHub. Providing a token (`KF_GITHUB_TOKEN`) will provide a higher rate limit. diff --git a/docs-site/docs/usage/integrations.md b/docs-site/docs/usage/integrations.md index 0d77442..b6891b7 100644 --- a/docs-site/docs/usage/integrations.md +++ b/docs-site/docs/usage/integrations.md @@ -164,10 +164,12 @@ kingfisher scan github --organization my-org \ Pass a repository URL as a positional scan target to clone and scan its files and history. (The legacy `--git-url` flag still works but is deprecated.) When the URL targets GitHub and you pass `--include-contributors`, Kingfisher enumerates -repository contributors and attempts to clone **all public repos owned by those -contributors**—a common offensive and blue-team pivot when developers leak -secrets in personal or side projects. Use `--repo-clone-limit` to cap how many -repositories are cloned during this enumeration. +repository contributors and attempts to clone the public repos owned by those +contributors—a common offensive and blue-team pivot when developers leak +secrets in personal or side projects. By default Kingfisher excludes forks; +pass `--github-repo-type all` to include them or `--github-repo-type fork` for +forks only. Use `--repo-clone-limit` to cap how many repositories are cloned +during this enumeration. **NOTE**: This may cause you to be temporarily rate-limited by GitHub. Providing a token (`KF_GITHUB_TOKEN`) will provide a higher rate limit. diff --git a/docs/INTEGRATIONS.md b/docs/INTEGRATIONS.md index 593f810..d924257 100644 --- a/docs/INTEGRATIONS.md +++ b/docs/INTEGRATIONS.md @@ -161,10 +161,12 @@ kingfisher scan github --organization my-org \ Pass a repository URL as a positional scan target to clone and scan its files and history. (The legacy `--git-url` flag still works but is deprecated.) When the URL targets GitHub and you pass `--include-contributors`, Kingfisher enumerates -repository contributors and attempts to clone **all public repos owned by those -contributors**—a common offensive and blue-team pivot when developers leak -secrets in personal or side projects. Use `--repo-clone-limit` to cap how many -repositories are cloned during this enumeration. +repository contributors and attempts to clone the public repos owned by those +contributors—a common offensive and blue-team pivot when developers leak +secrets in personal or side projects. By default Kingfisher excludes forks; +pass `--github-repo-type all` to include them or `--github-repo-type fork` for +forks only. Use `--repo-clone-limit` to cap how many repositories are cloned +during this enumeration. **NOTE**: This may cause you to be temporarily rate-limited by GitHub. Providing a token (`KF_GITHUB_TOKEN`) will provide a higher rate limit. diff --git a/docs/USAGE.md b/docs/USAGE.md index b4888e0..529ef81 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -650,7 +650,7 @@ kingfisher scan github --organization my-org \ ### Scan remote GitHub repository -Pass a repository URL as a positional scan target to clone and scan its files and history. (The legacy `--git-url` flag still works but is deprecated.) When the URL targets GitHub and you pass `--include-contributors`, Kingfisher enumerates repository contributors and attempts to clone **all public repos owned by those contributors**—a common offensive and blue-team pivot when developers leak secrets in personal or side projects. Use `--repo-clone-limit` to cap how many repositories are cloned during this enumeration. +Pass a repository URL as a positional scan target to clone and scan its files and history. (The legacy `--git-url` flag still works but is deprecated.) When the URL targets GitHub and you pass `--include-contributors`, Kingfisher enumerates repository contributors and attempts to clone the public repos owned by those contributors—a common offensive and blue-team pivot when developers leak secrets in personal or side projects. By default Kingfisher excludes forks; pass `--github-repo-type all` to include them or `--github-repo-type fork` for forks only. Use `--repo-clone-limit` to cap how many repositories are cloned during this enumeration. **NOTE**: This may cause you to be temporarily rate-limited by GitHub. Providing a token (`KF_GITHUB_TOKEN`) will provide a higher rate limit. diff --git a/src/cli/commands/github.rs b/src/cli/commands/github.rs index b9bd3e0..857eef5 100644 --- a/src/cli/commands/github.rs +++ b/src/cli/commands/github.rs @@ -84,6 +84,8 @@ pub enum GitHubRepoType { /// Only fork repositories #[value(alias = "forks")] Fork, + /// All repositories (source and forks) + All, } impl From for crate::github::RepoType { @@ -91,6 +93,7 @@ impl From for crate::github::RepoType { match val { GitHubRepoType::Source => crate::github::RepoType::Source, GitHubRepoType::Fork => crate::github::RepoType::Fork, + GitHubRepoType::All => crate::github::RepoType::All, } } } diff --git a/src/github.rs b/src/github.rs index 1b25e9b..29c6f0c 100644 --- a/src/github.rs +++ b/src/github.rs @@ -30,6 +30,7 @@ struct GitHubContributor { #[derive(Deserialize)] struct GitHubRepo { clone_url: String, + fork: bool, } #[derive(Debug)] @@ -173,6 +174,7 @@ pub async fn enumerate_contributor_repo_urls( exclude_repos: &[String], repo_clone_limit: Option, progress_enabled: bool, + repo_filter: RepoType, ) -> Result> { let (_, owner, repo) = parse_repo(repo_url).context("invalid GitHub repo URL")?; let exclude_set = build_exclude_matcher(exclude_repos); @@ -273,6 +275,14 @@ pub async fn enumerate_contributor_repo_urls( break; } } + let excluded_by_repo_type = match repo_filter { + RepoType::Source => repo.fork, + RepoType::Fork => !repo.fork, + RepoType::All => false, + }; + if excluded_by_repo_type { + continue; + } if should_exclude_repo(&repo.clone_url, &exclude_set) { continue; } diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 319cbb7..1bfe623 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -227,6 +227,7 @@ pub async fn enumerate_github_repos( &args.input_specifier_args.github_exclude, args.input_specifier_args.repo_clone_limit, global_args.use_progress(), + args.input_specifier_args.github_repo_type.into(), ) .await {