From 5465d903cfb983c313a9daaad542f84bdcb0ac45 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 26 Apr 2026 16:56:44 -0700 Subject: [PATCH 1/5] added kingfisher.github.9 to detect the new ~520-character stateless GitHub App installation token format (ghs__). The legacy 36-character ghs_ rule --- CHANGELOG.md | 3 + CONTRIBUTING.md | 153 +++++++++++++ Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 5 +- crates/kingfisher-rules/data/rules/github.yml | 45 ++++ docs-site/docs/blog/index.md | 11 + ...26-beyond-detection-validate-map-revoke.md | 201 ++++++++++++++++++ .../2026-04-26-scan-github-org-for-secrets.md | 178 ++++++++++++++++ docs-site/docs/changelog.md | 3 + docs-site/docs/index.md | 2 +- docs-site/docs/rules/builtin-rules.md | 56 ++++- docs-site/mkdocs.yml | 20 +- docs-site/overrides/home.html | 8 +- docs-site/overrides/main.html | 2 +- docs-site/requirements.txt | 1 + docs-site/scripts/generate-rules-page.py | 4 +- 17 files changed, 676 insertions(+), 20 deletions(-) create mode 100644 CONTRIBUTING.md create mode 100644 docs-site/docs/blog/index.md create mode 100644 docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md create mode 100644 docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md diff --git a/CHANGELOG.md b/CHANGELOG.md index f789f5c..c9797e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [v1.98.0] +- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. + ## [v1.97.0] - **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed. - Fixed the HTML report viewer dark mode so charts redraw correctly on theme changes and follow the system color scheme until manually overridden. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..9b66ce1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,153 @@ +# Contributing to Kingfisher + +Thank you for your interest in contributing to Kingfisher. + +Kingfisher is an open-source project owned by MongoDB and licensed under the +[Apache License 2.0](LICENSE). We welcome bug reports, feature requests, +documentation improvements, rule additions, validation improvements, and code +contributions. + +## Before You Start + +- Be respectful and collaborative. Participation in this project is covered by + the [MongoDB Community Code of Conduct](https://www.mongodb.com/community-code-of-conduct). +- If you plan to submit a pull request, sign the + [MongoDB Contributor Agreement](https://www.mongodb.com/legal/contributor-agreement) + first. +- For security vulnerabilities, do not open a public issue. Follow + [SECURITY.md](SECURITY.md) instead. + +## Ways to Contribute + +- Report bugs with clear reproduction steps, environment details, and logs when + possible. +- Propose features or usability improvements through GitHub issues. +- Improve documentation in `README.md`, `docs/`, or `docs-site/`. +- Add or refine detection rules under + `crates/kingfisher-rules/data/rules/`. +- Improve validation, revocation, scanning performance, output formats, or + integrations. + +## Reporting Bugs and Requesting Features + +Before opening a new issue: + +- Check whether an existing issue already covers the problem or request. +- Confirm the issue still reproduces on a recent `main` checkout or current + release when practical. +- Include the smallest reproducible example you can provide. + +Use the repository issue templates when they fit your case. + +## Development Setup + +Kingfisher is a Rust workspace. The workspace minimum Rust version is `1.94`, +and CI currently uses Rust `1.94.1`. + +Helpful commands: + +```bash +cargo build +make tests +cargo test --workspace --all-targets +cargo fmt --all +cargo clippy --workspace --all-targets -- -D warnings +``` + +For repository layout and project-specific guidance, see: + +- [README.md](README.md) +- [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) +- [docs/USAGE.md](docs/USAGE.md) +- [docs/RULES.md](docs/RULES.md) + +## Contribution Guidelines + +### Keep changes focused + +- Prefer small, reviewable pull requests over large mixed changes. +- Avoid unrelated refactors in the same PR unless they are necessary for the + fix. +- Update tests and docs when behavior changes. + +### Do not commit real secrets + +Kingfisher is a secret scanner. Never add live credentials, customer data, or +real tokens anywhere in the repository, including: + +- tests +- fixtures +- examples +- docs +- screenshots +- benchmark artifacts + +Use clearly fake placeholders or provider-documented example values only. + +### Rule contributions + +If you are adding or updating a rule: + +- Follow the schema and authoring guidance in [docs/RULES.md](docs/RULES.md). +- Prefer YAML-defined validation and revocation when the provider API supports + it. +- Keep patterns specific and efficient. +- Add realistic examples and relevant tests. +- Set rule confidence to `medium`. + +Useful validation commands: + +```bash +cargo test -p kingfisher-rules +cargo test --workspace --all-targets +kingfisher scan ./testdata --rule --rule-stats +kingfisher validate --rule +``` + +## Testing Expectations + +Run the narrowest relevant checks for your change before opening a PR, then run +broader checks when practical. + +Examples: + +- Rule-only changes: `cargo test -p kingfisher-rules` +- General Rust changes: `make tests` +- Formatting: `cargo fmt --all` +- Linting: `cargo clippy --workspace --all-targets -- -D warnings` + +If you cannot run a relevant check locally, say so in the pull request and +explain why. + +## Documentation Changes + +- Keep examples consistent with current CLI behavior. +- Update related docs when flags, outputs, or workflows change. +- After changing `docs-site/` sources, rebuild the site when practical: + +```bash +docs-site/.venv/bin/mkdocs build -f docs-site/mkdocs.yml +``` + +## Pull Request Checklist + +Before opening a PR, make sure you have: + +- signed the MongoDB Contributor Agreement +- kept the change focused +- added or updated tests where needed +- updated docs where needed +- run the relevant local checks +- avoided adding any real secrets or sensitive data + +In the PR description, include: + +- what changed +- why it changed +- how you tested it +- any follow-up work or known limitations + +## Questions + +If you are unsure whether a change is in scope, open an issue first so the +approach can be discussed before you spend time on implementation. diff --git a/Cargo.lock b/Cargo.lock index 00a1459..4a2f986 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5030,7 +5030,7 @@ dependencies = [ [[package]] name = "kingfisher" -version = "1.97.0" +version = "1.98.0" dependencies = [ "anyhow", "asar", diff --git a/Cargo.toml b/Cargo.toml index b46dde4..9691c5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ http = "1.4" [package] name = "kingfisher" -version = "1.97.0" +version = "1.98.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index d9f2d04..da0c81a 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,9 @@ ghcr downloads + + GitHub Downloads +
Kingfisher is an open source secret scanner and **live secret validation** tool built in Rust. @@ -392,7 +395,7 @@ Kingfisher ships with [942 built-in rules](crates/kingfisher-rules/data/rules/) ## Write Custom Rules -Kingfisher ships with 484 built-in rules that include HTTP and service-specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. +Of Kingfisher's 942 built-in rules, 484 include HTTP and service-specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. However, you may want to add your own custom rules, or modify a detection to better suit your needs / environment. diff --git a/crates/kingfisher-rules/data/rules/github.yml b/crates/kingfisher-rules/data/rules/github.yml index 9d48d6b..657b14d 100644 --- a/crates/kingfisher-rules/data/rules/github.yml +++ b/crates/kingfisher-rules/data/rules/github.yml @@ -362,3 +362,48 @@ rules: - | GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7 GITHUB_SECRET=37d02377a3e9d849e18704c3ec883f9c5787d857 + + - name: GitHub App Server-to-Server Token (stateless JWT format) + id: kingfisher.github.9 + pattern: | + (?x) + ( + ghs_[0-9]+_ + [A-Za-z0-9_-]+ \. [A-Za-z0-9_-]+ \. [A-Za-z0-9_-]+ + ) + \b + min_entropy: 3.5 + examples: + - 'ghs_12345_eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE3NDU1NjgwMDAsImV4cCI6MTc0NTU2ODM2MCwiaXNzIjoiMTIzNDUiLCJzdWIiOiJnaXRodWJ8MTIzNDUifQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c' + references: + - https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/about-authentication-with-a-github-app + - https://github.com/mongodb/kingfisher/issues/359 + validation: + type: Http + content: + request: + method: GET + url: https://api.github.com/user + headers: + Authorization: token {{ TOKEN }} + Accept: application/vnd.github+json + response_matcher: + - report_response: true + - match_all_words: true + type: WordMatch + words: + - '"login"' + - '"id"' + revocation: + type: Http + content: + request: + method: DELETE + url: https://api.github.com/installation/token + headers: + Authorization: token {{ TOKEN }} + Accept: application/vnd.github+json + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] \ No newline at end of file diff --git a/docs-site/docs/blog/index.md b/docs-site/docs/blog/index.md new file mode 100644 index 0000000..737ff10 --- /dev/null +++ b/docs-site/docs/blog/index.md @@ -0,0 +1,11 @@ +--- +title: Kingfisher Blog +description: > + News, tutorials, and deep-dives on secret detection, credential validation, + and supply-chain security from the Kingfisher team at MongoDB. +--- + +# Kingfisher Blog + +Tutorials, release notes, and deep-dives on secret scanning, credential +validation, and supply-chain security. diff --git a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md b/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md new file mode 100644 index 0000000..c5507ee --- /dev/null +++ b/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md @@ -0,0 +1,201 @@ +--- +date: 2026-04-26 +title: "Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation" +description: > + Detection alone is noise. Kingfisher answers the three questions that + actually matter when a secret leaks — is it live, what does it reach, + and can we revoke it now — across AWS, GCP, GitHub, GitLab, Slack, + and dozens of other providers. +categories: + - Features +tags: + - validation + - blast-radius + - revocation + - secret-scanning +--- + +# Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation + +A regex match on `AKIA[0-9A-Z]{16}` is the easy part. Every secret scanner +finds those. The hard part — and the part that decides whether your Tuesday +afternoon turns into an incident — is what happens **after** the match. + +Kingfisher answers the three questions that actually matter: + +1. **Is this credential alive right now?** +2. **What can it reach?** +3. **Can we kill it from here?** + + + +## 1. Live validation, not just pattern matching + +Out of Kingfisher's 820 standalone detectors, **484 include live validation +logic**. Every one of those calls the provider's own API and reports the +credential as `Active`, `Inactive`, or `NotAttempted` — so a 4,000-finding +scan collapses to the dozen findings that are actually live. + +Validation runs automatically when you scan: + +```bash +kingfisher scan github --organization my-org +``` + +Or you can run it standalone when you've already pulled a suspicious value +out of a paste, a log, or a customer ticket: + +```bash +# Hit GitHub's user API to confirm the token works +kingfisher validate --rule github "$GITHUB_TOKEN" + +# AWS needs both halves of the keypair +kingfisher validate --rule aws \ + --arg "$AWS_ACCESS_KEY_ID" \ + "$AWS_SECRET_ACCESS_KEY" + +# A GCP service account JSON, straight from the file +kingfisher validate --rule gcp "$(cat service-account.json)" + +# A Postgres connection URI — does it actually authenticate? +kingfisher validate --rule postgres "$POSTGRES_URI" +``` + +Validation logic lives in the rule YAML, not in compiled Rust, which is +why coverage is high and growing — every new detector ships with a +validation block whenever the provider exposes a safe check call. + +## 2. Blast radius mapping — what does this token actually reach? + +A leaked AWS key bound to a single read-only S3 bucket and a leaked AWS key +bound to organization-wide `AdministratorAccess` are not the same incident. +The first is a Friday afternoon ticket. The second is a war room. + +Add `--access-map` to a scan and Kingfisher authenticates each live +credential, enumerates what it can do, and writes the result alongside +the finding: + +```bash +kingfisher scan github --organization my-org \ + --access-map \ + --format json \ + --output findings.json +``` + +Each cloud finding gets an `access_map` block with the identity, the +permissions, and the concrete resources reachable. Today this is supported +for **AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and +Microsoft Teams.** + +You can also run it standalone — useful when triaging a single credential +you've fished out of a paste or a customer report: + +```bash +# What does this AWS keypair actually own? +kingfisher access-map aws ./aws.json --json-out aws.access-map.json + +# Same for a GitHub token +kingfisher access-map github ./github.token --json-out github.access-map.json + +# Or a GCP service account +kingfisher access-map gcp ./service-account.json --json-out gcp.access-map.json +``` + +The HTML report viewer (`--format html`) renders the access map as a +clickable tree — identity at the root, then services, then individual +resources and permissions. It's the fastest way to get a non-engineer +stakeholder to grasp severity in five seconds rather than five minutes. + +## 3. Revocation — kill the token from where you found it + +Validation tells you a credential is live. Blast radius tells you why it's +urgent. Revocation tells you it's done. + +For every rule whose provider exposes a safe revocation API, Kingfisher +ships the revocation call as part of the rule definition. One command, +no console: + +```bash +# Revoke a GitHub PAT +kingfisher revoke --rule github "$GITHUB_TOKEN" + +# Revoke a GitLab token +kingfisher revoke --rule gitlab "$GITLAB_TOKEN" + +# Revoke a Slack bot token +kingfisher revoke --rule slack "$SLACK_TOKEN" + +# Deactivate an AWS access key +kingfisher revoke --rule aws \ + --arg "$AWS_ACCESS_KEY_ID" \ + "$AWS_SECRET_ACCESS_KEY" + +# Disable a GCP service account key +kingfisher revoke --rule gcp "$(cat service-account.json)" +``` + +The same Liquid templating that powers the validation request handles +revocation — including multi-step flows for providers that need a separate +key-id lookup before disabling. (See +[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md#multi-step-revocation) +for the schema.) + +This matters in two scenarios: + +- **Mass revocation after a leak.** A laptop or a CI runner gets popped and + you have a list of fingerprints. `kingfisher revoke` walks the list, no + human pivoting between five provider consoles. +- **Automated response.** Wire `kingfisher revoke` into the same job that + scanned and validated, gated by an allow-list of rule IDs you've decided + are safe to auto-revoke (typically: short-lived CI tokens, dev-environment + secrets). The credential is dead before the on-call gets paged. + +## The combined workflow + +In practice these three primitives chain into a single pipeline: + +```bash +# 1. Scan + validate + map blast radius in one call +kingfisher scan github --organization my-org \ + --access-map \ + --format json \ + --output findings.json + +# 2. Pull just the live, high-blast-radius findings +jq '[.[] | select(.validation.status == "Active") + | select(.access_map.permissions + | any(. == "*" or contains("Admin")))]' \ + findings.json > urgent.json + +# 3. Triage in the HTML viewer (or revoke programmatically) +kingfisher view findings.json +``` + +Three commands, full incident workflow — find, prioritize, kill. + +## Why this is the right shape + +Most scanners stop at step one because going further is expensive: every +provider has its own auth flow, its own permission model, its own +revocation API. Kingfisher gets to a high-coverage version of all three by +keeping the logic in YAML rule files (the same place the detection regex +lives), reusing typed validators for the common families (AWS, GCP, JWT, +Postgres, MongoDB, MySQL, JDBC, Azure Storage, Coinbase), and letting rule +authors drop down to a `Raw` validator only for genuinely odd providers. + +The upshot for users: when a new detector lands, you almost always get +validation, blast radius, and revocation along with it — not three +separate roadmaps. + +## Next up + +- **Catching secrets in pull requests with GitHub Actions** — pre-merge + scanning so leaked credentials never reach `main`. +- **Top leaked credential types we see in the wild** — what validation + telemetry says about the credential leak landscape. +- **Docker image scanning** — pulling and scanning every layer for + embedded secrets. + +Got a provider you'd love to see validation or revocation support for? +Open an issue at +[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). diff --git a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md b/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md new file mode 100644 index 0000000..5bad86c --- /dev/null +++ b/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md @@ -0,0 +1,178 @@ +--- +date: 2026-04-26 +title: "Scanning an Entire GitHub Organization for Leaked Secrets" +description: > + Step-by-step guide to scanning every repository in a GitHub organization + for leaked credentials with Kingfisher — including history, issues, wikis, + and gists — and validating which secrets are still live. +categories: + - Tutorials +tags: + - github + - secret-scanning + - validation + - tutorial +--- + +# Scanning an Entire GitHub Organization for Leaked Secrets + +Most organizations have hundreds of repositories — some abandoned, some active, +plenty inherited from acquisitions. A leaked AWS key in a five-year-old archived +repo is just as dangerous as one in `main` today. Kingfisher can enumerate every +repo in a GitHub organization, scan the full git history, and then **validate +which credentials are still live** so you know what to rotate first. + + + +## What you need + +- Kingfisher installed (`brew install mongodb/brew/kingfisher`, or grab a + release from [GitHub](https://github.com/mongodb/kingfisher/releases)). +- A GitHub personal access token exported as `KF_GITHUB_TOKEN`. A classic + token with `repo` and `read:org` scopes is enough for private repos; for + public-only scans, even an unscoped token raises your rate limit and + is strongly recommended. +- About 5 GB of free disk for clones (varies by org size — use + `--git-clone-dir /path/to/big/disk` if your home volume is small). + +## The one-liner + +```bash +export KF_GITHUB_TOKEN=ghp_yourTokenHere +kingfisher scan github --organization my-org +``` + +That's it — Kingfisher enumerates every repo, clones each one, scans the full +commit history, runs all 942 detection rules, and validates findings against +provider APIs. + +## Tuning for real-world orgs + +Real orgs have huge monorepos, archived junk, and forks you don't care about. +Three flags do most of the work: + +```bash +kingfisher scan github --organization my-org \ + --repo-clone-limit 500 \ + --github-exclude 'my-org/*-archive' \ + --github-exclude 'my-org/legacy-monorepo' \ + --git-clone-dir /var/tmp/kf-clones \ + --format sarif \ + --output kf-findings.sarif +``` + +- **`--repo-clone-limit`** caps the number of clones per scan. Useful for + staged rollouts ("first 500 repos by stars") or to stay under disk budget. +- **`--github-exclude`** accepts exact `OWNER/REPO` strings or gitignore-style + globs (`my-org/*-archive`). Repeat the flag for each pattern. Matching is + case-insensitive. +- **`--git-clone-dir`** moves clones off your home volume. Combine with + `--keep-clones` if you want to re-scan later without re-cloning. + +## Pulling in issues, wikis, and gists + +Secrets don't only live in code. Issues and pull request descriptions are a +common leak source — someone pastes a stack trace with a JWT, or an +"oncall handoff" issue with a temporary token that never got rotated. Add +`--repo-artifacts` to fetch these: + +```bash +kingfisher scan github --organization my-org --repo-artifacts +``` + +This pulls each repo's issues (including PRs), wiki, and any **public** gists +owned by the repo owner, and scans them all. It does cost API calls, so plan +accordingly if you're near a rate limit. + +## Following the people, not just the org + +This is the trick that catches what every other scanner misses. Developers +leak secrets in *personal* repositories — side projects, dotfiles, throwaway +forks. If a contributor to your org has a public personal repo with an active +token that grants access to org infrastructure, that's a real incident. + +Pass a single repo URL with `--include-contributors` and Kingfisher will +enumerate the contributors, then clone and scan **every public repo they own**: + +```bash +kingfisher scan https://github.com/my-org/critical-service \ + --include-contributors \ + --repo-clone-limit 200 +``` + +This is a noisy operation — start with one or two critical repos rather than +the whole org. GitHub will rate-limit aggressive enumeration, so a token +(`KF_GITHUB_TOKEN`) is required in practice. + +## Reading the output + +The default `pretty` output is human-friendly for terminals. For automation, +pick the format that matches your downstream tool: + +```bash +# JSON for custom tooling +kingfisher scan github --organization my-org --format json --output findings.json + +# SARIF for GitHub code scanning, GitLab, or any SARIF-aware UI +kingfisher scan github --organization my-org --format sarif --output findings.sarif + +# TOON for piping to an LLM or agent +kingfisher scan github --organization my-org --format toon +``` + +The interactive HTML report is often the fastest way to triage a large scan — +filter by rule, by validation status, or by repository, and click through to +the exact commit and line: + +```bash +kingfisher scan github --organization my-org --format html --output kf-report.html +``` + +## Triage by validation status + +The single most important column in the output is **validation**. A live +credential is a fire — a never-was-valid one is noise. Filter to live findings +first: + +```bash +jq '.[] | select(.validation.status == "Active")' findings.json +``` + +Then walk those credentials in order of blast radius. For AWS, GCP, GitHub, +GitLab, and Slack tokens, Kingfisher already maps what each one can access — +look at the `access_map` field in the JSON output, or the **Blast Radius** +panel in the HTML report. + +## Revoke from the CLI + +For supported providers, you don't need to log into a console — Kingfisher can +revoke directly: + +```bash +kingfisher revoke --rule kingfisher.aws.access_key.1 AKIAEXAMPLE... +``` + +Each rule that supports revocation declares the API call in its YAML. Today +this works for AWS, GitHub, GitLab, Slack, and a growing list of SaaS +providers — see [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md) +for the current list and how to add revocation to a custom rule. + +## Wiring it into a recurring job + +A first scan is the one-shot baseline. The real value is recurring scans +catching new leaks within hours, not months. The simplest pattern is a nightly +GitHub Action or scheduled CI job that runs the org scan, diffs against +yesterday's findings, and pages on net-new live credentials. We'll cover that +end-to-end in the next post. + +## What's next + +- **Catching secrets in pull requests with GitHub Actions** — pre-merge + scanning so leaks never reach `main`. +- **The most common credential types we see leaked in the wild** — what + Kingfisher's validation telemetry says about the credential leak landscape. +- **Docker image scanning** — pulling images directly and scanning every + layer for embedded secrets. + +If there's a workflow you'd like us to cover, open an issue at +[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 2f68500..317a695 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,6 +7,9 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. +## [v1.98.0] +- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. Bundled ruleset is now **943 rules** (821 standalone detectors + 122 dependent rules), with **485 standalone detectors** offering live validation. + ## [v1.97.0] - **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed. - Fixed the HTML report viewer dark mode so charts redraw correctly on theme changes and follow the system color scheme until manually overridden. diff --git a/docs-site/docs/index.md b/docs-site/docs/index.md index a840985..56a1c6a 100644 --- a/docs-site/docs/index.md +++ b/docs-site/docs/index.md @@ -2,7 +2,7 @@ title: Kingfisher — Open Source Secret Scanner with Live Validation description: >- Kingfisher is an open source secret scanner with live validation, blast radius - mapping, and credential revocation. 942 detection rules (484 with live validation), + mapping, and credential revocation. 943 detection rules (485 with live validation), plus a browser-based report viewer that also triages Gitleaks and TruffleHog output. Built in Rust by MongoDB. template: home.html diff --git a/docs-site/docs/rules/builtin-rules.md b/docs-site/docs/rules/builtin-rules.md index 4a07706..e86cf5c 100644 --- a/docs-site/docs/rules/builtin-rules.md +++ b/docs-site/docs/rules/builtin-rules.md @@ -1,13 +1,13 @@ --- title: "Built-in Rules List" -description: "Complete list of all 942 built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support." +description: "Complete list of all 943 built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support." --- # Built-in Rules -Kingfisher ships with **942 detection rules** across **580 providers** -(820 detectors + 122 dependent rules). -Of these, **605** include live validation and **57** support direct revocation. +Kingfisher ships with **943 detection rules** across **581 providers** +(821 detectors + 122 dependent rules). +Of these, **485** include live validation and **50** support direct revocation. !!! tip "Search" Use the search box below to filter rules by provider name, rule ID, or confidence level. @@ -127,7 +127,7 @@ Of these, **605** include live validation and **57** support direct revocation. Agora Agora App ID kingfisher.agora.1 -Low +Medium @@ -1572,6 +1572,22 @@ Of these, **605** include live validation and **57** support direct revocation. +Confluence +Confluence Data Center Personal Access Token +kingfisher.confluence.1 +Medium +Yes + + + +Confluence +Confluence Data Center Domain +kingfisher.confluence.2 +Medium + + + + Confluent Confluent Client ID kingfisher.confluent.1 @@ -2096,7 +2112,7 @@ Of these, **605** include live validation and **57** support direct revocation. DocuSign API Secret Key kingfisher.docusign.1 Medium -Yes + @@ -2940,6 +2956,14 @@ Of these, **605** include live validation and **57** support direct revocation. +Github +GitHub App Server-to-Server Token (stateless JWT format) +kingfisher.github.9 +Medium +Yes +Yes + + Gitlab GitLab Private Token kingfisher.gitlab.1 @@ -3463,7 +3487,7 @@ Of these, **605** include live validation and **57** support direct revocation. Huawei Huawei Open Platform Client ID kingfisher.huawei.1 -Low +Medium @@ -3700,6 +3724,22 @@ Of these, **605** include live validation and **57** support direct revocation. +Jira +Jira Data Center Personal Access Token +kingfisher.jira.3 +Medium +Yes +Yes + + +Jira +Jira Data Center Domain +kingfisher.jira.4 +Medium + + + + Jotform Jotform API Key kingfisher.jotform.1 @@ -7215,7 +7255,7 @@ Of these, **605** include live validation and **57** support direct revocation. Webex Webex Integration Client ID kingfisher.webex.1 -Low +Medium diff --git a/docs-site/mkdocs.yml b/docs-site/mkdocs.yml index 18ff245..49c42b7 100644 --- a/docs-site/mkdocs.yml +++ b/docs-site/mkdocs.yml @@ -1,7 +1,7 @@ site_name: Kingfisher site_url: https://mongodb.github.io/kingfisher site_description: >- - Open source secret scanner with live validation. 942 detection rules, + Open source secret scanner with live validation. 943 detection rules, blast radius mapping, credential revocation, and a browser-based report viewer that also imports Gitleaks and TruffleHog output. Built in Rust by MongoDB. @@ -46,6 +46,22 @@ theme: plugins: - search + - blog: + blog_dir: blog + post_date_format: long + post_url_format: "{date}/{slug}" + post_excerpt: required + archive: true + categories: true + pagination_per_page: 10 + authors: false + - rss: + match_path: blog/posts/.* + date_from_meta: + as_creation: date + categories: + - categories + - tags - minify: minify_html: true @@ -99,6 +115,8 @@ nav: - Python Bindings: reference/python-bindings.md - Benchmarks & Comparison: reference/comparison.md - Report Viewer: https://mongodb.github.io/kingfisher/viewer/ + - Blog: + - blog/index.md - Changelog: changelog.md extra: diff --git a/docs-site/overrides/home.html b/docs-site/overrides/home.html index 897eaac..8d98d0a 100644 --- a/docs-site/overrides/home.html +++ b/docs-site/overrides/home.html @@ -36,7 +36,7 @@
- 942 + 943 Detection Rules
@@ -48,8 +48,8 @@ Scan Targets
- 34 - Revocation Providers + 49 + Rules with Revocation
@@ -90,7 +90,7 @@

Direct Revocation

- Revoke compromised credentials directly from the CLI for 34 provider families + Revoke compromised credentials directly from the CLI for 29 provider families including GitHub, GitLab, Slack, AWS, GCP, Heroku, and Cloudflare.

diff --git a/docs-site/overrides/main.html b/docs-site/overrides/main.html index 1753e8c..4e08c9e 100644 --- a/docs-site/overrides/main.html +++ b/docs-site/overrides/main.html @@ -7,7 +7,7 @@ "@context": "https://schema.org", "@type": "SoftwareApplication", "name": "Kingfisher", - "description": "Open source secret scanner with live validation. 942 detection rules, blast radius mapping, and credential revocation.", + "description": "Open source secret scanner with live validation. 943 detection rules, blast radius mapping, and credential revocation.", "applicationCategory": "DeveloperApplication", "operatingSystem": "Linux, macOS, Windows", "license": "https://opensource.org/licenses/Apache-2.0", diff --git a/docs-site/requirements.txt b/docs-site/requirements.txt index 67e1b48..3171225 100644 --- a/docs-site/requirements.txt +++ b/docs-site/requirements.txt @@ -1,5 +1,6 @@ mkdocs-material>=9.5 mkdocs-minify-plugin>=0.8 +mkdocs-rss-plugin>=1.6 pillow>=10.0 cairosvg>=2.7 pyyaml>=6.0 diff --git a/docs-site/scripts/generate-rules-page.py b/docs-site/scripts/generate-rules-page.py index c15a067..1c5f1e1 100644 --- a/docs-site/scripts/generate-rules-page.py +++ b/docs-site/scripts/generate-rules-page.py @@ -69,8 +69,8 @@ def generate_markdown(rules): total = len(rules) detectors = sum(1 for r in rules if not r["dependent"]) dependent = total - detectors - validated = sum(1 for r in rules if r["validates"]) - revocable = sum(1 for r in rules if r["revokes"]) + validated = sum(1 for r in rules if r["validates"] and not r["dependent"]) + revocable = sum(1 for r in rules if r["revokes"] and not r["dependent"]) providers = len(set(r["provider"] for r in rules)) lines = [ From 19dafa42eaae2146193d369504fc0d08db044a7a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 27 Apr 2026 13:20:16 -0700 Subject: [PATCH 2/5] Added provider endpoint overrides for validation and revocation via global --endpoint PROVIDER=URL and --endpoint-config FILE, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. --- .../data/rules/artifactory.yml | 4 +- .../data/rules/confluence.yml | 2 +- crates/kingfisher-rules/data/rules/gitea.yml | 2 +- crates/kingfisher-rules/data/rules/github.yml | 32 +- crates/kingfisher-rules/data/rules/gitlab.yml | 20 +- crates/kingfisher-rules/data/rules/jira.yml | 10 +- ...26-beyond-detection-validate-map-revoke.md | 76 ++-- .../2026-04-26-scan-github-org-for-secrets.md | 100 +++-- docs-site/docs/changelog.md | 3 +- docs-site/docs/usage/basic-scanning.md | 58 ++- docs/USAGE.md | 58 ++- src/cli/global.rs | 13 + src/direct_revoke.rs | 70 ++- src/direct_validate.rs | 22 +- src/lib.rs | 1 + src/provider_endpoints.rs | 409 ++++++++++++++++++ src/scanner/runner.rs | 27 +- src/scanner/validation.rs | 9 + src/validation.rs | 15 + 19 files changed, 790 insertions(+), 141 deletions(-) create mode 100644 src/provider_endpoints.rs diff --git a/crates/kingfisher-rules/data/rules/artifactory.yml b/crates/kingfisher-rules/data/rules/artifactory.yml index 8116f0a..32d6200 100644 --- a/crates/kingfisher-rules/data/rules/artifactory.yml +++ b/crates/kingfisher-rules/data/rules/artifactory.yml @@ -32,7 +32,7 @@ rules: - 200 type: StatusMatch - type: JsonValid - url: https://{{ JFROGURL }}/artifactory/api/repositories + url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories' references: - https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens - https://jfrog.com/help/r/jfrog-rest-apis/authentication @@ -93,7 +93,7 @@ rules: - status: - 200 type: StatusMatch - url: https://{{ JFROGURL }}/artifactory/api/repositories + url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories' references: - https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens - https://jfrog.com/help/r/jfrog-rest-apis/authentication diff --git a/crates/kingfisher-rules/data/rules/confluence.yml b/crates/kingfisher-rules/data/rules/confluence.yml index 8ba3815..841aca7 100644 --- a/crates/kingfisher-rules/data/rules/confluence.yml +++ b/crates/kingfisher-rules/data/rules/confluence.yml @@ -40,7 +40,7 @@ rules: - type: WordMatch words: - '"type":"known"' - url: https://{{ CONFLUENCEDCDOMAIN }}/rest/api/user/current + url: '{{ CONFLUENCE_BASE_URL }}/rest/api/user/current' depends_on_rule: - rule_id: kingfisher.confluence.2 variable: CONFLUENCEDCDOMAIN diff --git a/crates/kingfisher-rules/data/rules/gitea.yml b/crates/kingfisher-rules/data/rules/gitea.yml index 8381e1f..6b404df 100644 --- a/crates/kingfisher-rules/data/rules/gitea.yml +++ b/crates/kingfisher-rules/data/rules/gitea.yml @@ -34,7 +34,7 @@ rules: content: request: method: GET - url: https://gitea.com/api/v1/user + url: '{{ GITEA_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/json diff --git a/crates/kingfisher-rules/data/rules/github.yml b/crates/kingfisher-rules/data/rules/github.yml index 657b14d..d59a574 100644 --- a/crates/kingfisher-rules/data/rules/github.yml +++ b/crates/kingfisher-rules/data/rules/github.yml @@ -28,7 +28,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -44,7 +44,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -84,7 +84,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -100,7 +100,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -137,7 +137,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -153,7 +153,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -189,7 +189,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -205,7 +205,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -234,7 +234,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -250,7 +250,7 @@ rules: content: request: method: DELETE - url: https://api.github.com/installation/token + url: '{{ GITHUB_API_BASE_URL }}/installation/token' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -281,7 +281,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -297,7 +297,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -346,7 +346,7 @@ rules: content: request: method: POST - url: "https://github.com/login/oauth/access_token" + url: '{{ GITHUB_WEB_BASE_URL }}/login/oauth/access_token' headers: Accept: "application/json" Content-Type: "application/json" @@ -383,7 +383,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -399,11 +399,11 @@ rules: content: request: method: DELETE - url: https://api.github.com/installation/token + url: '{{ GITHUB_API_BASE_URL }}/installation/token' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json response_matcher: - report_response: true - type: StatusMatch - status: [204] \ No newline at end of file + status: [204] diff --git a/crates/kingfisher-rules/data/rules/gitlab.yml b/crates/kingfisher-rules/data/rules/gitlab.yml index 995bac0..632c4f3 100644 --- a/crates/kingfisher-rules/data/rules/gitlab.yml +++ b/crates/kingfisher-rules/data/rules/gitlab.yml @@ -34,7 +34,7 @@ rules: - type: WordMatch words: - '"id"' - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' revocation: type: Http content: @@ -46,7 +46,7 @@ rules: - report_response: true - type: StatusMatch status: [204] - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' - name: GitLab Runner Registration Token id: kingfisher.gitlab.2 @@ -92,7 +92,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/runners/verify + url: '{{ GITLAB_API_BASE_URL }}/runners/verify' - name: GitLab Pipeline Trigger Token id: kingfisher.gitlab.3 @@ -131,7 +131,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }} + url: '{{ GITLAB_API_BASE_URL }}/ci/pipeline_triggers/{{ TOKEN }}' - name: GitLab Private Token - Routable Format id: kingfisher.gitlab.4 pattern: | @@ -178,7 +178,7 @@ rules: - type: WordMatch words: - '"id"' - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' revocation: type: Http content: @@ -190,7 +190,7 @@ rules: - report_response: true - type: StatusMatch status: [204] - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' - name: GitLab CI/CD Job Token id: kingfisher.gitlab.5 @@ -219,7 +219,7 @@ rules: content: request: method: GET - url: https://gitlab.com/api/v4/job + url: '{{ GITLAB_API_BASE_URL }}/job' headers: JOB-TOKEN: '{{ TOKEN }}' response_matcher: @@ -393,7 +393,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/runners/verify + url: '{{ GITLAB_API_BASE_URL }}/runners/verify' - name: GitLab Runner Authentication Token - Routable Format id: kingfisher.gitlab.13 @@ -436,7 +436,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/runners/verify + url: '{{ GITLAB_API_BASE_URL }}/runners/verify' - name: GitLab SCIM Token id: kingfisher.gitlab.14 @@ -481,7 +481,7 @@ rules: content: request: method: GET - url: https://gitlab.com/api/v4/user + url: '{{ GITLAB_API_BASE_URL }}/user' headers: Cookie: "_gitlab_session={{ TOKEN }}" response_matcher: diff --git a/crates/kingfisher-rules/data/rules/jira.yml b/crates/kingfisher-rules/data/rules/jira.yml index 23ef153..1ddb4f6 100644 --- a/crates/kingfisher-rules/data/rules/jira.yml +++ b/crates/kingfisher-rules/data/rules/jira.yml @@ -53,7 +53,7 @@ rules: - status: - 200 type: StatusMatch - url: https://{{ DOMAIN }}/rest/api/3/dashboard + url: '{{ JIRA_CLOUD_BASE_URL }}/rest/api/3/dashboard' references: - https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/ depends_on_rule: @@ -97,7 +97,7 @@ rules: - 200 type: StatusMatch - type: JsonValid - url: https://{{ JIRADCDOMAIN }}/rest/api/latest/myself + url: '{{ JIRA_BASE_URL }}/rest/api/latest/myself' revocation: type: HttpMultiStep content: @@ -105,7 +105,7 @@ rules: - name: lookup_token_id request: method: GET - url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens + url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens' headers: Accept: application/json Authorization: Bearer {{ TOKEN }} @@ -120,7 +120,7 @@ rules: - name: revoke_token request: method: DELETE - url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }} + url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}' headers: Authorization: Bearer {{ TOKEN }} response_matcher: @@ -152,4 +152,4 @@ rules: - jira-staging.corp.mongodb.com - https://jira.corp.internal:8443 references: - - https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html \ No newline at end of file + - https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html diff --git a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md b/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md index c5507ee..54f2608 100644 --- a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md +++ b/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md @@ -17,9 +17,10 @@ tags: # Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation -A regex match on `AKIA[0-9A-Z]{16}` is the easy part. Every secret scanner -finds those. The hard part — and the part that decides whether your Tuesday -afternoon turns into an incident — is what happens **after** the match. +A regex hit is the easy part. Any scanner can tell you that a string looks +like an AWS access key or a GitHub token. The harder question is what to do +next, and that is usually what turns a scan result into either a routine +cleanup task or a real incident. Kingfisher answers the three questions that actually matter: @@ -32,9 +33,12 @@ Kingfisher answers the three questions that actually matter: ## 1. Live validation, not just pattern matching Out of Kingfisher's 820 standalone detectors, **484 include live validation -logic**. Every one of those calls the provider's own API and reports the -credential as `Active`, `Inactive`, or `NotAttempted` — so a 4,000-finding -scan collapses to the dozen findings that are actually live. +logic**. When a provider exposes a safe check call, Kingfisher uses that +provider's own API to report each credential as `Active`, `Inactive`, or +`NotAttempted`. + +That changes the output from "thousands of regex matches" to a much shorter +list of findings that actually authenticate today. Validation runs automatically when you scan: @@ -61,15 +65,15 @@ kingfisher validate --rule gcp "$(cat service-account.json)" kingfisher validate --rule postgres "$POSTGRES_URI" ``` -Validation logic lives in the rule YAML, not in compiled Rust, which is -why coverage is high and growing — every new detector ships with a -validation block whenever the provider exposes a safe check call. +Most validation logic lives in the rule YAML rather than bespoke compiled +code. That makes it practical to grow coverage rule-by-rule instead of +treating validation as a separate engineering project. ## 2. Blast radius mapping — what does this token actually reach? A leaked AWS key bound to a single read-only S3 bucket and a leaked AWS key bound to organization-wide `AdministratorAccess` are not the same incident. -The first is a Friday afternoon ticket. The second is a war room. +The first is a ticket. The second is a war room. Add `--access-map` to a scan and Kingfisher authenticates each live credential, enumerates what it can do, and writes the result alongside @@ -82,10 +86,10 @@ kingfisher scan github --organization my-org \ --output findings.json ``` -Each cloud finding gets an `access_map` block with the identity, the -permissions, and the concrete resources reachable. Today this is supported -for **AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and -Microsoft Teams.** +Each supported finding gets an `access_map` block with the identity, +permissions, and concrete resources reachable. Today that includes +**AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and +Microsoft Teams**. You can also run it standalone — useful when triaging a single credential you've fished out of a paste or a customer report: @@ -102,18 +106,17 @@ kingfisher access-map gcp ./service-account.json --json-out gcp.access-map.json ``` The HTML report viewer (`--format html`) renders the access map as a -clickable tree — identity at the root, then services, then individual -resources and permissions. It's the fastest way to get a non-engineer -stakeholder to grasp severity in five seconds rather than five minutes. +clickable tree: identity at the root, then services, then individual +resources and permissions. It is a much faster way to explain severity to +an incident commander or manager than pasting IAM JSON into chat. ## 3. Revocation — kill the token from where you found it Validation tells you a credential is live. Blast radius tells you why it's -urgent. Revocation tells you it's done. +urgent. Revocation closes the loop. For every rule whose provider exposes a safe revocation API, Kingfisher -ships the revocation call as part of the rule definition. One command, -no console: +ships the revocation call as part of the rule definition: ```bash # Revoke a GitHub PAT @@ -134,17 +137,17 @@ kingfisher revoke --rule aws \ kingfisher revoke --rule gcp "$(cat service-account.json)" ``` -The same Liquid templating that powers the validation request handles -revocation — including multi-step flows for providers that need a separate -key-id lookup before disabling. (See +The same Liquid templating that powers validation also powers revocation, +including multi-step flows for providers that require a lookup before +disabling the credential. See [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md#multi-step-revocation) -for the schema.) +for the schema. This matters in two scenarios: - **Mass revocation after a leak.** A laptop or a CI runner gets popped and - you have a list of fingerprints. `kingfisher revoke` walks the list, no - human pivoting between five provider consoles. + you have a list of live credentials. `kingfisher revoke` walks that list + without forcing a human to pivot between provider consoles. - **Automated response.** Wire `kingfisher revoke` into the same job that scanned and validated, gated by an allow-list of rule IDs you've decided are safe to auto-revoke (typically: short-lived CI tokens, dev-environment @@ -152,7 +155,7 @@ This matters in two scenarios: ## The combined workflow -In practice these three primitives chain into a single pipeline: +In practice, these three capabilities collapse into one response workflow: ```bash # 1. Scan + validate + map blast radius in one call @@ -162,16 +165,16 @@ kingfisher scan github --organization my-org \ --output findings.json # 2. Pull just the live, high-blast-radius findings -jq '[.[] | select(.validation.status == "Active") - | select(.access_map.permissions - | any(. == "*" or contains("Admin")))]' \ +jq '.findings + | map(select(.validation.status == "Active")) + | map(select(.access_map != null))' \ findings.json > urgent.json # 3. Triage in the HTML viewer (or revoke programmatically) kingfisher view findings.json ``` -Three commands, full incident workflow — find, prioritize, kill. +That is the full incident loop in three steps: find, prioritize, revoke. ## Why this is the right shape @@ -183,9 +186,9 @@ lives), reusing typed validators for the common families (AWS, GCP, JWT, Postgres, MongoDB, MySQL, JDBC, Azure Storage, Coinbase), and letting rule authors drop down to a `Raw` validator only for genuinely odd providers. -The upshot for users: when a new detector lands, you almost always get -validation, blast radius, and revocation along with it — not three -separate roadmaps. +The practical result is that new rules can ship with detection plus +post-detection response logic, instead of detection today and validation or +revocation on some later roadmap. ## Next up @@ -196,6 +199,5 @@ separate roadmaps. - **Docker image scanning** — pulling and scanning every layer for embedded secrets. -Got a provider you'd love to see validation or revocation support for? -Open an issue at -[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). +If there is a provider you want validation or revocation support for, open +an issue at [mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). diff --git a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md b/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md index 5bad86c..488c9fb 100644 --- a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md +++ b/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md @@ -16,11 +16,14 @@ tags: # Scanning an Entire GitHub Organization for Leaked Secrets -Most organizations have hundreds of repositories — some abandoned, some active, -plenty inherited from acquisitions. A leaked AWS key in a five-year-old archived -repo is just as dangerous as one in `main` today. Kingfisher can enumerate every -repo in a GitHub organization, scan the full git history, and then **validate -which credentials are still live** so you know what to rotate first. +Most organizations have more GitHub surface area than they think: active +services, abandoned repositories, internal tooling, forks, experiments, and +projects inherited through acquisitions. A credential leaked in a five-year-old +archived repo can still be live today. + +Kingfisher can enumerate every repository in a GitHub organization, scan the +full git history, and then **validate which credentials are still live** so +you can focus on what needs rotation first. @@ -42,14 +45,15 @@ export KF_GITHUB_TOKEN=ghp_yourTokenHere kingfisher scan github --organization my-org ``` -That's it — Kingfisher enumerates every repo, clones each one, scans the full -commit history, runs all 942 detection rules, and validates findings against +That single command enumerates the org, clones each repository, scans working +tree content plus git history, and validates supported findings against provider APIs. ## Tuning for real-world orgs -Real orgs have huge monorepos, archived junk, and forks you don't care about. -Three flags do most of the work: +Real organizations have huge monorepos, archived junk, mirrored forks, and +repositories you already know are out of scope. Three flags handle most of +the tuning: ```bash kingfisher scan github --organization my-org \ @@ -61,8 +65,8 @@ kingfisher scan github --organization my-org \ --output kf-findings.sarif ``` -- **`--repo-clone-limit`** caps the number of clones per scan. Useful for - staged rollouts ("first 500 repos by stars") or to stay under disk budget. +- **`--repo-clone-limit`** caps the number of clones per scan. It is useful + for staged rollouts or staying under a disk budget. - **`--github-exclude`** accepts exact `OWNER/REPO` strings or gitignore-style globs (`my-org/*-archive`). Repeat the flag for each pattern. Matching is case-insensitive. @@ -72,24 +76,25 @@ kingfisher scan github --organization my-org \ ## Pulling in issues, wikis, and gists Secrets don't only live in code. Issues and pull request descriptions are a -common leak source — someone pastes a stack trace with a JWT, or an -"oncall handoff" issue with a temporary token that never got rotated. Add +common leak source: someone pastes a stack trace with a JWT, or an +"on-call handoff" issue with a temporary token that never gets rotated. Add `--repo-artifacts` to fetch these: ```bash kingfisher scan github --organization my-org --repo-artifacts ``` -This pulls each repo's issues (including PRs), wiki, and any **public** gists -owned by the repo owner, and scans them all. It does cost API calls, so plan -accordingly if you're near a rate limit. +This pulls each repo's issues, pull requests, wiki, and any **public** gists +owned by the repo owner, then scans that material as well. It does consume API +calls, so budget for that if the org is large or your token is already near a +rate limit. ## Following the people, not just the org -This is the trick that catches what every other scanner misses. Developers -leak secrets in *personal* repositories — side projects, dotfiles, throwaway -forks. If a contributor to your org has a public personal repo with an active -token that grants access to org infrastructure, that's a real incident. +Developers also leak secrets in *personal* repositories: side projects, +dotfiles, and throwaway forks. If a contributor to your org has a public repo +containing a still-live credential that reaches company infrastructure, that is +still your incident. Pass a single repo URL with `--include-contributors` and Kingfisher will enumerate the contributors, then clone and scan **every public repo they own**: @@ -100,14 +105,14 @@ kingfisher scan https://github.com/my-org/critical-service \ --repo-clone-limit 200 ``` -This is a noisy operation — start with one or two critical repos rather than -the whole org. GitHub will rate-limit aggressive enumeration, so a token -(`KF_GITHUB_TOKEN`) is required in practice. +This is a noisy operation. Start with one or two critical repositories rather +than the entire organization. GitHub will also rate-limit aggressive +enumeration, so `KF_GITHUB_TOKEN` is effectively required. ## Reading the output -The default `pretty` output is human-friendly for terminals. For automation, -pick the format that matches your downstream tool: +The default `pretty` output is fine for interactive terminal use. For +automation, pick a format that matches your downstream consumer: ```bash # JSON for custom tooling @@ -120,9 +125,9 @@ kingfisher scan github --organization my-org --format sarif --output findings.sa kingfisher scan github --organization my-org --format toon ``` -The interactive HTML report is often the fastest way to triage a large scan — -filter by rule, by validation status, or by repository, and click through to -the exact commit and line: +The interactive HTML report is often the fastest way to triage a large scan. +You can filter by rule, validation status, or repository, then click through +to the exact commit and line: ```bash kingfisher scan github --organization my-org --format html --output kf-report.html @@ -130,40 +135,39 @@ kingfisher scan github --organization my-org --format html --output kf-report.ht ## Triage by validation status -The single most important column in the output is **validation**. A live -credential is a fire — a never-was-valid one is noise. Filter to live findings -first: +The single most important field in the output is **validation**. A live +credential should be triaged immediately; a value that never authenticated is +usually just cleanup work. Filter to live findings first: ```bash -jq '.[] | select(.validation.status == "Active")' findings.json +jq '.findings[] | select(.validation.status == "Active")' findings.json ``` -Then walk those credentials in order of blast radius. For AWS, GCP, GitHub, -GitLab, and Slack tokens, Kingfisher already maps what each one can access — -look at the `access_map` field in the JSON output, or the **Blast Radius** -panel in the HTML report. +Then prioritize by blast radius. For AWS, GCP, GitHub, GitLab, and Slack +tokens, Kingfisher can already map what each credential can access. Look at +the `access_map` field in JSON output, or the **Blast Radius** panel in the +HTML report. ## Revoke from the CLI -For supported providers, you don't need to log into a console — Kingfisher can -revoke directly: +For supported providers, you do not need to pivot into the provider console. +Kingfisher can revoke directly: ```bash kingfisher revoke --rule kingfisher.aws.access_key.1 AKIAEXAMPLE... ``` -Each rule that supports revocation declares the API call in its YAML. Today -this works for AWS, GitHub, GitLab, Slack, and a growing list of SaaS -providers — see [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md) -for the current list and how to add revocation to a custom rule. +Each rule that supports revocation declares the API call in its YAML. See +[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md) +for the schema and the current approach. ## Wiring it into a recurring job -A first scan is the one-shot baseline. The real value is recurring scans -catching new leaks within hours, not months. The simplest pattern is a nightly -GitHub Action or scheduled CI job that runs the org scan, diffs against -yesterday's findings, and pages on net-new live credentials. We'll cover that -end-to-end in the next post. +The first scan gives you a baseline. The real value comes from running the +same workflow continuously so new leaks are caught within hours instead of +months. A simple starting point is a nightly GitHub Action or scheduled CI +job that runs the org scan, diffs against yesterday's findings, and alerts on +net-new live credentials. ## What's next @@ -174,5 +178,5 @@ end-to-end in the next post. - **Docker image scanning** — pulling images directly and scanning every layer for embedded secrets. -If there's a workflow you'd like us to cover, open an issue at +If there is a workflow you want us to cover, open an issue at [mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 317a695..4e4c84d 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -8,7 +8,8 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. ## [v1.98.0] -- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. Bundled ruleset is now **943 rules** (821 standalone detectors + 122 dependent rules), with **485 standalone detectors** offering live validation. +- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. +- Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. ## [v1.97.0] - **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed. diff --git a/docs-site/docs/usage/basic-scanning.md b/docs-site/docs/usage/basic-scanning.md index cf66382..218b36d 100644 --- a/docs-site/docs/usage/basic-scanning.md +++ b/docs-site/docs/usage/basic-scanning.md @@ -274,6 +274,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key" kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key" ``` +**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):** + +Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML. + +- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers. +- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides. +- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`. + +Supported provider keys for endpoint overrides are: + +- `github` +- `gitlab` +- `gitea` +- `jira` (Jira Data Center / self-managed) +- `jira-cloud` +- `confluence` +- `artifactory` + +```bash +# Validate a GitHub Enterprise token against a self-hosted instance +kingfisher validate --rule github \ + --endpoint github=https://ghe.corp.example.com \ + "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +# Revoke a self-managed GitLab PAT +kingfisher revoke --rule gitlab \ + --endpoint gitlab=https://gitlab.corp.example.com \ + "glpat-xxxxxxxxxxxxxxxxxxxx" + +# Scan with an internal Artifactory validator target +kingfisher scan ./repo \ + --endpoint artifactory=http://localhost:8071 \ + --allow-internal-ips +``` + +Example endpoint config file: + +```yaml +endpoints: + github: https://ghe.corp.example.com + gitlab: https://gitlab.corp.example.com + gitea: https://gitea.corp.example.com + jira: https://jira.corp.example.com + confluence: https://wiki.corp.example.com + artifactory: http://localhost:8071 +``` + +```bash +kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips +``` + **Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**: ```bash @@ -1082,8 +1133,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v # Scan with SSRF protection disabled (allows requests to internal IPs) kingfisher scan --allow-internal-ips ./repo -# Also works with the validate command -kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1 +# Also works with direct validation against a self-hosted endpoint +kingfisher validate --allow-internal-ips \ + --endpoint artifactory=http://localhost:8071 \ + --rule kingfisher.artifactory.1 \ + "AKCp..." ``` > **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services. diff --git a/docs/USAGE.md b/docs/USAGE.md index 570b7fd..b4cd2cc 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -269,6 +269,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key" kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key" ``` +**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):** + +Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML. + +- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers. +- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides. +- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`. + +Supported provider keys for endpoint overrides are: + +- `github` +- `gitlab` +- `gitea` +- `jira` (Jira Data Center / self-managed) +- `jira-cloud` +- `confluence` +- `artifactory` + +```bash +# Validate a GitHub Enterprise token against a self-hosted instance +kingfisher validate --rule github \ + --endpoint github=https://ghe.corp.example.com \ + "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +# Revoke a self-managed GitLab PAT +kingfisher revoke --rule gitlab \ + --endpoint gitlab=https://gitlab.corp.example.com \ + "glpat-xxxxxxxxxxxxxxxxxxxx" + +# Scan with an internal Artifactory validator target +kingfisher scan ./repo \ + --endpoint artifactory=http://localhost:8071 \ + --allow-internal-ips +``` + +Example endpoint config file: + +```yaml +endpoints: + github: https://ghe.corp.example.com + gitlab: https://gitlab.corp.example.com + gitea: https://gitea.corp.example.com + jira: https://jira.corp.example.com + confluence: https://wiki.corp.example.com + artifactory: http://localhost:8071 +``` + +```bash +kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips +``` + **Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**: ```bash @@ -1077,8 +1128,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v # Scan with SSRF protection disabled (allows requests to internal IPs) kingfisher scan --allow-internal-ips ./repo -# Also works with the validate command -kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1 +# Also works with direct validation against a self-hosted endpoint +kingfisher validate --allow-internal-ips \ + --endpoint artifactory=http://localhost:8071 \ + --rule kingfisher.artifactory.1 \ + "AKCp..." ``` > **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services. diff --git a/src/cli/global.rs b/src/cli/global.rs index d912392..79f1bd4 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -1,4 +1,5 @@ use std::io::IsTerminal; +use std::path::PathBuf; use std::sync::LazyLock; @@ -144,6 +145,16 @@ pub struct GlobalArgs { #[arg(global = true, long = "user-agent-suffix", value_name = "SUFFIX")] pub user_agent_suffix: Option, + /// Override provider API endpoints for validation/revocation (PROVIDER=URL), repeatable. + /// + /// Supported providers: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory. + #[arg(global = true, long = "endpoint", value_name = "PROVIDER=URL")] + pub endpoint: Vec, + + /// YAML file containing provider endpoint overrides. + #[arg(global = true, long = "endpoint-config", value_name = "FILE")] + pub endpoint_config: Option, + // Internal fields (not CLI arguments) #[clap(skip)] pub color: Mode, @@ -163,6 +174,8 @@ impl Default for GlobalArgs { self_update: false, no_update_check: false, user_agent_suffix: None, + endpoint: Vec::new(), + endpoint_config: None, color: Mode::Auto, progress: Mode::Auto, } diff --git a/src/direct_revoke.rs b/src/direct_revoke.rs index e2ae76f..f8847dc 100644 --- a/src/direct_revoke.rs +++ b/src/direct_revoke.rs @@ -20,6 +20,7 @@ use tracing::debug; use crate::{ cli::{commands::revoke::RevokeArgs, global::GlobalArgs}, liquid_filters::register_all, + provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule}, rule_loader::RuleLoader, template_vars::extract_template_vars, validation::GLOBAL_USER_AGENT, @@ -138,15 +139,22 @@ fn get_global_var(globals: &Object, name: &str) -> Option { /// Build the globals object for Liquid template rendering. fn build_globals( + rule_id: &str, secret: &str, args: &[String], variables: &[String], template_vars: &BTreeSet, + endpoint_overrides: &ProviderEndpointOverrides, ) -> Result { let mut globals = Object::new(); globals.insert("TOKEN".into(), Value::scalar(secret.to_string())); - let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect(); + endpoint_overrides.apply_defaults(&mut globals); + + let auto_assign_vars: Vec<&String> = template_vars + .iter() + .filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str())) + .collect(); for (i, arg_value) in args.iter().enumerate() { if i < auto_assign_vars.len() { @@ -171,6 +179,8 @@ fn build_globals( globals.insert(name.into(), Value::scalar(value)); } + hydrate_endpoint_globals_for_rule(rule_id, &mut globals); + Ok(globals) } @@ -553,6 +563,7 @@ pub async fn run_direct_revocation( let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; let timeout = Duration::from_secs(args.timeout); + let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?; let mut results = Vec::new(); @@ -597,7 +608,14 @@ pub async fn run_direct_revocation( } } - let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?; + let globals = build_globals( + &rule_id, + &secret, + &args.args, + &args.variables, + &template_vars, + &endpoint_overrides, + )?; if !non_token_vars.is_empty() && !args.args.is_empty() { debug!( @@ -1028,7 +1046,15 @@ mod tests { #[test] fn build_globals_sets_token() { let template_vars = BTreeSet::from(["TOKEN".to_string()]); - let globals = build_globals("my-secret", &[], &[], &template_vars).unwrap(); + let globals = build_globals( + "kingfisher.test.1", + "my-secret", + &[], + &[], + &template_vars, + &ProviderEndpointOverrides::default(), + ) + .unwrap(); assert_eq!(globals.get("TOKEN"), Some(Value::scalar("my-secret".to_string())).as_ref()); } @@ -1037,7 +1063,15 @@ mod tests { let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string(), "REGION".to_string()]); let args = vec!["my-akid".to_string(), "us-east-1".to_string()]; - let globals = build_globals("secret", &args, &[], &template_vars).unwrap(); + let globals = build_globals( + "kingfisher.test.1", + "secret", + &args, + &[], + &template_vars, + &ProviderEndpointOverrides::default(), + ) + .unwrap(); assert_eq!(globals.get("TOKEN"), Some(Value::scalar("secret".to_string())).as_ref()); assert_eq!(globals.get("AKID"), Some(Value::scalar("my-akid".to_string())).as_ref()); @@ -1048,7 +1082,15 @@ mod tests { fn build_globals_explicit_variables() { let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string()]); let vars = vec!["AKID=explicit-value".to_string()]; - let globals = build_globals("secret", &[], &vars, &template_vars).unwrap(); + let globals = build_globals( + "kingfisher.test.1", + "secret", + &[], + &vars, + &template_vars, + &ProviderEndpointOverrides::default(), + ) + .unwrap(); assert_eq!(globals.get("AKID"), Some(Value::scalar("explicit-value".to_string())).as_ref()); } @@ -1057,7 +1099,14 @@ mod tests { fn build_globals_invalid_var_format() { let template_vars = BTreeSet::new(); let vars = vec!["NO_EQUALS_SIGN".to_string()]; - let result = build_globals("secret", &[], &vars, &template_vars); + let result = build_globals( + "kingfisher.test.1", + "secret", + &[], + &vars, + &template_vars, + &ProviderEndpointOverrides::default(), + ); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("Expected NAME=VALUE")); } @@ -1066,7 +1115,14 @@ mod tests { fn build_globals_empty_var_name() { let template_vars = BTreeSet::new(); let vars = vec!["=value".to_string()]; - let result = build_globals("secret", &[], &vars, &template_vars); + let result = build_globals( + "kingfisher.test.1", + "secret", + &[], + &vars, + &template_vars, + &ProviderEndpointOverrides::default(), + ); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("cannot be empty")); } diff --git a/src/direct_validate.rs b/src/direct_validate.rs index f68caff..489189c 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -21,6 +21,7 @@ use tracing::debug; use crate::{ cli::{commands::validate::ValidateArgs, global::GlobalArgs}, liquid_filters::register_all, + provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule}, rule_loader::RuleLoader, rules::{HttpValidation, Validation, rule::Rule}, template_vars::extract_template_vars, @@ -210,18 +211,25 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet { /// - `variables`: Named variables in NAME=VALUE format (explicit overrides) /// - `template_vars`: Set of variable names used in the validation template fn build_globals( + rule_id: &str, secret: &str, args: &[String], variables: &[String], template_vars: &BTreeSet, + endpoint_overrides: &ProviderEndpointOverrides, ) -> Result { let mut globals = Object::new(); // Set TOKEN to the provided secret globals.insert("TOKEN".into(), Value::scalar(secret.to_string())); + endpoint_overrides.apply_defaults(&mut globals); + // Get non-TOKEN variables in alphabetical order for auto-assignment - let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect(); + let auto_assign_vars: Vec<&String> = template_vars + .iter() + .filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str())) + .collect(); // Auto-assign --arg values to template variables for (i, arg_value) in args.iter().enumerate() { @@ -248,6 +256,8 @@ fn build_globals( globals.insert(name.into(), Value::scalar(value)); } + hydrate_endpoint_globals_for_rule(rule_id, &mut globals); + Ok(globals) } @@ -469,6 +479,7 @@ pub async fn run_direct_validation( // Build Liquid parser let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; + let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?; let timeout = Duration::from_secs(args.timeout); let rate_limiter = @@ -525,7 +536,14 @@ pub async fn run_direct_validation( } } - let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?; + let globals = build_globals( + &rule_id, + &secret, + &args.args, + &args.variables, + &template_vars, + &endpoint_overrides, + )?; // Log auto-assignment info for debugging if !non_token_vars.is_empty() && !args.args.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 14452d6..33d6490 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ pub mod location; pub mod matcher; pub mod origin; pub mod parser; +pub mod provider_endpoints; pub mod pyc; pub mod reporter; pub mod rule_loader; diff --git a/src/provider_endpoints.rs b/src/provider_endpoints.rs new file mode 100644 index 0000000..6a2f3ca --- /dev/null +++ b/src/provider_endpoints.rs @@ -0,0 +1,409 @@ +use std::{collections::BTreeMap, fs, path::Path}; + +use anyhow::{Context, Result, anyhow, bail}; +use liquid::Object; +use liquid_core::{Value, ValueView}; +use serde::Deserialize; +use url::Url; + +use crate::cli::global::GlobalArgs; + +const GITHUB_API_BASE_URL: &str = "GITHUB_API_BASE_URL"; +const GITHUB_WEB_BASE_URL: &str = "GITHUB_WEB_BASE_URL"; +const GITLAB_API_BASE_URL: &str = "GITLAB_API_BASE_URL"; +const GITEA_API_BASE_URL: &str = "GITEA_API_BASE_URL"; +const JIRA_BASE_URL: &str = "JIRA_BASE_URL"; +const JIRA_CLOUD_BASE_URL: &str = "JIRA_CLOUD_BASE_URL"; +const CONFLUENCE_BASE_URL: &str = "CONFLUENCE_BASE_URL"; +const ARTIFACTORY_BASE_URL: &str = "ARTIFACTORY_BASE_URL"; + +#[derive(Debug, Clone, Default)] +pub struct ProviderEndpointOverrides { + config: EndpointVars, + cli: EndpointVars, +} + +#[derive(Debug, Clone, Default)] +struct EndpointVars { + values: BTreeMap, +} + +#[derive(Debug, Deserialize, Default)] +struct EndpointConfigFile { + #[serde(default)] + endpoints: BTreeMap, + #[serde(default)] + provider_endpoints: BTreeMap, + #[serde(default)] + providers: BTreeMap, +} + +impl ProviderEndpointOverrides { + pub fn from_global_args(global_args: &GlobalArgs) -> Result { + let config = match &global_args.endpoint_config { + Some(path) => EndpointVars::from_config_path(path)?, + None => EndpointVars::default(), + }; + let cli = EndpointVars::from_pairs(&global_args.endpoint)?; + Ok(Self { config, cli }) + } + + pub fn apply_defaults(&self, globals: &mut Object) { + self.config.apply(globals, false); + apply_builtin_defaults(globals); + self.cli.apply(globals, true); + } + + pub fn apply_scan_overrides(&self, globals: &mut Object) { + self.config.apply(globals, false); + apply_builtin_defaults(globals); + self.cli.apply(globals, true); + } +} + +impl EndpointVars { + fn from_config_path(path: &Path) -> Result { + let raw = fs::read_to_string(path) + .with_context(|| format!("Failed to read endpoint config from {}", path.display()))?; + let parsed: EndpointConfigFile = serde_yaml::from_str(&raw) + .with_context(|| format!("Failed to parse endpoint config {}", path.display()))?; + + let mut merged = parsed.endpoints; + merged.extend(parsed.provider_endpoints); + merged.extend(parsed.providers); + Self::from_map(merged) + } + + fn from_pairs(pairs: &[String]) -> Result { + let mut map = BTreeMap::new(); + for pair in pairs { + let (provider, endpoint) = parse_assignment(pair)?; + map.insert(provider, endpoint); + } + Self::from_map(map) + } + + fn from_map(map: BTreeMap) -> Result { + let mut values = BTreeMap::new(); + for (provider, endpoint) in map { + let normalized = normalize_endpoint_key(&provider); + match normalized.as_str() { + "github" => { + let github = normalize_github_endpoint(&endpoint)?; + values.insert(GITHUB_API_BASE_URL.to_string(), github.api_base_url); + values.insert(GITHUB_WEB_BASE_URL.to_string(), github.web_base_url); + } + "gitlab" => { + values.insert( + GITLAB_API_BASE_URL.to_string(), + normalize_api_base_url(&endpoint, "/api/v4")?, + ); + } + "gitea" => { + values.insert( + GITEA_API_BASE_URL.to_string(), + normalize_api_base_url(&endpoint, "/api/v1")?, + ); + } + "jira" | "jira-dc" => { + values.insert(JIRA_BASE_URL.to_string(), normalize_base_url(&endpoint)?); + } + "jira-cloud" => { + values.insert(JIRA_CLOUD_BASE_URL.to_string(), normalize_base_url(&endpoint)?); + } + "confluence" | "confluence-dc" => { + values.insert(CONFLUENCE_BASE_URL.to_string(), normalize_base_url(&endpoint)?); + } + "artifactory" | "jfrog" => { + values.insert( + ARTIFACTORY_BASE_URL.to_string(), + normalize_artifactory_base_url(&endpoint)?, + ); + } + _ => bail!( + "Unsupported endpoint provider '{}'. Supported values: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory", + provider + ), + } + } + Ok(Self { values }) + } + + fn apply(&self, globals: &mut Object, overwrite_existing: bool) { + for (name, value) in &self.values { + if overwrite_existing || !globals.contains_key(name.as_str()) { + globals.insert(name.clone().into(), Value::scalar(value.clone())); + } + } + } +} + +#[derive(Debug)] +struct GitHubEndpoint { + api_base_url: String, + web_base_url: String, +} + +pub fn hydrate_endpoint_globals_for_rule(rule_id: &str, globals: &mut Object) { + hydrate_github_globals(globals); + hydrate_artifactory_globals(globals); + hydrate_confluence_globals(globals); + hydrate_jira_dc_globals(globals); + if rule_id == "kingfisher.jira.2" { + hydrate_jira_cloud_globals(globals); + } +} + +pub fn endpoint_var_names() -> &'static [&'static str] { + &[ + GITHUB_API_BASE_URL, + GITHUB_WEB_BASE_URL, + GITLAB_API_BASE_URL, + GITEA_API_BASE_URL, + JIRA_BASE_URL, + JIRA_CLOUD_BASE_URL, + CONFLUENCE_BASE_URL, + ARTIFACTORY_BASE_URL, + ] +} + +fn hydrate_github_globals(globals: &mut Object) { + match (string_var(globals, GITHUB_API_BASE_URL), string_var(globals, GITHUB_WEB_BASE_URL)) { + (Some(api), None) => { + if let Ok(normalized) = normalize_github_endpoint(&api) { + globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url)); + globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url)); + } + } + (None, Some(web)) => { + if let Ok(normalized) = normalize_github_endpoint(&web) { + globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url)); + globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url)); + } + } + _ => {} + } +} + +fn hydrate_artifactory_globals(globals: &mut Object) { + if globals.contains_key(ARTIFACTORY_BASE_URL) { + return; + } + if let Some(jfrog_url) = string_var(globals, "JFROGURL") + && let Ok(base_url) = normalize_artifactory_base_url(&jfrog_url) + { + globals.insert(ARTIFACTORY_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn hydrate_confluence_globals(globals: &mut Object) { + if globals.contains_key(CONFLUENCE_BASE_URL) { + return; + } + if let Some(domain) = string_var(globals, "CONFLUENCEDCDOMAIN") + && let Ok(base_url) = normalize_base_url(&domain) + { + globals.insert(CONFLUENCE_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn hydrate_jira_dc_globals(globals: &mut Object) { + if globals.contains_key(JIRA_BASE_URL) { + return; + } + if let Some(domain) = string_var(globals, "JIRADCDOMAIN") + && let Ok(base_url) = normalize_base_url(&domain) + { + globals.insert(JIRA_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn hydrate_jira_cloud_globals(globals: &mut Object) { + if globals.contains_key(JIRA_CLOUD_BASE_URL) { + return; + } + if let Some(domain) = string_var(globals, "DOMAIN") + && let Ok(base_url) = normalize_base_url(&domain) + { + globals.insert(JIRA_CLOUD_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn string_var(globals: &Object, name: &str) -> Option { + globals.get(name).map(|value| value.to_kstr().to_string()).filter(|s| !s.is_empty()) +} + +fn apply_builtin_defaults(globals: &mut Object) { + for (name, value) in [ + (GITHUB_API_BASE_URL, "https://api.github.com"), + (GITHUB_WEB_BASE_URL, "https://github.com"), + (GITLAB_API_BASE_URL, "https://gitlab.com/api/v4"), + (GITEA_API_BASE_URL, "https://gitea.com/api/v1"), + ] { + if !globals.contains_key(name) { + globals.insert(name.into(), Value::scalar(value.to_string())); + } + } +} + +fn parse_assignment(raw: &str) -> Result<(String, String)> { + let (provider, endpoint) = raw + .split_once('=') + .ok_or_else(|| anyhow!("Invalid endpoint '{}'. Expected PROVIDER=URL", raw))?; + let provider = provider.trim(); + let endpoint = endpoint.trim(); + if provider.is_empty() { + bail!("Invalid endpoint '{}'. Provider name cannot be empty", raw); + } + if endpoint.is_empty() { + bail!("Invalid endpoint '{}'. URL cannot be empty", raw); + } + Ok((provider.to_string(), endpoint.to_string())) +} + +fn normalize_endpoint_key(key: &str) -> String { + key.trim().to_ascii_lowercase().replace('_', "-") +} + +fn normalize_base_url(raw: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + Ok(url_with_path(&url, url.path().trim_end_matches('/'))) +} + +fn normalize_api_base_url(raw: &str, api_suffix: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + let path = url.path().trim_end_matches('/'); + let full_path = if path.is_empty() { + api_suffix.to_string() + } else if path.ends_with(api_suffix) { + path.to_string() + } else { + format!("{path}{api_suffix}") + }; + Ok(url_with_path(&url, &full_path)) +} + +fn normalize_artifactory_base_url(raw: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + let mut path = url.path().trim_end_matches('/').to_string(); + if let Some(prefix) = path.strip_suffix("/artifactory") { + path = prefix.to_string(); + } + Ok(url_with_path(&url, &path)) +} + +fn normalize_github_endpoint(raw: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + let host = url + .host_str() + .ok_or_else(|| anyhow!("Endpoint '{}' is missing a host", raw))? + .to_ascii_lowercase(); + let path = url.path().trim_end_matches('/'); + + if host == "api.github.com" { + return Ok(GitHubEndpoint { + api_base_url: "https://api.github.com".to_string(), + web_base_url: "https://github.com".to_string(), + }); + } + if host == "github.com" && path.is_empty() { + return Ok(GitHubEndpoint { + api_base_url: "https://api.github.com".to_string(), + web_base_url: "https://github.com".to_string(), + }); + } + + let (web_path, api_path) = if path.is_empty() { + ("".to_string(), "/api/v3".to_string()) + } else if let Some(prefix) = path.strip_suffix("/api/v3") { + (prefix.to_string(), path.to_string()) + } else { + (path.to_string(), format!("{path}/api/v3")) + }; + + Ok(GitHubEndpoint { + api_base_url: url_with_path(&url, &api_path), + web_base_url: url_with_path(&url, &web_path), + }) +} + +fn parse_url_or_assume_https(raw: &str) -> Result { + match Url::parse(raw.trim()) { + Ok(url) => Ok(url), + Err(url::ParseError::RelativeUrlWithoutBase) => { + Url::parse(&format!("https://{}", raw.trim())).with_context(|| { + format!("Invalid endpoint URL '{}'. Use a full URL or hostname", raw) + }) + } + Err(err) => Err(anyhow!("Invalid endpoint URL '{}': {}", raw, err)), + } +} + +fn url_with_path(url: &Url, path: &str) -> String { + let mut out = url.clone(); + out.set_query(None); + out.set_fragment(None); + if path.is_empty() { + out.set_path(""); + } else { + out.set_path(path); + } + out.to_string().trim_end_matches('/').to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn github_endpoint_normalizes_host_only() { + let normalized = normalize_github_endpoint("ghe.corp.example.com").unwrap(); + assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3"); + assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com"); + } + + #[test] + fn github_endpoint_normalizes_api_path() { + let normalized = normalize_github_endpoint("https://ghe.corp.example.com/api/v3").unwrap(); + assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3"); + assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com"); + } + + #[test] + fn gitlab_endpoint_appends_api_path() { + assert_eq!( + normalize_api_base_url("gitlab.example.com/gitlab", "/api/v4").unwrap(), + "https://gitlab.example.com/gitlab/api/v4" + ); + } + + #[test] + fn artifactory_endpoint_strips_artifactory_suffix() { + assert_eq!( + normalize_artifactory_base_url("http://localhost:8071/artifactory").unwrap(), + "http://localhost:8071" + ); + } + + #[test] + fn jira_cloud_hydrates_from_legacy_domain() { + let mut globals = Object::new(); + globals.insert("DOMAIN".into(), Value::scalar("example.atlassian.net")); + hydrate_endpoint_globals_for_rule("kingfisher.jira.2", &mut globals); + assert_eq!( + string_var(&globals, JIRA_CLOUD_BASE_URL).as_deref(), + Some("https://example.atlassian.net") + ); + } + + #[test] + fn artifactory_hydrates_from_legacy_host() { + let mut globals = Object::new(); + globals.insert("JFROGURL".into(), Value::scalar("repo.example.com")); + hydrate_endpoint_globals_for_rule("kingfisher.artifactory.1", &mut globals); + assert_eq!( + string_var(&globals, ARTIFACTORY_BASE_URL).as_deref(), + Some("https://repo.example.com") + ); + } +} diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 4fde808..bbc25c5 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -23,6 +23,7 @@ use crate::{ gitea, github, gitlab, liquid_filters::register_all, matcher::MatcherStats, + provider_endpoints::ProviderEndpointOverrides, reporter::styles::Styles, rule_loader::RuleLoader, rule_profiling::ConcurrentRuleProfiler, @@ -46,12 +47,14 @@ use crate::{ validation_rate_limit::ValidationRateLimiter, }; -/// Shared validation dependencies: (liquid parser, HTTP clients, validation cache, rate limiter). +/// Shared validation dependencies: +/// (liquid parser, HTTP clients, validation cache, rate limiter, provider endpoint overrides). type ValidationDeps = Arc<( liquid::Parser, crate::validation::ValidationClients, Arc>, Option>, + Arc, )>; pub async fn run_scan( @@ -159,6 +162,7 @@ pub async fn run_async_scan( let validation_rate_limiter = ValidationRateLimiter::from_cli(args.validation_rps, &args.validation_rps_rule)? .map(Arc::new); + let provider_endpoints = Arc::new(ProviderEndpointOverrides::from_global_args(global_args)?); let validation_deps: Option = if !args.no_validate { info!("Starting secret validation phase..."); @@ -170,6 +174,7 @@ pub async fn run_async_scan( )?, Arc::new(SkipMap::new()), validation_rate_limiter.clone(), + Arc::clone(&provider_endpoints), ))) } else { None @@ -517,8 +522,8 @@ async fn run_validation_phase( access_map_collector: Option, ) -> Result<()> { if let Some(validation) = validation_deps { - let (parser, clients, cache, rate_limiter) = - (&validation.0, &validation.1, &validation.2, &validation.3); + let (parser, clients, cache, rate_limiter, provider_endpoints) = + (&validation.0, &validation.1, &validation.2, &validation.3, &validation.4); run_secret_validation( Arc::clone(datastore), parser, @@ -528,6 +533,7 @@ async fn run_validation_phase( match_range, access_map_collector, rate_limiter.clone(), + provider_endpoints.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, effective_max_validation_body_len(args), @@ -661,8 +667,8 @@ async fn run_parallel_scan( // Validate initial (non-repo) matches if let Some(validation) = validation_deps { - let (parser, clients, cache, rate_limiter) = - (&validation.0, &validation.1, &validation.2, &validation.3); + let (parser, clients, cache, rate_limiter, provider_endpoints) = + (&validation.0, &validation.1, &validation.2, &validation.3, &validation.4); let initial_match_count = { datastore.lock().unwrap().get_matches().len() }; if initial_match_count > 0 { run_secret_validation( @@ -674,6 +680,7 @@ async fn run_parallel_scan( Some(0..initial_match_count), access_map_collector.clone(), rate_limiter.clone(), + provider_endpoints.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, effective_max_validation_body_len(args), @@ -749,8 +756,13 @@ async fn run_parallel_scan( } if let Some(validation) = validation_deps.clone() { - let (parser, clients, cache, rate_limiter) = - (&validation.0, &validation.1, &validation.2, &validation.3); + let (parser, clients, cache, rate_limiter, provider_endpoints) = ( + &validation.0, + &validation.1, + &validation.2, + &validation.3, + &validation.4, + ); let match_count = { repo_datastore.lock().unwrap().get_matches().len() }; if match_count > 0 { @@ -763,6 +775,7 @@ async fn run_parallel_scan( Some(0..match_count), access_map.clone(), rate_limiter.clone(), + provider_endpoints.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, effective_max_validation_body_len(&args), diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 43f7e84..16078c9 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -23,6 +23,7 @@ use crate::{ findings_store::{FindingsStore, FindingsStoreMessage}, location::OffsetSpan, matcher::OwnedBlobMatch, + provider_endpoints::ProviderEndpointOverrides, rules::rule::Validation, validation::{ CachedResponse, collect_variables_and_dependencies, utils, validate_single_match, @@ -421,6 +422,7 @@ pub async fn run_secret_validation( range: Option>, access_map: Option, rate_limiter: Option>, + provider_endpoints: Arc, validation_timeout: Duration, validation_retries: u32, max_body_len: usize, @@ -536,6 +538,7 @@ pub async fn run_secret_validation( let pb = pb.clone(); let access_map = access_map.clone(); let rate_limiter = rate_limiter.clone(); + let provider_endpoints = provider_endpoints.clone(); let empty_dep_vars = &empty_dep_vars; let empty_missing = &empty_missing; let empty_cache = empty_cache.clone(); @@ -577,6 +580,7 @@ pub async fn run_secret_validation( &cache_glob, access_map.as_ref(), rate_limiter.as_deref(), + &provider_endpoints, validation_timeout, validation_retries, max_body_len, @@ -690,6 +694,7 @@ pub async fn run_secret_validation( let cache_glob = cache.clone(); let access_map = access_map.clone(); let rate_limiter = rate_limiter.clone(); + let provider_endpoints = provider_endpoints.clone(); let validation_timeout = validation_timeout; let validation_retries = validation_retries; @@ -730,6 +735,7 @@ pub async fn run_secret_validation( let cache_glob = cache_glob.clone(); let access_map = access_map.clone(); let rate_limiter = rate_limiter.clone(); + let provider_endpoints = provider_endpoints.clone(); async move { validate_single( &mut rep, @@ -744,6 +750,7 @@ pub async fn run_secret_validation( &cache_glob, access_map.as_ref(), rate_limiter.as_deref(), + &provider_endpoints, validation_timeout, validation_retries, max_body_len, @@ -839,6 +846,7 @@ async fn validate_single( cache2: &Arc>, access_map: Option<&AccessMapCollector>, rate_limiter: Option<&ValidationRateLimiter>, + provider_endpoints: &Arc, validation_timeout: Duration, validation_retries: u32, max_body_len: usize, @@ -905,6 +913,7 @@ async fn validate_single( validation_timeout, validation_retries, rate_limiter, + provider_endpoints.as_ref(), max_body_len, ) .boxed(), diff --git a/src/validation.rs b/src/validation.rs index 2418663..be12ecd 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -23,6 +23,9 @@ use crate::{ cli::global::TlsMode, location::OffsetSpan, matcher::{OwnedBlobMatch, SerializableCaptures}, + provider_endpoints::{ + ProviderEndpointOverrides, endpoint_var_names, hydrate_endpoint_globals_for_rule, + }, rules::rule::Validation, validation_body::{self}, }; @@ -441,6 +444,7 @@ pub async fn validate_single_match( validation_timeout: Duration, validation_retries: u32, rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>, + provider_endpoints: &ProviderEndpointOverrides, max_body_len: usize, ) { let fp = validation_dedup_key(m); @@ -456,6 +460,7 @@ pub async fn validate_single_match( validation_timeout, validation_retries, rate_limiter, + provider_endpoints, max_body_len, ) .boxed(), @@ -499,6 +504,7 @@ async fn timed_validate_single_match<'a>( validation_timeout: Duration, validation_retries: u32, rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>, + provider_endpoints: &ProviderEndpointOverrides, max_body_len: usize, ) { // Select the appropriate HTTP client based on rule's TLS mode preference @@ -595,6 +601,8 @@ async fn timed_validate_single_match<'a>( let mut globals = Object::new(); populate_globals_from_captures(&mut globals, &captured_values); + hydrate_endpoint_globals_for_rule(m.rule.id(), &mut globals); + provider_endpoints.apply_scan_overrides(&mut globals); // Persist named captures (non-TOKEN) for validate/revoke command generation. // This is especially important for gRPC validators like Modal where TOKEN_ID is required. @@ -604,6 +612,13 @@ async fn timed_validate_single_match<'a>( } m.dependent_captures.entry(k.to_uppercase()).or_insert_with(|| v.clone()); } + for endpoint_var in endpoint_var_names() { + if let Some(value) = globals.get(*endpoint_var).and_then(|v| v.as_scalar()) { + m.dependent_captures + .entry((*endpoint_var).to_string()) + .or_insert_with(|| value.to_kstr().to_string()); + } + } { let rule_syntax = m.rule.syntax(); From cafa97f8d16b7a2d61d110885db3a1a015d8be92 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 27 Apr 2026 14:26:07 -0700 Subject: [PATCH 3/5] Updated rule --- .../kingfisher-rules/data/rules/anypoint.yml | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/crates/kingfisher-rules/data/rules/anypoint.yml b/crates/kingfisher-rules/data/rules/anypoint.yml index e632809..91b4b65 100644 --- a/crates/kingfisher-rules/data/rules/anypoint.yml +++ b/crates/kingfisher-rules/data/rules/anypoint.yml @@ -41,3 +41,90 @@ rules: - 200 type: StatusMatch url: https://anypoint.mulesoft.com/accounts/api/me + + - name: Anypoint OAuth Client ID + id: kingfisher.anypoint.2 + pattern: | + (?xi) + \b + anypoint + (?:.|[\n\r]){0,48}? + (?: + client[\s_.-]*id + | + app[\s_.-]*id + | + connected[\s_.-]*app[\s_.-]*id + ) + (?:.|[\n\r]){0,16}? + \b + ( + [0-9a-f]{32} + ) + \b + pattern_requirements: + min_digits: 4 + min_lowercase: 4 + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - 'ANYPOINT_CLIENT_ID="4f2c9d1081ab7c3de56f9081a2bc4d7e"' + - 'anypoint connected app id: 9c7a21e45d6f0b8c3a1d4e6f7b2c9a10' + references: + - https://docs.mulesoft.com/access-management/connected-apps-developers + - https://docs.mulesoft.com/access-management/connected-app-bearer-token-example + + - name: Anypoint OAuth Client Secret + id: kingfisher.anypoint.3 + pattern: | + (?xi) + \b + anypoint + (?:.|[\n\r]){0,48}? + (?: + client[\s_.-]*secret + | + app[\s_.-]*secret + | + connected[\s_.-]*app[\s_.-]*secret + | + oauth[\s_.-]*secret + ) + (?:.|[\n\r]){0,16}? + \b + ( + [0-9A-Fa-f]{32} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: medium + examples: + - 'ANYPOINT_CLIENT_SECRET="3E7f94b18c2d6a40f1b7e9c24d6a8b0f"' + - 'anypoint oauth secret = 6a9c1e2f4b7d8a0c3e5f1b2d4a6c8e0f' + depends_on_rule: + - rule_id: kingfisher.anypoint.2 + variable: CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://anypoint.mulesoft.com/accounts/api/v2/oauth2/token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: "client_id={{ CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&grant_type=client_credentials" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + words: + - '"access_token"' + references: + - https://docs.mulesoft.com/access-management/connected-app-bearer-token-example + - https://docs.mulesoft.com/api-manager/latest/manage-client-apps-connected-apps-concept From bf6c7da4a4f0c4f1cf7f89810c87c5adcd1c80ea Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 28 Apr 2026 15:28:48 -0700 Subject: [PATCH 4/5] added blog posts --- .github/dependabot.yml | 22 +++++++++++++++++-- CHANGELOG.md | 3 ++- ...8-beyond-detection-validate-map-revoke.md} | 14 +++++++----- ...2026-04-28-scan-github-org-for-secrets.md} | 2 +- docs-site/docs/changelog.md | 2 +- 5 files changed, 33 insertions(+), 10 deletions(-) rename docs-site/docs/blog/posts/{2026-04-26-beyond-detection-validate-map-revoke.md => 2026-04-28-beyond-detection-validate-map-revoke.md} (94%) rename docs-site/docs/blog/posts/{2026-04-26-scan-github-org-for-secrets.md => 2026-04-28-scan-github-org-for-secrets.md} (99%) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 0c6b041..97258d5 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -4,13 +4,31 @@ updates: directory: "/" schedule: interval: "weekly" - open-pull-requests-limit: 10 + open-pull-requests-limit: 25 + cooldown: + default-days: 7 + include: + - "*" + groups: + cargo-patch: + update-types: ["patch"] + cargo-minor: + update-types: ["minor"] - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" - open-pull-requests-limit: 10 + open-pull-requests-limit: 25 + cooldown: + default-days: 7 + include: + - "*" + groups: + actions-patch: + update-types: ["patch"] + actions-minor: + update-types: ["minor"] ignore: - dependency-name: "actions/checkout" update-types: ["version-update:semver-major"] diff --git a/CHANGELOG.md b/CHANGELOG.md index c9797e2..d4b293d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,9 @@ All notable changes to this project will be documented in this file. -## [v1.98.0] +## [unreleased v1.98.0] - Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. +- Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. ## [v1.97.0] - **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed. diff --git a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md b/docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md similarity index 94% rename from docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md rename to docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md index 54f2608..6b72120 100644 --- a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md +++ b/docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md @@ -1,5 +1,5 @@ --- -date: 2026-04-26 +date: 2026-04-28 title: "Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation" description: > Detection alone is noise. Kingfisher answers the three questions that @@ -32,18 +32,22 @@ Kingfisher answers the three questions that actually matter: ## 1. Live validation, not just pattern matching -Out of Kingfisher's 820 standalone detectors, **484 include live validation -logic**. When a provider exposes a safe check call, Kingfisher uses that +Kingfisher can drastically reduce false positives by identifying +secrets that are still active and valid. + +When a provider exposes a safe check call, Kingfisher uses that provider's own API to report each credential as `Active`, `Inactive`, or `NotAttempted`. That changes the output from "thousands of regex matches" to a much shorter list of findings that actually authenticate today. -Validation runs automatically when you scan: +Validation runs automatically when you run a scan: ```bash -kingfisher scan github --organization my-org +kingfisher scan github --organization my-org --view-report + +kingfisher scan https://github.com/leaktk/fake-leaks.git --view-report ``` Or you can run it standalone when you've already pulled a suspicious value diff --git a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md b/docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md similarity index 99% rename from docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md rename to docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md index 488c9fb..d768dae 100644 --- a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md +++ b/docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md @@ -1,5 +1,5 @@ --- -date: 2026-04-26 +date: 2026-04-28 title: "Scanning an Entire GitHub Organization for Leaked Secrets" description: > Step-by-step guide to scanning every repository in a GitHub organization diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 4e4c84d..a17a783 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,7 +7,7 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. -## [v1.98.0] +## [unreleased v1.98.0] - Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. - Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. From 0b89e4b02f21b547423544d680a0397c4324e417 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 28 Apr 2026 19:21:44 -0700 Subject: [PATCH 5/5] added blog posts --- README.md | 12 +- crates/kingfisher-rules/data/rules/github.yml | 1 - .../kingfisher-rules/data/rules/minimax.yml | 2 + ...28-beyond-detection-validate-map-revoke.md | 4 +- .../2026-04-28-scan-github-org-for-secrets.md | 150 +++++++++++++++++- docs-site/docs/changelog.md | 2 +- docs-site/docs/index.md | 2 +- docs-site/docs/rules/builtin-rules.md | 22 ++- docs-site/mkdocs.yml | 2 +- docs-site/overrides/home.html | 2 +- docs-site/overrides/main.html | 2 +- src/provider_endpoints.rs | 4 +- tests/int_allowlist.rs | 2 + tests/int_bitbucket.rs | 2 + tests/int_dedup.rs | 2 + tests/int_github.rs | 2 + tests/int_gitlab.rs | 4 + tests/int_redact.rs | 2 + tests/int_slack.rs | 2 + tests/int_teams.rs | 2 + tests/int_validation_cache.rs | 2 + tests/int_vulnerable_files.rs | 2 + 22 files changed, 203 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index da0c81a..6760690 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ License - Detection Rules + Detection Rules
@@ -20,7 +20,7 @@ Kingfisher is an open source secret scanner and **live secret validation** tool built in Rust. -It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and ships with [942 built-in rules](https://mongodb.github.io/kingfisher/rules/builtin-rules/) to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. +It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and ships with [945 built-in rules](https://mongodb.github.io/kingfisher/rules/builtin-rules/) to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. Kingfisher also ships a **browser-based report viewer** that visualizes and triages findings from Kingfisher **and** from Gitleaks and TruffleHog JSON reports — so you can import scans from other tools and triage them in the same UI. A [hosted copy of the viewer](https://mongodb.github.io/kingfisher/viewer/) is published on the Kingfisher docs site. @@ -54,9 +54,9 @@ Kingfisher is a high-performance, open source secret detection tool for source c -### Performance, Accuracy, and 942 Rules +### Performance, Accuracy, and 945 Rules - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases -- **Extensible rules**: 942 built-in rules (484 with live validation) plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) +- **Extensible rules**: 945 built-in rules (485 with live validation) plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Validate & Revoke**: live validation of discovered secrets, plus direct revocation for supported platforms (GitHub, GitLab, Slack, AWS, GCP, and more) ([docs/USAGE.md](/docs/USAGE.md)) - **Revocation support matrix**: current built-in revocation coverage across providers and rule IDs ([docs/REVOCATION_PROVIDERS.md](/docs/REVOCATION_PROVIDERS.md)) - **Blast Radius Mapping**: instantly map leaked keys to their effective cloud identities and exposed resources with `--access-map`. Supports 42 providers (see table below). @@ -378,7 +378,7 @@ kingfisher scan /path/to/scan --access-map --view-report # Detection Rules -Kingfisher ships with [942 built-in rules](crates/kingfisher-rules/data/rules/) covering cloud keys, AI tokens, CI/CD secrets, database credentials, and SaaS API keys. Below is an overview — see the full list in [crates/kingfisher-rules/data/rules/](crates/kingfisher-rules/data/rules/): +Kingfisher ships with [945 built-in rules](crates/kingfisher-rules/data/rules/) covering cloud keys, AI tokens, CI/CD secrets, database credentials, and SaaS API keys. Below is an overview — see the full list in [crates/kingfisher-rules/data/rules/](crates/kingfisher-rules/data/rules/): | Category | What we catch | |----------|---------------| @@ -395,7 +395,7 @@ Kingfisher ships with [942 built-in rules](crates/kingfisher-rules/data/rules/) ## Write Custom Rules -Of Kingfisher's 942 built-in rules, 484 include HTTP and service-specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. +Of Kingfisher's 945 built-in rules, 485 include HTTP and service-specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. However, you may want to add your own custom rules, or modify a detection to better suit your needs / environment. diff --git a/crates/kingfisher-rules/data/rules/github.yml b/crates/kingfisher-rules/data/rules/github.yml index d59a574..2e33bb6 100644 --- a/crates/kingfisher-rules/data/rules/github.yml +++ b/crates/kingfisher-rules/data/rules/github.yml @@ -371,7 +371,6 @@ rules: ghs_[0-9]+_ [A-Za-z0-9_-]+ \. [A-Za-z0-9_-]+ \. [A-Za-z0-9_-]+ ) - \b min_entropy: 3.5 examples: - 'ghs_12345_eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpYXQiOjE3NDU1NjgwMDAsImV4cCI6MTc0NTU2ODM2MCwiaXNzIjoiMTIzNDUiLCJzdWIiOiJnaXRodWJ8MTIzNDUifQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c' diff --git a/crates/kingfisher-rules/data/rules/minimax.yml b/crates/kingfisher-rules/data/rules/minimax.yml index 520b827..f517d28 100644 --- a/crates/kingfisher-rules/data/rules/minimax.yml +++ b/crates/kingfisher-rules/data/rules/minimax.yml @@ -28,3 +28,5 @@ rules: - type: StatusMatch status: [200] - type: JsonValid + - type: WordMatch + words: ['"status_code":0'] diff --git a/docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md b/docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md index 6b72120..3716609 100644 --- a/docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md +++ b/docs-site/docs/blog/posts/2026-04-28-beyond-detection-validate-map-revoke.md @@ -26,7 +26,7 @@ Kingfisher answers the three questions that actually matter: 1. **Is this credential alive right now?** 2. **What can it reach?** -3. **Can we kill it from here?** +3. **Can we revoke it from here?** @@ -114,7 +114,7 @@ clickable tree: identity at the root, then services, then individual resources and permissions. It is a much faster way to explain severity to an incident commander or manager than pasting IAM JSON into chat. -## 3. Revocation — kill the token from where you found it +## 3. Revocation — revoke the token from where you found it Validation tells you a credential is live. Blast radius tells you why it's urgent. Revocation closes the loop. diff --git a/docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md b/docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md index d768dae..9726b7f 100644 --- a/docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md +++ b/docs-site/docs/blog/posts/2026-04-28-scan-github-org-for-secrets.md @@ -145,8 +145,8 @@ jq '.findings[] | select(.validation.status == "Active")' findings.json Then prioritize by blast radius. For AWS, GCP, GitHub, GitLab, and Slack tokens, Kingfisher can already map what each credential can access. Look at -the `access_map` field in JSON output, or the **Blast Radius** panel in the -HTML report. +the `access_map` field in JSON output, or the **Access Map** panel in the +HTML report (`kingfisher view ./report.json` or `kingfisher scan /path/to/code --view-report`) ## Revoke from the CLI @@ -165,9 +165,149 @@ for the schema and the current approach. The first scan gives you a baseline. The real value comes from running the same workflow continuously so new leaks are caught within hours instead of -months. A simple starting point is a nightly GitHub Action or scheduled CI -job that runs the org scan, diffs against yesterday's findings, and alerts on -net-new live credentials. +months. A practical starting point is a scheduled GitHub Action in a dedicated +security repository. For the token, prefer a fine-grained PAT scoped to the +target organization with read-only access to repository contents and +organization metadata, or a GitHub App installation token if you're operating +at scale — a classic PAT with `repo` works but grants more than the scan +needs. Store it in `KF_GITHUB_TOKEN`, pin a specific Kingfisher image tag (a +floating `:latest` will silently change findings between runs as rules +update), and upload the JSON report as an artifact: + +```yaml +name: nightly-org-secret-scan + +on: + schedule: + - cron: "17 3 * * *" + workflow_dispatch: + +concurrency: + group: kingfisher-nightly + cancel-in-progress: false + +jobs: + scan: + runs-on: ubuntu-latest + timeout-minutes: 360 + permissions: {} + steps: + - name: Prepare output directory + run: mkdir -p reports + + - name: Scan the GitHub organization + env: + KF_GITHUB_TOKEN: ${{ secrets.KF_GITHUB_TOKEN }} + run: | + docker run --rm \ + -e KF_GITHUB_TOKEN \ + -v "$PWD/reports:/reports" \ + ghcr.io/mongodb/kingfisher:v \ + scan github --organization my-org \ + --git-history none \ + --format json \ + --output /reports/findings.json + + - name: Upload scan report + if: always() + uses: actions/upload-artifact@v4 + with: + name: kingfisher-findings-${{ github.run_id }} + path: reports/findings.json +``` + +A few notes on the choices above. `--git-history none` scans only what's +currently checked out at `HEAD` of each repo; for a midsize org this can be +the difference between a job that finishes in minutes and one that runs for +hours and exhausts the runner's ~14 GB of free disk. If you also need +historical coverage, run a *separate weekly* job with `--git-history full` +rather than paying that cost every night. The same goes for +`--repo-artifacts`, which fetches each repo's issues, wiki, and gists — it's +worth running, just not nightly. `concurrency` keeps a slow run from piling +up on the next cron tick, `timeout-minutes` caps a hung run before it burns +the default six hours, and `if: always()` on the upload step ensures you +still get the report even when the scan exits non-zero (e.g. once you start +gating the workflow on Active findings). The run-ID-suffixed artifact name +makes it easy to diff last night's report against tonight's. + +For larger orgs, consider sharding by feeding `gh repo list` into a job +matrix so several runners scan in parallel — the total minutes are similar, +but each runner gets its own disk budget and the wall-clock time drops +sharply. Above a certain size, a self-hosted runner (or a dedicated VM +running the same `docker run` command on cron) becomes cheaper and removes +the disk cap entirely. + +From there, add whatever response path fits your process: open an issue, post +to Slack, diff against the previous artifact, or fail the workflow if `jq` +finds any `Active` credentials in `findings.json`. + +### A weekly deep scan + +The nightly above is intentionally narrow: current `HEAD` content, no +ancillary artifacts. Pair it with a *weekly* job that pays the cost of full +history and `--repo-artifacts` so issues, wiki pages, and rewritten commits +don't slip through unnoticed: + +```yaml +name: weekly-org-deep-scan + +on: + schedule: + - cron: "23 4 * * 6" # Saturday 04:23 UTC + workflow_dispatch: + +concurrency: + group: kingfisher-weekly + cancel-in-progress: false + +jobs: + scan: + runs-on: ubuntu-latest + timeout-minutes: 1080 # up to 18h; tune to your org size + permissions: {} + steps: + - name: Prepare output directory + run: mkdir -p reports + + - name: Deep-scan the GitHub organization + env: + KF_GITHUB_TOKEN: ${{ secrets.KF_GITHUB_TOKEN }} + run: | + docker run --rm \ + -e KF_GITHUB_TOKEN \ + -v "$PWD/reports:/reports" \ + ghcr.io/mongodb/kingfisher:v \ + scan github --organization my-org \ + --git-history full \ + --repo-artifacts \ + --format json \ + --output /reports/findings.json + + - name: Upload deep-scan report + if: always() + uses: actions/upload-artifact@v4 + with: + name: kingfisher-deep-${{ github.run_id }} + path: reports/findings.json +``` + +The deep scan is where `ubuntu-latest`'s ~14 GB disk limit will bite first. +If your org is large enough that the weekly job fails on disk or runs past +its timeout, that's the signal to shard the repo list across a job matrix +or move this workload to a self-hosted runner. A simple matrix looks like: + +```yaml +strategy: + fail-fast: false + matrix: + shard: [0, 1, 2, 3] +# ...then in the scan step, list repos with `gh repo list my-org --limit 1000` +# and filter to those whose name hash mod 4 == matrix.shard, scanning each +# with `kingfisher scan --git-history full --repo-artifacts`. +``` + +Each shard gets its own runner and its own disk budget, and you can upload +one artifact per shard for triage. ## What's next diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index a17a783..30e9c0d 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -8,7 +8,7 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. ## [unreleased v1.98.0] -- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. +- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. - Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. ## [v1.97.0] diff --git a/docs-site/docs/index.md b/docs-site/docs/index.md index 56a1c6a..b4ae0e0 100644 --- a/docs-site/docs/index.md +++ b/docs-site/docs/index.md @@ -2,7 +2,7 @@ title: Kingfisher — Open Source Secret Scanner with Live Validation description: >- Kingfisher is an open source secret scanner with live validation, blast radius - mapping, and credential revocation. 943 detection rules (485 with live validation), + mapping, and credential revocation. 945 detection rules (485 with live validation), plus a browser-based report viewer that also triages Gitleaks and TruffleHog output. Built in Rust by MongoDB. template: home.html diff --git a/docs-site/docs/rules/builtin-rules.md b/docs-site/docs/rules/builtin-rules.md index e86cf5c..19308a8 100644 --- a/docs-site/docs/rules/builtin-rules.md +++ b/docs-site/docs/rules/builtin-rules.md @@ -1,12 +1,12 @@ --- title: "Built-in Rules List" -description: "Complete list of all 943 built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support." +description: "Complete list of all 945 built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support." --- # Built-in Rules -Kingfisher ships with **943 detection rules** across **581 providers** -(821 detectors + 122 dependent rules). +Kingfisher ships with **945 detection rules** across **581 providers** +(822 detectors + 123 dependent rules). Of these, **485** include live validation and **50** support direct revocation. !!! tip "Search" @@ -356,6 +356,22 @@ Of these, **485** include live validation and **50** support direct revocation. +Anypoint +Anypoint OAuth Client ID +kingfisher.anypoint.2 +Medium + + + + +Anypoint +Anypoint OAuth Client Secret +kingfisher.anypoint.3 +Medium +Yes + + + Apify Apify API Token kingfisher.apify.1 diff --git a/docs-site/mkdocs.yml b/docs-site/mkdocs.yml index 49c42b7..300d969 100644 --- a/docs-site/mkdocs.yml +++ b/docs-site/mkdocs.yml @@ -1,7 +1,7 @@ site_name: Kingfisher site_url: https://mongodb.github.io/kingfisher site_description: >- - Open source secret scanner with live validation. 943 detection rules, + Open source secret scanner with live validation. 945 detection rules, blast radius mapping, credential revocation, and a browser-based report viewer that also imports Gitleaks and TruffleHog output. Built in Rust by MongoDB. diff --git a/docs-site/overrides/home.html b/docs-site/overrides/home.html index 8d98d0a..fed8406 100644 --- a/docs-site/overrides/home.html +++ b/docs-site/overrides/home.html @@ -36,7 +36,7 @@
- 943 + 945 Detection Rules
diff --git a/docs-site/overrides/main.html b/docs-site/overrides/main.html index 4e08c9e..2804a17 100644 --- a/docs-site/overrides/main.html +++ b/docs-site/overrides/main.html @@ -7,7 +7,7 @@ "@context": "https://schema.org", "@type": "SoftwareApplication", "name": "Kingfisher", - "description": "Open source secret scanner with live validation. 943 detection rules, blast radius mapping, and credential revocation.", + "description": "Open source secret scanner with live validation. 945 detection rules, blast radius mapping, and credential revocation.", "applicationCategory": "DeveloperApplication", "operatingSystem": "Linux, macOS, Windows", "license": "https://opensource.org/licenses/Apache-2.0", diff --git a/src/provider_endpoints.rs b/src/provider_endpoints.rs index 6a2f3ca..2cde84a 100644 --- a/src/provider_endpoints.rs +++ b/src/provider_endpoints.rs @@ -55,7 +55,7 @@ impl ProviderEndpointOverrides { } pub fn apply_scan_overrides(&self, globals: &mut Object) { - self.config.apply(globals, false); + self.config.apply(globals, true); apply_builtin_defaults(globals); self.cli.apply(globals, true); } @@ -121,7 +121,7 @@ impl EndpointVars { ); } _ => bail!( - "Unsupported endpoint provider '{}'. Supported values: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory", + "Unsupported endpoint provider '{}'. Supported values: github, gitlab, gitea, jira (alias: jira-dc), jira-cloud, confluence (alias: confluence-dc), artifactory (alias: jfrog)", provider ), } diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index e4fcd6b..db74ecc 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -198,6 +198,8 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 32b0955..76fa724 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -203,6 +203,8 @@ rules: user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; // ── load rules once ───────────────────────────────────────────── diff --git a/tests/int_github.rs b/tests/int_github.rs index b85c617..4954278 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -190,6 +190,8 @@ fn test_github_remote_scan() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; // Create in-memory datastore let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index b916cb5..ce9fd52 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -188,6 +188,8 @@ fn test_gitlab_remote_scan() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); @@ -364,6 +366,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); diff --git a/tests/int_redact.rs b/tests/int_redact.rs index c69cb14..fb0e01c 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -166,6 +166,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 4aa0f4e..d655297 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -333,6 +333,8 @@ async fn test_scan_slack_messages() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); diff --git a/tests/int_teams.rs b/tests/int_teams.rs index 625ede8..be9734c 100644 --- a/tests/int_teams.rs +++ b/tests/int_teams.rs @@ -207,6 +207,8 @@ async fn test_scan_teams_messages() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index e4ade67..28ad0eb 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -264,6 +264,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: true, + endpoint: Vec::new(), + endpoint_config: None, }; let update_status = UpdateStatus::default(); diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index f37eaf7..f7a1056 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -337,6 +337,8 @@ impl TestContext { user_agent_suffix: None, tls_mode: TlsMode::Strict, allow_internal_ips: false, + endpoint: Vec::new(), + endpoint_config: None, }; let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir)));