Added provider endpoint overrides for validation and revocation via global --endpoint PROVIDER=URL and --endpoint-config FILE, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances.

This commit is contained in:
Mick Grove 2026-04-27 13:20:16 -07:00
commit 19dafa42ea
19 changed files with 790 additions and 141 deletions

View file

@ -32,7 +32,7 @@ rules:
- 200
type: StatusMatch
- type: JsonValid
url: https://{{ JFROGURL }}/artifactory/api/repositories
url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories'
references:
- https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens
- https://jfrog.com/help/r/jfrog-rest-apis/authentication
@ -93,7 +93,7 @@ rules:
- status:
- 200
type: StatusMatch
url: https://{{ JFROGURL }}/artifactory/api/repositories
url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories'
references:
- https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens
- https://jfrog.com/help/r/jfrog-rest-apis/authentication

View file

@ -40,7 +40,7 @@ rules:
- type: WordMatch
words:
- '"type":"known"'
url: https://{{ CONFLUENCEDCDOMAIN }}/rest/api/user/current
url: '{{ CONFLUENCE_BASE_URL }}/rest/api/user/current'
depends_on_rule:
- rule_id: kingfisher.confluence.2
variable: CONFLUENCEDCDOMAIN

View file

@ -34,7 +34,7 @@ rules:
content:
request:
method: GET
url: https://gitea.com/api/v1/user
url: '{{ GITEA_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/json

View file

@ -28,7 +28,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -44,7 +44,7 @@ rules:
content:
request:
method: POST
url: https://api.github.com/credentials/revoke
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
headers:
Accept: application/vnd.github+json
X-GitHub-Api-Version: 2026-03-10
@ -84,7 +84,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -100,7 +100,7 @@ rules:
content:
request:
method: POST
url: https://api.github.com/credentials/revoke
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
headers:
Accept: application/vnd.github+json
X-GitHub-Api-Version: 2026-03-10
@ -137,7 +137,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -153,7 +153,7 @@ rules:
content:
request:
method: POST
url: https://api.github.com/credentials/revoke
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
headers:
Accept: application/vnd.github+json
X-GitHub-Api-Version: 2026-03-10
@ -189,7 +189,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -205,7 +205,7 @@ rules:
content:
request:
method: POST
url: https://api.github.com/credentials/revoke
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
headers:
Accept: application/vnd.github+json
X-GitHub-Api-Version: 2026-03-10
@ -234,7 +234,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -250,7 +250,7 @@ rules:
content:
request:
method: DELETE
url: https://api.github.com/installation/token
url: '{{ GITHUB_API_BASE_URL }}/installation/token'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -281,7 +281,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -297,7 +297,7 @@ rules:
content:
request:
method: POST
url: https://api.github.com/credentials/revoke
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
headers:
Accept: application/vnd.github+json
X-GitHub-Api-Version: 2026-03-10
@ -346,7 +346,7 @@ rules:
content:
request:
method: POST
url: "https://github.com/login/oauth/access_token"
url: '{{ GITHUB_WEB_BASE_URL }}/login/oauth/access_token'
headers:
Accept: "application/json"
Content-Type: "application/json"
@ -383,7 +383,7 @@ rules:
content:
request:
method: GET
url: https://api.github.com/user
url: '{{ GITHUB_API_BASE_URL }}/user'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
@ -399,11 +399,11 @@ rules:
content:
request:
method: DELETE
url: https://api.github.com/installation/token
url: '{{ GITHUB_API_BASE_URL }}/installation/token'
headers:
Authorization: token {{ TOKEN }}
Accept: application/vnd.github+json
response_matcher:
- report_response: true
- type: StatusMatch
status: [204]
status: [204]

View file

@ -34,7 +34,7 @@ rules:
- type: WordMatch
words:
- '"id"'
url: https://gitlab.com/api/v4/personal_access_tokens/self
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
revocation:
type: Http
content:
@ -46,7 +46,7 @@ rules:
- report_response: true
- type: StatusMatch
status: [204]
url: https://gitlab.com/api/v4/personal_access_tokens/self
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
- name: GitLab Runner Registration Token
id: kingfisher.gitlab.2
@ -92,7 +92,7 @@ rules:
- '"token is missing"'
- '"403 Forbidden"'
negative: true
url: https://gitlab.com/api/v4/runners/verify
url: '{{ GITLAB_API_BASE_URL }}/runners/verify'
- name: GitLab Pipeline Trigger Token
id: kingfisher.gitlab.3
@ -131,7 +131,7 @@ rules:
- '"token is missing"'
- '"403 Forbidden"'
negative: true
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
url: '{{ GITLAB_API_BASE_URL }}/ci/pipeline_triggers/{{ TOKEN }}'
- name: GitLab Private Token - Routable Format
id: kingfisher.gitlab.4
pattern: |
@ -178,7 +178,7 @@ rules:
- type: WordMatch
words:
- '"id"'
url: https://gitlab.com/api/v4/personal_access_tokens/self
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
revocation:
type: Http
content:
@ -190,7 +190,7 @@ rules:
- report_response: true
- type: StatusMatch
status: [204]
url: https://gitlab.com/api/v4/personal_access_tokens/self
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
- name: GitLab CI/CD Job Token
id: kingfisher.gitlab.5
@ -219,7 +219,7 @@ rules:
content:
request:
method: GET
url: https://gitlab.com/api/v4/job
url: '{{ GITLAB_API_BASE_URL }}/job'
headers:
JOB-TOKEN: '{{ TOKEN }}'
response_matcher:
@ -393,7 +393,7 @@ rules:
- '"token is missing"'
- '"403 Forbidden"'
negative: true
url: https://gitlab.com/api/v4/runners/verify
url: '{{ GITLAB_API_BASE_URL }}/runners/verify'
- name: GitLab Runner Authentication Token - Routable Format
id: kingfisher.gitlab.13
@ -436,7 +436,7 @@ rules:
- '"token is missing"'
- '"403 Forbidden"'
negative: true
url: https://gitlab.com/api/v4/runners/verify
url: '{{ GITLAB_API_BASE_URL }}/runners/verify'
- name: GitLab SCIM Token
id: kingfisher.gitlab.14
@ -481,7 +481,7 @@ rules:
content:
request:
method: GET
url: https://gitlab.com/api/v4/user
url: '{{ GITLAB_API_BASE_URL }}/user'
headers:
Cookie: "_gitlab_session={{ TOKEN }}"
response_matcher:

View file

@ -53,7 +53,7 @@ rules:
- status:
- 200
type: StatusMatch
url: https://{{ DOMAIN }}/rest/api/3/dashboard
url: '{{ JIRA_CLOUD_BASE_URL }}/rest/api/3/dashboard'
references:
- https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/
depends_on_rule:
@ -97,7 +97,7 @@ rules:
- 200
type: StatusMatch
- type: JsonValid
url: https://{{ JIRADCDOMAIN }}/rest/api/latest/myself
url: '{{ JIRA_BASE_URL }}/rest/api/latest/myself'
revocation:
type: HttpMultiStep
content:
@ -105,7 +105,7 @@ rules:
- name: lookup_token_id
request:
method: GET
url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens
url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens'
headers:
Accept: application/json
Authorization: Bearer {{ TOKEN }}
@ -120,7 +120,7 @@ rules:
- name: revoke_token
request:
method: DELETE
url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}
url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}'
headers:
Authorization: Bearer {{ TOKEN }}
response_matcher:
@ -152,4 +152,4 @@ rules:
- jira-staging.corp.mongodb.com
- https://jira.corp.internal:8443
references:
- https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html
- https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html

View file

@ -17,9 +17,10 @@ tags:
# Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation
A regex match on `AKIA[0-9A-Z]{16}` is the easy part. Every secret scanner
finds those. The hard part — and the part that decides whether your Tuesday
afternoon turns into an incident — is what happens **after** the match.
A regex hit is the easy part. Any scanner can tell you that a string looks
like an AWS access key or a GitHub token. The harder question is what to do
next, and that is usually what turns a scan result into either a routine
cleanup task or a real incident.
Kingfisher answers the three questions that actually matter:
@ -32,9 +33,12 @@ Kingfisher answers the three questions that actually matter:
## 1. Live validation, not just pattern matching
Out of Kingfisher's 820 standalone detectors, **484 include live validation
logic**. Every one of those calls the provider's own API and reports the
credential as `Active`, `Inactive`, or `NotAttempted` — so a 4,000-finding
scan collapses to the dozen findings that are actually live.
logic**. When a provider exposes a safe check call, Kingfisher uses that
provider's own API to report each credential as `Active`, `Inactive`, or
`NotAttempted`.
That changes the output from "thousands of regex matches" to a much shorter
list of findings that actually authenticate today.
Validation runs automatically when you scan:
@ -61,15 +65,15 @@ kingfisher validate --rule gcp "$(cat service-account.json)"
kingfisher validate --rule postgres "$POSTGRES_URI"
```
Validation logic lives in the rule YAML, not in compiled Rust, which is
why coverage is high and growing — every new detector ships with a
validation block whenever the provider exposes a safe check call.
Most validation logic lives in the rule YAML rather than bespoke compiled
code. That makes it practical to grow coverage rule-by-rule instead of
treating validation as a separate engineering project.
## 2. Blast radius mapping — what does this token actually reach?
A leaked AWS key bound to a single read-only S3 bucket and a leaked AWS key
bound to organization-wide `AdministratorAccess` are not the same incident.
The first is a Friday afternoon ticket. The second is a war room.
The first is a ticket. The second is a war room.
Add `--access-map` to a scan and Kingfisher authenticates each live
credential, enumerates what it can do, and writes the result alongside
@ -82,10 +86,10 @@ kingfisher scan github --organization my-org \
--output findings.json
```
Each cloud finding gets an `access_map` block with the identity, the
permissions, and the concrete resources reachable. Today this is supported
for **AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and
Microsoft Teams.**
Each supported finding gets an `access_map` block with the identity,
permissions, and concrete resources reachable. Today that includes
**AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and
Microsoft Teams**.
You can also run it standalone — useful when triaging a single credential
you've fished out of a paste or a customer report:
@ -102,18 +106,17 @@ kingfisher access-map gcp ./service-account.json --json-out gcp.access-map.json
```
The HTML report viewer (`--format html`) renders the access map as a
clickable tree identity at the root, then services, then individual
resources and permissions. It's the fastest way to get a non-engineer
stakeholder to grasp severity in five seconds rather than five minutes.
clickable tree: identity at the root, then services, then individual
resources and permissions. It is a much faster way to explain severity to
an incident commander or manager than pasting IAM JSON into chat.
## 3. Revocation — kill the token from where you found it
Validation tells you a credential is live. Blast radius tells you why it's
urgent. Revocation tells you it's done.
urgent. Revocation closes the loop.
For every rule whose provider exposes a safe revocation API, Kingfisher
ships the revocation call as part of the rule definition. One command,
no console:
ships the revocation call as part of the rule definition:
```bash
# Revoke a GitHub PAT
@ -134,17 +137,17 @@ kingfisher revoke --rule aws \
kingfisher revoke --rule gcp "$(cat service-account.json)"
```
The same Liquid templating that powers the validation request handles
revocation — including multi-step flows for providers that need a separate
key-id lookup before disabling. (See
The same Liquid templating that powers validation also powers revocation,
including multi-step flows for providers that require a lookup before
disabling the credential. See
[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md#multi-step-revocation)
for the schema.)
for the schema.
This matters in two scenarios:
- **Mass revocation after a leak.** A laptop or a CI runner gets popped and
you have a list of fingerprints. `kingfisher revoke` walks the list, no
human pivoting between five provider consoles.
you have a list of live credentials. `kingfisher revoke` walks that list
without forcing a human to pivot between provider consoles.
- **Automated response.** Wire `kingfisher revoke` into the same job that
scanned and validated, gated by an allow-list of rule IDs you've decided
are safe to auto-revoke (typically: short-lived CI tokens, dev-environment
@ -152,7 +155,7 @@ This matters in two scenarios:
## The combined workflow
In practice these three primitives chain into a single pipeline:
In practice, these three capabilities collapse into one response workflow:
```bash
# 1. Scan + validate + map blast radius in one call
@ -162,16 +165,16 @@ kingfisher scan github --organization my-org \
--output findings.json
# 2. Pull just the live, high-blast-radius findings
jq '[.[] | select(.validation.status == "Active")
| select(.access_map.permissions
| any(. == "*" or contains("Admin")))]' \
jq '.findings
| map(select(.validation.status == "Active"))
| map(select(.access_map != null))' \
findings.json > urgent.json
# 3. Triage in the HTML viewer (or revoke programmatically)
kingfisher view findings.json
```
Three commands, full incident workflow — find, prioritize, kill.
That is the full incident loop in three steps: find, prioritize, revoke.
## Why this is the right shape
@ -183,9 +186,9 @@ lives), reusing typed validators for the common families (AWS, GCP, JWT,
Postgres, MongoDB, MySQL, JDBC, Azure Storage, Coinbase), and letting rule
authors drop down to a `Raw` validator only for genuinely odd providers.
The upshot for users: when a new detector lands, you almost always get
validation, blast radius, and revocation along with it — not three
separate roadmaps.
The practical result is that new rules can ship with detection plus
post-detection response logic, instead of detection today and validation or
revocation on some later roadmap.
## Next up
@ -196,6 +199,5 @@ separate roadmaps.
- **Docker image scanning** — pulling and scanning every layer for
embedded secrets.
Got a provider you'd love to see validation or revocation support for?
Open an issue at
[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues).
If there is a provider you want validation or revocation support for, open
an issue at [mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues).

View file

@ -16,11 +16,14 @@ tags:
# Scanning an Entire GitHub Organization for Leaked Secrets
Most organizations have hundreds of repositories — some abandoned, some active,
plenty inherited from acquisitions. A leaked AWS key in a five-year-old archived
repo is just as dangerous as one in `main` today. Kingfisher can enumerate every
repo in a GitHub organization, scan the full git history, and then **validate
which credentials are still live** so you know what to rotate first.
Most organizations have more GitHub surface area than they think: active
services, abandoned repositories, internal tooling, forks, experiments, and
projects inherited through acquisitions. A credential leaked in a five-year-old
archived repo can still be live today.
Kingfisher can enumerate every repository in a GitHub organization, scan the
full git history, and then **validate which credentials are still live** so
you can focus on what needs rotation first.
<!-- more -->
@ -42,14 +45,15 @@ export KF_GITHUB_TOKEN=ghp_yourTokenHere
kingfisher scan github --organization my-org
```
That's it — Kingfisher enumerates every repo, clones each one, scans the full
commit history, runs all 942 detection rules, and validates findings against
That single command enumerates the org, clones each repository, scans working
tree content plus git history, and validates supported findings against
provider APIs.
## Tuning for real-world orgs
Real orgs have huge monorepos, archived junk, and forks you don't care about.
Three flags do most of the work:
Real organizations have huge monorepos, archived junk, mirrored forks, and
repositories you already know are out of scope. Three flags handle most of
the tuning:
```bash
kingfisher scan github --organization my-org \
@ -61,8 +65,8 @@ kingfisher scan github --organization my-org \
--output kf-findings.sarif
```
- **`--repo-clone-limit`** caps the number of clones per scan. Useful for
staged rollouts ("first 500 repos by stars") or to stay under disk budget.
- **`--repo-clone-limit`** caps the number of clones per scan. It is useful
for staged rollouts or staying under a disk budget.
- **`--github-exclude`** accepts exact `OWNER/REPO` strings or gitignore-style
globs (`my-org/*-archive`). Repeat the flag for each pattern. Matching is
case-insensitive.
@ -72,24 +76,25 @@ kingfisher scan github --organization my-org \
## Pulling in issues, wikis, and gists
Secrets don't only live in code. Issues and pull request descriptions are a
common leak source someone pastes a stack trace with a JWT, or an
"oncall handoff" issue with a temporary token that never got rotated. Add
common leak source: someone pastes a stack trace with a JWT, or an
"on-call handoff" issue with a temporary token that never gets rotated. Add
`--repo-artifacts` to fetch these:
```bash
kingfisher scan github --organization my-org --repo-artifacts
```
This pulls each repo's issues (including PRs), wiki, and any **public** gists
owned by the repo owner, and scans them all. It does cost API calls, so plan
accordingly if you're near a rate limit.
This pulls each repo's issues, pull requests, wiki, and any **public** gists
owned by the repo owner, then scans that material as well. It does consume API
calls, so budget for that if the org is large or your token is already near a
rate limit.
## Following the people, not just the org
This is the trick that catches what every other scanner misses. Developers
leak secrets in *personal* repositories — side projects, dotfiles, throwaway
forks. If a contributor to your org has a public personal repo with an active
token that grants access to org infrastructure, that's a real incident.
Developers also leak secrets in *personal* repositories: side projects,
dotfiles, and throwaway forks. If a contributor to your org has a public repo
containing a still-live credential that reaches company infrastructure, that is
still your incident.
Pass a single repo URL with `--include-contributors` and Kingfisher will
enumerate the contributors, then clone and scan **every public repo they own**:
@ -100,14 +105,14 @@ kingfisher scan https://github.com/my-org/critical-service \
--repo-clone-limit 200
```
This is a noisy operation — start with one or two critical repos rather than
the whole org. GitHub will rate-limit aggressive enumeration, so a token
(`KF_GITHUB_TOKEN`) is required in practice.
This is a noisy operation. Start with one or two critical repositories rather
than the entire organization. GitHub will also rate-limit aggressive
enumeration, so `KF_GITHUB_TOKEN` is effectively required.
## Reading the output
The default `pretty` output is human-friendly for terminals. For automation,
pick the format that matches your downstream tool:
The default `pretty` output is fine for interactive terminal use. For
automation, pick a format that matches your downstream consumer:
```bash
# JSON for custom tooling
@ -120,9 +125,9 @@ kingfisher scan github --organization my-org --format sarif --output findings.sa
kingfisher scan github --organization my-org --format toon
```
The interactive HTML report is often the fastest way to triage a large scan
filter by rule, by validation status, or by repository, and click through to
the exact commit and line:
The interactive HTML report is often the fastest way to triage a large scan.
You can filter by rule, validation status, or repository, then click through
to the exact commit and line:
```bash
kingfisher scan github --organization my-org --format html --output kf-report.html
@ -130,40 +135,39 @@ kingfisher scan github --organization my-org --format html --output kf-report.ht
## Triage by validation status
The single most important column in the output is **validation**. A live
credential is a fire — a never-was-valid one is noise. Filter to live findings
first:
The single most important field in the output is **validation**. A live
credential should be triaged immediately; a value that never authenticated is
usually just cleanup work. Filter to live findings first:
```bash
jq '.[] | select(.validation.status == "Active")' findings.json
jq '.findings[] | select(.validation.status == "Active")' findings.json
```
Then walk those credentials in order of blast radius. For AWS, GCP, GitHub,
GitLab, and Slack tokens, Kingfisher already maps what each one can access —
look at the `access_map` field in the JSON output, or the **Blast Radius**
panel in the HTML report.
Then prioritize by blast radius. For AWS, GCP, GitHub, GitLab, and Slack
tokens, Kingfisher can already map what each credential can access. Look at
the `access_map` field in JSON output, or the **Blast Radius** panel in the
HTML report.
## Revoke from the CLI
For supported providers, you don't need to log into a console — Kingfisher can
revoke directly:
For supported providers, you do not need to pivot into the provider console.
Kingfisher can revoke directly:
```bash
kingfisher revoke --rule kingfisher.aws.access_key.1 AKIAEXAMPLE...
```
Each rule that supports revocation declares the API call in its YAML. Today
this works for AWS, GitHub, GitLab, Slack, and a growing list of SaaS
providers — see [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md)
for the current list and how to add revocation to a custom rule.
Each rule that supports revocation declares the API call in its YAML. See
[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md)
for the schema and the current approach.
## Wiring it into a recurring job
A first scan is the one-shot baseline. The real value is recurring scans
catching new leaks within hours, not months. The simplest pattern is a nightly
GitHub Action or scheduled CI job that runs the org scan, diffs against
yesterday's findings, and pages on net-new live credentials. We'll cover that
end-to-end in the next post.
The first scan gives you a baseline. The real value comes from running the
same workflow continuously so new leaks are caught within hours instead of
months. A simple starting point is a nightly GitHub Action or scheduled CI
job that runs the org scan, diffs against yesterday's findings, and alerts on
net-new live credentials.
## What's next
@ -174,5 +178,5 @@ end-to-end in the next post.
- **Docker image scanning** — pulling images directly and scanning every
layer for embedded secrets.
If there's a workflow you'd like us to cover, open an issue at
If there is a workflow you want us to cover, open an issue at
[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues).

View file

@ -8,7 +8,8 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im
All notable changes to this project will be documented in this file.
## [v1.98.0]
- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs_<APP_ID>_<JWT>`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. Bundled ruleset is now **943 rules** (821 standalone detectors + 122 dependent rules), with **485 standalone detectors** offering live validation.
- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs_<APP_ID>_<JWT>`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation.
- Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances.
## [v1.97.0]
- **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed.

View file

@ -274,6 +274,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key"
kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key"
```
**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):**
Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML.
- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers.
- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides.
- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`.
Supported provider keys for endpoint overrides are:
- `github`
- `gitlab`
- `gitea`
- `jira` (Jira Data Center / self-managed)
- `jira-cloud`
- `confluence`
- `artifactory`
```bash
# Validate a GitHub Enterprise token against a self-hosted instance
kingfisher validate --rule github \
--endpoint github=https://ghe.corp.example.com \
"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# Revoke a self-managed GitLab PAT
kingfisher revoke --rule gitlab \
--endpoint gitlab=https://gitlab.corp.example.com \
"glpat-xxxxxxxxxxxxxxxxxxxx"
# Scan with an internal Artifactory validator target
kingfisher scan ./repo \
--endpoint artifactory=http://localhost:8071 \
--allow-internal-ips
```
Example endpoint config file:
```yaml
endpoints:
github: https://ghe.corp.example.com
gitlab: https://gitlab.corp.example.com
gitea: https://gitea.corp.example.com
jira: https://jira.corp.example.com
confluence: https://wiki.corp.example.com
artifactory: http://localhost:8071
```
```bash
kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips
```
**Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**:
```bash
@ -1082,8 +1133,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v
# Scan with SSRF protection disabled (allows requests to internal IPs)
kingfisher scan --allow-internal-ips ./repo
# Also works with the validate command
kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1
# Also works with direct validation against a self-hosted endpoint
kingfisher validate --allow-internal-ips \
--endpoint artifactory=http://localhost:8071 \
--rule kingfisher.artifactory.1 \
"AKCp..."
```
> **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services.

View file

@ -269,6 +269,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key"
kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key"
```
**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):**
Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML.
- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers.
- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides.
- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`.
Supported provider keys for endpoint overrides are:
- `github`
- `gitlab`
- `gitea`
- `jira` (Jira Data Center / self-managed)
- `jira-cloud`
- `confluence`
- `artifactory`
```bash
# Validate a GitHub Enterprise token against a self-hosted instance
kingfisher validate --rule github \
--endpoint github=https://ghe.corp.example.com \
"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# Revoke a self-managed GitLab PAT
kingfisher revoke --rule gitlab \
--endpoint gitlab=https://gitlab.corp.example.com \
"glpat-xxxxxxxxxxxxxxxxxxxx"
# Scan with an internal Artifactory validator target
kingfisher scan ./repo \
--endpoint artifactory=http://localhost:8071 \
--allow-internal-ips
```
Example endpoint config file:
```yaml
endpoints:
github: https://ghe.corp.example.com
gitlab: https://gitlab.corp.example.com
gitea: https://gitea.corp.example.com
jira: https://jira.corp.example.com
confluence: https://wiki.corp.example.com
artifactory: http://localhost:8071
```
```bash
kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips
```
**Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**:
```bash
@ -1077,8 +1128,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v
# Scan with SSRF protection disabled (allows requests to internal IPs)
kingfisher scan --allow-internal-ips ./repo
# Also works with the validate command
kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1
# Also works with direct validation against a self-hosted endpoint
kingfisher validate --allow-internal-ips \
--endpoint artifactory=http://localhost:8071 \
--rule kingfisher.artifactory.1 \
"AKCp..."
```
> **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services.

View file

@ -1,4 +1,5 @@
use std::io::IsTerminal;
use std::path::PathBuf;
use std::sync::LazyLock;
@ -144,6 +145,16 @@ pub struct GlobalArgs {
#[arg(global = true, long = "user-agent-suffix", value_name = "SUFFIX")]
pub user_agent_suffix: Option<String>,
/// Override provider API endpoints for validation/revocation (PROVIDER=URL), repeatable.
///
/// Supported providers: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory.
#[arg(global = true, long = "endpoint", value_name = "PROVIDER=URL")]
pub endpoint: Vec<String>,
/// YAML file containing provider endpoint overrides.
#[arg(global = true, long = "endpoint-config", value_name = "FILE")]
pub endpoint_config: Option<PathBuf>,
// Internal fields (not CLI arguments)
#[clap(skip)]
pub color: Mode,
@ -163,6 +174,8 @@ impl Default for GlobalArgs {
self_update: false,
no_update_check: false,
user_agent_suffix: None,
endpoint: Vec::new(),
endpoint_config: None,
color: Mode::Auto,
progress: Mode::Auto,
}

View file

@ -20,6 +20,7 @@ use tracing::debug;
use crate::{
cli::{commands::revoke::RevokeArgs, global::GlobalArgs},
liquid_filters::register_all,
provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule},
rule_loader::RuleLoader,
template_vars::extract_template_vars,
validation::GLOBAL_USER_AGENT,
@ -138,15 +139,22 @@ fn get_global_var(globals: &Object, name: &str) -> Option<String> {
/// Build the globals object for Liquid template rendering.
fn build_globals(
rule_id: &str,
secret: &str,
args: &[String],
variables: &[String],
template_vars: &BTreeSet<String>,
endpoint_overrides: &ProviderEndpointOverrides,
) -> Result<Object> {
let mut globals = Object::new();
globals.insert("TOKEN".into(), Value::scalar(secret.to_string()));
let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect();
endpoint_overrides.apply_defaults(&mut globals);
let auto_assign_vars: Vec<&String> = template_vars
.iter()
.filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str()))
.collect();
for (i, arg_value) in args.iter().enumerate() {
if i < auto_assign_vars.len() {
@ -171,6 +179,8 @@ fn build_globals(
globals.insert(name.into(), Value::scalar(value));
}
hydrate_endpoint_globals_for_rule(rule_id, &mut globals);
Ok(globals)
}
@ -553,6 +563,7 @@ pub async fn run_direct_revocation(
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
let timeout = Duration::from_secs(args.timeout);
let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?;
let mut results = Vec::new();
@ -597,7 +608,14 @@ pub async fn run_direct_revocation(
}
}
let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?;
let globals = build_globals(
&rule_id,
&secret,
&args.args,
&args.variables,
&template_vars,
&endpoint_overrides,
)?;
if !non_token_vars.is_empty() && !args.args.is_empty() {
debug!(
@ -1028,7 +1046,15 @@ mod tests {
#[test]
fn build_globals_sets_token() {
let template_vars = BTreeSet::from(["TOKEN".to_string()]);
let globals = build_globals("my-secret", &[], &[], &template_vars).unwrap();
let globals = build_globals(
"kingfisher.test.1",
"my-secret",
&[],
&[],
&template_vars,
&ProviderEndpointOverrides::default(),
)
.unwrap();
assert_eq!(globals.get("TOKEN"), Some(Value::scalar("my-secret".to_string())).as_ref());
}
@ -1037,7 +1063,15 @@ mod tests {
let template_vars =
BTreeSet::from(["TOKEN".to_string(), "AKID".to_string(), "REGION".to_string()]);
let args = vec!["my-akid".to_string(), "us-east-1".to_string()];
let globals = build_globals("secret", &args, &[], &template_vars).unwrap();
let globals = build_globals(
"kingfisher.test.1",
"secret",
&args,
&[],
&template_vars,
&ProviderEndpointOverrides::default(),
)
.unwrap();
assert_eq!(globals.get("TOKEN"), Some(Value::scalar("secret".to_string())).as_ref());
assert_eq!(globals.get("AKID"), Some(Value::scalar("my-akid".to_string())).as_ref());
@ -1048,7 +1082,15 @@ mod tests {
fn build_globals_explicit_variables() {
let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string()]);
let vars = vec!["AKID=explicit-value".to_string()];
let globals = build_globals("secret", &[], &vars, &template_vars).unwrap();
let globals = build_globals(
"kingfisher.test.1",
"secret",
&[],
&vars,
&template_vars,
&ProviderEndpointOverrides::default(),
)
.unwrap();
assert_eq!(globals.get("AKID"), Some(Value::scalar("explicit-value".to_string())).as_ref());
}
@ -1057,7 +1099,14 @@ mod tests {
fn build_globals_invalid_var_format() {
let template_vars = BTreeSet::new();
let vars = vec!["NO_EQUALS_SIGN".to_string()];
let result = build_globals("secret", &[], &vars, &template_vars);
let result = build_globals(
"kingfisher.test.1",
"secret",
&[],
&vars,
&template_vars,
&ProviderEndpointOverrides::default(),
);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Expected NAME=VALUE"));
}
@ -1066,7 +1115,14 @@ mod tests {
fn build_globals_empty_var_name() {
let template_vars = BTreeSet::new();
let vars = vec!["=value".to_string()];
let result = build_globals("secret", &[], &vars, &template_vars);
let result = build_globals(
"kingfisher.test.1",
"secret",
&[],
&vars,
&template_vars,
&ProviderEndpointOverrides::default(),
);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("cannot be empty"));
}

View file

@ -21,6 +21,7 @@ use tracing::debug;
use crate::{
cli::{commands::validate::ValidateArgs, global::GlobalArgs},
liquid_filters::register_all,
provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule},
rule_loader::RuleLoader,
rules::{HttpValidation, Validation, rule::Rule},
template_vars::extract_template_vars,
@ -210,18 +211,25 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet<String> {
/// - `variables`: Named variables in NAME=VALUE format (explicit overrides)
/// - `template_vars`: Set of variable names used in the validation template
fn build_globals(
rule_id: &str,
secret: &str,
args: &[String],
variables: &[String],
template_vars: &BTreeSet<String>,
endpoint_overrides: &ProviderEndpointOverrides,
) -> Result<Object> {
let mut globals = Object::new();
// Set TOKEN to the provided secret
globals.insert("TOKEN".into(), Value::scalar(secret.to_string()));
endpoint_overrides.apply_defaults(&mut globals);
// Get non-TOKEN variables in alphabetical order for auto-assignment
let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect();
let auto_assign_vars: Vec<&String> = template_vars
.iter()
.filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str()))
.collect();
// Auto-assign --arg values to template variables
for (i, arg_value) in args.iter().enumerate() {
@ -248,6 +256,8 @@ fn build_globals(
globals.insert(name.into(), Value::scalar(value));
}
hydrate_endpoint_globals_for_rule(rule_id, &mut globals);
Ok(globals)
}
@ -469,6 +479,7 @@ pub async fn run_direct_validation(
// Build Liquid parser
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?;
let timeout = Duration::from_secs(args.timeout);
let rate_limiter =
@ -525,7 +536,14 @@ pub async fn run_direct_validation(
}
}
let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?;
let globals = build_globals(
&rule_id,
&secret,
&args.args,
&args.variables,
&template_vars,
&endpoint_overrides,
)?;
// Log auto-assignment info for debugging
if !non_token_vars.is_empty() && !args.args.is_empty() {

View file

@ -40,6 +40,7 @@ pub mod location;
pub mod matcher;
pub mod origin;
pub mod parser;
pub mod provider_endpoints;
pub mod pyc;
pub mod reporter;
pub mod rule_loader;

409
src/provider_endpoints.rs Normal file
View file

@ -0,0 +1,409 @@
use std::{collections::BTreeMap, fs, path::Path};
use anyhow::{Context, Result, anyhow, bail};
use liquid::Object;
use liquid_core::{Value, ValueView};
use serde::Deserialize;
use url::Url;
use crate::cli::global::GlobalArgs;
const GITHUB_API_BASE_URL: &str = "GITHUB_API_BASE_URL";
const GITHUB_WEB_BASE_URL: &str = "GITHUB_WEB_BASE_URL";
const GITLAB_API_BASE_URL: &str = "GITLAB_API_BASE_URL";
const GITEA_API_BASE_URL: &str = "GITEA_API_BASE_URL";
const JIRA_BASE_URL: &str = "JIRA_BASE_URL";
const JIRA_CLOUD_BASE_URL: &str = "JIRA_CLOUD_BASE_URL";
const CONFLUENCE_BASE_URL: &str = "CONFLUENCE_BASE_URL";
const ARTIFACTORY_BASE_URL: &str = "ARTIFACTORY_BASE_URL";
#[derive(Debug, Clone, Default)]
pub struct ProviderEndpointOverrides {
config: EndpointVars,
cli: EndpointVars,
}
#[derive(Debug, Clone, Default)]
struct EndpointVars {
values: BTreeMap<String, String>,
}
#[derive(Debug, Deserialize, Default)]
struct EndpointConfigFile {
#[serde(default)]
endpoints: BTreeMap<String, String>,
#[serde(default)]
provider_endpoints: BTreeMap<String, String>,
#[serde(default)]
providers: BTreeMap<String, String>,
}
impl ProviderEndpointOverrides {
pub fn from_global_args(global_args: &GlobalArgs) -> Result<Self> {
let config = match &global_args.endpoint_config {
Some(path) => EndpointVars::from_config_path(path)?,
None => EndpointVars::default(),
};
let cli = EndpointVars::from_pairs(&global_args.endpoint)?;
Ok(Self { config, cli })
}
pub fn apply_defaults(&self, globals: &mut Object) {
self.config.apply(globals, false);
apply_builtin_defaults(globals);
self.cli.apply(globals, true);
}
pub fn apply_scan_overrides(&self, globals: &mut Object) {
self.config.apply(globals, false);
apply_builtin_defaults(globals);
self.cli.apply(globals, true);
}
}
impl EndpointVars {
fn from_config_path(path: &Path) -> Result<Self> {
let raw = fs::read_to_string(path)
.with_context(|| format!("Failed to read endpoint config from {}", path.display()))?;
let parsed: EndpointConfigFile = serde_yaml::from_str(&raw)
.with_context(|| format!("Failed to parse endpoint config {}", path.display()))?;
let mut merged = parsed.endpoints;
merged.extend(parsed.provider_endpoints);
merged.extend(parsed.providers);
Self::from_map(merged)
}
fn from_pairs(pairs: &[String]) -> Result<Self> {
let mut map = BTreeMap::new();
for pair in pairs {
let (provider, endpoint) = parse_assignment(pair)?;
map.insert(provider, endpoint);
}
Self::from_map(map)
}
fn from_map(map: BTreeMap<String, String>) -> Result<Self> {
let mut values = BTreeMap::new();
for (provider, endpoint) in map {
let normalized = normalize_endpoint_key(&provider);
match normalized.as_str() {
"github" => {
let github = normalize_github_endpoint(&endpoint)?;
values.insert(GITHUB_API_BASE_URL.to_string(), github.api_base_url);
values.insert(GITHUB_WEB_BASE_URL.to_string(), github.web_base_url);
}
"gitlab" => {
values.insert(
GITLAB_API_BASE_URL.to_string(),
normalize_api_base_url(&endpoint, "/api/v4")?,
);
}
"gitea" => {
values.insert(
GITEA_API_BASE_URL.to_string(),
normalize_api_base_url(&endpoint, "/api/v1")?,
);
}
"jira" | "jira-dc" => {
values.insert(JIRA_BASE_URL.to_string(), normalize_base_url(&endpoint)?);
}
"jira-cloud" => {
values.insert(JIRA_CLOUD_BASE_URL.to_string(), normalize_base_url(&endpoint)?);
}
"confluence" | "confluence-dc" => {
values.insert(CONFLUENCE_BASE_URL.to_string(), normalize_base_url(&endpoint)?);
}
"artifactory" | "jfrog" => {
values.insert(
ARTIFACTORY_BASE_URL.to_string(),
normalize_artifactory_base_url(&endpoint)?,
);
}
_ => bail!(
"Unsupported endpoint provider '{}'. Supported values: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory",
provider
),
}
}
Ok(Self { values })
}
fn apply(&self, globals: &mut Object, overwrite_existing: bool) {
for (name, value) in &self.values {
if overwrite_existing || !globals.contains_key(name.as_str()) {
globals.insert(name.clone().into(), Value::scalar(value.clone()));
}
}
}
}
#[derive(Debug)]
struct GitHubEndpoint {
api_base_url: String,
web_base_url: String,
}
pub fn hydrate_endpoint_globals_for_rule(rule_id: &str, globals: &mut Object) {
hydrate_github_globals(globals);
hydrate_artifactory_globals(globals);
hydrate_confluence_globals(globals);
hydrate_jira_dc_globals(globals);
if rule_id == "kingfisher.jira.2" {
hydrate_jira_cloud_globals(globals);
}
}
pub fn endpoint_var_names() -> &'static [&'static str] {
&[
GITHUB_API_BASE_URL,
GITHUB_WEB_BASE_URL,
GITLAB_API_BASE_URL,
GITEA_API_BASE_URL,
JIRA_BASE_URL,
JIRA_CLOUD_BASE_URL,
CONFLUENCE_BASE_URL,
ARTIFACTORY_BASE_URL,
]
}
fn hydrate_github_globals(globals: &mut Object) {
match (string_var(globals, GITHUB_API_BASE_URL), string_var(globals, GITHUB_WEB_BASE_URL)) {
(Some(api), None) => {
if let Ok(normalized) = normalize_github_endpoint(&api) {
globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url));
globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url));
}
}
(None, Some(web)) => {
if let Ok(normalized) = normalize_github_endpoint(&web) {
globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url));
globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url));
}
}
_ => {}
}
}
fn hydrate_artifactory_globals(globals: &mut Object) {
if globals.contains_key(ARTIFACTORY_BASE_URL) {
return;
}
if let Some(jfrog_url) = string_var(globals, "JFROGURL")
&& let Ok(base_url) = normalize_artifactory_base_url(&jfrog_url)
{
globals.insert(ARTIFACTORY_BASE_URL.into(), Value::scalar(base_url));
}
}
fn hydrate_confluence_globals(globals: &mut Object) {
if globals.contains_key(CONFLUENCE_BASE_URL) {
return;
}
if let Some(domain) = string_var(globals, "CONFLUENCEDCDOMAIN")
&& let Ok(base_url) = normalize_base_url(&domain)
{
globals.insert(CONFLUENCE_BASE_URL.into(), Value::scalar(base_url));
}
}
fn hydrate_jira_dc_globals(globals: &mut Object) {
if globals.contains_key(JIRA_BASE_URL) {
return;
}
if let Some(domain) = string_var(globals, "JIRADCDOMAIN")
&& let Ok(base_url) = normalize_base_url(&domain)
{
globals.insert(JIRA_BASE_URL.into(), Value::scalar(base_url));
}
}
fn hydrate_jira_cloud_globals(globals: &mut Object) {
if globals.contains_key(JIRA_CLOUD_BASE_URL) {
return;
}
if let Some(domain) = string_var(globals, "DOMAIN")
&& let Ok(base_url) = normalize_base_url(&domain)
{
globals.insert(JIRA_CLOUD_BASE_URL.into(), Value::scalar(base_url));
}
}
fn string_var(globals: &Object, name: &str) -> Option<String> {
globals.get(name).map(|value| value.to_kstr().to_string()).filter(|s| !s.is_empty())
}
fn apply_builtin_defaults(globals: &mut Object) {
for (name, value) in [
(GITHUB_API_BASE_URL, "https://api.github.com"),
(GITHUB_WEB_BASE_URL, "https://github.com"),
(GITLAB_API_BASE_URL, "https://gitlab.com/api/v4"),
(GITEA_API_BASE_URL, "https://gitea.com/api/v1"),
] {
if !globals.contains_key(name) {
globals.insert(name.into(), Value::scalar(value.to_string()));
}
}
}
fn parse_assignment(raw: &str) -> Result<(String, String)> {
let (provider, endpoint) = raw
.split_once('=')
.ok_or_else(|| anyhow!("Invalid endpoint '{}'. Expected PROVIDER=URL", raw))?;
let provider = provider.trim();
let endpoint = endpoint.trim();
if provider.is_empty() {
bail!("Invalid endpoint '{}'. Provider name cannot be empty", raw);
}
if endpoint.is_empty() {
bail!("Invalid endpoint '{}'. URL cannot be empty", raw);
}
Ok((provider.to_string(), endpoint.to_string()))
}
fn normalize_endpoint_key(key: &str) -> String {
key.trim().to_ascii_lowercase().replace('_', "-")
}
fn normalize_base_url(raw: &str) -> Result<String> {
let url = parse_url_or_assume_https(raw)?;
Ok(url_with_path(&url, url.path().trim_end_matches('/')))
}
fn normalize_api_base_url(raw: &str, api_suffix: &str) -> Result<String> {
let url = parse_url_or_assume_https(raw)?;
let path = url.path().trim_end_matches('/');
let full_path = if path.is_empty() {
api_suffix.to_string()
} else if path.ends_with(api_suffix) {
path.to_string()
} else {
format!("{path}{api_suffix}")
};
Ok(url_with_path(&url, &full_path))
}
fn normalize_artifactory_base_url(raw: &str) -> Result<String> {
let url = parse_url_or_assume_https(raw)?;
let mut path = url.path().trim_end_matches('/').to_string();
if let Some(prefix) = path.strip_suffix("/artifactory") {
path = prefix.to_string();
}
Ok(url_with_path(&url, &path))
}
fn normalize_github_endpoint(raw: &str) -> Result<GitHubEndpoint> {
let url = parse_url_or_assume_https(raw)?;
let host = url
.host_str()
.ok_or_else(|| anyhow!("Endpoint '{}' is missing a host", raw))?
.to_ascii_lowercase();
let path = url.path().trim_end_matches('/');
if host == "api.github.com" {
return Ok(GitHubEndpoint {
api_base_url: "https://api.github.com".to_string(),
web_base_url: "https://github.com".to_string(),
});
}
if host == "github.com" && path.is_empty() {
return Ok(GitHubEndpoint {
api_base_url: "https://api.github.com".to_string(),
web_base_url: "https://github.com".to_string(),
});
}
let (web_path, api_path) = if path.is_empty() {
("".to_string(), "/api/v3".to_string())
} else if let Some(prefix) = path.strip_suffix("/api/v3") {
(prefix.to_string(), path.to_string())
} else {
(path.to_string(), format!("{path}/api/v3"))
};
Ok(GitHubEndpoint {
api_base_url: url_with_path(&url, &api_path),
web_base_url: url_with_path(&url, &web_path),
})
}
fn parse_url_or_assume_https(raw: &str) -> Result<Url> {
match Url::parse(raw.trim()) {
Ok(url) => Ok(url),
Err(url::ParseError::RelativeUrlWithoutBase) => {
Url::parse(&format!("https://{}", raw.trim())).with_context(|| {
format!("Invalid endpoint URL '{}'. Use a full URL or hostname", raw)
})
}
Err(err) => Err(anyhow!("Invalid endpoint URL '{}': {}", raw, err)),
}
}
fn url_with_path(url: &Url, path: &str) -> String {
let mut out = url.clone();
out.set_query(None);
out.set_fragment(None);
if path.is_empty() {
out.set_path("");
} else {
out.set_path(path);
}
out.to_string().trim_end_matches('/').to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn github_endpoint_normalizes_host_only() {
let normalized = normalize_github_endpoint("ghe.corp.example.com").unwrap();
assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3");
assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com");
}
#[test]
fn github_endpoint_normalizes_api_path() {
let normalized = normalize_github_endpoint("https://ghe.corp.example.com/api/v3").unwrap();
assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3");
assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com");
}
#[test]
fn gitlab_endpoint_appends_api_path() {
assert_eq!(
normalize_api_base_url("gitlab.example.com/gitlab", "/api/v4").unwrap(),
"https://gitlab.example.com/gitlab/api/v4"
);
}
#[test]
fn artifactory_endpoint_strips_artifactory_suffix() {
assert_eq!(
normalize_artifactory_base_url("http://localhost:8071/artifactory").unwrap(),
"http://localhost:8071"
);
}
#[test]
fn jira_cloud_hydrates_from_legacy_domain() {
let mut globals = Object::new();
globals.insert("DOMAIN".into(), Value::scalar("example.atlassian.net"));
hydrate_endpoint_globals_for_rule("kingfisher.jira.2", &mut globals);
assert_eq!(
string_var(&globals, JIRA_CLOUD_BASE_URL).as_deref(),
Some("https://example.atlassian.net")
);
}
#[test]
fn artifactory_hydrates_from_legacy_host() {
let mut globals = Object::new();
globals.insert("JFROGURL".into(), Value::scalar("repo.example.com"));
hydrate_endpoint_globals_for_rule("kingfisher.artifactory.1", &mut globals);
assert_eq!(
string_var(&globals, ARTIFACTORY_BASE_URL).as_deref(),
Some("https://repo.example.com")
);
}
}

View file

@ -23,6 +23,7 @@ use crate::{
gitea, github, gitlab,
liquid_filters::register_all,
matcher::MatcherStats,
provider_endpoints::ProviderEndpointOverrides,
reporter::styles::Styles,
rule_loader::RuleLoader,
rule_profiling::ConcurrentRuleProfiler,
@ -46,12 +47,14 @@ use crate::{
validation_rate_limit::ValidationRateLimiter,
};
/// Shared validation dependencies: (liquid parser, HTTP clients, validation cache, rate limiter).
/// Shared validation dependencies:
/// (liquid parser, HTTP clients, validation cache, rate limiter, provider endpoint overrides).
type ValidationDeps = Arc<(
liquid::Parser,
crate::validation::ValidationClients,
Arc<SkipMap<String, CachedResponse>>,
Option<Arc<ValidationRateLimiter>>,
Arc<ProviderEndpointOverrides>,
)>;
pub async fn run_scan(
@ -159,6 +162,7 @@ pub async fn run_async_scan(
let validation_rate_limiter =
ValidationRateLimiter::from_cli(args.validation_rps, &args.validation_rps_rule)?
.map(Arc::new);
let provider_endpoints = Arc::new(ProviderEndpointOverrides::from_global_args(global_args)?);
let validation_deps: Option<ValidationDeps> = if !args.no_validate {
info!("Starting secret validation phase...");
@ -170,6 +174,7 @@ pub async fn run_async_scan(
)?,
Arc::new(SkipMap::new()),
validation_rate_limiter.clone(),
Arc::clone(&provider_endpoints),
)))
} else {
None
@ -517,8 +522,8 @@ async fn run_validation_phase(
access_map_collector: Option<AccessMapCollector>,
) -> Result<()> {
if let Some(validation) = validation_deps {
let (parser, clients, cache, rate_limiter) =
(&validation.0, &validation.1, &validation.2, &validation.3);
let (parser, clients, cache, rate_limiter, provider_endpoints) =
(&validation.0, &validation.1, &validation.2, &validation.3, &validation.4);
run_secret_validation(
Arc::clone(datastore),
parser,
@ -528,6 +533,7 @@ async fn run_validation_phase(
match_range,
access_map_collector,
rate_limiter.clone(),
provider_endpoints.clone(),
Duration::from_secs(args.validation_timeout),
args.validation_retries,
effective_max_validation_body_len(args),
@ -661,8 +667,8 @@ async fn run_parallel_scan(
// Validate initial (non-repo) matches
if let Some(validation) = validation_deps {
let (parser, clients, cache, rate_limiter) =
(&validation.0, &validation.1, &validation.2, &validation.3);
let (parser, clients, cache, rate_limiter, provider_endpoints) =
(&validation.0, &validation.1, &validation.2, &validation.3, &validation.4);
let initial_match_count = { datastore.lock().unwrap().get_matches().len() };
if initial_match_count > 0 {
run_secret_validation(
@ -674,6 +680,7 @@ async fn run_parallel_scan(
Some(0..initial_match_count),
access_map_collector.clone(),
rate_limiter.clone(),
provider_endpoints.clone(),
Duration::from_secs(args.validation_timeout),
args.validation_retries,
effective_max_validation_body_len(args),
@ -749,8 +756,13 @@ async fn run_parallel_scan(
}
if let Some(validation) = validation_deps.clone() {
let (parser, clients, cache, rate_limiter) =
(&validation.0, &validation.1, &validation.2, &validation.3);
let (parser, clients, cache, rate_limiter, provider_endpoints) = (
&validation.0,
&validation.1,
&validation.2,
&validation.3,
&validation.4,
);
let match_count =
{ repo_datastore.lock().unwrap().get_matches().len() };
if match_count > 0 {
@ -763,6 +775,7 @@ async fn run_parallel_scan(
Some(0..match_count),
access_map.clone(),
rate_limiter.clone(),
provider_endpoints.clone(),
Duration::from_secs(args.validation_timeout),
args.validation_retries,
effective_max_validation_body_len(&args),

View file

@ -23,6 +23,7 @@ use crate::{
findings_store::{FindingsStore, FindingsStoreMessage},
location::OffsetSpan,
matcher::OwnedBlobMatch,
provider_endpoints::ProviderEndpointOverrides,
rules::rule::Validation,
validation::{
CachedResponse, collect_variables_and_dependencies, utils, validate_single_match,
@ -421,6 +422,7 @@ pub async fn run_secret_validation(
range: Option<std::ops::Range<usize>>,
access_map: Option<AccessMapCollector>,
rate_limiter: Option<Arc<ValidationRateLimiter>>,
provider_endpoints: Arc<ProviderEndpointOverrides>,
validation_timeout: Duration,
validation_retries: u32,
max_body_len: usize,
@ -536,6 +538,7 @@ pub async fn run_secret_validation(
let pb = pb.clone();
let access_map = access_map.clone();
let rate_limiter = rate_limiter.clone();
let provider_endpoints = provider_endpoints.clone();
let empty_dep_vars = &empty_dep_vars;
let empty_missing = &empty_missing;
let empty_cache = empty_cache.clone();
@ -577,6 +580,7 @@ pub async fn run_secret_validation(
&cache_glob,
access_map.as_ref(),
rate_limiter.as_deref(),
&provider_endpoints,
validation_timeout,
validation_retries,
max_body_len,
@ -690,6 +694,7 @@ pub async fn run_secret_validation(
let cache_glob = cache.clone();
let access_map = access_map.clone();
let rate_limiter = rate_limiter.clone();
let provider_endpoints = provider_endpoints.clone();
let validation_timeout = validation_timeout;
let validation_retries = validation_retries;
@ -730,6 +735,7 @@ pub async fn run_secret_validation(
let cache_glob = cache_glob.clone();
let access_map = access_map.clone();
let rate_limiter = rate_limiter.clone();
let provider_endpoints = provider_endpoints.clone();
async move {
validate_single(
&mut rep,
@ -744,6 +750,7 @@ pub async fn run_secret_validation(
&cache_glob,
access_map.as_ref(),
rate_limiter.as_deref(),
&provider_endpoints,
validation_timeout,
validation_retries,
max_body_len,
@ -839,6 +846,7 @@ async fn validate_single(
cache2: &Arc<SkipMap<String, CachedResponse>>,
access_map: Option<&AccessMapCollector>,
rate_limiter: Option<&ValidationRateLimiter>,
provider_endpoints: &Arc<ProviderEndpointOverrides>,
validation_timeout: Duration,
validation_retries: u32,
max_body_len: usize,
@ -905,6 +913,7 @@ async fn validate_single(
validation_timeout,
validation_retries,
rate_limiter,
provider_endpoints.as_ref(),
max_body_len,
)
.boxed(),

View file

@ -23,6 +23,9 @@ use crate::{
cli::global::TlsMode,
location::OffsetSpan,
matcher::{OwnedBlobMatch, SerializableCaptures},
provider_endpoints::{
ProviderEndpointOverrides, endpoint_var_names, hydrate_endpoint_globals_for_rule,
},
rules::rule::Validation,
validation_body::{self},
};
@ -441,6 +444,7 @@ pub async fn validate_single_match(
validation_timeout: Duration,
validation_retries: u32,
rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>,
provider_endpoints: &ProviderEndpointOverrides,
max_body_len: usize,
) {
let fp = validation_dedup_key(m);
@ -456,6 +460,7 @@ pub async fn validate_single_match(
validation_timeout,
validation_retries,
rate_limiter,
provider_endpoints,
max_body_len,
)
.boxed(),
@ -499,6 +504,7 @@ async fn timed_validate_single_match<'a>(
validation_timeout: Duration,
validation_retries: u32,
rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>,
provider_endpoints: &ProviderEndpointOverrides,
max_body_len: usize,
) {
// Select the appropriate HTTP client based on rule's TLS mode preference
@ -595,6 +601,8 @@ async fn timed_validate_single_match<'a>(
let mut globals = Object::new();
populate_globals_from_captures(&mut globals, &captured_values);
hydrate_endpoint_globals_for_rule(m.rule.id(), &mut globals);
provider_endpoints.apply_scan_overrides(&mut globals);
// Persist named captures (non-TOKEN) for validate/revoke command generation.
// This is especially important for gRPC validators like Modal where TOKEN_ID is required.
@ -604,6 +612,13 @@ async fn timed_validate_single_match<'a>(
}
m.dependent_captures.entry(k.to_uppercase()).or_insert_with(|| v.clone());
}
for endpoint_var in endpoint_var_names() {
if let Some(value) = globals.get(*endpoint_var).and_then(|v| v.as_scalar()) {
m.dependent_captures
.entry((*endpoint_var).to_string())
.or_insert_with(|| value.to_kstr().to_string());
}
}
{
let rule_syntax = m.rule.syntax();