forked from mirrors/kingfisher
Added provider endpoint overrides for validation and revocation via global --endpoint PROVIDER=URL and --endpoint-config FILE, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances.
This commit is contained in:
parent
5465d903cf
commit
19dafa42ea
19 changed files with 790 additions and 141 deletions
|
|
@ -32,7 +32,7 @@ rules:
|
|||
- 200
|
||||
type: StatusMatch
|
||||
- type: JsonValid
|
||||
url: https://{{ JFROGURL }}/artifactory/api/repositories
|
||||
url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories'
|
||||
references:
|
||||
- https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens
|
||||
- https://jfrog.com/help/r/jfrog-rest-apis/authentication
|
||||
|
|
@ -93,7 +93,7 @@ rules:
|
|||
- status:
|
||||
- 200
|
||||
type: StatusMatch
|
||||
url: https://{{ JFROGURL }}/artifactory/api/repositories
|
||||
url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories'
|
||||
references:
|
||||
- https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens
|
||||
- https://jfrog.com/help/r/jfrog-rest-apis/authentication
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ rules:
|
|||
- type: WordMatch
|
||||
words:
|
||||
- '"type":"known"'
|
||||
url: https://{{ CONFLUENCEDCDOMAIN }}/rest/api/user/current
|
||||
url: '{{ CONFLUENCE_BASE_URL }}/rest/api/user/current'
|
||||
depends_on_rule:
|
||||
- rule_id: kingfisher.confluence.2
|
||||
variable: CONFLUENCEDCDOMAIN
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://gitea.com/api/v1/user
|
||||
url: '{{ GITEA_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/json
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -44,7 +44,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: https://api.github.com/credentials/revoke
|
||||
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
|
||||
headers:
|
||||
Accept: application/vnd.github+json
|
||||
X-GitHub-Api-Version: 2026-03-10
|
||||
|
|
@ -84,7 +84,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -100,7 +100,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: https://api.github.com/credentials/revoke
|
||||
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
|
||||
headers:
|
||||
Accept: application/vnd.github+json
|
||||
X-GitHub-Api-Version: 2026-03-10
|
||||
|
|
@ -137,7 +137,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -153,7 +153,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: https://api.github.com/credentials/revoke
|
||||
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
|
||||
headers:
|
||||
Accept: application/vnd.github+json
|
||||
X-GitHub-Api-Version: 2026-03-10
|
||||
|
|
@ -189,7 +189,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -205,7 +205,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: https://api.github.com/credentials/revoke
|
||||
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
|
||||
headers:
|
||||
Accept: application/vnd.github+json
|
||||
X-GitHub-Api-Version: 2026-03-10
|
||||
|
|
@ -234,7 +234,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -250,7 +250,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: DELETE
|
||||
url: https://api.github.com/installation/token
|
||||
url: '{{ GITHUB_API_BASE_URL }}/installation/token'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -281,7 +281,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -297,7 +297,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: https://api.github.com/credentials/revoke
|
||||
url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke'
|
||||
headers:
|
||||
Accept: application/vnd.github+json
|
||||
X-GitHub-Api-Version: 2026-03-10
|
||||
|
|
@ -346,7 +346,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: "https://github.com/login/oauth/access_token"
|
||||
url: '{{ GITHUB_WEB_BASE_URL }}/login/oauth/access_token'
|
||||
headers:
|
||||
Accept: "application/json"
|
||||
Content-Type: "application/json"
|
||||
|
|
@ -383,7 +383,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.github.com/user
|
||||
url: '{{ GITHUB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
|
|
@ -399,11 +399,11 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: DELETE
|
||||
url: https://api.github.com/installation/token
|
||||
url: '{{ GITHUB_API_BASE_URL }}/installation/token'
|
||||
headers:
|
||||
Authorization: token {{ TOKEN }}
|
||||
Accept: application/vnd.github+json
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [204]
|
||||
status: [204]
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ rules:
|
|||
- type: WordMatch
|
||||
words:
|
||||
- '"id"'
|
||||
url: https://gitlab.com/api/v4/personal_access_tokens/self
|
||||
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
|
||||
revocation:
|
||||
type: Http
|
||||
content:
|
||||
|
|
@ -46,7 +46,7 @@ rules:
|
|||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [204]
|
||||
url: https://gitlab.com/api/v4/personal_access_tokens/self
|
||||
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
|
||||
|
||||
- name: GitLab Runner Registration Token
|
||||
id: kingfisher.gitlab.2
|
||||
|
|
@ -92,7 +92,7 @@ rules:
|
|||
- '"token is missing"'
|
||||
- '"403 Forbidden"'
|
||||
negative: true
|
||||
url: https://gitlab.com/api/v4/runners/verify
|
||||
url: '{{ GITLAB_API_BASE_URL }}/runners/verify'
|
||||
|
||||
- name: GitLab Pipeline Trigger Token
|
||||
id: kingfisher.gitlab.3
|
||||
|
|
@ -131,7 +131,7 @@ rules:
|
|||
- '"token is missing"'
|
||||
- '"403 Forbidden"'
|
||||
negative: true
|
||||
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
|
||||
url: '{{ GITLAB_API_BASE_URL }}/ci/pipeline_triggers/{{ TOKEN }}'
|
||||
- name: GitLab Private Token - Routable Format
|
||||
id: kingfisher.gitlab.4
|
||||
pattern: |
|
||||
|
|
@ -178,7 +178,7 @@ rules:
|
|||
- type: WordMatch
|
||||
words:
|
||||
- '"id"'
|
||||
url: https://gitlab.com/api/v4/personal_access_tokens/self
|
||||
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
|
||||
revocation:
|
||||
type: Http
|
||||
content:
|
||||
|
|
@ -190,7 +190,7 @@ rules:
|
|||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [204]
|
||||
url: https://gitlab.com/api/v4/personal_access_tokens/self
|
||||
url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self'
|
||||
|
||||
- name: GitLab CI/CD Job Token
|
||||
id: kingfisher.gitlab.5
|
||||
|
|
@ -219,7 +219,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://gitlab.com/api/v4/job
|
||||
url: '{{ GITLAB_API_BASE_URL }}/job'
|
||||
headers:
|
||||
JOB-TOKEN: '{{ TOKEN }}'
|
||||
response_matcher:
|
||||
|
|
@ -393,7 +393,7 @@ rules:
|
|||
- '"token is missing"'
|
||||
- '"403 Forbidden"'
|
||||
negative: true
|
||||
url: https://gitlab.com/api/v4/runners/verify
|
||||
url: '{{ GITLAB_API_BASE_URL }}/runners/verify'
|
||||
|
||||
- name: GitLab Runner Authentication Token - Routable Format
|
||||
id: kingfisher.gitlab.13
|
||||
|
|
@ -436,7 +436,7 @@ rules:
|
|||
- '"token is missing"'
|
||||
- '"403 Forbidden"'
|
||||
negative: true
|
||||
url: https://gitlab.com/api/v4/runners/verify
|
||||
url: '{{ GITLAB_API_BASE_URL }}/runners/verify'
|
||||
|
||||
- name: GitLab SCIM Token
|
||||
id: kingfisher.gitlab.14
|
||||
|
|
@ -481,7 +481,7 @@ rules:
|
|||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://gitlab.com/api/v4/user
|
||||
url: '{{ GITLAB_API_BASE_URL }}/user'
|
||||
headers:
|
||||
Cookie: "_gitlab_session={{ TOKEN }}"
|
||||
response_matcher:
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ rules:
|
|||
- status:
|
||||
- 200
|
||||
type: StatusMatch
|
||||
url: https://{{ DOMAIN }}/rest/api/3/dashboard
|
||||
url: '{{ JIRA_CLOUD_BASE_URL }}/rest/api/3/dashboard'
|
||||
references:
|
||||
- https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/
|
||||
depends_on_rule:
|
||||
|
|
@ -97,7 +97,7 @@ rules:
|
|||
- 200
|
||||
type: StatusMatch
|
||||
- type: JsonValid
|
||||
url: https://{{ JIRADCDOMAIN }}/rest/api/latest/myself
|
||||
url: '{{ JIRA_BASE_URL }}/rest/api/latest/myself'
|
||||
revocation:
|
||||
type: HttpMultiStep
|
||||
content:
|
||||
|
|
@ -105,7 +105,7 @@ rules:
|
|||
- name: lookup_token_id
|
||||
request:
|
||||
method: GET
|
||||
url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens
|
||||
url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens'
|
||||
headers:
|
||||
Accept: application/json
|
||||
Authorization: Bearer {{ TOKEN }}
|
||||
|
|
@ -120,7 +120,7 @@ rules:
|
|||
- name: revoke_token
|
||||
request:
|
||||
method: DELETE
|
||||
url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}
|
||||
url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}'
|
||||
headers:
|
||||
Authorization: Bearer {{ TOKEN }}
|
||||
response_matcher:
|
||||
|
|
@ -152,4 +152,4 @@ rules:
|
|||
- jira-staging.corp.mongodb.com
|
||||
- https://jira.corp.internal:8443
|
||||
references:
|
||||
- https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html
|
||||
- https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html
|
||||
|
|
|
|||
|
|
@ -17,9 +17,10 @@ tags:
|
|||
|
||||
# Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation
|
||||
|
||||
A regex match on `AKIA[0-9A-Z]{16}` is the easy part. Every secret scanner
|
||||
finds those. The hard part — and the part that decides whether your Tuesday
|
||||
afternoon turns into an incident — is what happens **after** the match.
|
||||
A regex hit is the easy part. Any scanner can tell you that a string looks
|
||||
like an AWS access key or a GitHub token. The harder question is what to do
|
||||
next, and that is usually what turns a scan result into either a routine
|
||||
cleanup task or a real incident.
|
||||
|
||||
Kingfisher answers the three questions that actually matter:
|
||||
|
||||
|
|
@ -32,9 +33,12 @@ Kingfisher answers the three questions that actually matter:
|
|||
## 1. Live validation, not just pattern matching
|
||||
|
||||
Out of Kingfisher's 820 standalone detectors, **484 include live validation
|
||||
logic**. Every one of those calls the provider's own API and reports the
|
||||
credential as `Active`, `Inactive`, or `NotAttempted` — so a 4,000-finding
|
||||
scan collapses to the dozen findings that are actually live.
|
||||
logic**. When a provider exposes a safe check call, Kingfisher uses that
|
||||
provider's own API to report each credential as `Active`, `Inactive`, or
|
||||
`NotAttempted`.
|
||||
|
||||
That changes the output from "thousands of regex matches" to a much shorter
|
||||
list of findings that actually authenticate today.
|
||||
|
||||
Validation runs automatically when you scan:
|
||||
|
||||
|
|
@ -61,15 +65,15 @@ kingfisher validate --rule gcp "$(cat service-account.json)"
|
|||
kingfisher validate --rule postgres "$POSTGRES_URI"
|
||||
```
|
||||
|
||||
Validation logic lives in the rule YAML, not in compiled Rust, which is
|
||||
why coverage is high and growing — every new detector ships with a
|
||||
validation block whenever the provider exposes a safe check call.
|
||||
Most validation logic lives in the rule YAML rather than bespoke compiled
|
||||
code. That makes it practical to grow coverage rule-by-rule instead of
|
||||
treating validation as a separate engineering project.
|
||||
|
||||
## 2. Blast radius mapping — what does this token actually reach?
|
||||
|
||||
A leaked AWS key bound to a single read-only S3 bucket and a leaked AWS key
|
||||
bound to organization-wide `AdministratorAccess` are not the same incident.
|
||||
The first is a Friday afternoon ticket. The second is a war room.
|
||||
The first is a ticket. The second is a war room.
|
||||
|
||||
Add `--access-map` to a scan and Kingfisher authenticates each live
|
||||
credential, enumerates what it can do, and writes the result alongside
|
||||
|
|
@ -82,10 +86,10 @@ kingfisher scan github --organization my-org \
|
|||
--output findings.json
|
||||
```
|
||||
|
||||
Each cloud finding gets an `access_map` block with the identity, the
|
||||
permissions, and the concrete resources reachable. Today this is supported
|
||||
for **AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and
|
||||
Microsoft Teams.**
|
||||
Each supported finding gets an `access_map` block with the identity,
|
||||
permissions, and concrete resources reachable. Today that includes
|
||||
**AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and
|
||||
Microsoft Teams**.
|
||||
|
||||
You can also run it standalone — useful when triaging a single credential
|
||||
you've fished out of a paste or a customer report:
|
||||
|
|
@ -102,18 +106,17 @@ kingfisher access-map gcp ./service-account.json --json-out gcp.access-map.json
|
|||
```
|
||||
|
||||
The HTML report viewer (`--format html`) renders the access map as a
|
||||
clickable tree — identity at the root, then services, then individual
|
||||
resources and permissions. It's the fastest way to get a non-engineer
|
||||
stakeholder to grasp severity in five seconds rather than five minutes.
|
||||
clickable tree: identity at the root, then services, then individual
|
||||
resources and permissions. It is a much faster way to explain severity to
|
||||
an incident commander or manager than pasting IAM JSON into chat.
|
||||
|
||||
## 3. Revocation — kill the token from where you found it
|
||||
|
||||
Validation tells you a credential is live. Blast radius tells you why it's
|
||||
urgent. Revocation tells you it's done.
|
||||
urgent. Revocation closes the loop.
|
||||
|
||||
For every rule whose provider exposes a safe revocation API, Kingfisher
|
||||
ships the revocation call as part of the rule definition. One command,
|
||||
no console:
|
||||
ships the revocation call as part of the rule definition:
|
||||
|
||||
```bash
|
||||
# Revoke a GitHub PAT
|
||||
|
|
@ -134,17 +137,17 @@ kingfisher revoke --rule aws \
|
|||
kingfisher revoke --rule gcp "$(cat service-account.json)"
|
||||
```
|
||||
|
||||
The same Liquid templating that powers the validation request handles
|
||||
revocation — including multi-step flows for providers that need a separate
|
||||
key-id lookup before disabling. (See
|
||||
The same Liquid templating that powers validation also powers revocation,
|
||||
including multi-step flows for providers that require a lookup before
|
||||
disabling the credential. See
|
||||
[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md#multi-step-revocation)
|
||||
for the schema.)
|
||||
for the schema.
|
||||
|
||||
This matters in two scenarios:
|
||||
|
||||
- **Mass revocation after a leak.** A laptop or a CI runner gets popped and
|
||||
you have a list of fingerprints. `kingfisher revoke` walks the list, no
|
||||
human pivoting between five provider consoles.
|
||||
you have a list of live credentials. `kingfisher revoke` walks that list
|
||||
without forcing a human to pivot between provider consoles.
|
||||
- **Automated response.** Wire `kingfisher revoke` into the same job that
|
||||
scanned and validated, gated by an allow-list of rule IDs you've decided
|
||||
are safe to auto-revoke (typically: short-lived CI tokens, dev-environment
|
||||
|
|
@ -152,7 +155,7 @@ This matters in two scenarios:
|
|||
|
||||
## The combined workflow
|
||||
|
||||
In practice these three primitives chain into a single pipeline:
|
||||
In practice, these three capabilities collapse into one response workflow:
|
||||
|
||||
```bash
|
||||
# 1. Scan + validate + map blast radius in one call
|
||||
|
|
@ -162,16 +165,16 @@ kingfisher scan github --organization my-org \
|
|||
--output findings.json
|
||||
|
||||
# 2. Pull just the live, high-blast-radius findings
|
||||
jq '[.[] | select(.validation.status == "Active")
|
||||
| select(.access_map.permissions
|
||||
| any(. == "*" or contains("Admin")))]' \
|
||||
jq '.findings
|
||||
| map(select(.validation.status == "Active"))
|
||||
| map(select(.access_map != null))' \
|
||||
findings.json > urgent.json
|
||||
|
||||
# 3. Triage in the HTML viewer (or revoke programmatically)
|
||||
kingfisher view findings.json
|
||||
```
|
||||
|
||||
Three commands, full incident workflow — find, prioritize, kill.
|
||||
That is the full incident loop in three steps: find, prioritize, revoke.
|
||||
|
||||
## Why this is the right shape
|
||||
|
||||
|
|
@ -183,9 +186,9 @@ lives), reusing typed validators for the common families (AWS, GCP, JWT,
|
|||
Postgres, MongoDB, MySQL, JDBC, Azure Storage, Coinbase), and letting rule
|
||||
authors drop down to a `Raw` validator only for genuinely odd providers.
|
||||
|
||||
The upshot for users: when a new detector lands, you almost always get
|
||||
validation, blast radius, and revocation along with it — not three
|
||||
separate roadmaps.
|
||||
The practical result is that new rules can ship with detection plus
|
||||
post-detection response logic, instead of detection today and validation or
|
||||
revocation on some later roadmap.
|
||||
|
||||
## Next up
|
||||
|
||||
|
|
@ -196,6 +199,5 @@ separate roadmaps.
|
|||
- **Docker image scanning** — pulling and scanning every layer for
|
||||
embedded secrets.
|
||||
|
||||
Got a provider you'd love to see validation or revocation support for?
|
||||
Open an issue at
|
||||
[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues).
|
||||
If there is a provider you want validation or revocation support for, open
|
||||
an issue at [mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues).
|
||||
|
|
|
|||
|
|
@ -16,11 +16,14 @@ tags:
|
|||
|
||||
# Scanning an Entire GitHub Organization for Leaked Secrets
|
||||
|
||||
Most organizations have hundreds of repositories — some abandoned, some active,
|
||||
plenty inherited from acquisitions. A leaked AWS key in a five-year-old archived
|
||||
repo is just as dangerous as one in `main` today. Kingfisher can enumerate every
|
||||
repo in a GitHub organization, scan the full git history, and then **validate
|
||||
which credentials are still live** so you know what to rotate first.
|
||||
Most organizations have more GitHub surface area than they think: active
|
||||
services, abandoned repositories, internal tooling, forks, experiments, and
|
||||
projects inherited through acquisitions. A credential leaked in a five-year-old
|
||||
archived repo can still be live today.
|
||||
|
||||
Kingfisher can enumerate every repository in a GitHub organization, scan the
|
||||
full git history, and then **validate which credentials are still live** so
|
||||
you can focus on what needs rotation first.
|
||||
|
||||
<!-- more -->
|
||||
|
||||
|
|
@ -42,14 +45,15 @@ export KF_GITHUB_TOKEN=ghp_yourTokenHere
|
|||
kingfisher scan github --organization my-org
|
||||
```
|
||||
|
||||
That's it — Kingfisher enumerates every repo, clones each one, scans the full
|
||||
commit history, runs all 942 detection rules, and validates findings against
|
||||
That single command enumerates the org, clones each repository, scans working
|
||||
tree content plus git history, and validates supported findings against
|
||||
provider APIs.
|
||||
|
||||
## Tuning for real-world orgs
|
||||
|
||||
Real orgs have huge monorepos, archived junk, and forks you don't care about.
|
||||
Three flags do most of the work:
|
||||
Real organizations have huge monorepos, archived junk, mirrored forks, and
|
||||
repositories you already know are out of scope. Three flags handle most of
|
||||
the tuning:
|
||||
|
||||
```bash
|
||||
kingfisher scan github --organization my-org \
|
||||
|
|
@ -61,8 +65,8 @@ kingfisher scan github --organization my-org \
|
|||
--output kf-findings.sarif
|
||||
```
|
||||
|
||||
- **`--repo-clone-limit`** caps the number of clones per scan. Useful for
|
||||
staged rollouts ("first 500 repos by stars") or to stay under disk budget.
|
||||
- **`--repo-clone-limit`** caps the number of clones per scan. It is useful
|
||||
for staged rollouts or staying under a disk budget.
|
||||
- **`--github-exclude`** accepts exact `OWNER/REPO` strings or gitignore-style
|
||||
globs (`my-org/*-archive`). Repeat the flag for each pattern. Matching is
|
||||
case-insensitive.
|
||||
|
|
@ -72,24 +76,25 @@ kingfisher scan github --organization my-org \
|
|||
## Pulling in issues, wikis, and gists
|
||||
|
||||
Secrets don't only live in code. Issues and pull request descriptions are a
|
||||
common leak source — someone pastes a stack trace with a JWT, or an
|
||||
"oncall handoff" issue with a temporary token that never got rotated. Add
|
||||
common leak source: someone pastes a stack trace with a JWT, or an
|
||||
"on-call handoff" issue with a temporary token that never gets rotated. Add
|
||||
`--repo-artifacts` to fetch these:
|
||||
|
||||
```bash
|
||||
kingfisher scan github --organization my-org --repo-artifacts
|
||||
```
|
||||
|
||||
This pulls each repo's issues (including PRs), wiki, and any **public** gists
|
||||
owned by the repo owner, and scans them all. It does cost API calls, so plan
|
||||
accordingly if you're near a rate limit.
|
||||
This pulls each repo's issues, pull requests, wiki, and any **public** gists
|
||||
owned by the repo owner, then scans that material as well. It does consume API
|
||||
calls, so budget for that if the org is large or your token is already near a
|
||||
rate limit.
|
||||
|
||||
## Following the people, not just the org
|
||||
|
||||
This is the trick that catches what every other scanner misses. Developers
|
||||
leak secrets in *personal* repositories — side projects, dotfiles, throwaway
|
||||
forks. If a contributor to your org has a public personal repo with an active
|
||||
token that grants access to org infrastructure, that's a real incident.
|
||||
Developers also leak secrets in *personal* repositories: side projects,
|
||||
dotfiles, and throwaway forks. If a contributor to your org has a public repo
|
||||
containing a still-live credential that reaches company infrastructure, that is
|
||||
still your incident.
|
||||
|
||||
Pass a single repo URL with `--include-contributors` and Kingfisher will
|
||||
enumerate the contributors, then clone and scan **every public repo they own**:
|
||||
|
|
@ -100,14 +105,14 @@ kingfisher scan https://github.com/my-org/critical-service \
|
|||
--repo-clone-limit 200
|
||||
```
|
||||
|
||||
This is a noisy operation — start with one or two critical repos rather than
|
||||
the whole org. GitHub will rate-limit aggressive enumeration, so a token
|
||||
(`KF_GITHUB_TOKEN`) is required in practice.
|
||||
This is a noisy operation. Start with one or two critical repositories rather
|
||||
than the entire organization. GitHub will also rate-limit aggressive
|
||||
enumeration, so `KF_GITHUB_TOKEN` is effectively required.
|
||||
|
||||
## Reading the output
|
||||
|
||||
The default `pretty` output is human-friendly for terminals. For automation,
|
||||
pick the format that matches your downstream tool:
|
||||
The default `pretty` output is fine for interactive terminal use. For
|
||||
automation, pick a format that matches your downstream consumer:
|
||||
|
||||
```bash
|
||||
# JSON for custom tooling
|
||||
|
|
@ -120,9 +125,9 @@ kingfisher scan github --organization my-org --format sarif --output findings.sa
|
|||
kingfisher scan github --organization my-org --format toon
|
||||
```
|
||||
|
||||
The interactive HTML report is often the fastest way to triage a large scan —
|
||||
filter by rule, by validation status, or by repository, and click through to
|
||||
the exact commit and line:
|
||||
The interactive HTML report is often the fastest way to triage a large scan.
|
||||
You can filter by rule, validation status, or repository, then click through
|
||||
to the exact commit and line:
|
||||
|
||||
```bash
|
||||
kingfisher scan github --organization my-org --format html --output kf-report.html
|
||||
|
|
@ -130,40 +135,39 @@ kingfisher scan github --organization my-org --format html --output kf-report.ht
|
|||
|
||||
## Triage by validation status
|
||||
|
||||
The single most important column in the output is **validation**. A live
|
||||
credential is a fire — a never-was-valid one is noise. Filter to live findings
|
||||
first:
|
||||
The single most important field in the output is **validation**. A live
|
||||
credential should be triaged immediately; a value that never authenticated is
|
||||
usually just cleanup work. Filter to live findings first:
|
||||
|
||||
```bash
|
||||
jq '.[] | select(.validation.status == "Active")' findings.json
|
||||
jq '.findings[] | select(.validation.status == "Active")' findings.json
|
||||
```
|
||||
|
||||
Then walk those credentials in order of blast radius. For AWS, GCP, GitHub,
|
||||
GitLab, and Slack tokens, Kingfisher already maps what each one can access —
|
||||
look at the `access_map` field in the JSON output, or the **Blast Radius**
|
||||
panel in the HTML report.
|
||||
Then prioritize by blast radius. For AWS, GCP, GitHub, GitLab, and Slack
|
||||
tokens, Kingfisher can already map what each credential can access. Look at
|
||||
the `access_map` field in JSON output, or the **Blast Radius** panel in the
|
||||
HTML report.
|
||||
|
||||
## Revoke from the CLI
|
||||
|
||||
For supported providers, you don't need to log into a console — Kingfisher can
|
||||
revoke directly:
|
||||
For supported providers, you do not need to pivot into the provider console.
|
||||
Kingfisher can revoke directly:
|
||||
|
||||
```bash
|
||||
kingfisher revoke --rule kingfisher.aws.access_key.1 AKIAEXAMPLE...
|
||||
```
|
||||
|
||||
Each rule that supports revocation declares the API call in its YAML. Today
|
||||
this works for AWS, GitHub, GitLab, Slack, and a growing list of SaaS
|
||||
providers — see [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md)
|
||||
for the current list and how to add revocation to a custom rule.
|
||||
Each rule that supports revocation declares the API call in its YAML. See
|
||||
[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md)
|
||||
for the schema and the current approach.
|
||||
|
||||
## Wiring it into a recurring job
|
||||
|
||||
A first scan is the one-shot baseline. The real value is recurring scans
|
||||
catching new leaks within hours, not months. The simplest pattern is a nightly
|
||||
GitHub Action or scheduled CI job that runs the org scan, diffs against
|
||||
yesterday's findings, and pages on net-new live credentials. We'll cover that
|
||||
end-to-end in the next post.
|
||||
The first scan gives you a baseline. The real value comes from running the
|
||||
same workflow continuously so new leaks are caught within hours instead of
|
||||
months. A simple starting point is a nightly GitHub Action or scheduled CI
|
||||
job that runs the org scan, diffs against yesterday's findings, and alerts on
|
||||
net-new live credentials.
|
||||
|
||||
## What's next
|
||||
|
||||
|
|
@ -174,5 +178,5 @@ end-to-end in the next post.
|
|||
- **Docker image scanning** — pulling images directly and scanning every
|
||||
layer for embedded secrets.
|
||||
|
||||
If there's a workflow you'd like us to cover, open an issue at
|
||||
If there is a workflow you want us to cover, open an issue at
|
||||
[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues).
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im
|
|||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [v1.98.0]
|
||||
- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs_<APP_ID>_<JWT>`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. Bundled ruleset is now **943 rules** (821 standalone detectors + 122 dependent rules), with **485 standalone detectors** offering live validation.
|
||||
- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs_<APP_ID>_<JWT>`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation.
|
||||
- Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances.
|
||||
|
||||
## [v1.97.0]
|
||||
- **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed.
|
||||
|
|
|
|||
|
|
@ -274,6 +274,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key"
|
|||
kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key"
|
||||
```
|
||||
|
||||
**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):**
|
||||
|
||||
Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML.
|
||||
|
||||
- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers.
|
||||
- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides.
|
||||
- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`.
|
||||
|
||||
Supported provider keys for endpoint overrides are:
|
||||
|
||||
- `github`
|
||||
- `gitlab`
|
||||
- `gitea`
|
||||
- `jira` (Jira Data Center / self-managed)
|
||||
- `jira-cloud`
|
||||
- `confluence`
|
||||
- `artifactory`
|
||||
|
||||
```bash
|
||||
# Validate a GitHub Enterprise token against a self-hosted instance
|
||||
kingfisher validate --rule github \
|
||||
--endpoint github=https://ghe.corp.example.com \
|
||||
"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
# Revoke a self-managed GitLab PAT
|
||||
kingfisher revoke --rule gitlab \
|
||||
--endpoint gitlab=https://gitlab.corp.example.com \
|
||||
"glpat-xxxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
# Scan with an internal Artifactory validator target
|
||||
kingfisher scan ./repo \
|
||||
--endpoint artifactory=http://localhost:8071 \
|
||||
--allow-internal-ips
|
||||
```
|
||||
|
||||
Example endpoint config file:
|
||||
|
||||
```yaml
|
||||
endpoints:
|
||||
github: https://ghe.corp.example.com
|
||||
gitlab: https://gitlab.corp.example.com
|
||||
gitea: https://gitea.corp.example.com
|
||||
jira: https://jira.corp.example.com
|
||||
confluence: https://wiki.corp.example.com
|
||||
artifactory: http://localhost:8071
|
||||
```
|
||||
|
||||
```bash
|
||||
kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips
|
||||
```
|
||||
|
||||
**Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**:
|
||||
|
||||
```bash
|
||||
|
|
@ -1082,8 +1133,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v
|
|||
# Scan with SSRF protection disabled (allows requests to internal IPs)
|
||||
kingfisher scan --allow-internal-ips ./repo
|
||||
|
||||
# Also works with the validate command
|
||||
kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1
|
||||
# Also works with direct validation against a self-hosted endpoint
|
||||
kingfisher validate --allow-internal-ips \
|
||||
--endpoint artifactory=http://localhost:8071 \
|
||||
--rule kingfisher.artifactory.1 \
|
||||
"AKCp..."
|
||||
```
|
||||
|
||||
> **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services.
|
||||
|
|
|
|||
|
|
@ -269,6 +269,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key"
|
|||
kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key"
|
||||
```
|
||||
|
||||
**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):**
|
||||
|
||||
Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML.
|
||||
|
||||
- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers.
|
||||
- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides.
|
||||
- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`.
|
||||
|
||||
Supported provider keys for endpoint overrides are:
|
||||
|
||||
- `github`
|
||||
- `gitlab`
|
||||
- `gitea`
|
||||
- `jira` (Jira Data Center / self-managed)
|
||||
- `jira-cloud`
|
||||
- `confluence`
|
||||
- `artifactory`
|
||||
|
||||
```bash
|
||||
# Validate a GitHub Enterprise token against a self-hosted instance
|
||||
kingfisher validate --rule github \
|
||||
--endpoint github=https://ghe.corp.example.com \
|
||||
"ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
# Revoke a self-managed GitLab PAT
|
||||
kingfisher revoke --rule gitlab \
|
||||
--endpoint gitlab=https://gitlab.corp.example.com \
|
||||
"glpat-xxxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
# Scan with an internal Artifactory validator target
|
||||
kingfisher scan ./repo \
|
||||
--endpoint artifactory=http://localhost:8071 \
|
||||
--allow-internal-ips
|
||||
```
|
||||
|
||||
Example endpoint config file:
|
||||
|
||||
```yaml
|
||||
endpoints:
|
||||
github: https://ghe.corp.example.com
|
||||
gitlab: https://gitlab.corp.example.com
|
||||
gitea: https://gitea.corp.example.com
|
||||
jira: https://jira.corp.example.com
|
||||
confluence: https://wiki.corp.example.com
|
||||
artifactory: http://localhost:8071
|
||||
```
|
||||
|
||||
```bash
|
||||
kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips
|
||||
```
|
||||
|
||||
**Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**:
|
||||
|
||||
```bash
|
||||
|
|
@ -1077,8 +1128,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v
|
|||
# Scan with SSRF protection disabled (allows requests to internal IPs)
|
||||
kingfisher scan --allow-internal-ips ./repo
|
||||
|
||||
# Also works with the validate command
|
||||
kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1
|
||||
# Also works with direct validation against a self-hosted endpoint
|
||||
kingfisher validate --allow-internal-ips \
|
||||
--endpoint artifactory=http://localhost:8071 \
|
||||
--rule kingfisher.artifactory.1 \
|
||||
"AKCp..."
|
||||
```
|
||||
|
||||
> **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use std::io::IsTerminal;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use std::sync::LazyLock;
|
||||
|
||||
|
|
@ -144,6 +145,16 @@ pub struct GlobalArgs {
|
|||
#[arg(global = true, long = "user-agent-suffix", value_name = "SUFFIX")]
|
||||
pub user_agent_suffix: Option<String>,
|
||||
|
||||
/// Override provider API endpoints for validation/revocation (PROVIDER=URL), repeatable.
|
||||
///
|
||||
/// Supported providers: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory.
|
||||
#[arg(global = true, long = "endpoint", value_name = "PROVIDER=URL")]
|
||||
pub endpoint: Vec<String>,
|
||||
|
||||
/// YAML file containing provider endpoint overrides.
|
||||
#[arg(global = true, long = "endpoint-config", value_name = "FILE")]
|
||||
pub endpoint_config: Option<PathBuf>,
|
||||
|
||||
// Internal fields (not CLI arguments)
|
||||
#[clap(skip)]
|
||||
pub color: Mode,
|
||||
|
|
@ -163,6 +174,8 @@ impl Default for GlobalArgs {
|
|||
self_update: false,
|
||||
no_update_check: false,
|
||||
user_agent_suffix: None,
|
||||
endpoint: Vec::new(),
|
||||
endpoint_config: None,
|
||||
color: Mode::Auto,
|
||||
progress: Mode::Auto,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ use tracing::debug;
|
|||
use crate::{
|
||||
cli::{commands::revoke::RevokeArgs, global::GlobalArgs},
|
||||
liquid_filters::register_all,
|
||||
provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule},
|
||||
rule_loader::RuleLoader,
|
||||
template_vars::extract_template_vars,
|
||||
validation::GLOBAL_USER_AGENT,
|
||||
|
|
@ -138,15 +139,22 @@ fn get_global_var(globals: &Object, name: &str) -> Option<String> {
|
|||
|
||||
/// Build the globals object for Liquid template rendering.
|
||||
fn build_globals(
|
||||
rule_id: &str,
|
||||
secret: &str,
|
||||
args: &[String],
|
||||
variables: &[String],
|
||||
template_vars: &BTreeSet<String>,
|
||||
endpoint_overrides: &ProviderEndpointOverrides,
|
||||
) -> Result<Object> {
|
||||
let mut globals = Object::new();
|
||||
globals.insert("TOKEN".into(), Value::scalar(secret.to_string()));
|
||||
|
||||
let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect();
|
||||
endpoint_overrides.apply_defaults(&mut globals);
|
||||
|
||||
let auto_assign_vars: Vec<&String> = template_vars
|
||||
.iter()
|
||||
.filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str()))
|
||||
.collect();
|
||||
|
||||
for (i, arg_value) in args.iter().enumerate() {
|
||||
if i < auto_assign_vars.len() {
|
||||
|
|
@ -171,6 +179,8 @@ fn build_globals(
|
|||
globals.insert(name.into(), Value::scalar(value));
|
||||
}
|
||||
|
||||
hydrate_endpoint_globals_for_rule(rule_id, &mut globals);
|
||||
|
||||
Ok(globals)
|
||||
}
|
||||
|
||||
|
|
@ -553,6 +563,7 @@ pub async fn run_direct_revocation(
|
|||
|
||||
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
|
||||
let timeout = Duration::from_secs(args.timeout);
|
||||
let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?;
|
||||
|
||||
let mut results = Vec::new();
|
||||
|
||||
|
|
@ -597,7 +608,14 @@ pub async fn run_direct_revocation(
|
|||
}
|
||||
}
|
||||
|
||||
let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?;
|
||||
let globals = build_globals(
|
||||
&rule_id,
|
||||
&secret,
|
||||
&args.args,
|
||||
&args.variables,
|
||||
&template_vars,
|
||||
&endpoint_overrides,
|
||||
)?;
|
||||
|
||||
if !non_token_vars.is_empty() && !args.args.is_empty() {
|
||||
debug!(
|
||||
|
|
@ -1028,7 +1046,15 @@ mod tests {
|
|||
#[test]
|
||||
fn build_globals_sets_token() {
|
||||
let template_vars = BTreeSet::from(["TOKEN".to_string()]);
|
||||
let globals = build_globals("my-secret", &[], &[], &template_vars).unwrap();
|
||||
let globals = build_globals(
|
||||
"kingfisher.test.1",
|
||||
"my-secret",
|
||||
&[],
|
||||
&[],
|
||||
&template_vars,
|
||||
&ProviderEndpointOverrides::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(globals.get("TOKEN"), Some(Value::scalar("my-secret".to_string())).as_ref());
|
||||
}
|
||||
|
||||
|
|
@ -1037,7 +1063,15 @@ mod tests {
|
|||
let template_vars =
|
||||
BTreeSet::from(["TOKEN".to_string(), "AKID".to_string(), "REGION".to_string()]);
|
||||
let args = vec!["my-akid".to_string(), "us-east-1".to_string()];
|
||||
let globals = build_globals("secret", &args, &[], &template_vars).unwrap();
|
||||
let globals = build_globals(
|
||||
"kingfisher.test.1",
|
||||
"secret",
|
||||
&args,
|
||||
&[],
|
||||
&template_vars,
|
||||
&ProviderEndpointOverrides::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(globals.get("TOKEN"), Some(Value::scalar("secret".to_string())).as_ref());
|
||||
assert_eq!(globals.get("AKID"), Some(Value::scalar("my-akid".to_string())).as_ref());
|
||||
|
|
@ -1048,7 +1082,15 @@ mod tests {
|
|||
fn build_globals_explicit_variables() {
|
||||
let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string()]);
|
||||
let vars = vec!["AKID=explicit-value".to_string()];
|
||||
let globals = build_globals("secret", &[], &vars, &template_vars).unwrap();
|
||||
let globals = build_globals(
|
||||
"kingfisher.test.1",
|
||||
"secret",
|
||||
&[],
|
||||
&vars,
|
||||
&template_vars,
|
||||
&ProviderEndpointOverrides::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(globals.get("AKID"), Some(Value::scalar("explicit-value".to_string())).as_ref());
|
||||
}
|
||||
|
|
@ -1057,7 +1099,14 @@ mod tests {
|
|||
fn build_globals_invalid_var_format() {
|
||||
let template_vars = BTreeSet::new();
|
||||
let vars = vec!["NO_EQUALS_SIGN".to_string()];
|
||||
let result = build_globals("secret", &[], &vars, &template_vars);
|
||||
let result = build_globals(
|
||||
"kingfisher.test.1",
|
||||
"secret",
|
||||
&[],
|
||||
&vars,
|
||||
&template_vars,
|
||||
&ProviderEndpointOverrides::default(),
|
||||
);
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().to_string().contains("Expected NAME=VALUE"));
|
||||
}
|
||||
|
|
@ -1066,7 +1115,14 @@ mod tests {
|
|||
fn build_globals_empty_var_name() {
|
||||
let template_vars = BTreeSet::new();
|
||||
let vars = vec!["=value".to_string()];
|
||||
let result = build_globals("secret", &[], &vars, &template_vars);
|
||||
let result = build_globals(
|
||||
"kingfisher.test.1",
|
||||
"secret",
|
||||
&[],
|
||||
&vars,
|
||||
&template_vars,
|
||||
&ProviderEndpointOverrides::default(),
|
||||
);
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().to_string().contains("cannot be empty"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ use tracing::debug;
|
|||
use crate::{
|
||||
cli::{commands::validate::ValidateArgs, global::GlobalArgs},
|
||||
liquid_filters::register_all,
|
||||
provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule},
|
||||
rule_loader::RuleLoader,
|
||||
rules::{HttpValidation, Validation, rule::Rule},
|
||||
template_vars::extract_template_vars,
|
||||
|
|
@ -210,18 +211,25 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet<String> {
|
|||
/// - `variables`: Named variables in NAME=VALUE format (explicit overrides)
|
||||
/// - `template_vars`: Set of variable names used in the validation template
|
||||
fn build_globals(
|
||||
rule_id: &str,
|
||||
secret: &str,
|
||||
args: &[String],
|
||||
variables: &[String],
|
||||
template_vars: &BTreeSet<String>,
|
||||
endpoint_overrides: &ProviderEndpointOverrides,
|
||||
) -> Result<Object> {
|
||||
let mut globals = Object::new();
|
||||
|
||||
// Set TOKEN to the provided secret
|
||||
globals.insert("TOKEN".into(), Value::scalar(secret.to_string()));
|
||||
|
||||
endpoint_overrides.apply_defaults(&mut globals);
|
||||
|
||||
// Get non-TOKEN variables in alphabetical order for auto-assignment
|
||||
let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect();
|
||||
let auto_assign_vars: Vec<&String> = template_vars
|
||||
.iter()
|
||||
.filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str()))
|
||||
.collect();
|
||||
|
||||
// Auto-assign --arg values to template variables
|
||||
for (i, arg_value) in args.iter().enumerate() {
|
||||
|
|
@ -248,6 +256,8 @@ fn build_globals(
|
|||
globals.insert(name.into(), Value::scalar(value));
|
||||
}
|
||||
|
||||
hydrate_endpoint_globals_for_rule(rule_id, &mut globals);
|
||||
|
||||
Ok(globals)
|
||||
}
|
||||
|
||||
|
|
@ -469,6 +479,7 @@ pub async fn run_direct_validation(
|
|||
|
||||
// Build Liquid parser
|
||||
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
|
||||
let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?;
|
||||
|
||||
let timeout = Duration::from_secs(args.timeout);
|
||||
let rate_limiter =
|
||||
|
|
@ -525,7 +536,14 @@ pub async fn run_direct_validation(
|
|||
}
|
||||
}
|
||||
|
||||
let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?;
|
||||
let globals = build_globals(
|
||||
&rule_id,
|
||||
&secret,
|
||||
&args.args,
|
||||
&args.variables,
|
||||
&template_vars,
|
||||
&endpoint_overrides,
|
||||
)?;
|
||||
|
||||
// Log auto-assignment info for debugging
|
||||
if !non_token_vars.is_empty() && !args.args.is_empty() {
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ pub mod location;
|
|||
pub mod matcher;
|
||||
pub mod origin;
|
||||
pub mod parser;
|
||||
pub mod provider_endpoints;
|
||||
pub mod pyc;
|
||||
pub mod reporter;
|
||||
pub mod rule_loader;
|
||||
|
|
|
|||
409
src/provider_endpoints.rs
Normal file
409
src/provider_endpoints.rs
Normal file
|
|
@ -0,0 +1,409 @@
|
|||
use std::{collections::BTreeMap, fs, path::Path};
|
||||
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use liquid::Object;
|
||||
use liquid_core::{Value, ValueView};
|
||||
use serde::Deserialize;
|
||||
use url::Url;
|
||||
|
||||
use crate::cli::global::GlobalArgs;
|
||||
|
||||
const GITHUB_API_BASE_URL: &str = "GITHUB_API_BASE_URL";
|
||||
const GITHUB_WEB_BASE_URL: &str = "GITHUB_WEB_BASE_URL";
|
||||
const GITLAB_API_BASE_URL: &str = "GITLAB_API_BASE_URL";
|
||||
const GITEA_API_BASE_URL: &str = "GITEA_API_BASE_URL";
|
||||
const JIRA_BASE_URL: &str = "JIRA_BASE_URL";
|
||||
const JIRA_CLOUD_BASE_URL: &str = "JIRA_CLOUD_BASE_URL";
|
||||
const CONFLUENCE_BASE_URL: &str = "CONFLUENCE_BASE_URL";
|
||||
const ARTIFACTORY_BASE_URL: &str = "ARTIFACTORY_BASE_URL";
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ProviderEndpointOverrides {
|
||||
config: EndpointVars,
|
||||
cli: EndpointVars,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
struct EndpointVars {
|
||||
values: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Default)]
|
||||
struct EndpointConfigFile {
|
||||
#[serde(default)]
|
||||
endpoints: BTreeMap<String, String>,
|
||||
#[serde(default)]
|
||||
provider_endpoints: BTreeMap<String, String>,
|
||||
#[serde(default)]
|
||||
providers: BTreeMap<String, String>,
|
||||
}
|
||||
|
||||
impl ProviderEndpointOverrides {
|
||||
pub fn from_global_args(global_args: &GlobalArgs) -> Result<Self> {
|
||||
let config = match &global_args.endpoint_config {
|
||||
Some(path) => EndpointVars::from_config_path(path)?,
|
||||
None => EndpointVars::default(),
|
||||
};
|
||||
let cli = EndpointVars::from_pairs(&global_args.endpoint)?;
|
||||
Ok(Self { config, cli })
|
||||
}
|
||||
|
||||
pub fn apply_defaults(&self, globals: &mut Object) {
|
||||
self.config.apply(globals, false);
|
||||
apply_builtin_defaults(globals);
|
||||
self.cli.apply(globals, true);
|
||||
}
|
||||
|
||||
pub fn apply_scan_overrides(&self, globals: &mut Object) {
|
||||
self.config.apply(globals, false);
|
||||
apply_builtin_defaults(globals);
|
||||
self.cli.apply(globals, true);
|
||||
}
|
||||
}
|
||||
|
||||
impl EndpointVars {
|
||||
fn from_config_path(path: &Path) -> Result<Self> {
|
||||
let raw = fs::read_to_string(path)
|
||||
.with_context(|| format!("Failed to read endpoint config from {}", path.display()))?;
|
||||
let parsed: EndpointConfigFile = serde_yaml::from_str(&raw)
|
||||
.with_context(|| format!("Failed to parse endpoint config {}", path.display()))?;
|
||||
|
||||
let mut merged = parsed.endpoints;
|
||||
merged.extend(parsed.provider_endpoints);
|
||||
merged.extend(parsed.providers);
|
||||
Self::from_map(merged)
|
||||
}
|
||||
|
||||
fn from_pairs(pairs: &[String]) -> Result<Self> {
|
||||
let mut map = BTreeMap::new();
|
||||
for pair in pairs {
|
||||
let (provider, endpoint) = parse_assignment(pair)?;
|
||||
map.insert(provider, endpoint);
|
||||
}
|
||||
Self::from_map(map)
|
||||
}
|
||||
|
||||
fn from_map(map: BTreeMap<String, String>) -> Result<Self> {
|
||||
let mut values = BTreeMap::new();
|
||||
for (provider, endpoint) in map {
|
||||
let normalized = normalize_endpoint_key(&provider);
|
||||
match normalized.as_str() {
|
||||
"github" => {
|
||||
let github = normalize_github_endpoint(&endpoint)?;
|
||||
values.insert(GITHUB_API_BASE_URL.to_string(), github.api_base_url);
|
||||
values.insert(GITHUB_WEB_BASE_URL.to_string(), github.web_base_url);
|
||||
}
|
||||
"gitlab" => {
|
||||
values.insert(
|
||||
GITLAB_API_BASE_URL.to_string(),
|
||||
normalize_api_base_url(&endpoint, "/api/v4")?,
|
||||
);
|
||||
}
|
||||
"gitea" => {
|
||||
values.insert(
|
||||
GITEA_API_BASE_URL.to_string(),
|
||||
normalize_api_base_url(&endpoint, "/api/v1")?,
|
||||
);
|
||||
}
|
||||
"jira" | "jira-dc" => {
|
||||
values.insert(JIRA_BASE_URL.to_string(), normalize_base_url(&endpoint)?);
|
||||
}
|
||||
"jira-cloud" => {
|
||||
values.insert(JIRA_CLOUD_BASE_URL.to_string(), normalize_base_url(&endpoint)?);
|
||||
}
|
||||
"confluence" | "confluence-dc" => {
|
||||
values.insert(CONFLUENCE_BASE_URL.to_string(), normalize_base_url(&endpoint)?);
|
||||
}
|
||||
"artifactory" | "jfrog" => {
|
||||
values.insert(
|
||||
ARTIFACTORY_BASE_URL.to_string(),
|
||||
normalize_artifactory_base_url(&endpoint)?,
|
||||
);
|
||||
}
|
||||
_ => bail!(
|
||||
"Unsupported endpoint provider '{}'. Supported values: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory",
|
||||
provider
|
||||
),
|
||||
}
|
||||
}
|
||||
Ok(Self { values })
|
||||
}
|
||||
|
||||
fn apply(&self, globals: &mut Object, overwrite_existing: bool) {
|
||||
for (name, value) in &self.values {
|
||||
if overwrite_existing || !globals.contains_key(name.as_str()) {
|
||||
globals.insert(name.clone().into(), Value::scalar(value.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct GitHubEndpoint {
|
||||
api_base_url: String,
|
||||
web_base_url: String,
|
||||
}
|
||||
|
||||
pub fn hydrate_endpoint_globals_for_rule(rule_id: &str, globals: &mut Object) {
|
||||
hydrate_github_globals(globals);
|
||||
hydrate_artifactory_globals(globals);
|
||||
hydrate_confluence_globals(globals);
|
||||
hydrate_jira_dc_globals(globals);
|
||||
if rule_id == "kingfisher.jira.2" {
|
||||
hydrate_jira_cloud_globals(globals);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn endpoint_var_names() -> &'static [&'static str] {
|
||||
&[
|
||||
GITHUB_API_BASE_URL,
|
||||
GITHUB_WEB_BASE_URL,
|
||||
GITLAB_API_BASE_URL,
|
||||
GITEA_API_BASE_URL,
|
||||
JIRA_BASE_URL,
|
||||
JIRA_CLOUD_BASE_URL,
|
||||
CONFLUENCE_BASE_URL,
|
||||
ARTIFACTORY_BASE_URL,
|
||||
]
|
||||
}
|
||||
|
||||
fn hydrate_github_globals(globals: &mut Object) {
|
||||
match (string_var(globals, GITHUB_API_BASE_URL), string_var(globals, GITHUB_WEB_BASE_URL)) {
|
||||
(Some(api), None) => {
|
||||
if let Ok(normalized) = normalize_github_endpoint(&api) {
|
||||
globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url));
|
||||
globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url));
|
||||
}
|
||||
}
|
||||
(None, Some(web)) => {
|
||||
if let Ok(normalized) = normalize_github_endpoint(&web) {
|
||||
globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url));
|
||||
globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn hydrate_artifactory_globals(globals: &mut Object) {
|
||||
if globals.contains_key(ARTIFACTORY_BASE_URL) {
|
||||
return;
|
||||
}
|
||||
if let Some(jfrog_url) = string_var(globals, "JFROGURL")
|
||||
&& let Ok(base_url) = normalize_artifactory_base_url(&jfrog_url)
|
||||
{
|
||||
globals.insert(ARTIFACTORY_BASE_URL.into(), Value::scalar(base_url));
|
||||
}
|
||||
}
|
||||
|
||||
fn hydrate_confluence_globals(globals: &mut Object) {
|
||||
if globals.contains_key(CONFLUENCE_BASE_URL) {
|
||||
return;
|
||||
}
|
||||
if let Some(domain) = string_var(globals, "CONFLUENCEDCDOMAIN")
|
||||
&& let Ok(base_url) = normalize_base_url(&domain)
|
||||
{
|
||||
globals.insert(CONFLUENCE_BASE_URL.into(), Value::scalar(base_url));
|
||||
}
|
||||
}
|
||||
|
||||
fn hydrate_jira_dc_globals(globals: &mut Object) {
|
||||
if globals.contains_key(JIRA_BASE_URL) {
|
||||
return;
|
||||
}
|
||||
if let Some(domain) = string_var(globals, "JIRADCDOMAIN")
|
||||
&& let Ok(base_url) = normalize_base_url(&domain)
|
||||
{
|
||||
globals.insert(JIRA_BASE_URL.into(), Value::scalar(base_url));
|
||||
}
|
||||
}
|
||||
|
||||
fn hydrate_jira_cloud_globals(globals: &mut Object) {
|
||||
if globals.contains_key(JIRA_CLOUD_BASE_URL) {
|
||||
return;
|
||||
}
|
||||
if let Some(domain) = string_var(globals, "DOMAIN")
|
||||
&& let Ok(base_url) = normalize_base_url(&domain)
|
||||
{
|
||||
globals.insert(JIRA_CLOUD_BASE_URL.into(), Value::scalar(base_url));
|
||||
}
|
||||
}
|
||||
|
||||
fn string_var(globals: &Object, name: &str) -> Option<String> {
|
||||
globals.get(name).map(|value| value.to_kstr().to_string()).filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
fn apply_builtin_defaults(globals: &mut Object) {
|
||||
for (name, value) in [
|
||||
(GITHUB_API_BASE_URL, "https://api.github.com"),
|
||||
(GITHUB_WEB_BASE_URL, "https://github.com"),
|
||||
(GITLAB_API_BASE_URL, "https://gitlab.com/api/v4"),
|
||||
(GITEA_API_BASE_URL, "https://gitea.com/api/v1"),
|
||||
] {
|
||||
if !globals.contains_key(name) {
|
||||
globals.insert(name.into(), Value::scalar(value.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_assignment(raw: &str) -> Result<(String, String)> {
|
||||
let (provider, endpoint) = raw
|
||||
.split_once('=')
|
||||
.ok_or_else(|| anyhow!("Invalid endpoint '{}'. Expected PROVIDER=URL", raw))?;
|
||||
let provider = provider.trim();
|
||||
let endpoint = endpoint.trim();
|
||||
if provider.is_empty() {
|
||||
bail!("Invalid endpoint '{}'. Provider name cannot be empty", raw);
|
||||
}
|
||||
if endpoint.is_empty() {
|
||||
bail!("Invalid endpoint '{}'. URL cannot be empty", raw);
|
||||
}
|
||||
Ok((provider.to_string(), endpoint.to_string()))
|
||||
}
|
||||
|
||||
fn normalize_endpoint_key(key: &str) -> String {
|
||||
key.trim().to_ascii_lowercase().replace('_', "-")
|
||||
}
|
||||
|
||||
fn normalize_base_url(raw: &str) -> Result<String> {
|
||||
let url = parse_url_or_assume_https(raw)?;
|
||||
Ok(url_with_path(&url, url.path().trim_end_matches('/')))
|
||||
}
|
||||
|
||||
fn normalize_api_base_url(raw: &str, api_suffix: &str) -> Result<String> {
|
||||
let url = parse_url_or_assume_https(raw)?;
|
||||
let path = url.path().trim_end_matches('/');
|
||||
let full_path = if path.is_empty() {
|
||||
api_suffix.to_string()
|
||||
} else if path.ends_with(api_suffix) {
|
||||
path.to_string()
|
||||
} else {
|
||||
format!("{path}{api_suffix}")
|
||||
};
|
||||
Ok(url_with_path(&url, &full_path))
|
||||
}
|
||||
|
||||
fn normalize_artifactory_base_url(raw: &str) -> Result<String> {
|
||||
let url = parse_url_or_assume_https(raw)?;
|
||||
let mut path = url.path().trim_end_matches('/').to_string();
|
||||
if let Some(prefix) = path.strip_suffix("/artifactory") {
|
||||
path = prefix.to_string();
|
||||
}
|
||||
Ok(url_with_path(&url, &path))
|
||||
}
|
||||
|
||||
fn normalize_github_endpoint(raw: &str) -> Result<GitHubEndpoint> {
|
||||
let url = parse_url_or_assume_https(raw)?;
|
||||
let host = url
|
||||
.host_str()
|
||||
.ok_or_else(|| anyhow!("Endpoint '{}' is missing a host", raw))?
|
||||
.to_ascii_lowercase();
|
||||
let path = url.path().trim_end_matches('/');
|
||||
|
||||
if host == "api.github.com" {
|
||||
return Ok(GitHubEndpoint {
|
||||
api_base_url: "https://api.github.com".to_string(),
|
||||
web_base_url: "https://github.com".to_string(),
|
||||
});
|
||||
}
|
||||
if host == "github.com" && path.is_empty() {
|
||||
return Ok(GitHubEndpoint {
|
||||
api_base_url: "https://api.github.com".to_string(),
|
||||
web_base_url: "https://github.com".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
let (web_path, api_path) = if path.is_empty() {
|
||||
("".to_string(), "/api/v3".to_string())
|
||||
} else if let Some(prefix) = path.strip_suffix("/api/v3") {
|
||||
(prefix.to_string(), path.to_string())
|
||||
} else {
|
||||
(path.to_string(), format!("{path}/api/v3"))
|
||||
};
|
||||
|
||||
Ok(GitHubEndpoint {
|
||||
api_base_url: url_with_path(&url, &api_path),
|
||||
web_base_url: url_with_path(&url, &web_path),
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_url_or_assume_https(raw: &str) -> Result<Url> {
|
||||
match Url::parse(raw.trim()) {
|
||||
Ok(url) => Ok(url),
|
||||
Err(url::ParseError::RelativeUrlWithoutBase) => {
|
||||
Url::parse(&format!("https://{}", raw.trim())).with_context(|| {
|
||||
format!("Invalid endpoint URL '{}'. Use a full URL or hostname", raw)
|
||||
})
|
||||
}
|
||||
Err(err) => Err(anyhow!("Invalid endpoint URL '{}': {}", raw, err)),
|
||||
}
|
||||
}
|
||||
|
||||
fn url_with_path(url: &Url, path: &str) -> String {
|
||||
let mut out = url.clone();
|
||||
out.set_query(None);
|
||||
out.set_fragment(None);
|
||||
if path.is_empty() {
|
||||
out.set_path("");
|
||||
} else {
|
||||
out.set_path(path);
|
||||
}
|
||||
out.to_string().trim_end_matches('/').to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn github_endpoint_normalizes_host_only() {
|
||||
let normalized = normalize_github_endpoint("ghe.corp.example.com").unwrap();
|
||||
assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3");
|
||||
assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn github_endpoint_normalizes_api_path() {
|
||||
let normalized = normalize_github_endpoint("https://ghe.corp.example.com/api/v3").unwrap();
|
||||
assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3");
|
||||
assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitlab_endpoint_appends_api_path() {
|
||||
assert_eq!(
|
||||
normalize_api_base_url("gitlab.example.com/gitlab", "/api/v4").unwrap(),
|
||||
"https://gitlab.example.com/gitlab/api/v4"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifactory_endpoint_strips_artifactory_suffix() {
|
||||
assert_eq!(
|
||||
normalize_artifactory_base_url("http://localhost:8071/artifactory").unwrap(),
|
||||
"http://localhost:8071"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jira_cloud_hydrates_from_legacy_domain() {
|
||||
let mut globals = Object::new();
|
||||
globals.insert("DOMAIN".into(), Value::scalar("example.atlassian.net"));
|
||||
hydrate_endpoint_globals_for_rule("kingfisher.jira.2", &mut globals);
|
||||
assert_eq!(
|
||||
string_var(&globals, JIRA_CLOUD_BASE_URL).as_deref(),
|
||||
Some("https://example.atlassian.net")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn artifactory_hydrates_from_legacy_host() {
|
||||
let mut globals = Object::new();
|
||||
globals.insert("JFROGURL".into(), Value::scalar("repo.example.com"));
|
||||
hydrate_endpoint_globals_for_rule("kingfisher.artifactory.1", &mut globals);
|
||||
assert_eq!(
|
||||
string_var(&globals, ARTIFACTORY_BASE_URL).as_deref(),
|
||||
Some("https://repo.example.com")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -23,6 +23,7 @@ use crate::{
|
|||
gitea, github, gitlab,
|
||||
liquid_filters::register_all,
|
||||
matcher::MatcherStats,
|
||||
provider_endpoints::ProviderEndpointOverrides,
|
||||
reporter::styles::Styles,
|
||||
rule_loader::RuleLoader,
|
||||
rule_profiling::ConcurrentRuleProfiler,
|
||||
|
|
@ -46,12 +47,14 @@ use crate::{
|
|||
validation_rate_limit::ValidationRateLimiter,
|
||||
};
|
||||
|
||||
/// Shared validation dependencies: (liquid parser, HTTP clients, validation cache, rate limiter).
|
||||
/// Shared validation dependencies:
|
||||
/// (liquid parser, HTTP clients, validation cache, rate limiter, provider endpoint overrides).
|
||||
type ValidationDeps = Arc<(
|
||||
liquid::Parser,
|
||||
crate::validation::ValidationClients,
|
||||
Arc<SkipMap<String, CachedResponse>>,
|
||||
Option<Arc<ValidationRateLimiter>>,
|
||||
Arc<ProviderEndpointOverrides>,
|
||||
)>;
|
||||
|
||||
pub async fn run_scan(
|
||||
|
|
@ -159,6 +162,7 @@ pub async fn run_async_scan(
|
|||
let validation_rate_limiter =
|
||||
ValidationRateLimiter::from_cli(args.validation_rps, &args.validation_rps_rule)?
|
||||
.map(Arc::new);
|
||||
let provider_endpoints = Arc::new(ProviderEndpointOverrides::from_global_args(global_args)?);
|
||||
|
||||
let validation_deps: Option<ValidationDeps> = if !args.no_validate {
|
||||
info!("Starting secret validation phase...");
|
||||
|
|
@ -170,6 +174,7 @@ pub async fn run_async_scan(
|
|||
)?,
|
||||
Arc::new(SkipMap::new()),
|
||||
validation_rate_limiter.clone(),
|
||||
Arc::clone(&provider_endpoints),
|
||||
)))
|
||||
} else {
|
||||
None
|
||||
|
|
@ -517,8 +522,8 @@ async fn run_validation_phase(
|
|||
access_map_collector: Option<AccessMapCollector>,
|
||||
) -> Result<()> {
|
||||
if let Some(validation) = validation_deps {
|
||||
let (parser, clients, cache, rate_limiter) =
|
||||
(&validation.0, &validation.1, &validation.2, &validation.3);
|
||||
let (parser, clients, cache, rate_limiter, provider_endpoints) =
|
||||
(&validation.0, &validation.1, &validation.2, &validation.3, &validation.4);
|
||||
run_secret_validation(
|
||||
Arc::clone(datastore),
|
||||
parser,
|
||||
|
|
@ -528,6 +533,7 @@ async fn run_validation_phase(
|
|||
match_range,
|
||||
access_map_collector,
|
||||
rate_limiter.clone(),
|
||||
provider_endpoints.clone(),
|
||||
Duration::from_secs(args.validation_timeout),
|
||||
args.validation_retries,
|
||||
effective_max_validation_body_len(args),
|
||||
|
|
@ -661,8 +667,8 @@ async fn run_parallel_scan(
|
|||
|
||||
// Validate initial (non-repo) matches
|
||||
if let Some(validation) = validation_deps {
|
||||
let (parser, clients, cache, rate_limiter) =
|
||||
(&validation.0, &validation.1, &validation.2, &validation.3);
|
||||
let (parser, clients, cache, rate_limiter, provider_endpoints) =
|
||||
(&validation.0, &validation.1, &validation.2, &validation.3, &validation.4);
|
||||
let initial_match_count = { datastore.lock().unwrap().get_matches().len() };
|
||||
if initial_match_count > 0 {
|
||||
run_secret_validation(
|
||||
|
|
@ -674,6 +680,7 @@ async fn run_parallel_scan(
|
|||
Some(0..initial_match_count),
|
||||
access_map_collector.clone(),
|
||||
rate_limiter.clone(),
|
||||
provider_endpoints.clone(),
|
||||
Duration::from_secs(args.validation_timeout),
|
||||
args.validation_retries,
|
||||
effective_max_validation_body_len(args),
|
||||
|
|
@ -749,8 +756,13 @@ async fn run_parallel_scan(
|
|||
}
|
||||
|
||||
if let Some(validation) = validation_deps.clone() {
|
||||
let (parser, clients, cache, rate_limiter) =
|
||||
(&validation.0, &validation.1, &validation.2, &validation.3);
|
||||
let (parser, clients, cache, rate_limiter, provider_endpoints) = (
|
||||
&validation.0,
|
||||
&validation.1,
|
||||
&validation.2,
|
||||
&validation.3,
|
||||
&validation.4,
|
||||
);
|
||||
let match_count =
|
||||
{ repo_datastore.lock().unwrap().get_matches().len() };
|
||||
if match_count > 0 {
|
||||
|
|
@ -763,6 +775,7 @@ async fn run_parallel_scan(
|
|||
Some(0..match_count),
|
||||
access_map.clone(),
|
||||
rate_limiter.clone(),
|
||||
provider_endpoints.clone(),
|
||||
Duration::from_secs(args.validation_timeout),
|
||||
args.validation_retries,
|
||||
effective_max_validation_body_len(&args),
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ use crate::{
|
|||
findings_store::{FindingsStore, FindingsStoreMessage},
|
||||
location::OffsetSpan,
|
||||
matcher::OwnedBlobMatch,
|
||||
provider_endpoints::ProviderEndpointOverrides,
|
||||
rules::rule::Validation,
|
||||
validation::{
|
||||
CachedResponse, collect_variables_and_dependencies, utils, validate_single_match,
|
||||
|
|
@ -421,6 +422,7 @@ pub async fn run_secret_validation(
|
|||
range: Option<std::ops::Range<usize>>,
|
||||
access_map: Option<AccessMapCollector>,
|
||||
rate_limiter: Option<Arc<ValidationRateLimiter>>,
|
||||
provider_endpoints: Arc<ProviderEndpointOverrides>,
|
||||
validation_timeout: Duration,
|
||||
validation_retries: u32,
|
||||
max_body_len: usize,
|
||||
|
|
@ -536,6 +538,7 @@ pub async fn run_secret_validation(
|
|||
let pb = pb.clone();
|
||||
let access_map = access_map.clone();
|
||||
let rate_limiter = rate_limiter.clone();
|
||||
let provider_endpoints = provider_endpoints.clone();
|
||||
let empty_dep_vars = &empty_dep_vars;
|
||||
let empty_missing = &empty_missing;
|
||||
let empty_cache = empty_cache.clone();
|
||||
|
|
@ -577,6 +580,7 @@ pub async fn run_secret_validation(
|
|||
&cache_glob,
|
||||
access_map.as_ref(),
|
||||
rate_limiter.as_deref(),
|
||||
&provider_endpoints,
|
||||
validation_timeout,
|
||||
validation_retries,
|
||||
max_body_len,
|
||||
|
|
@ -690,6 +694,7 @@ pub async fn run_secret_validation(
|
|||
let cache_glob = cache.clone();
|
||||
let access_map = access_map.clone();
|
||||
let rate_limiter = rate_limiter.clone();
|
||||
let provider_endpoints = provider_endpoints.clone();
|
||||
let validation_timeout = validation_timeout;
|
||||
let validation_retries = validation_retries;
|
||||
|
||||
|
|
@ -730,6 +735,7 @@ pub async fn run_secret_validation(
|
|||
let cache_glob = cache_glob.clone();
|
||||
let access_map = access_map.clone();
|
||||
let rate_limiter = rate_limiter.clone();
|
||||
let provider_endpoints = provider_endpoints.clone();
|
||||
async move {
|
||||
validate_single(
|
||||
&mut rep,
|
||||
|
|
@ -744,6 +750,7 @@ pub async fn run_secret_validation(
|
|||
&cache_glob,
|
||||
access_map.as_ref(),
|
||||
rate_limiter.as_deref(),
|
||||
&provider_endpoints,
|
||||
validation_timeout,
|
||||
validation_retries,
|
||||
max_body_len,
|
||||
|
|
@ -839,6 +846,7 @@ async fn validate_single(
|
|||
cache2: &Arc<SkipMap<String, CachedResponse>>,
|
||||
access_map: Option<&AccessMapCollector>,
|
||||
rate_limiter: Option<&ValidationRateLimiter>,
|
||||
provider_endpoints: &Arc<ProviderEndpointOverrides>,
|
||||
validation_timeout: Duration,
|
||||
validation_retries: u32,
|
||||
max_body_len: usize,
|
||||
|
|
@ -905,6 +913,7 @@ async fn validate_single(
|
|||
validation_timeout,
|
||||
validation_retries,
|
||||
rate_limiter,
|
||||
provider_endpoints.as_ref(),
|
||||
max_body_len,
|
||||
)
|
||||
.boxed(),
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ use crate::{
|
|||
cli::global::TlsMode,
|
||||
location::OffsetSpan,
|
||||
matcher::{OwnedBlobMatch, SerializableCaptures},
|
||||
provider_endpoints::{
|
||||
ProviderEndpointOverrides, endpoint_var_names, hydrate_endpoint_globals_for_rule,
|
||||
},
|
||||
rules::rule::Validation,
|
||||
validation_body::{self},
|
||||
};
|
||||
|
|
@ -441,6 +444,7 @@ pub async fn validate_single_match(
|
|||
validation_timeout: Duration,
|
||||
validation_retries: u32,
|
||||
rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>,
|
||||
provider_endpoints: &ProviderEndpointOverrides,
|
||||
max_body_len: usize,
|
||||
) {
|
||||
let fp = validation_dedup_key(m);
|
||||
|
|
@ -456,6 +460,7 @@ pub async fn validate_single_match(
|
|||
validation_timeout,
|
||||
validation_retries,
|
||||
rate_limiter,
|
||||
provider_endpoints,
|
||||
max_body_len,
|
||||
)
|
||||
.boxed(),
|
||||
|
|
@ -499,6 +504,7 @@ async fn timed_validate_single_match<'a>(
|
|||
validation_timeout: Duration,
|
||||
validation_retries: u32,
|
||||
rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>,
|
||||
provider_endpoints: &ProviderEndpointOverrides,
|
||||
max_body_len: usize,
|
||||
) {
|
||||
// Select the appropriate HTTP client based on rule's TLS mode preference
|
||||
|
|
@ -595,6 +601,8 @@ async fn timed_validate_single_match<'a>(
|
|||
|
||||
let mut globals = Object::new();
|
||||
populate_globals_from_captures(&mut globals, &captured_values);
|
||||
hydrate_endpoint_globals_for_rule(m.rule.id(), &mut globals);
|
||||
provider_endpoints.apply_scan_overrides(&mut globals);
|
||||
|
||||
// Persist named captures (non-TOKEN) for validate/revoke command generation.
|
||||
// This is especially important for gRPC validators like Modal where TOKEN_ID is required.
|
||||
|
|
@ -604,6 +612,13 @@ async fn timed_validate_single_match<'a>(
|
|||
}
|
||||
m.dependent_captures.entry(k.to_uppercase()).or_insert_with(|| v.clone());
|
||||
}
|
||||
for endpoint_var in endpoint_var_names() {
|
||||
if let Some(value) = globals.get(*endpoint_var).and_then(|v| v.as_scalar()) {
|
||||
m.dependent_captures
|
||||
.entry((*endpoint_var).to_string())
|
||||
.or_insert_with(|| value.to_kstr().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let rule_syntax = m.rule.syntax();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue