diff --git a/crates/kingfisher-rules/data/rules/artifactory.yml b/crates/kingfisher-rules/data/rules/artifactory.yml index 8116f0a..32d6200 100644 --- a/crates/kingfisher-rules/data/rules/artifactory.yml +++ b/crates/kingfisher-rules/data/rules/artifactory.yml @@ -32,7 +32,7 @@ rules: - 200 type: StatusMatch - type: JsonValid - url: https://{{ JFROGURL }}/artifactory/api/repositories + url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories' references: - https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens - https://jfrog.com/help/r/jfrog-rest-apis/authentication @@ -93,7 +93,7 @@ rules: - status: - 200 type: StatusMatch - url: https://{{ JFROGURL }}/artifactory/api/repositories + url: '{{ ARTIFACTORY_BASE_URL }}/artifactory/api/repositories' references: - https://jfrog.com/help/r/jfrog-platform-administration-documentation/access-tokens - https://jfrog.com/help/r/jfrog-rest-apis/authentication diff --git a/crates/kingfisher-rules/data/rules/confluence.yml b/crates/kingfisher-rules/data/rules/confluence.yml index 8ba3815..841aca7 100644 --- a/crates/kingfisher-rules/data/rules/confluence.yml +++ b/crates/kingfisher-rules/data/rules/confluence.yml @@ -40,7 +40,7 @@ rules: - type: WordMatch words: - '"type":"known"' - url: https://{{ CONFLUENCEDCDOMAIN }}/rest/api/user/current + url: '{{ CONFLUENCE_BASE_URL }}/rest/api/user/current' depends_on_rule: - rule_id: kingfisher.confluence.2 variable: CONFLUENCEDCDOMAIN diff --git a/crates/kingfisher-rules/data/rules/gitea.yml b/crates/kingfisher-rules/data/rules/gitea.yml index 8381e1f..6b404df 100644 --- a/crates/kingfisher-rules/data/rules/gitea.yml +++ b/crates/kingfisher-rules/data/rules/gitea.yml @@ -34,7 +34,7 @@ rules: content: request: method: GET - url: https://gitea.com/api/v1/user + url: '{{ GITEA_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/json diff --git a/crates/kingfisher-rules/data/rules/github.yml b/crates/kingfisher-rules/data/rules/github.yml index 657b14d..d59a574 100644 --- a/crates/kingfisher-rules/data/rules/github.yml +++ b/crates/kingfisher-rules/data/rules/github.yml @@ -28,7 +28,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -44,7 +44,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -84,7 +84,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -100,7 +100,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -137,7 +137,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -153,7 +153,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -189,7 +189,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -205,7 +205,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -234,7 +234,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -250,7 +250,7 @@ rules: content: request: method: DELETE - url: https://api.github.com/installation/token + url: '{{ GITHUB_API_BASE_URL }}/installation/token' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -281,7 +281,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -297,7 +297,7 @@ rules: content: request: method: POST - url: https://api.github.com/credentials/revoke + url: '{{ GITHUB_API_BASE_URL }}/credentials/revoke' headers: Accept: application/vnd.github+json X-GitHub-Api-Version: 2026-03-10 @@ -346,7 +346,7 @@ rules: content: request: method: POST - url: "https://github.com/login/oauth/access_token" + url: '{{ GITHUB_WEB_BASE_URL }}/login/oauth/access_token' headers: Accept: "application/json" Content-Type: "application/json" @@ -383,7 +383,7 @@ rules: content: request: method: GET - url: https://api.github.com/user + url: '{{ GITHUB_API_BASE_URL }}/user' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json @@ -399,11 +399,11 @@ rules: content: request: method: DELETE - url: https://api.github.com/installation/token + url: '{{ GITHUB_API_BASE_URL }}/installation/token' headers: Authorization: token {{ TOKEN }} Accept: application/vnd.github+json response_matcher: - report_response: true - type: StatusMatch - status: [204] \ No newline at end of file + status: [204] diff --git a/crates/kingfisher-rules/data/rules/gitlab.yml b/crates/kingfisher-rules/data/rules/gitlab.yml index 995bac0..632c4f3 100644 --- a/crates/kingfisher-rules/data/rules/gitlab.yml +++ b/crates/kingfisher-rules/data/rules/gitlab.yml @@ -34,7 +34,7 @@ rules: - type: WordMatch words: - '"id"' - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' revocation: type: Http content: @@ -46,7 +46,7 @@ rules: - report_response: true - type: StatusMatch status: [204] - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' - name: GitLab Runner Registration Token id: kingfisher.gitlab.2 @@ -92,7 +92,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/runners/verify + url: '{{ GITLAB_API_BASE_URL }}/runners/verify' - name: GitLab Pipeline Trigger Token id: kingfisher.gitlab.3 @@ -131,7 +131,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }} + url: '{{ GITLAB_API_BASE_URL }}/ci/pipeline_triggers/{{ TOKEN }}' - name: GitLab Private Token - Routable Format id: kingfisher.gitlab.4 pattern: | @@ -178,7 +178,7 @@ rules: - type: WordMatch words: - '"id"' - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' revocation: type: Http content: @@ -190,7 +190,7 @@ rules: - report_response: true - type: StatusMatch status: [204] - url: https://gitlab.com/api/v4/personal_access_tokens/self + url: '{{ GITLAB_API_BASE_URL }}/personal_access_tokens/self' - name: GitLab CI/CD Job Token id: kingfisher.gitlab.5 @@ -219,7 +219,7 @@ rules: content: request: method: GET - url: https://gitlab.com/api/v4/job + url: '{{ GITLAB_API_BASE_URL }}/job' headers: JOB-TOKEN: '{{ TOKEN }}' response_matcher: @@ -393,7 +393,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/runners/verify + url: '{{ GITLAB_API_BASE_URL }}/runners/verify' - name: GitLab Runner Authentication Token - Routable Format id: kingfisher.gitlab.13 @@ -436,7 +436,7 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/runners/verify + url: '{{ GITLAB_API_BASE_URL }}/runners/verify' - name: GitLab SCIM Token id: kingfisher.gitlab.14 @@ -481,7 +481,7 @@ rules: content: request: method: GET - url: https://gitlab.com/api/v4/user + url: '{{ GITLAB_API_BASE_URL }}/user' headers: Cookie: "_gitlab_session={{ TOKEN }}" response_matcher: diff --git a/crates/kingfisher-rules/data/rules/jira.yml b/crates/kingfisher-rules/data/rules/jira.yml index 23ef153..1ddb4f6 100644 --- a/crates/kingfisher-rules/data/rules/jira.yml +++ b/crates/kingfisher-rules/data/rules/jira.yml @@ -53,7 +53,7 @@ rules: - status: - 200 type: StatusMatch - url: https://{{ DOMAIN }}/rest/api/3/dashboard + url: '{{ JIRA_CLOUD_BASE_URL }}/rest/api/3/dashboard' references: - https://developer.atlassian.com/cloud/jira/platform/basic-auth-for-rest-apis/ depends_on_rule: @@ -97,7 +97,7 @@ rules: - 200 type: StatusMatch - type: JsonValid - url: https://{{ JIRADCDOMAIN }}/rest/api/latest/myself + url: '{{ JIRA_BASE_URL }}/rest/api/latest/myself' revocation: type: HttpMultiStep content: @@ -105,7 +105,7 @@ rules: - name: lookup_token_id request: method: GET - url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens + url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens' headers: Accept: application/json Authorization: Bearer {{ TOKEN }} @@ -120,7 +120,7 @@ rules: - name: revoke_token request: method: DELETE - url: https://{{ JIRADCDOMAIN }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }} + url: '{{ JIRA_BASE_URL }}/rest/pat/latest/tokens/{{ JIRA_TOKEN_ID }}' headers: Authorization: Bearer {{ TOKEN }} response_matcher: @@ -152,4 +152,4 @@ rules: - jira-staging.corp.mongodb.com - https://jira.corp.internal:8443 references: - - https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html \ No newline at end of file + - https://confluence.atlassian.com/adminjiraserver/jira-applications-base-url-938846869.html diff --git a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md b/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md index c5507ee..54f2608 100644 --- a/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md +++ b/docs-site/docs/blog/posts/2026-04-26-beyond-detection-validate-map-revoke.md @@ -17,9 +17,10 @@ tags: # Beyond Detection: Live Validation, Blast Radius, and One-Command Revocation -A regex match on `AKIA[0-9A-Z]{16}` is the easy part. Every secret scanner -finds those. The hard part — and the part that decides whether your Tuesday -afternoon turns into an incident — is what happens **after** the match. +A regex hit is the easy part. Any scanner can tell you that a string looks +like an AWS access key or a GitHub token. The harder question is what to do +next, and that is usually what turns a scan result into either a routine +cleanup task or a real incident. Kingfisher answers the three questions that actually matter: @@ -32,9 +33,12 @@ Kingfisher answers the three questions that actually matter: ## 1. Live validation, not just pattern matching Out of Kingfisher's 820 standalone detectors, **484 include live validation -logic**. Every one of those calls the provider's own API and reports the -credential as `Active`, `Inactive`, or `NotAttempted` — so a 4,000-finding -scan collapses to the dozen findings that are actually live. +logic**. When a provider exposes a safe check call, Kingfisher uses that +provider's own API to report each credential as `Active`, `Inactive`, or +`NotAttempted`. + +That changes the output from "thousands of regex matches" to a much shorter +list of findings that actually authenticate today. Validation runs automatically when you scan: @@ -61,15 +65,15 @@ kingfisher validate --rule gcp "$(cat service-account.json)" kingfisher validate --rule postgres "$POSTGRES_URI" ``` -Validation logic lives in the rule YAML, not in compiled Rust, which is -why coverage is high and growing — every new detector ships with a -validation block whenever the provider exposes a safe check call. +Most validation logic lives in the rule YAML rather than bespoke compiled +code. That makes it practical to grow coverage rule-by-rule instead of +treating validation as a separate engineering project. ## 2. Blast radius mapping — what does this token actually reach? A leaked AWS key bound to a single read-only S3 bucket and a leaked AWS key bound to organization-wide `AdministratorAccess` are not the same incident. -The first is a Friday afternoon ticket. The second is a war room. +The first is a ticket. The second is a war room. Add `--access-map` to a scan and Kingfisher authenticates each live credential, enumerates what it can do, and writes the result alongside @@ -82,10 +86,10 @@ kingfisher scan github --organization my-org \ --output findings.json ``` -Each cloud finding gets an `access_map` block with the identity, the -permissions, and the concrete resources reachable. Today this is supported -for **AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and -Microsoft Teams.** +Each supported finding gets an `access_map` block with the identity, +permissions, and concrete resources reachable. Today that includes +**AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, Slack, and +Microsoft Teams**. You can also run it standalone — useful when triaging a single credential you've fished out of a paste or a customer report: @@ -102,18 +106,17 @@ kingfisher access-map gcp ./service-account.json --json-out gcp.access-map.json ``` The HTML report viewer (`--format html`) renders the access map as a -clickable tree — identity at the root, then services, then individual -resources and permissions. It's the fastest way to get a non-engineer -stakeholder to grasp severity in five seconds rather than five minutes. +clickable tree: identity at the root, then services, then individual +resources and permissions. It is a much faster way to explain severity to +an incident commander or manager than pasting IAM JSON into chat. ## 3. Revocation — kill the token from where you found it Validation tells you a credential is live. Blast radius tells you why it's -urgent. Revocation tells you it's done. +urgent. Revocation closes the loop. For every rule whose provider exposes a safe revocation API, Kingfisher -ships the revocation call as part of the rule definition. One command, -no console: +ships the revocation call as part of the rule definition: ```bash # Revoke a GitHub PAT @@ -134,17 +137,17 @@ kingfisher revoke --rule aws \ kingfisher revoke --rule gcp "$(cat service-account.json)" ``` -The same Liquid templating that powers the validation request handles -revocation — including multi-step flows for providers that need a separate -key-id lookup before disabling. (See +The same Liquid templating that powers validation also powers revocation, +including multi-step flows for providers that require a lookup before +disabling the credential. See [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md#multi-step-revocation) -for the schema.) +for the schema. This matters in two scenarios: - **Mass revocation after a leak.** A laptop or a CI runner gets popped and - you have a list of fingerprints. `kingfisher revoke` walks the list, no - human pivoting between five provider consoles. + you have a list of live credentials. `kingfisher revoke` walks that list + without forcing a human to pivot between provider consoles. - **Automated response.** Wire `kingfisher revoke` into the same job that scanned and validated, gated by an allow-list of rule IDs you've decided are safe to auto-revoke (typically: short-lived CI tokens, dev-environment @@ -152,7 +155,7 @@ This matters in two scenarios: ## The combined workflow -In practice these three primitives chain into a single pipeline: +In practice, these three capabilities collapse into one response workflow: ```bash # 1. Scan + validate + map blast radius in one call @@ -162,16 +165,16 @@ kingfisher scan github --organization my-org \ --output findings.json # 2. Pull just the live, high-blast-radius findings -jq '[.[] | select(.validation.status == "Active") - | select(.access_map.permissions - | any(. == "*" or contains("Admin")))]' \ +jq '.findings + | map(select(.validation.status == "Active")) + | map(select(.access_map != null))' \ findings.json > urgent.json # 3. Triage in the HTML viewer (or revoke programmatically) kingfisher view findings.json ``` -Three commands, full incident workflow — find, prioritize, kill. +That is the full incident loop in three steps: find, prioritize, revoke. ## Why this is the right shape @@ -183,9 +186,9 @@ lives), reusing typed validators for the common families (AWS, GCP, JWT, Postgres, MongoDB, MySQL, JDBC, Azure Storage, Coinbase), and letting rule authors drop down to a `Raw` validator only for genuinely odd providers. -The upshot for users: when a new detector lands, you almost always get -validation, blast radius, and revocation along with it — not three -separate roadmaps. +The practical result is that new rules can ship with detection plus +post-detection response logic, instead of detection today and validation or +revocation on some later roadmap. ## Next up @@ -196,6 +199,5 @@ separate roadmaps. - **Docker image scanning** — pulling and scanning every layer for embedded secrets. -Got a provider you'd love to see validation or revocation support for? -Open an issue at -[mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). +If there is a provider you want validation or revocation support for, open +an issue at [mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). diff --git a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md b/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md index 5bad86c..488c9fb 100644 --- a/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md +++ b/docs-site/docs/blog/posts/2026-04-26-scan-github-org-for-secrets.md @@ -16,11 +16,14 @@ tags: # Scanning an Entire GitHub Organization for Leaked Secrets -Most organizations have hundreds of repositories — some abandoned, some active, -plenty inherited from acquisitions. A leaked AWS key in a five-year-old archived -repo is just as dangerous as one in `main` today. Kingfisher can enumerate every -repo in a GitHub organization, scan the full git history, and then **validate -which credentials are still live** so you know what to rotate first. +Most organizations have more GitHub surface area than they think: active +services, abandoned repositories, internal tooling, forks, experiments, and +projects inherited through acquisitions. A credential leaked in a five-year-old +archived repo can still be live today. + +Kingfisher can enumerate every repository in a GitHub organization, scan the +full git history, and then **validate which credentials are still live** so +you can focus on what needs rotation first. @@ -42,14 +45,15 @@ export KF_GITHUB_TOKEN=ghp_yourTokenHere kingfisher scan github --organization my-org ``` -That's it — Kingfisher enumerates every repo, clones each one, scans the full -commit history, runs all 942 detection rules, and validates findings against +That single command enumerates the org, clones each repository, scans working +tree content plus git history, and validates supported findings against provider APIs. ## Tuning for real-world orgs -Real orgs have huge monorepos, archived junk, and forks you don't care about. -Three flags do most of the work: +Real organizations have huge monorepos, archived junk, mirrored forks, and +repositories you already know are out of scope. Three flags handle most of +the tuning: ```bash kingfisher scan github --organization my-org \ @@ -61,8 +65,8 @@ kingfisher scan github --organization my-org \ --output kf-findings.sarif ``` -- **`--repo-clone-limit`** caps the number of clones per scan. Useful for - staged rollouts ("first 500 repos by stars") or to stay under disk budget. +- **`--repo-clone-limit`** caps the number of clones per scan. It is useful + for staged rollouts or staying under a disk budget. - **`--github-exclude`** accepts exact `OWNER/REPO` strings or gitignore-style globs (`my-org/*-archive`). Repeat the flag for each pattern. Matching is case-insensitive. @@ -72,24 +76,25 @@ kingfisher scan github --organization my-org \ ## Pulling in issues, wikis, and gists Secrets don't only live in code. Issues and pull request descriptions are a -common leak source — someone pastes a stack trace with a JWT, or an -"oncall handoff" issue with a temporary token that never got rotated. Add +common leak source: someone pastes a stack trace with a JWT, or an +"on-call handoff" issue with a temporary token that never gets rotated. Add `--repo-artifacts` to fetch these: ```bash kingfisher scan github --organization my-org --repo-artifacts ``` -This pulls each repo's issues (including PRs), wiki, and any **public** gists -owned by the repo owner, and scans them all. It does cost API calls, so plan -accordingly if you're near a rate limit. +This pulls each repo's issues, pull requests, wiki, and any **public** gists +owned by the repo owner, then scans that material as well. It does consume API +calls, so budget for that if the org is large or your token is already near a +rate limit. ## Following the people, not just the org -This is the trick that catches what every other scanner misses. Developers -leak secrets in *personal* repositories — side projects, dotfiles, throwaway -forks. If a contributor to your org has a public personal repo with an active -token that grants access to org infrastructure, that's a real incident. +Developers also leak secrets in *personal* repositories: side projects, +dotfiles, and throwaway forks. If a contributor to your org has a public repo +containing a still-live credential that reaches company infrastructure, that is +still your incident. Pass a single repo URL with `--include-contributors` and Kingfisher will enumerate the contributors, then clone and scan **every public repo they own**: @@ -100,14 +105,14 @@ kingfisher scan https://github.com/my-org/critical-service \ --repo-clone-limit 200 ``` -This is a noisy operation — start with one or two critical repos rather than -the whole org. GitHub will rate-limit aggressive enumeration, so a token -(`KF_GITHUB_TOKEN`) is required in practice. +This is a noisy operation. Start with one or two critical repositories rather +than the entire organization. GitHub will also rate-limit aggressive +enumeration, so `KF_GITHUB_TOKEN` is effectively required. ## Reading the output -The default `pretty` output is human-friendly for terminals. For automation, -pick the format that matches your downstream tool: +The default `pretty` output is fine for interactive terminal use. For +automation, pick a format that matches your downstream consumer: ```bash # JSON for custom tooling @@ -120,9 +125,9 @@ kingfisher scan github --organization my-org --format sarif --output findings.sa kingfisher scan github --organization my-org --format toon ``` -The interactive HTML report is often the fastest way to triage a large scan — -filter by rule, by validation status, or by repository, and click through to -the exact commit and line: +The interactive HTML report is often the fastest way to triage a large scan. +You can filter by rule, validation status, or repository, then click through +to the exact commit and line: ```bash kingfisher scan github --organization my-org --format html --output kf-report.html @@ -130,40 +135,39 @@ kingfisher scan github --organization my-org --format html --output kf-report.ht ## Triage by validation status -The single most important column in the output is **validation**. A live -credential is a fire — a never-was-valid one is noise. Filter to live findings -first: +The single most important field in the output is **validation**. A live +credential should be triaged immediately; a value that never authenticated is +usually just cleanup work. Filter to live findings first: ```bash -jq '.[] | select(.validation.status == "Active")' findings.json +jq '.findings[] | select(.validation.status == "Active")' findings.json ``` -Then walk those credentials in order of blast radius. For AWS, GCP, GitHub, -GitLab, and Slack tokens, Kingfisher already maps what each one can access — -look at the `access_map` field in the JSON output, or the **Blast Radius** -panel in the HTML report. +Then prioritize by blast radius. For AWS, GCP, GitHub, GitLab, and Slack +tokens, Kingfisher can already map what each credential can access. Look at +the `access_map` field in JSON output, or the **Blast Radius** panel in the +HTML report. ## Revoke from the CLI -For supported providers, you don't need to log into a console — Kingfisher can -revoke directly: +For supported providers, you do not need to pivot into the provider console. +Kingfisher can revoke directly: ```bash kingfisher revoke --rule kingfisher.aws.access_key.1 AKIAEXAMPLE... ``` -Each rule that supports revocation declares the API call in its YAML. Today -this works for AWS, GitHub, GitLab, Slack, and a growing list of SaaS -providers — see [`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md) -for the current list and how to add revocation to a custom rule. +Each rule that supports revocation declares the API call in its YAML. See +[`docs/RULES.md`](https://github.com/mongodb/kingfisher/blob/main/docs/RULES.md) +for the schema and the current approach. ## Wiring it into a recurring job -A first scan is the one-shot baseline. The real value is recurring scans -catching new leaks within hours, not months. The simplest pattern is a nightly -GitHub Action or scheduled CI job that runs the org scan, diffs against -yesterday's findings, and pages on net-new live credentials. We'll cover that -end-to-end in the next post. +The first scan gives you a baseline. The real value comes from running the +same workflow continuously so new leaks are caught within hours instead of +months. A simple starting point is a nightly GitHub Action or scheduled CI +job that runs the org scan, diffs against yesterday's findings, and alerts on +net-new live credentials. ## What's next @@ -174,5 +178,5 @@ end-to-end in the next post. - **Docker image scanning** — pulling images directly and scanning every layer for embedded secrets. -If there's a workflow you'd like us to cover, open an issue at +If there is a workflow you want us to cover, open an issue at [mongodb/kingfisher](https://github.com/mongodb/kingfisher/issues). diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 317a695..4e4c84d 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -8,7 +8,8 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. ## [v1.98.0] -- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. Bundled ruleset is now **943 rules** (821 standalone detectors + 122 dependent rules), with **485 standalone detectors** offering live validation. +- Fixed [#359](https://github.com/mongodb/kingfisher/issues/359): added `kingfisher.github.9` to detect the new ~520-character stateless GitHub App installation token format (`ghs__`). The legacy 36-character `ghs_` rule (`kingfisher.github.5`) is retained for older / GHES-issued tokens that are still in circulation. +- Added provider endpoint overrides for validation and revocation via global `--endpoint PROVIDER=URL` and `--endpoint-config FILE`, with built-in support for self-hosted GitHub, GitLab, Gitea, Jira, Confluence, and Artifactory instances. ## [v1.97.0] - **Report viewer cross-tool triage:** when a Kingfisher report is loaded alongside a Gitleaks or TruffleHog report, matching imported findings are enriched with Kingfisher's validation verdict, validation response, validate command, and revoke command. Matching is keyed on `commit + file + line` with a `file + line` fallback, and enriched rows show an "Enriched by Kingfisher" callout in the detail panel plus an "Enriched" chip in the findings table. Added a **Source** column to the findings table; a new **Duplicates Removed by Tool** dashboard panel showing per-tool cards for Kingfisher / TruffleHog / Gitleaks; and an upload-time **Deduplicate findings** toggle (on by default) so users can inspect the raw rows before fingerprint dedup when needed. diff --git a/docs-site/docs/usage/basic-scanning.md b/docs-site/docs/usage/basic-scanning.md index cf66382..218b36d 100644 --- a/docs-site/docs/usage/basic-scanning.md +++ b/docs-site/docs/usage/basic-scanning.md @@ -274,6 +274,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key" kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key" ``` +**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):** + +Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML. + +- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers. +- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides. +- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`. + +Supported provider keys for endpoint overrides are: + +- `github` +- `gitlab` +- `gitea` +- `jira` (Jira Data Center / self-managed) +- `jira-cloud` +- `confluence` +- `artifactory` + +```bash +# Validate a GitHub Enterprise token against a self-hosted instance +kingfisher validate --rule github \ + --endpoint github=https://ghe.corp.example.com \ + "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +# Revoke a self-managed GitLab PAT +kingfisher revoke --rule gitlab \ + --endpoint gitlab=https://gitlab.corp.example.com \ + "glpat-xxxxxxxxxxxxxxxxxxxx" + +# Scan with an internal Artifactory validator target +kingfisher scan ./repo \ + --endpoint artifactory=http://localhost:8071 \ + --allow-internal-ips +``` + +Example endpoint config file: + +```yaml +endpoints: + github: https://ghe.corp.example.com + gitlab: https://gitlab.corp.example.com + gitea: https://gitea.corp.example.com + jira: https://jira.corp.example.com + confluence: https://wiki.corp.example.com + artifactory: http://localhost:8071 +``` + +```bash +kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips +``` + **Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**: ```bash @@ -1082,8 +1133,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v # Scan with SSRF protection disabled (allows requests to internal IPs) kingfisher scan --allow-internal-ips ./repo -# Also works with the validate command -kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1 +# Also works with direct validation against a self-hosted endpoint +kingfisher validate --allow-internal-ips \ + --endpoint artifactory=http://localhost:8071 \ + --rule kingfisher.artifactory.1 \ + "AKCp..." ``` > **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services. diff --git a/docs/USAGE.md b/docs/USAGE.md index 570b7fd..b4cd2cc 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -269,6 +269,57 @@ kingfisher validate --rule aws --arg AKIAEXAMPLE "secret_key" kingfisher validate --rule aws --var AKID=AKIAEXAMPLE "secret_key" ``` +**Provider endpoint overrides (`--endpoint` and `--endpoint-config`):** + +Rules for providers that can run outside the public SaaS control plane can be pointed at a different instance without editing rule YAML. + +- `--endpoint PROVIDER=URL` sets an endpoint for the current command. Repeat it for multiple providers. +- `--endpoint-config FILE` loads a YAML file with reusable endpoint overrides. +- For self-hosted instances on private IPs or `localhost`, combine endpoint overrides with `--allow-internal-ips`. + +Supported provider keys for endpoint overrides are: + +- `github` +- `gitlab` +- `gitea` +- `jira` (Jira Data Center / self-managed) +- `jira-cloud` +- `confluence` +- `artifactory` + +```bash +# Validate a GitHub Enterprise token against a self-hosted instance +kingfisher validate --rule github \ + --endpoint github=https://ghe.corp.example.com \ + "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +# Revoke a self-managed GitLab PAT +kingfisher revoke --rule gitlab \ + --endpoint gitlab=https://gitlab.corp.example.com \ + "glpat-xxxxxxxxxxxxxxxxxxxx" + +# Scan with an internal Artifactory validator target +kingfisher scan ./repo \ + --endpoint artifactory=http://localhost:8071 \ + --allow-internal-ips +``` + +Example endpoint config file: + +```yaml +endpoints: + github: https://ghe.corp.example.com + gitlab: https://gitlab.corp.example.com + gitea: https://gitea.corp.example.com + jira: https://jira.corp.example.com + confluence: https://wiki.corp.example.com + artifactory: http://localhost:8071 +``` + +```bash +kingfisher scan ./repo --endpoint-config ./kingfisher-endpoints.yml --allow-internal-ips +``` + **Rule prefix matching:** Use partial rule IDs like `opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**: ```bash @@ -1077,8 +1128,11 @@ If you are scanning infrastructure that uses internal endpoints for credential v # Scan with SSRF protection disabled (allows requests to internal IPs) kingfisher scan --allow-internal-ips ./repo -# Also works with the validate command -kingfisher validate --allow-internal-ips --rule kingfisher.artifactory.1 +# Also works with direct validation against a self-hosted endpoint +kingfisher validate --allow-internal-ips \ + --endpoint artifactory=http://localhost:8071 \ + --rule kingfisher.artifactory.1 \ + "AKCp..." ``` > **Warning:** Only use `--allow-internal-ips` when you trust the content being scanned. Malicious content could cause Kingfisher to make requests to internal services. diff --git a/src/cli/global.rs b/src/cli/global.rs index d912392..79f1bd4 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -1,4 +1,5 @@ use std::io::IsTerminal; +use std::path::PathBuf; use std::sync::LazyLock; @@ -144,6 +145,16 @@ pub struct GlobalArgs { #[arg(global = true, long = "user-agent-suffix", value_name = "SUFFIX")] pub user_agent_suffix: Option, + /// Override provider API endpoints for validation/revocation (PROVIDER=URL), repeatable. + /// + /// Supported providers: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory. + #[arg(global = true, long = "endpoint", value_name = "PROVIDER=URL")] + pub endpoint: Vec, + + /// YAML file containing provider endpoint overrides. + #[arg(global = true, long = "endpoint-config", value_name = "FILE")] + pub endpoint_config: Option, + // Internal fields (not CLI arguments) #[clap(skip)] pub color: Mode, @@ -163,6 +174,8 @@ impl Default for GlobalArgs { self_update: false, no_update_check: false, user_agent_suffix: None, + endpoint: Vec::new(), + endpoint_config: None, color: Mode::Auto, progress: Mode::Auto, } diff --git a/src/direct_revoke.rs b/src/direct_revoke.rs index e2ae76f..f8847dc 100644 --- a/src/direct_revoke.rs +++ b/src/direct_revoke.rs @@ -20,6 +20,7 @@ use tracing::debug; use crate::{ cli::{commands::revoke::RevokeArgs, global::GlobalArgs}, liquid_filters::register_all, + provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule}, rule_loader::RuleLoader, template_vars::extract_template_vars, validation::GLOBAL_USER_AGENT, @@ -138,15 +139,22 @@ fn get_global_var(globals: &Object, name: &str) -> Option { /// Build the globals object for Liquid template rendering. fn build_globals( + rule_id: &str, secret: &str, args: &[String], variables: &[String], template_vars: &BTreeSet, + endpoint_overrides: &ProviderEndpointOverrides, ) -> Result { let mut globals = Object::new(); globals.insert("TOKEN".into(), Value::scalar(secret.to_string())); - let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect(); + endpoint_overrides.apply_defaults(&mut globals); + + let auto_assign_vars: Vec<&String> = template_vars + .iter() + .filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str())) + .collect(); for (i, arg_value) in args.iter().enumerate() { if i < auto_assign_vars.len() { @@ -171,6 +179,8 @@ fn build_globals( globals.insert(name.into(), Value::scalar(value)); } + hydrate_endpoint_globals_for_rule(rule_id, &mut globals); + Ok(globals) } @@ -553,6 +563,7 @@ pub async fn run_direct_revocation( let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; let timeout = Duration::from_secs(args.timeout); + let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?; let mut results = Vec::new(); @@ -597,7 +608,14 @@ pub async fn run_direct_revocation( } } - let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?; + let globals = build_globals( + &rule_id, + &secret, + &args.args, + &args.variables, + &template_vars, + &endpoint_overrides, + )?; if !non_token_vars.is_empty() && !args.args.is_empty() { debug!( @@ -1028,7 +1046,15 @@ mod tests { #[test] fn build_globals_sets_token() { let template_vars = BTreeSet::from(["TOKEN".to_string()]); - let globals = build_globals("my-secret", &[], &[], &template_vars).unwrap(); + let globals = build_globals( + "kingfisher.test.1", + "my-secret", + &[], + &[], + &template_vars, + &ProviderEndpointOverrides::default(), + ) + .unwrap(); assert_eq!(globals.get("TOKEN"), Some(Value::scalar("my-secret".to_string())).as_ref()); } @@ -1037,7 +1063,15 @@ mod tests { let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string(), "REGION".to_string()]); let args = vec!["my-akid".to_string(), "us-east-1".to_string()]; - let globals = build_globals("secret", &args, &[], &template_vars).unwrap(); + let globals = build_globals( + "kingfisher.test.1", + "secret", + &args, + &[], + &template_vars, + &ProviderEndpointOverrides::default(), + ) + .unwrap(); assert_eq!(globals.get("TOKEN"), Some(Value::scalar("secret".to_string())).as_ref()); assert_eq!(globals.get("AKID"), Some(Value::scalar("my-akid".to_string())).as_ref()); @@ -1048,7 +1082,15 @@ mod tests { fn build_globals_explicit_variables() { let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string()]); let vars = vec!["AKID=explicit-value".to_string()]; - let globals = build_globals("secret", &[], &vars, &template_vars).unwrap(); + let globals = build_globals( + "kingfisher.test.1", + "secret", + &[], + &vars, + &template_vars, + &ProviderEndpointOverrides::default(), + ) + .unwrap(); assert_eq!(globals.get("AKID"), Some(Value::scalar("explicit-value".to_string())).as_ref()); } @@ -1057,7 +1099,14 @@ mod tests { fn build_globals_invalid_var_format() { let template_vars = BTreeSet::new(); let vars = vec!["NO_EQUALS_SIGN".to_string()]; - let result = build_globals("secret", &[], &vars, &template_vars); + let result = build_globals( + "kingfisher.test.1", + "secret", + &[], + &vars, + &template_vars, + &ProviderEndpointOverrides::default(), + ); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("Expected NAME=VALUE")); } @@ -1066,7 +1115,14 @@ mod tests { fn build_globals_empty_var_name() { let template_vars = BTreeSet::new(); let vars = vec!["=value".to_string()]; - let result = build_globals("secret", &[], &vars, &template_vars); + let result = build_globals( + "kingfisher.test.1", + "secret", + &[], + &vars, + &template_vars, + &ProviderEndpointOverrides::default(), + ); assert!(result.is_err()); assert!(result.unwrap_err().to_string().contains("cannot be empty")); } diff --git a/src/direct_validate.rs b/src/direct_validate.rs index f68caff..489189c 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -21,6 +21,7 @@ use tracing::debug; use crate::{ cli::{commands::validate::ValidateArgs, global::GlobalArgs}, liquid_filters::register_all, + provider_endpoints::{ProviderEndpointOverrides, hydrate_endpoint_globals_for_rule}, rule_loader::RuleLoader, rules::{HttpValidation, Validation, rule::Rule}, template_vars::extract_template_vars, @@ -210,18 +211,25 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet { /// - `variables`: Named variables in NAME=VALUE format (explicit overrides) /// - `template_vars`: Set of variable names used in the validation template fn build_globals( + rule_id: &str, secret: &str, args: &[String], variables: &[String], template_vars: &BTreeSet, + endpoint_overrides: &ProviderEndpointOverrides, ) -> Result { let mut globals = Object::new(); // Set TOKEN to the provided secret globals.insert("TOKEN".into(), Value::scalar(secret.to_string())); + endpoint_overrides.apply_defaults(&mut globals); + // Get non-TOKEN variables in alphabetical order for auto-assignment - let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect(); + let auto_assign_vars: Vec<&String> = template_vars + .iter() + .filter(|v| *v != "TOKEN" && !globals.contains_key(v.as_str())) + .collect(); // Auto-assign --arg values to template variables for (i, arg_value) in args.iter().enumerate() { @@ -248,6 +256,8 @@ fn build_globals( globals.insert(name.into(), Value::scalar(value)); } + hydrate_endpoint_globals_for_rule(rule_id, &mut globals); + Ok(globals) } @@ -469,6 +479,7 @@ pub async fn run_direct_validation( // Build Liquid parser let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; + let endpoint_overrides = ProviderEndpointOverrides::from_global_args(global_args)?; let timeout = Duration::from_secs(args.timeout); let rate_limiter = @@ -525,7 +536,14 @@ pub async fn run_direct_validation( } } - let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?; + let globals = build_globals( + &rule_id, + &secret, + &args.args, + &args.variables, + &template_vars, + &endpoint_overrides, + )?; // Log auto-assignment info for debugging if !non_token_vars.is_empty() && !args.args.is_empty() { diff --git a/src/lib.rs b/src/lib.rs index 14452d6..33d6490 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,6 +40,7 @@ pub mod location; pub mod matcher; pub mod origin; pub mod parser; +pub mod provider_endpoints; pub mod pyc; pub mod reporter; pub mod rule_loader; diff --git a/src/provider_endpoints.rs b/src/provider_endpoints.rs new file mode 100644 index 0000000..6a2f3ca --- /dev/null +++ b/src/provider_endpoints.rs @@ -0,0 +1,409 @@ +use std::{collections::BTreeMap, fs, path::Path}; + +use anyhow::{Context, Result, anyhow, bail}; +use liquid::Object; +use liquid_core::{Value, ValueView}; +use serde::Deserialize; +use url::Url; + +use crate::cli::global::GlobalArgs; + +const GITHUB_API_BASE_URL: &str = "GITHUB_API_BASE_URL"; +const GITHUB_WEB_BASE_URL: &str = "GITHUB_WEB_BASE_URL"; +const GITLAB_API_BASE_URL: &str = "GITLAB_API_BASE_URL"; +const GITEA_API_BASE_URL: &str = "GITEA_API_BASE_URL"; +const JIRA_BASE_URL: &str = "JIRA_BASE_URL"; +const JIRA_CLOUD_BASE_URL: &str = "JIRA_CLOUD_BASE_URL"; +const CONFLUENCE_BASE_URL: &str = "CONFLUENCE_BASE_URL"; +const ARTIFACTORY_BASE_URL: &str = "ARTIFACTORY_BASE_URL"; + +#[derive(Debug, Clone, Default)] +pub struct ProviderEndpointOverrides { + config: EndpointVars, + cli: EndpointVars, +} + +#[derive(Debug, Clone, Default)] +struct EndpointVars { + values: BTreeMap, +} + +#[derive(Debug, Deserialize, Default)] +struct EndpointConfigFile { + #[serde(default)] + endpoints: BTreeMap, + #[serde(default)] + provider_endpoints: BTreeMap, + #[serde(default)] + providers: BTreeMap, +} + +impl ProviderEndpointOverrides { + pub fn from_global_args(global_args: &GlobalArgs) -> Result { + let config = match &global_args.endpoint_config { + Some(path) => EndpointVars::from_config_path(path)?, + None => EndpointVars::default(), + }; + let cli = EndpointVars::from_pairs(&global_args.endpoint)?; + Ok(Self { config, cli }) + } + + pub fn apply_defaults(&self, globals: &mut Object) { + self.config.apply(globals, false); + apply_builtin_defaults(globals); + self.cli.apply(globals, true); + } + + pub fn apply_scan_overrides(&self, globals: &mut Object) { + self.config.apply(globals, false); + apply_builtin_defaults(globals); + self.cli.apply(globals, true); + } +} + +impl EndpointVars { + fn from_config_path(path: &Path) -> Result { + let raw = fs::read_to_string(path) + .with_context(|| format!("Failed to read endpoint config from {}", path.display()))?; + let parsed: EndpointConfigFile = serde_yaml::from_str(&raw) + .with_context(|| format!("Failed to parse endpoint config {}", path.display()))?; + + let mut merged = parsed.endpoints; + merged.extend(parsed.provider_endpoints); + merged.extend(parsed.providers); + Self::from_map(merged) + } + + fn from_pairs(pairs: &[String]) -> Result { + let mut map = BTreeMap::new(); + for pair in pairs { + let (provider, endpoint) = parse_assignment(pair)?; + map.insert(provider, endpoint); + } + Self::from_map(map) + } + + fn from_map(map: BTreeMap) -> Result { + let mut values = BTreeMap::new(); + for (provider, endpoint) in map { + let normalized = normalize_endpoint_key(&provider); + match normalized.as_str() { + "github" => { + let github = normalize_github_endpoint(&endpoint)?; + values.insert(GITHUB_API_BASE_URL.to_string(), github.api_base_url); + values.insert(GITHUB_WEB_BASE_URL.to_string(), github.web_base_url); + } + "gitlab" => { + values.insert( + GITLAB_API_BASE_URL.to_string(), + normalize_api_base_url(&endpoint, "/api/v4")?, + ); + } + "gitea" => { + values.insert( + GITEA_API_BASE_URL.to_string(), + normalize_api_base_url(&endpoint, "/api/v1")?, + ); + } + "jira" | "jira-dc" => { + values.insert(JIRA_BASE_URL.to_string(), normalize_base_url(&endpoint)?); + } + "jira-cloud" => { + values.insert(JIRA_CLOUD_BASE_URL.to_string(), normalize_base_url(&endpoint)?); + } + "confluence" | "confluence-dc" => { + values.insert(CONFLUENCE_BASE_URL.to_string(), normalize_base_url(&endpoint)?); + } + "artifactory" | "jfrog" => { + values.insert( + ARTIFACTORY_BASE_URL.to_string(), + normalize_artifactory_base_url(&endpoint)?, + ); + } + _ => bail!( + "Unsupported endpoint provider '{}'. Supported values: github, gitlab, gitea, jira, jira-cloud, confluence, artifactory", + provider + ), + } + } + Ok(Self { values }) + } + + fn apply(&self, globals: &mut Object, overwrite_existing: bool) { + for (name, value) in &self.values { + if overwrite_existing || !globals.contains_key(name.as_str()) { + globals.insert(name.clone().into(), Value::scalar(value.clone())); + } + } + } +} + +#[derive(Debug)] +struct GitHubEndpoint { + api_base_url: String, + web_base_url: String, +} + +pub fn hydrate_endpoint_globals_for_rule(rule_id: &str, globals: &mut Object) { + hydrate_github_globals(globals); + hydrate_artifactory_globals(globals); + hydrate_confluence_globals(globals); + hydrate_jira_dc_globals(globals); + if rule_id == "kingfisher.jira.2" { + hydrate_jira_cloud_globals(globals); + } +} + +pub fn endpoint_var_names() -> &'static [&'static str] { + &[ + GITHUB_API_BASE_URL, + GITHUB_WEB_BASE_URL, + GITLAB_API_BASE_URL, + GITEA_API_BASE_URL, + JIRA_BASE_URL, + JIRA_CLOUD_BASE_URL, + CONFLUENCE_BASE_URL, + ARTIFACTORY_BASE_URL, + ] +} + +fn hydrate_github_globals(globals: &mut Object) { + match (string_var(globals, GITHUB_API_BASE_URL), string_var(globals, GITHUB_WEB_BASE_URL)) { + (Some(api), None) => { + if let Ok(normalized) = normalize_github_endpoint(&api) { + globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url)); + globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url)); + } + } + (None, Some(web)) => { + if let Ok(normalized) = normalize_github_endpoint(&web) { + globals.insert(GITHUB_API_BASE_URL.into(), Value::scalar(normalized.api_base_url)); + globals.insert(GITHUB_WEB_BASE_URL.into(), Value::scalar(normalized.web_base_url)); + } + } + _ => {} + } +} + +fn hydrate_artifactory_globals(globals: &mut Object) { + if globals.contains_key(ARTIFACTORY_BASE_URL) { + return; + } + if let Some(jfrog_url) = string_var(globals, "JFROGURL") + && let Ok(base_url) = normalize_artifactory_base_url(&jfrog_url) + { + globals.insert(ARTIFACTORY_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn hydrate_confluence_globals(globals: &mut Object) { + if globals.contains_key(CONFLUENCE_BASE_URL) { + return; + } + if let Some(domain) = string_var(globals, "CONFLUENCEDCDOMAIN") + && let Ok(base_url) = normalize_base_url(&domain) + { + globals.insert(CONFLUENCE_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn hydrate_jira_dc_globals(globals: &mut Object) { + if globals.contains_key(JIRA_BASE_URL) { + return; + } + if let Some(domain) = string_var(globals, "JIRADCDOMAIN") + && let Ok(base_url) = normalize_base_url(&domain) + { + globals.insert(JIRA_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn hydrate_jira_cloud_globals(globals: &mut Object) { + if globals.contains_key(JIRA_CLOUD_BASE_URL) { + return; + } + if let Some(domain) = string_var(globals, "DOMAIN") + && let Ok(base_url) = normalize_base_url(&domain) + { + globals.insert(JIRA_CLOUD_BASE_URL.into(), Value::scalar(base_url)); + } +} + +fn string_var(globals: &Object, name: &str) -> Option { + globals.get(name).map(|value| value.to_kstr().to_string()).filter(|s| !s.is_empty()) +} + +fn apply_builtin_defaults(globals: &mut Object) { + for (name, value) in [ + (GITHUB_API_BASE_URL, "https://api.github.com"), + (GITHUB_WEB_BASE_URL, "https://github.com"), + (GITLAB_API_BASE_URL, "https://gitlab.com/api/v4"), + (GITEA_API_BASE_URL, "https://gitea.com/api/v1"), + ] { + if !globals.contains_key(name) { + globals.insert(name.into(), Value::scalar(value.to_string())); + } + } +} + +fn parse_assignment(raw: &str) -> Result<(String, String)> { + let (provider, endpoint) = raw + .split_once('=') + .ok_or_else(|| anyhow!("Invalid endpoint '{}'. Expected PROVIDER=URL", raw))?; + let provider = provider.trim(); + let endpoint = endpoint.trim(); + if provider.is_empty() { + bail!("Invalid endpoint '{}'. Provider name cannot be empty", raw); + } + if endpoint.is_empty() { + bail!("Invalid endpoint '{}'. URL cannot be empty", raw); + } + Ok((provider.to_string(), endpoint.to_string())) +} + +fn normalize_endpoint_key(key: &str) -> String { + key.trim().to_ascii_lowercase().replace('_', "-") +} + +fn normalize_base_url(raw: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + Ok(url_with_path(&url, url.path().trim_end_matches('/'))) +} + +fn normalize_api_base_url(raw: &str, api_suffix: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + let path = url.path().trim_end_matches('/'); + let full_path = if path.is_empty() { + api_suffix.to_string() + } else if path.ends_with(api_suffix) { + path.to_string() + } else { + format!("{path}{api_suffix}") + }; + Ok(url_with_path(&url, &full_path)) +} + +fn normalize_artifactory_base_url(raw: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + let mut path = url.path().trim_end_matches('/').to_string(); + if let Some(prefix) = path.strip_suffix("/artifactory") { + path = prefix.to_string(); + } + Ok(url_with_path(&url, &path)) +} + +fn normalize_github_endpoint(raw: &str) -> Result { + let url = parse_url_or_assume_https(raw)?; + let host = url + .host_str() + .ok_or_else(|| anyhow!("Endpoint '{}' is missing a host", raw))? + .to_ascii_lowercase(); + let path = url.path().trim_end_matches('/'); + + if host == "api.github.com" { + return Ok(GitHubEndpoint { + api_base_url: "https://api.github.com".to_string(), + web_base_url: "https://github.com".to_string(), + }); + } + if host == "github.com" && path.is_empty() { + return Ok(GitHubEndpoint { + api_base_url: "https://api.github.com".to_string(), + web_base_url: "https://github.com".to_string(), + }); + } + + let (web_path, api_path) = if path.is_empty() { + ("".to_string(), "/api/v3".to_string()) + } else if let Some(prefix) = path.strip_suffix("/api/v3") { + (prefix.to_string(), path.to_string()) + } else { + (path.to_string(), format!("{path}/api/v3")) + }; + + Ok(GitHubEndpoint { + api_base_url: url_with_path(&url, &api_path), + web_base_url: url_with_path(&url, &web_path), + }) +} + +fn parse_url_or_assume_https(raw: &str) -> Result { + match Url::parse(raw.trim()) { + Ok(url) => Ok(url), + Err(url::ParseError::RelativeUrlWithoutBase) => { + Url::parse(&format!("https://{}", raw.trim())).with_context(|| { + format!("Invalid endpoint URL '{}'. Use a full URL or hostname", raw) + }) + } + Err(err) => Err(anyhow!("Invalid endpoint URL '{}': {}", raw, err)), + } +} + +fn url_with_path(url: &Url, path: &str) -> String { + let mut out = url.clone(); + out.set_query(None); + out.set_fragment(None); + if path.is_empty() { + out.set_path(""); + } else { + out.set_path(path); + } + out.to_string().trim_end_matches('/').to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn github_endpoint_normalizes_host_only() { + let normalized = normalize_github_endpoint("ghe.corp.example.com").unwrap(); + assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3"); + assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com"); + } + + #[test] + fn github_endpoint_normalizes_api_path() { + let normalized = normalize_github_endpoint("https://ghe.corp.example.com/api/v3").unwrap(); + assert_eq!(normalized.api_base_url, "https://ghe.corp.example.com/api/v3"); + assert_eq!(normalized.web_base_url, "https://ghe.corp.example.com"); + } + + #[test] + fn gitlab_endpoint_appends_api_path() { + assert_eq!( + normalize_api_base_url("gitlab.example.com/gitlab", "/api/v4").unwrap(), + "https://gitlab.example.com/gitlab/api/v4" + ); + } + + #[test] + fn artifactory_endpoint_strips_artifactory_suffix() { + assert_eq!( + normalize_artifactory_base_url("http://localhost:8071/artifactory").unwrap(), + "http://localhost:8071" + ); + } + + #[test] + fn jira_cloud_hydrates_from_legacy_domain() { + let mut globals = Object::new(); + globals.insert("DOMAIN".into(), Value::scalar("example.atlassian.net")); + hydrate_endpoint_globals_for_rule("kingfisher.jira.2", &mut globals); + assert_eq!( + string_var(&globals, JIRA_CLOUD_BASE_URL).as_deref(), + Some("https://example.atlassian.net") + ); + } + + #[test] + fn artifactory_hydrates_from_legacy_host() { + let mut globals = Object::new(); + globals.insert("JFROGURL".into(), Value::scalar("repo.example.com")); + hydrate_endpoint_globals_for_rule("kingfisher.artifactory.1", &mut globals); + assert_eq!( + string_var(&globals, ARTIFACTORY_BASE_URL).as_deref(), + Some("https://repo.example.com") + ); + } +} diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 4fde808..bbc25c5 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -23,6 +23,7 @@ use crate::{ gitea, github, gitlab, liquid_filters::register_all, matcher::MatcherStats, + provider_endpoints::ProviderEndpointOverrides, reporter::styles::Styles, rule_loader::RuleLoader, rule_profiling::ConcurrentRuleProfiler, @@ -46,12 +47,14 @@ use crate::{ validation_rate_limit::ValidationRateLimiter, }; -/// Shared validation dependencies: (liquid parser, HTTP clients, validation cache, rate limiter). +/// Shared validation dependencies: +/// (liquid parser, HTTP clients, validation cache, rate limiter, provider endpoint overrides). type ValidationDeps = Arc<( liquid::Parser, crate::validation::ValidationClients, Arc>, Option>, + Arc, )>; pub async fn run_scan( @@ -159,6 +162,7 @@ pub async fn run_async_scan( let validation_rate_limiter = ValidationRateLimiter::from_cli(args.validation_rps, &args.validation_rps_rule)? .map(Arc::new); + let provider_endpoints = Arc::new(ProviderEndpointOverrides::from_global_args(global_args)?); let validation_deps: Option = if !args.no_validate { info!("Starting secret validation phase..."); @@ -170,6 +174,7 @@ pub async fn run_async_scan( )?, Arc::new(SkipMap::new()), validation_rate_limiter.clone(), + Arc::clone(&provider_endpoints), ))) } else { None @@ -517,8 +522,8 @@ async fn run_validation_phase( access_map_collector: Option, ) -> Result<()> { if let Some(validation) = validation_deps { - let (parser, clients, cache, rate_limiter) = - (&validation.0, &validation.1, &validation.2, &validation.3); + let (parser, clients, cache, rate_limiter, provider_endpoints) = + (&validation.0, &validation.1, &validation.2, &validation.3, &validation.4); run_secret_validation( Arc::clone(datastore), parser, @@ -528,6 +533,7 @@ async fn run_validation_phase( match_range, access_map_collector, rate_limiter.clone(), + provider_endpoints.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, effective_max_validation_body_len(args), @@ -661,8 +667,8 @@ async fn run_parallel_scan( // Validate initial (non-repo) matches if let Some(validation) = validation_deps { - let (parser, clients, cache, rate_limiter) = - (&validation.0, &validation.1, &validation.2, &validation.3); + let (parser, clients, cache, rate_limiter, provider_endpoints) = + (&validation.0, &validation.1, &validation.2, &validation.3, &validation.4); let initial_match_count = { datastore.lock().unwrap().get_matches().len() }; if initial_match_count > 0 { run_secret_validation( @@ -674,6 +680,7 @@ async fn run_parallel_scan( Some(0..initial_match_count), access_map_collector.clone(), rate_limiter.clone(), + provider_endpoints.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, effective_max_validation_body_len(args), @@ -749,8 +756,13 @@ async fn run_parallel_scan( } if let Some(validation) = validation_deps.clone() { - let (parser, clients, cache, rate_limiter) = - (&validation.0, &validation.1, &validation.2, &validation.3); + let (parser, clients, cache, rate_limiter, provider_endpoints) = ( + &validation.0, + &validation.1, + &validation.2, + &validation.3, + &validation.4, + ); let match_count = { repo_datastore.lock().unwrap().get_matches().len() }; if match_count > 0 { @@ -763,6 +775,7 @@ async fn run_parallel_scan( Some(0..match_count), access_map.clone(), rate_limiter.clone(), + provider_endpoints.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, effective_max_validation_body_len(&args), diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 43f7e84..16078c9 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -23,6 +23,7 @@ use crate::{ findings_store::{FindingsStore, FindingsStoreMessage}, location::OffsetSpan, matcher::OwnedBlobMatch, + provider_endpoints::ProviderEndpointOverrides, rules::rule::Validation, validation::{ CachedResponse, collect_variables_and_dependencies, utils, validate_single_match, @@ -421,6 +422,7 @@ pub async fn run_secret_validation( range: Option>, access_map: Option, rate_limiter: Option>, + provider_endpoints: Arc, validation_timeout: Duration, validation_retries: u32, max_body_len: usize, @@ -536,6 +538,7 @@ pub async fn run_secret_validation( let pb = pb.clone(); let access_map = access_map.clone(); let rate_limiter = rate_limiter.clone(); + let provider_endpoints = provider_endpoints.clone(); let empty_dep_vars = &empty_dep_vars; let empty_missing = &empty_missing; let empty_cache = empty_cache.clone(); @@ -577,6 +580,7 @@ pub async fn run_secret_validation( &cache_glob, access_map.as_ref(), rate_limiter.as_deref(), + &provider_endpoints, validation_timeout, validation_retries, max_body_len, @@ -690,6 +694,7 @@ pub async fn run_secret_validation( let cache_glob = cache.clone(); let access_map = access_map.clone(); let rate_limiter = rate_limiter.clone(); + let provider_endpoints = provider_endpoints.clone(); let validation_timeout = validation_timeout; let validation_retries = validation_retries; @@ -730,6 +735,7 @@ pub async fn run_secret_validation( let cache_glob = cache_glob.clone(); let access_map = access_map.clone(); let rate_limiter = rate_limiter.clone(); + let provider_endpoints = provider_endpoints.clone(); async move { validate_single( &mut rep, @@ -744,6 +750,7 @@ pub async fn run_secret_validation( &cache_glob, access_map.as_ref(), rate_limiter.as_deref(), + &provider_endpoints, validation_timeout, validation_retries, max_body_len, @@ -839,6 +846,7 @@ async fn validate_single( cache2: &Arc>, access_map: Option<&AccessMapCollector>, rate_limiter: Option<&ValidationRateLimiter>, + provider_endpoints: &Arc, validation_timeout: Duration, validation_retries: u32, max_body_len: usize, @@ -905,6 +913,7 @@ async fn validate_single( validation_timeout, validation_retries, rate_limiter, + provider_endpoints.as_ref(), max_body_len, ) .boxed(), diff --git a/src/validation.rs b/src/validation.rs index 2418663..be12ecd 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -23,6 +23,9 @@ use crate::{ cli::global::TlsMode, location::OffsetSpan, matcher::{OwnedBlobMatch, SerializableCaptures}, + provider_endpoints::{ + ProviderEndpointOverrides, endpoint_var_names, hydrate_endpoint_globals_for_rule, + }, rules::rule::Validation, validation_body::{self}, }; @@ -441,6 +444,7 @@ pub async fn validate_single_match( validation_timeout: Duration, validation_retries: u32, rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>, + provider_endpoints: &ProviderEndpointOverrides, max_body_len: usize, ) { let fp = validation_dedup_key(m); @@ -456,6 +460,7 @@ pub async fn validate_single_match( validation_timeout, validation_retries, rate_limiter, + provider_endpoints, max_body_len, ) .boxed(), @@ -499,6 +504,7 @@ async fn timed_validate_single_match<'a>( validation_timeout: Duration, validation_retries: u32, rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>, + provider_endpoints: &ProviderEndpointOverrides, max_body_len: usize, ) { // Select the appropriate HTTP client based on rule's TLS mode preference @@ -595,6 +601,8 @@ async fn timed_validate_single_match<'a>( let mut globals = Object::new(); populate_globals_from_captures(&mut globals, &captured_values); + hydrate_endpoint_globals_for_rule(m.rule.id(), &mut globals); + provider_endpoints.apply_scan_overrides(&mut globals); // Persist named captures (non-TOKEN) for validate/revoke command generation. // This is especially important for gRPC validators like Modal where TOKEN_ID is required. @@ -604,6 +612,13 @@ async fn timed_validate_single_match<'a>( } m.dependent_captures.entry(k.to_uppercase()).or_insert_with(|| v.clone()); } + for endpoint_var in endpoint_var_names() { + if let Some(value) = globals.get(*endpoint_var).and_then(|v| v.as_scalar()) { + m.dependent_captures + .entry((*endpoint_var).to_string()) + .or_insert_with(|| value.to_kstr().to_string()); + } + } { let rule_syntax = m.rule.syntax();