From a27f90d619353d2dabda5a567b3024e2ab7a70c8 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 16 Apr 2026 16:57:31 -0700 Subject: [PATCH] performance improvements and rule improvements --- CHANGELOG.md | 1 + crates/kingfisher-rules/data/rules/AGENTS.md | 17 +++ .../kingfisher-rules/data/rules/buttercms.yml | 2 +- .../data/rules/chartmogul.yml | 6 +- .../kingfisher-rules/data/rules/couchbase.yml | 2 - .../kingfisher-rules/data/rules/godaddy.yml | 2 +- .../kingfisher-rules/data/rules/langfuse.yml | 16 ++- .../data/rules/lemonsqueezy.yml | 1 + crates/kingfisher-rules/data/rules/miro.yml | 79 +++++++++++- .../kingfisher-rules/data/rules/mixpanel.yml | 116 ++++++++++++++++-- .../kingfisher-rules/data/rules/moralis.yml | 2 - .../kingfisher-rules/data/rules/nightfall.yml | 9 +- .../kingfisher-rules/data/rules/pandadoc.yml | 16 +-- crates/kingfisher-rules/data/rules/pinata.yml | 91 ++++++++++---- crates/kingfisher-rules/data/rules/ramp.yml | 33 ++++- .../data/rules/ringcentral.yml | 2 - crates/kingfisher-rules/data/rules/rootly.yml | 2 +- .../data/rules/shutterstock.yml | 9 +- docs-site/docs/changelog.md | 1 + src/validation.rs | 103 ++++++++++++++-- 20 files changed, 423 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3e0fca..a6d1855 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file. - **Access Map:** Alibaba Cloud long-lived and STS access key pairs (validated `kingfisher.alibabacloud.2` and `kingfisher.alibabacloud.5`): caller identity via STS GetCallerIdentity; standalone `kingfisher access-map alibaba` (alias `aliyun`). - **Report viewer:** Import Gitleaks and TruffleHog JSON into the bundled local viewer with deduplication for repeated imported findings, and publish a static upload-based viewer on the docs site for GitHub Pages hosting. See `docs/USAGE.md`. - Fixed parser-based context gating so assignment-style contextual secrets still scan in raw text when parser verification is unavailable, instead of being dropped. +- Fixed dependent-variable pairing for HTTP validation so rules use the nearest helper match in-file, and updated Pinata detection/validation to reliably catch API key IDs, API secrets, and JWTs, including key+secret validation. - Corrected several newly added SaaS rules and validators, including LiveKit (with dependent API secret validation), Tinybird, Inngest, Tolgee, Unkey, Composio, Hex.pm, Trigger.dev, Voiceflow, WorkOS, and Infisical. - Added 61 new detection rules across 46 providers: Axiom (API token + PAT), Trigger.dev (secret key + PAT), Dub.co, Svix webhook signing secret, Liveblocks, Inngest (signing key + event key), Seam, Courier, Cal.com, Arcjet, WarpStream, Mem0, Mintlify, Pirsch, Tinybird, Tolgee (project key + PAT), Ory (API key + session + OAuth2 tokens), Xendit, Xata, Crossmint (server + client keys), DeepL (Free + Pro), Flagsmith, E2B, Infisical, WooCommerce (consumer key + secret), Nightfall AI, Ramp (client ID + secret), Hex.pm (personal + workspace tokens), Convex deploy key, MiniMax, Mappedin (key + secret), Pollinations (secret + publishable), Fal.ai, Aikido, Hack Club, GuardSquare, Browser Use, Composio, Gamma, Hex.tech, Mastra, redirect.pizza, Upstash, and WorkOS. Also added new prefixed-token rules for Netlify (`nfp_`), Cloudflare (`cfut_`), and Supabase (`sb_publishable_`). Added live HTTP validation for 30 of these rules. - Added 32 new detection rules across 25 providers: Ghost CMS (admin + content keys), UpCloud (`ucat_`), Voiceflow (`VF.DM.`/`VF.WS.`), Robinhood Crypto (`rh-api-`), ClickUp (`pk_`), Unleash (client/admin + personal tokens), ConfigCat (standard + extended SDK keys), SaladCloud (`salad_cloud_`), Tigris (`tid_`/`tsec_`), Portainer (`ptr_`), Permit.io (`permit_key_`), Builder.io (`bpk-`), LiveKit (API key + secret), Close CRM (`api_`), Hetzner Cloud, Censys (API ID + secret), Wistia, PandaDoc, Pinata (key + secret), ZeroTier, Detectify, ChartMogul, Moralis, ButterCMS, and Loops. Includes HTTP validation for 19 of these rules. diff --git a/crates/kingfisher-rules/data/rules/AGENTS.md b/crates/kingfisher-rules/data/rules/AGENTS.md index 0287d66..5a83611 100644 --- a/crates/kingfisher-rules/data/rules/AGENTS.md +++ b/crates/kingfisher-rules/data/rules/AGENTS.md @@ -64,6 +64,23 @@ Strongly recommended fields: - When Rust validation is unavoidable for a one-off provider, prefer adding a raw validator instead of inventing a new typed validator. - Do not convert existing typed validators to `Raw` just for consistency. +## HTTP Validation Request Capabilities +The `validation.content.request` block under `type: Http` supports these fields: +- `method` (required): `GET`, `POST`, `DELETE`, `HEAD`, `PUT`, etc. +- `url` (required): target URL; supports Liquid templating (`{{ TOKEN }}`, filters, etc.) +- `headers` (optional): map of header name → value; supports Liquid templating. +- `body` (optional): request body string; supports Liquid templating. Use with `Content-Type: application/x-www-form-urlencoded` for form-encoded POST bodies or `application/json` for JSON bodies. +- `multipart` (optional): multipart form data; use for file-upload endpoints. +- `response_is_html` (optional, bool): allow HTML responses (default false). + +Useful Liquid filters for bodies and headers: `b64enc`, `url_encode`, `append`, `crc32`, `base62`. + +**OAuth client credential validation pattern** — when a provider's token endpoint accepts `grant_type=authorization_code`, send an invalid code with real credentials. Valid credentials return `400` (bad code); invalid credentials return `401` (bad client). Example body: +``` +grant_type=authorization_code&client_id={{ CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&code=invalid&redirect_uri=https%3A%2F%2Fexample.com%2Fcallback +``` +Pair with `StatusMatch: [400]` and `JsonValid`. + ## Revocation Policy - If a rule has validation and the provider API safely supports revocation, add `revocation:` in the same YAML rule. - Prefer explicit success criteria in `response_matcher`. diff --git a/crates/kingfisher-rules/data/rules/buttercms.yml b/crates/kingfisher-rules/data/rules/buttercms.yml index 9f7c38b..1369836 100644 --- a/crates/kingfisher-rules/data/rules/buttercms.yml +++ b/crates/kingfisher-rules/data/rules/buttercms.yml @@ -25,7 +25,7 @@ rules: content: request: method: GET - url: "https://api.buttercms.com/v2/posts/?auth_token={{ TOKEN }}&page_size=1" + url: "https://api.buttercms.com/v2/authors/?auth_token={{ TOKEN }}" headers: Accept: application/json response_matcher: diff --git a/crates/kingfisher-rules/data/rules/chartmogul.yml b/crates/kingfisher-rules/data/rules/chartmogul.yml index 42cdb52..9edbd96 100644 --- a/crates/kingfisher-rules/data/rules/chartmogul.yml +++ b/crates/kingfisher-rules/data/rules/chartmogul.yml @@ -18,7 +18,7 @@ rules: min_entropy: 3.5 confidence: medium examples: - - 'CHARTMOGUL_API_KEY=a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6' + - 'CHARTMOGUL_API_KEY=a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6' # nosemgrep references: - https://dev.chartmogul.com/reference/authentication validation: @@ -26,7 +26,7 @@ rules: content: request: method: GET - url: https://api.chartmogul.com/v1/ping + url: https://api.chartmogul.com/v1/account headers: Accept: application/json Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}" @@ -37,4 +37,4 @@ rules: - type: JsonValid - type: WordMatch words: - - '"pong"' + - '"currency"' diff --git a/crates/kingfisher-rules/data/rules/couchbase.yml b/crates/kingfisher-rules/data/rules/couchbase.yml index 407a177..9e5479e 100644 --- a/crates/kingfisher-rules/data/rules/couchbase.yml +++ b/crates/kingfisher-rules/data/rules/couchbase.yml @@ -31,8 +31,6 @@ rules: - 'COUCHBASE_API_KEY_SECRET="QktxVUtFU1dKV1FlJBYXdnTVlRemFZdmFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6MDJBQ0RF"' - | capella_api_secret = 'aB1cD2eF3gH4iJ5kL6mN7oP8qR9sT0uV1wX2yZ3aA4bB5cC6dD7eE8fF9gG0hH1iJ2kL3m==' - negative_examples: - - 'COUCHBASE_URL="https://cloud.couchbase.com"' references: - https://docs.couchbase.com/cloud/management-api-reference/index.html validation: diff --git a/crates/kingfisher-rules/data/rules/godaddy.yml b/crates/kingfisher-rules/data/rules/godaddy.yml index a71f7e8..e46ca52 100644 --- a/crates/kingfisher-rules/data/rules/godaddy.yml +++ b/crates/kingfisher-rules/data/rules/godaddy.yml @@ -38,5 +38,5 @@ rules: response_matcher: - report_response: true - type: StatusMatch - status: [200, 403] + status: [200] - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/langfuse.yml b/crates/kingfisher-rules/data/rules/langfuse.yml index 845e6cb..953a300 100644 --- a/crates/kingfisher-rules/data/rules/langfuse.yml +++ b/crates/kingfisher-rules/data/rules/langfuse.yml @@ -5,7 +5,12 @@ rules: (?x) \b ( - sk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} + sk-lf + -[0-9a-fA-F]{8} + -[0-9a-fA-F]{4} + -[0-9a-fA-F]{4} + -[0-9a-fA-F]{4} + -[0-9a-fA-F]{12} ) \b pattern_requirements: @@ -30,7 +35,7 @@ rules: content: request: method: GET - url: https://cloud.langfuse.com/api/public/projects + url: https://cloud.langfuse.com/api/public/organizations/memberships headers: Authorization: 'Basic {{ PUBLIC_KEY | append: ":" | append: TOKEN | b64enc }}' response_matcher: @@ -45,7 +50,12 @@ rules: (?x) \b ( - pk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} + pk-lf + -[0-9a-fA-F]{8} + -[0-9a-fA-F]{4} + -[0-9a-fA-F]{4} + -[0-9a-fA-F]{4} + -[0-9a-fA-F]{12} ) \b pattern_requirements: diff --git a/crates/kingfisher-rules/data/rules/lemonsqueezy.yml b/crates/kingfisher-rules/data/rules/lemonsqueezy.yml index 9ce3e62..f1d0970 100644 --- a/crates/kingfisher-rules/data/rules/lemonsqueezy.yml +++ b/crates/kingfisher-rules/data/rules/lemonsqueezy.yml @@ -23,6 +23,7 @@ rules: - 'LEMONSQUEEZY_URL="https://api.lemonsqueezy.com"' references: - https://docs.lemonsqueezy.com/api + - https://developer.samsung.com/iap/isn/jwt/header.html validation: type: Http content: diff --git a/crates/kingfisher-rules/data/rules/miro.yml b/crates/kingfisher-rules/data/rules/miro.yml index dbc7f30..c75a2c7 100644 --- a/crates/kingfisher-rules/data/rules/miro.yml +++ b/crates/kingfisher-rules/data/rules/miro.yml @@ -4,11 +4,16 @@ rules: pattern: | (?x) \b + (?i:miro) + (?:.|[\n\r]){0,32}? + (?i:(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|AUTH|TOKEN)) + (?:.|[\n\r]){0,32}? + \b ( eyJtaXJv - [A-Za-z0-9_=-]{10,} + [A-Za-z0-9-]{10,64} _ - [A-Za-z0-9_-]{20,} + [A-Za-z0-9_-]{20,64} ) \b pattern_requirements: @@ -18,11 +23,8 @@ rules: min_entropy: 3.5 confidence: medium examples: - - eyJtaXJvLm9yaWdpbiI6ImV1MDEifQ_o-P91OccaII0A63CDSK--x21xiI + - miro_key = meyJtaXJvLm9yaWdpbiI6ImV1MDEifQ_o-P91OccaII0A63CDSK--x21xiI - 'MIRO_TOKEN="eyJtaXJvLm9yaWdpbiI6InVzMDEifQ_kL8m2Nq7RpWxYz3AbCdEfGhI"' - negative_examples: - - eyJtaXJv - - eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9 references: - https://developers.miro.com/docs/rest-api-authorization - https://developers.miro.com/reference/get-token-info @@ -40,3 +42,68 @@ rules: - type: StatusMatch status: [200] - type: JsonValid + + - name: Miro Client Secret + id: kingfisher.miro.2 + pattern: | + (?xi) + \b + miro + (?:.|[\n\r]){0,40}? + (?:client[_-]?secret|app[_-]?secret) + (?:.|[\n\r]){0,20}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + examples: + - 'MIRO_CLIENT_SECRET=5VEWim0jDbaytgKXN7ReM7MkfpQ8Rm3d' # nosemgrep + references: + - https://developers.miro.com/docs/getting-started-with-oauth + depends_on_rule: + - rule_id: kingfisher.miro.3 + variable: CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://api.miro.com/v1/oauth/token + headers: + Content-Type: application/x-www-form-urlencoded + body: "grant_type=authorization_code&client_id={{ CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&code=invalid&redirect_uri=https%3A%2F%2Fexample.com%2Fcallback" + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: JsonValid + + - name: Miro Client ID + id: kingfisher.miro.3 + pattern: | + (?xi) + \b miro + (?:.|[\n\r]){0,40}? + (?:client[_-]?id|app[_-]?id) + (?:.|[\n\r]){0,20}? + \b + ( + [0-9]{15,21} + ) + \b + pattern_requirements: + min_digits: 15 + min_entropy: 2.5 + confidence: medium + visible: false + examples: + - 'MIRO_CLIENT_ID=3458764668142796369' + references: + - https://developers.miro.com/docs/getting-started-with-oauth diff --git a/crates/kingfisher-rules/data/rules/mixpanel.yml b/crates/kingfisher-rules/data/rules/mixpanel.yml index f49378c..56d371b 100644 --- a/crates/kingfisher-rules/data/rules/mixpanel.yml +++ b/crates/kingfisher-rules/data/rules/mixpanel.yml @@ -2,15 +2,15 @@ rules: - name: Mixpanel API Secret id: kingfisher.mixpanel.1 pattern: | - (?x) + (?xis) \b - (?i:mixpanel) - (?:.|[\n\r]){0,32}? - (?i:SECRET|API.?SECRET|PROJECT.?SECRET) - (?:.|[\n\r]){0,32}? + mixpanel + .{0,32}? + (?:SECRET|API.?SECRET|API.?KEY|PROJECT.?SECRET|TOKEN) + .{0,32}? \b ( - [a-zA-Z0-9]{32} + [a-f0-9]{32} ) \b pattern_requirements: @@ -19,13 +19,12 @@ rules: min_entropy: 3.5 confidence: medium examples: - - 'MIXPANEL_API_SECRET="a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6"' - - | - mixpanel_secret: 'Ab1Cd2Ef3Gh4Ij5Kl6Mn7Op8Qr9St0Uv' + - 'MIXPANEL_API_SECRET="a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6"' # nosemgrep + - 'MIXPANEL_API_KEY="0fff5f614cbcafefb9c1ba319b9905d2"' # nosemgrep + - "Mixpanel SECRET Token\na2813cd4502f94a77de470f2d7adc8d7" # nosemgrep negative_examples: - 'MIXPANEL_URL="https://api.mixpanel.com"' - 'mixpanel_project_id=12345' - - 'MIXPANEL_TOKEN="a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6"' references: - https://developer.mixpanel.com/reference/authentication - https://developer.mixpanel.com/reference/overview @@ -42,3 +41,100 @@ rules: - type: StatusMatch status: [200] - type: JsonValid + + - name: Mixpanel Service Account Secret + id: kingfisher.mixpanel.2 + pattern: | + (?xs) + mp-service-account + .{0,64}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + examples: + - "kingfisher-svc-1.b500ae.mp-service-account\nSecret: Vqprs8MMJm3XSpfWiuAFECFuyKxxrJL1" # nosemgrep + negative_examples: + - mp-service-account + - "mp-service-account\nsecret: a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6" # nosemgrep + references: + - https://developer.mixpanel.com/reference/service-accounts + depends_on_rule: + - rule_id: kingfisher.mixpanel.4 + variable: SVC_USERNAME + validation: + type: Http + content: + request: + method: GET + url: https://mixpanel.com/api/app/me + headers: + Authorization: 'Basic {{ SVC_USERNAME | append: ":" | append: TOKEN | b64enc }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + + - name: Mixpanel API Key or Secret + id: kingfisher.mixpanel.3 + pattern: | + (?xis) + \b API \s+ (?:Key|Secret) \b + .{0,20}? + \b + ( + [a-f0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + examples: + - "API Key\n0fff5f614cbcafefb9c1ba319b9905d2" # nosemgrep + - "API Secret\na2813cd4502f94a77de470f2d7adc8d7" # nosemgrep + references: + - https://developer.mixpanel.com/reference/authentication + validation: + type: Http + content: + request: + method: GET + url: https://api.mixpanel.com/import + headers: + Authorization: 'Basic {{ TOKEN | append: ":" | b64enc }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: JsonValid + + - name: Mixpanel Service Account Username + id: kingfisher.mixpanel.4 + pattern: | + (?x) + \b + ( + [a-zA-Z0-9_-]+ \. [0-9a-f]{6} \. mp-service-account + ) + \b + pattern_requirements: + min_digits: 1 + min_lowercase: 1 + min_entropy: 2.5 + confidence: medium + visible: false + examples: + - 'MIXPANEL_USERNAME=kingfisher-svc-1.b500ae.mp-service-account' + - 'kingfisher-svc-2.6c6256.mp-service-account' + references: + - https://developer.mixpanel.com/reference/service-accounts diff --git a/crates/kingfisher-rules/data/rules/moralis.yml b/crates/kingfisher-rules/data/rules/moralis.yml index 3a34d78..5f6c184 100644 --- a/crates/kingfisher-rules/data/rules/moralis.yml +++ b/crates/kingfisher-rules/data/rules/moralis.yml @@ -6,8 +6,6 @@ rules: \b moralis (?:.|[\n\r]){0,32}? - (?:API[_-]?KEY|SECRET|TOKEN|KEY|WEB3) - (?:.|[\n\r]){0,16}? \b ( [0-9a-zA-Z]{64} diff --git a/crates/kingfisher-rules/data/rules/nightfall.yml b/crates/kingfisher-rules/data/rules/nightfall.yml index 8fe80e7..15f9400 100644 --- a/crates/kingfisher-rules/data/rules/nightfall.yml +++ b/crates/kingfisher-rules/data/rules/nightfall.yml @@ -14,18 +14,19 @@ rules: - 'NIGHTFALL_API_KEY=NF-a1B2c3D4e5F6g7H8i9J0k1L2m3N4o5P6' - 'Authorization: Bearer NF-xK8m2LpQr5nW0vYz3cJ7aB4dE6fG8h0i' references: - - https://docs.nightfall.ai/docs/creating-an-api-key + - https://help.nightfall.ai/developer-api/key-concepts/setup/api_key validation: type: Http content: request: method: POST - url: https://api.nightfall.ai/v3/upload + url: https://api.nightfall.ai/v3/scan headers: Authorization: "Bearer {{ TOKEN }}" + Accept: application/json Content-Type: application/json - body: '{"fileSizeBytes":256}' + body: '{"policy":{"detectionRules":[{"detectors":[{"minNumFindings":1,"minConfidence":"VERY_LIKELY","displayName":"US Social Security Number","detectorType":"NIGHTFALL_DETECTOR","nightfallDetector":"US_SOCIAL_SECURITY_NUMBER"}],"name":"Kingfisher Validation Rule","logicalOp":"ANY"}]},"payload":["my ssn is 458-02-6124"]}' response_matcher: - report_response: true - type: StatusMatch - status: [200, 201] + status: [200] diff --git a/crates/kingfisher-rules/data/rules/pandadoc.yml b/crates/kingfisher-rules/data/rules/pandadoc.yml index d0c4fa0..aa94ac9 100644 --- a/crates/kingfisher-rules/data/rules/pandadoc.yml +++ b/crates/kingfisher-rules/data/rules/pandadoc.yml @@ -2,11 +2,10 @@ rules: - name: PandaDoc API Key id: kingfisher.pandadoc.1 pattern: | - (?xi) - \b(?:pandadoc) + (?x) + \b + (?i:pandadoc) (?:.|[\n\r]){0,32}? - (?:API[_-]?KEY|SECRET|TOKEN|KEY) - (?:.|[\n\r]){0,16}? \b ( [a-zA-Z0-9]{40} @@ -14,20 +13,21 @@ rules: \b pattern_requirements: min_digits: 3 - min_uppercase: 3 min_lowercase: 3 min_entropy: 3.5 confidence: medium examples: - - 'PANDADOC_API_KEY=aBcDeFgHiJkLmNoPqRsTuVwXyZ01234567890abc' + - 'PANDADOC_API_KEY=3039ba033eb1410caa0a2227158d63c9d6502cd8' references: - - https://developers.pandadoc.com/reference/about + - https://developers.pandadoc.com/reference/api-key-authentication-process + - https://developers.pandadoc.com/openapi/pandadoc-public-api.json + - https://developers.pandadoc.com/reference/current-member-details validation: type: Http content: request: method: GET - url: https://api.pandadoc.com/public/v1/documents?count=1 + url: https://api.pandadoc.com/public/v1/members/current headers: Accept: application/json Authorization: "API-Key {{ TOKEN }}" diff --git a/crates/kingfisher-rules/data/rules/pinata.yml b/crates/kingfisher-rules/data/rules/pinata.yml index b44cb6a..0756bab 100644 --- a/crates/kingfisher-rules/data/rules/pinata.yml +++ b/crates/kingfisher-rules/data/rules/pinata.yml @@ -1,51 +1,56 @@ rules: - - name: Pinata API Key + - name: Pinata API Key ID id: kingfisher.pinata.1 - visible: false pattern: | - (?xi) - \b - pinata - (?:.|[\n\r]){0,32}? - (?:API[_-]?KEY|KEY) - (?:.|[\n\r]){0,16}? - \b - ( - [0-9a-f]{64} + (?x) + (?: + (?i:pinata)\s+API\s+Key + | + PINATA_API_KEY + | + API\s+Key + ) + \s*[:=]\s* + ( + [0-9a-f]{20} ) - \b pattern_requirements: min_digits: 4 - min_entropy: 3.5 + min_entropy: 3.3 + visible: false confidence: medium examples: - - 'PINATA_API_KEY=a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2' + - 'PINATA_API_KEY=017bb392dcfe1fcfe009' + - 'API Key: 017bb392dcfe1fcfe009' - name: Pinata API Secret id: kingfisher.pinata.2 pattern: | - (?xi) - \b - pinata - (?:.|[\n\r]){0,32}? - (?:API[_-]?SECRET|SECRET[_-]?KEY|SECRET) - (?:.|[\n\r]){0,16}? - \b + (?x) + (?: + (?i:pinata)\s+API\s+Secret + | + PINATA_API_SECRET + | + API\s+Secret + ) + \s*[:=]\s* ( [0-9a-f]{64} ) - \b pattern_requirements: min_digits: 4 min_entropy: 3.5 confidence: medium examples: - 'PINATA_API_SECRET=f0e1d2c3b4a5f6e7d8c9b0a1f2e3d4c5b6a7f8e9d0c1b2a3f4e5d6c7b8a9f0e1' + - 'API Secret: c9c7663470ffe6a11d2e9ef607ba32954e4ea3da7d7e662650d0eba8bcc2801f' references: - https://docs.pinata.cloud/api-reference/introduction + - https://docs.pinata.cloud/account-management/api-keys depends_on_rule: - rule_id: kingfisher.pinata.1 - variable: PINATA_API_KEY + variable: PINATA_API_KEY_ID validation: type: Http content: @@ -54,7 +59,7 @@ rules: url: https://api.pinata.cloud/data/testAuthentication headers: Accept: application/json - pinata_api_key: "{{ PINATA_API_KEY }}" + pinata_api_key: "{{ PINATA_API_KEY_ID }}" pinata_secret_api_key: "{{ TOKEN }}" response_matcher: - report_response: true @@ -64,3 +69,41 @@ rules: words: - '"message"' - 'Congratulations' + + - name: Pinata JWT + id: kingfisher.pinata.3 + pattern: | + (?x) + ( + (?:ey|ewogIC)[A-Za-z0-9_-]{12,512} + \. + ey[A-Za-z0-9_-]{0,512} + \. + [A-Za-z0-9_-]{24,512} + ) + (?:[^A-Za-z0-9_-]|$) + min_entropy: 3.3 + confidence: medium + examples: + - 'PINATA_JWT=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE4MDc5MTQxNjAsImF1dGhlbnRpY2F0aW9uVHlwZSI6InNjb3BlZEtleSIsInNjb3BlZEtleUtleSI6IjAxN2JiMzkyZGNmZTFmY2ZlMDA5Iiwic2NvcGVkS2V5U2VjcmV0IjoiYzljNzY2MzQ3MGZmZTZhMTFkMmU5ZWY2MDdiYTMyOTU0ZTRlYTNkYTdkN2U2NjI2NTBkMGViYThiY2MyODAxZiJ9.abcdefghijklmnopqrstuvwxyzABCDEF' + - 'API Secret: c9c7663470ffe6a11d2e9ef607ba32954e4ea3da7d7e662650d0eba8bcc2801f JWT: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE4MDc5MTQxNjAsImF1dGhlbnRpY2F0aW9uVHlwZSI6InNjb3BlZEtleSIsInNjb3BlZEtleUtleSI6IjAxN2JiMzkyZGNmZTFmY2ZlMDA5Iiwic2NvcGVkS2V5U2VjcmV0IjoiYzljNzY2MzQ3MGZmZTZhMTFkMmU5ZWY2MDdiYTMyOTU0ZTRlYTNkYTdkN2U2NjI2NTBkMGViYThiY2MyODAxZiJ9.abcdefghijklmnopqrstuvwxyzABCDEF' + references: + - https://docs.pinata.cloud/account-management/api-keys + - https://docs.pinata.cloud/api-reference/endpoint/ipfs/test-authentication + validation: + type: Http + content: + request: + method: GET + url: https://api.pinata.cloud/data/testAuthentication + headers: + Accept: application/json + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"message"' + - 'Congratulations' diff --git a/crates/kingfisher-rules/data/rules/ramp.yml b/crates/kingfisher-rules/data/rules/ramp.yml index d053828..efb442c 100644 --- a/crates/kingfisher-rules/data/rules/ramp.yml +++ b/crates/kingfisher-rules/data/rules/ramp.yml @@ -13,7 +13,7 @@ rules: examples: - 'RAMP_CLIENT_ID=ramp_id_a1B2c3D4e5F6g7H8i9J0k1L2m3N4o5P6q7R8s9T0' references: - - https://docs.ramp.com/reference/authentication + - https://docs.ramp.com/developer-api/v1/authorization - name: Ramp Client Secret id: kingfisher.ramp.2 @@ -29,4 +29,33 @@ rules: examples: - 'RAMP_CLIENT_SECRET=ramp_sec_a1B2c3D4e5F6g7H8i9J0k1L2m3N4o5P6q7R8s9T0u1V2w3X4' references: - - https://docs.ramp.com/reference/authentication + - https://docs.ramp.com/developer-api/v1/authorization + depends_on_rule: + - rule_id: kingfisher.ramp.1 + variable: CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://api.ramp.com/developer/v1/token + headers: + Accept: application/json + Content-Type: application/x-www-form-urlencoded + Authorization: 'Basic {{ CLIENT_ID | append: ":" | append: TOKEN | b64enc }}' + body: 'grant_type=client_credentials&scope=users%3Aread' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 400, 401] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"access_token"' + - invalid_scope + - unauthorized_client + - type: WordMatch + negative: true + words: + - invalid_client diff --git a/crates/kingfisher-rules/data/rules/ringcentral.yml b/crates/kingfisher-rules/data/rules/ringcentral.yml index 9244dc2..e959938 100644 --- a/crates/kingfisher-rules/data/rules/ringcentral.yml +++ b/crates/kingfisher-rules/data/rules/ringcentral.yml @@ -22,8 +22,6 @@ rules: visible: false examples: - 'RINGCENTRAL_CLIENT_ID="aB1cD2eF3gH4iJ5kL6mN7o"' - negative_examples: - - 'RINGCENTRAL_URL="https://platform.ringcentral.com"' references: - https://developers.ringcentral.com/api-reference/ diff --git a/crates/kingfisher-rules/data/rules/rootly.yml b/crates/kingfisher-rules/data/rules/rootly.yml index 8fe48ac..08e43fb 100644 --- a/crates/kingfisher-rules/data/rules/rootly.yml +++ b/crates/kingfisher-rules/data/rules/rootly.yml @@ -11,7 +11,7 @@ rules: pattern_requirements: min_digits: 4 min_entropy: 3.5 - confidence: high + confidence: medium categories: [api, key] examples: - 'ROOTLY_API_KEY=rootly_abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890' diff --git a/crates/kingfisher-rules/data/rules/shutterstock.yml b/crates/kingfisher-rules/data/rules/shutterstock.yml index f7df056..93e8930 100644 --- a/crates/kingfisher-rules/data/rules/shutterstock.yml +++ b/crates/kingfisher-rules/data/rules/shutterstock.yml @@ -2,14 +2,13 @@ rules: - name: Shutterstock OAuth Token id: kingfisher.shutterstock.1 pattern: | - (?xi) + (?x) \b - shutterstock - (?:.|[\n\r]){0,48}? - (?:ACCESS|TOKEN|BEARER) + (?i:shutterstock) (?:.|[\n\r]){0,32}? + \b ( - v2/[A-Za-z0-9+/]{40,}={0,2} + v2/[A-Za-z0-9+/]{40,512}={0,2} ) pattern_requirements: min_digits: 2 diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 4cf5118..bc6b34e 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -14,6 +14,7 @@ All notable changes to this project will be documented in this file. - **Access Map:** Alibaba Cloud long-lived and STS access key pairs (validated `kingfisher.alibabacloud.2` and `kingfisher.alibabacloud.5`): caller identity via STS GetCallerIdentity; standalone `kingfisher access-map alibaba` (alias `aliyun`). - **Report viewer:** Import Gitleaks and TruffleHog JSON into the bundled local viewer with deduplication for repeated imported findings, and publish a static upload-based viewer on the docs site for GitHub Pages hosting. See `docs/USAGE.md`. - Fixed parser-based context gating so assignment-style contextual secrets still scan in raw text when parser verification is unavailable, instead of being dropped. +- Fixed dependent-variable pairing for HTTP validation so rules use the nearest helper match in-file, and updated Pinata detection/validation to reliably catch API key IDs, API secrets, and JWTs, including key+secret validation. - Corrected several newly added SaaS rules and validators, including LiveKit (with dependent API secret validation), Tinybird, Inngest, Tolgee, Unkey, Composio, Hex.pm, Trigger.dev, Voiceflow, WorkOS, and Infisical. - Added 61 new detection rules across 46 providers: Axiom (API token + PAT), Trigger.dev (secret key + PAT), Dub.co, Svix webhook signing secret, Liveblocks, Inngest (signing key + event key), Seam, Courier, Cal.com, Arcjet, WarpStream, Mem0, Mintlify, Pirsch, Tinybird, Tolgee (project key + PAT), Ory (API key + session + OAuth2 tokens), Xendit, Xata, Crossmint (server + client keys), DeepL (Free + Pro), Flagsmith, E2B, Infisical, WooCommerce (consumer key + secret), Nightfall AI, Ramp (client ID + secret), Hex.pm (personal + workspace tokens), Convex deploy key, MiniMax, Mappedin (key + secret), Pollinations (secret + publishable), Fal.ai, Aikido, Hack Club, GuardSquare, Browser Use, Composio, Gamma, Hex.tech, Mastra, redirect.pizza, Upstash, and WorkOS. Also added new prefixed-token rules for Netlify (`nfp_`), Cloudflare (`cfut_`), and Supabase (`sb_publishable_`). Added live HTTP validation for 30 of these rules. - Added 32 new detection rules across 25 providers: Ghost CMS (admin + content keys), UpCloud (`ucat_`), Voiceflow (`VF.DM.`/`VF.WS.`), Robinhood Crypto (`rh-api-`), ClickUp (`pk_`), Unleash (client/admin + personal tokens), ConfigCat (standard + extended SDK keys), SaladCloud (`salad_cloud_`), Tigris (`tid_`/`tsec_`), Portainer (`ptr_`), Permit.io (`permit_key_`), Builder.io (`bpk-`), LiveKit (API key + secret), Close CRM (`api_`), Hetzner Cloud, Censys (API ID + secret), Wistia, PandaDoc, Pinata (key + secret), ZeroTier, Detectify, ChartMogul, Moralis, ButterCMS, and Loops. Includes HTTP validation for 19 of these rules. diff --git a/src/validation.rs b/src/validation.rs index 46ec4ce..61503fb 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -581,21 +581,19 @@ async fn timed_validate_single_match<'a>( }; for dep in m.rule.syntax().depends_on_rule.iter().flatten() { - if let Some(vals) = dependent_variables.get(&dep.variable.to_uppercase()) { - for (val, span) in vals { - // Skip adding captured values for TOKEN dependencies - if dep.variable.eq_ignore_ascii_case("TOKEN") { - continue; - } - captured_values.push(( - dep.variable.to_uppercase(), - val.clone(), - span.start, - span.end, - )); + // Skip adding captured values for TOKEN dependencies + if dep.variable.eq_ignore_ascii_case("TOKEN") { + continue; + } + let dep_name = dep.variable.to_uppercase(); + if let Some(vals) = dependent_variables.get(&dep_name) { + if let Some((val, span)) = + select_closest_dependency_value(vals, m.matching_input_offset_span) + { + captured_values.push((dep_name.clone(), val.clone(), span.start, span.end)); // Store the dependent capture for later use in reporting // (e.g., generating validate/revoke commands) - m.dependent_captures.insert(dep.variable.to_uppercase(), val.clone()); + m.dependent_captures.insert(dep_name, val); } } } @@ -1626,6 +1624,56 @@ fn populate_globals_from_captures( } } +fn select_closest_dependency_value( + values: &[(String, OffsetSpan)], + target_span: OffsetSpan, +) -> Option<(String, OffsetSpan)> { + let mut best_before: Option<(usize, (String, OffsetSpan))> = None; + let mut best_overlap: Option<(usize, (String, OffsetSpan))> = None; + let mut best_after: Option<(usize, (String, OffsetSpan))> = None; + + for (value, span) in values { + if span.end <= target_span.start { + let distance = target_span.start - span.end; + match &mut best_before { + Some((best_distance, best_value)) if distance < *best_distance => { + *best_distance = distance; + *best_value = (value.clone(), *span); + } + None => { + best_before = Some((distance, (value.clone(), *span))); + } + _ => {} + } + } else if span.start >= target_span.end { + let distance = span.start - target_span.end; + match &mut best_after { + Some((best_distance, best_value)) if distance < *best_distance => { + *best_distance = distance; + *best_value = (value.clone(), *span); + } + None => { + best_after = Some((distance, (value.clone(), *span))); + } + _ => {} + } + } else { + match &mut best_overlap { + Some((best_distance, best_value)) if 0 < *best_distance => { + *best_distance = 0; + *best_value = (value.clone(), *span); + } + None => { + best_overlap = Some((0, (value.clone(), *span))); + } + _ => {} + } + } + } + + best_before.or(best_overlap).or(best_after).map(|(_, value)| value) +} + #[cfg(test)] mod tests { use super::*; @@ -1656,6 +1704,35 @@ mod tests { assert_eq!(globals.get("CHECKSUM"), Some(Value::scalar("123456")).as_ref()); } + #[test] + fn select_closest_dependency_value_prefers_nearest_preceding_dependency() { + let values = vec![ + ("first".to_string(), OffsetSpan::from_range(10..20)), + ("second".to_string(), OffsetSpan::from_range(40..50)), + ("third".to_string(), OffsetSpan::from_range(80..90)), + ]; + + let selected = + select_closest_dependency_value(&values, OffsetSpan::from_range(55..60)).unwrap(); + + assert_eq!(selected.0, "second"); + assert_eq!(selected.1, OffsetSpan::from_range(40..50)); + } + + #[test] + fn select_closest_dependency_value_falls_back_to_nearest_following_dependency() { + let values = vec![ + ("first".to_string(), OffsetSpan::from_range(70..80)), + ("second".to_string(), OffsetSpan::from_range(90..100)), + ]; + + let selected = + select_closest_dependency_value(&values, OffsetSpan::from_range(55..60)).unwrap(); + + assert_eq!(selected.0, "first"); + assert_eq!(selected.1, OffsetSpan::from_range(70..80)); + } + #[test] fn truncate_to_char_boundary_handles_multibyte_characters() { let max_len = 2048;