From afee0b7181228fd5de87f3796d9a12db5a8b626f Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Tue, 7 Apr 2026 10:42:44 -0700 Subject: [PATCH] updated rules --- CHANGELOG.md | 2 +- crates/kingfisher-rules/data/rules/AGENTS.md | 4 + crates/kingfisher-rules/data/rules/adobe.yml | 51 ++++++++++- crates/kingfisher-rules/data/rules/asaas.yml | 29 ++++++ crates/kingfisher-rules/data/rules/asana.yml | 26 ++++++ crates/kingfisher-rules/data/rules/azure.yml | 24 +++++ .../kingfisher-rules/data/rules/azuremaps.yml | 21 +++++ .../kingfisher-rules/data/rules/branchio.yml | 14 +++ .../data/rules/cockroachlabs.yml | 23 +++++ .../data/rules/databricks.yml | 24 ++++- crates/kingfisher-rules/data/rules/gitlab.yml | 21 +++++ crates/kingfisher-rules/data/rules/google.yml | 90 ++++++++++++++++++- .../data/rules/googleoauth2.yml | 17 +++- .../kingfisher-rules/data/rules/highnote.yml | 30 +++++++ .../kingfisher-rules/data/rules/langfuse.yml | 11 +-- .../kingfisher-rules/data/rules/posthog.yml | 16 ++++ crates/kingfisher-rules/data/rules/proof.yml | 27 ++++++ .../kingfisher-rules/data/rules/tableau.yml | 17 ++-- .../kingfisher-scanner/src/validation/raw.rs | 27 ++++++ data/default/rule_cleanup/count_rules.py | 63 +++++++++---- docs-site/docs/changelog.md | 5 ++ src/direct_validate.rs | 1 + src/reporter.rs | 8 +- src/validation.rs | 3 +- 24 files changed, 513 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f507c0..5ee1dde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. ## [v1.95.0] - Added 80+ built-in rules, bringing the bundled ruleset to 820 total. New coverage includes Amazon OAuth, Asaas, multiple Azure credential families, Bitrise, Canva, CockroachDB, eBay, Elastic, hCaptcha, Highnote, Lichess, MailerSend, Onfido, Paddle, Pangea, Persona, Pinterest, Proof, Rootly, Runpod, Telnyx, Thunderstore, Valtown, Volcengine, and more. - Added a `validation: type: Raw` exception path for provider-specific checks, with new raw validators for Azure Batch, FTP, Kraken, LDAP, RabbitMQ, and Redis. Also added stable request-scoped template values plus new Liquid filters for HMAC-SHA384 hex output and timestamp generation. -- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex, and fixed newly added rule patterns/examples so `kingfisher rules check` passes cleanly. +- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex. Also tightened newly added helper regex to avoid high-match scan regressions, and made preflight-blocked raw validations report as skipped/not attempted instead of failed. ## [v1.94.0] - Updated vendored `vectorscan-rs` from v0.0.5 (Vectorscan 5.4.11) to v0.0.6 (Vectorscan 5.4.12). The upstream crate now ships pre-extracted sources instead of a tarball+patch, and fixes the `cpu_native` feature flag. Local Windows and musl build patches have been re-applied. diff --git a/crates/kingfisher-rules/data/rules/AGENTS.md b/crates/kingfisher-rules/data/rules/AGENTS.md index 433d951..0287d66 100644 --- a/crates/kingfisher-rules/data/rules/AGENTS.md +++ b/crates/kingfisher-rules/data/rules/AGENTS.md @@ -30,6 +30,7 @@ Strongly recommended fields: ## Pattern Quality Rules - Prefer specific anchors/prefixes and provider context over broad generic regex. +- Keep helper/context regex narrow. Avoid patterns that match generic URLs, hostnames, query params, or assignments without strong provider-specific constraints; broad helpers can create huge match counts and cause major memory/time regressions on large repos and git history. - When the token format is generic or common-looking (for example bare 32-hex keys), prefer contextual patterns of the form: provider keyword -> short flexible gap -> key/secret label -> short flexible gap -> token. A good default is: - `\b` - provider identifier (for example `amplitude`, `azure`, `speech`, `translator`) @@ -83,6 +84,9 @@ Strongly recommended fields: - `cargo test -p kingfisher-rules` - Broader regression check: - `cargo test --workspace --all-targets` +- Match-volume check on a realistic large target: + - `kingfisher scan --rule-stats` + - Review unexpected high-match helper/generic rules before submitting. - **Warning-free build**: `cargo check` (or `make darwin` / `make linux`) must produce zero warnings. Address all `dead_code`, `unused_*`, and other warnings before submitting. Use `#[allow(dead_code)]` on individual struct fields kept for deserialization completeness, and remove truly unused code. - Behavioral check against sample content: - `kingfisher scan ./testdata --rule --rule-stats` diff --git a/crates/kingfisher-rules/data/rules/adobe.yml b/crates/kingfisher-rules/data/rules/adobe.yml index ec72a85..63a2062 100644 --- a/crates/kingfisher-rules/data/rules/adobe.yml +++ b/crates/kingfisher-rules/data/rules/adobe.yml @@ -73,4 +73,53 @@ rules: "client_credentials": { "client_id": "a65b0146769d433a835f36660881db50", "client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5" - }, \ No newline at end of file + }, + depends_on_rule: + - rule_id: "kingfisher.adobe.4" + variable: ADOBE_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://ims-na1.adobelogin.com/ims/token/v3 + headers: + Authorization: 'Basic {{ ADOBE_CLIENT_ID | append: ":" | append: TOKEN | b64enc }}' + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: 'code=invalid_code&grant_type=authorization_code' + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Adobe documents revocation for access and refresh + # tokens, not for the OAuth client secret itself. + references: + - https://developer.adobe.com/developer-console/docs/guides/authentication/UserAuthentication/ims + + - name: Adobe OAuth Client ID + id: kingfisher.adobe.4 + pattern: | + (?xi) + \b + adobe + (?:.|[\n\r]){0,64}? + client_id + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{32} + ) + \b + min_entropy: 3.0 + visible: false + examples: + - | + { + "client_credentials": { + "client_id": "a65b0146769d433a835f36660881db50", + "client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5" + }, diff --git a/crates/kingfisher-rules/data/rules/asaas.yml b/crates/kingfisher-rules/data/rules/asaas.yml index 3b06e7b..1746083 100644 --- a/crates/kingfisher-rules/data/rules/asaas.yml +++ b/crates/kingfisher-rules/data/rules/asaas.yml @@ -14,5 +14,34 @@ rules: examples: - 'ASAAS_API_KEY=$aact_prod_abcdefghijklmnop1234567890ABCDEF' - 'api_token: $aact_hmlg_abcdefghijklmnop1234567890ABCDEF' + validation: + type: Http + content: + request: + method: GET + url: > + {%- if TOKEN contains "$aact_hmlg_" -%} + https://api-sandbox.asaas.com/v3/myAccount/commercialInfo/ + {%- else -%} + https://api.asaas.com/v3/myAccount/commercialInfo/ + {%- endif -%} + headers: + Accept: application/json + User-Agent: kingfisher + access_token: "{{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"object"' + - '"commercialInfo"' + # Revocation not added: Asaas documents key deletion in the dashboard and + # parent-driven sub-account key management, but not a self-revoke endpoint + # for the current access_token alone. references: - https://docs.asaas.com/docs/authentication-2 + - https://docs.asaas.com/docs/change-the-name-of-a-business-subaccount-via-api diff --git a/crates/kingfisher-rules/data/rules/asana.yml b/crates/kingfisher-rules/data/rules/asana.yml index 0def56b..0c767d9 100644 --- a/crates/kingfisher-rules/data/rules/asana.yml +++ b/crates/kingfisher-rules/data/rules/asana.yml @@ -41,6 +41,32 @@ rules: examples: - "asana :'20c2F0d03201af478ca1aBE9515A1A4FEfb'" - ASANA_PAT = 1234567890abcdef1234567890abcdef12 + depends_on_rule: + - rule_id: kingfisher.asana.1 + variable: ASANA_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://app.asana.com/-/oauth_token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: > + grant_type=authorization_code&client_id={{ ASANA_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&code=invalid_code + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Asana's revoke endpoint deauthorizes refresh tokens, + # not OAuth client secrets. + references: + - https://developers.asana.com/docs/oauth - name: Asana OAuth / Personal Access Token (Legacy) id: kingfisher.asana.3 diff --git a/crates/kingfisher-rules/data/rules/azure.yml b/crates/kingfisher-rules/data/rules/azure.yml index f909148..dd782e1 100644 --- a/crates/kingfisher-rules/data/rules/azure.yml +++ b/crates/kingfisher-rules/data/rules/azure.yml @@ -70,6 +70,30 @@ rules: - | if __name__ == "__main__": ado_pat = "iyfmob6xjrfmit67anxbot64umfx2clwx7dz5ynxi4q2z3uqegvq" + validation: + type: Http + content: + request: + method: GET + url: https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version=7.1 + headers: + Authorization: 'Basic {{ ":" | append: TOKEN | b64enc }}' + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"id"' + - '"displayName"' + # Revocation not added: Azure DevOps PAT lifecycle management is documented + # separately and is not a self-revoke flow driven solely by the PAT itself. + references: + - https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops + - https://learn.microsoft.com/en-us/rest/api/azure/devops/profile/profiles/get?view=azure-devops-rest-7.1 - name: Azure Container Registry URL id: kingfisher.azure.4 pattern: | diff --git a/crates/kingfisher-rules/data/rules/azuremaps.yml b/crates/kingfisher-rules/data/rules/azuremaps.yml index 177a382..762e41e 100644 --- a/crates/kingfisher-rules/data/rules/azuremaps.yml +++ b/crates/kingfisher-rules/data/rules/azuremaps.yml @@ -17,5 +17,26 @@ rules: categories: [api, key] examples: - AZURE_MAPS_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz123456 + validation: + type: Http + content: + request: + method: GET + url: https://atlas.microsoft.com/geocode?api-version=2025-01-01&addressLine=15127%20NE%2024th%20Street%20Redmond%20WA&countryRegion=US&subscription-key={{ TOKEN }} + headers: + Accept: application/geo+json, application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"FeatureCollection"' + - '"features"' + # Revocation not added: Azure Maps shared-key docs cover rotation and + # authentication, but I did not find a token self-revoke API. references: - https://learn.microsoft.com/en-us/azure/azure-maps/how-to-manage-authentication + - https://learn.microsoft.com/en-us/rest/api/maps/search/get-geocoding diff --git a/crates/kingfisher-rules/data/rules/branchio.yml b/crates/kingfisher-rules/data/rules/branchio.yml index 04cf378..7a0a655 100644 --- a/crates/kingfisher-rules/data/rules/branchio.yml +++ b/crates/kingfisher-rules/data/rules/branchio.yml @@ -45,6 +45,20 @@ rules: - 'branch.init("key_test_plqYW3Aq9Xija1cobGMieipndBzO5y7J");' references: - https://help.branch.io/developers-hub/docs/deep-linking-api + - https://help.branch.io/apidocs/app-api + depends_on_rule: + - rule_id: kingfisher.branchio.3 + variable: BRANCH_SECRET + validation: + type: Http + content: + request: + method: GET + url: "https://api2.branch.io/v1/app/{{ TOKEN }}?branch_secret={{ BRANCH_SECRET }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] - name: Branch.io Secret id: kingfisher.branchio.3 diff --git a/crates/kingfisher-rules/data/rules/cockroachlabs.yml b/crates/kingfisher-rules/data/rules/cockroachlabs.yml index 9fbcc40..a4b8d7e 100644 --- a/crates/kingfisher-rules/data/rules/cockroachlabs.yml +++ b/crates/kingfisher-rules/data/rules/cockroachlabs.yml @@ -24,5 +24,28 @@ rules: categories: [api, key] examples: - 'COCKROACHDB_API_KEY=B81649_8F7D11A_92BCE13_56782D_C53' + validation: + type: Http + content: + request: + method: GET + url: https://cockroachlabs.cloud/api/v1/clusters?show_inactive=true + headers: + Authorization: Bearer {{ TOKEN }} + Accept: application/json + Cc-Version: "2024-09-16" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"clusters"' + - '"pagination"' + # Revocation not added: the public Cloud API docs describe bearer-token + # authentication for service-account secret keys, but not a documented + # self-revocation endpoint for the current secret key value. references: - https://www.cockroachlabs.com/docs/cockroachcloud/cloud-api diff --git a/crates/kingfisher-rules/data/rules/databricks.yml b/crates/kingfisher-rules/data/rules/databricks.yml index 2405d5c..4190d22 100644 --- a/crates/kingfisher-rules/data/rules/databricks.yml +++ b/crates/kingfisher-rules/data/rules/databricks.yml @@ -22,6 +22,26 @@ rules: - secret references: - https://docs.databricks.com/dev-tools/api/latest/authentication.html + - https://docs.databricks.com/en/dev-tools/auth/pat.html + validation: + type: Http + content: + request: + headers: + Authorization: Bearer {{ TOKEN }} + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: https://{{ DOMAIN }}/api/2.0/clusters/list + depends_on_rule: + - rule_id: "kingfisher.databricks.3" + variable: DOMAIN + # Revocation not added: Databricks PAT docs describe token creation and + # use, but I did not find a PAT-only self-revoke endpoint suitable for YAML + # revocation here. - name: Databricks API Token id: kingfisher.databricks.2 @@ -51,7 +71,7 @@ rules: type: StatusMatch url: https://{{ DOMAIN }}/api/2.0/clusters/list depends_on_rule: - - rule_id: "kingfisher.databricks.2" + - rule_id: "kingfisher.databricks.3" variable: DOMAIN - name: Databricks Domain @@ -83,4 +103,4 @@ rules: references: - https://docs.databricks.com/workspace/workspace-details.html - https://docs.gcp.databricks.com/workspace/workspace-details.html - - https://docs.microsoft.com/en-us/azure/databricks/scenarios/what-is-azure-databricks \ No newline at end of file + - https://docs.microsoft.com/en-us/azure/databricks/scenarios/what-is-azure-databricks diff --git a/crates/kingfisher-rules/data/rules/gitlab.yml b/crates/kingfisher-rules/data/rules/gitlab.yml index 2fa5320..2ac05bb 100644 --- a/crates/kingfisher-rules/data/rules/gitlab.yml +++ b/crates/kingfisher-rules/data/rules/gitlab.yml @@ -213,6 +213,27 @@ rules: - 'CI_JOB_TOKEN=glcbt-a1b2c_3dEfGhIjKlMnOpQrStUv' references: - https://docs.gitlab.com/ci/jobs/ci_job_token/ + - https://docs.gitlab.com/api/jobs/ + validation: + type: Http + content: + request: + method: GET + url: https://gitlab.com/api/v4/job + headers: + JOB-TOKEN: '{{ TOKEN }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"id"' + - '"status"' + # Revocation not added: CI/CD job tokens are short-lived and automatically + # invalidated when the job finishes. - name: GitLab Deploy Token id: kingfisher.gitlab.6 diff --git a/crates/kingfisher-rules/data/rules/google.yml b/crates/kingfisher-rules/data/rules/google.yml index c6c9e66..4cfbde6 100644 --- a/crates/kingfisher-rules/data/rules/google.yml +++ b/crates/kingfisher-rules/data/rules/google.yml @@ -23,6 +23,32 @@ rules: confidence: medium examples: - 'const CLIENTSECRET = "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"' + depends_on_rule: + - rule_id: "kingfisher.google.1" + variable: GOOGLE_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: > + code=invalid_code&client_id={{ GOOGLE_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&grant_type=authorization_code + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Google's OAuth revocation endpoint revokes tokens, + # not client secrets. + references: + - https://developers.google.com/identity/protocols/oauth2/web-server - name: Google OAuth Client Secret id: kingfisher.google.3 @@ -36,6 +62,32 @@ rules: examples: - " //$google_client_secret = 'fnhqAakzWrX-mtFQ4PRdMoy0';" - " 'clientSecret' : 'Ufvuj-d6alhwGKvvLh_8Nq0K'" + depends_on_rule: + - rule_id: "kingfisher.google.1" + variable: GOOGLE_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: > + code=invalid_code&client_id={{ GOOGLE_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&grant_type=authorization_code + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Google's OAuth revocation endpoint revokes tokens, + # not client secrets. + references: + - https://developers.google.com/identity/protocols/oauth2/web-server - name: Google OAuth Access Token id: kingfisher.google.4 @@ -61,6 +113,42 @@ rules: - | -- Clear login if it's a new connection. --propertyTable.access_token = 'ya29.Ci_UA7aEsvT6-oVI8f96kvB6i8oO13WgdZUviLaCVtpEPYZqhQcQycR-u2X9xtmYGA' + validation: + type: Http + content: + request: + method: GET + url: https://www.googleapis.com/oauth2/v3/tokeninfo?access_token={{ TOKEN | url_encode }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"aud"' + - '"expires_in"' + revocation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/revoke + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: token={{ TOKEN | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://developers.google.com/identity/openid-connect/openid-connect + - https://developers.google.com/data-portability/user-guide/quickstart + - https://developers.google.com/identity/protocols/oauth2/web-server - name: Google OAuth Credentials id: kingfisher.google.6 @@ -118,4 +206,4 @@ rules: match_all_words: true words: - '"models"' - - '"name"' \ No newline at end of file + - '"name"' diff --git a/crates/kingfisher-rules/data/rules/googleoauth2.yml b/crates/kingfisher-rules/data/rules/googleoauth2.yml index ddbd412..be4664b 100644 --- a/crates/kingfisher-rules/data/rules/googleoauth2.yml +++ b/crates/kingfisher-rules/data/rules/googleoauth2.yml @@ -30,5 +30,20 @@ rules: - type: WordMatch words: - '"email":' + revocation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/revoke + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: token={{ TOKEN | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] references: - - https://developers.google.com/identity/protocols/oauth2 \ No newline at end of file + - https://developers.google.com/identity/protocols/oauth2 + - https://developers.google.com/identity/protocols/oauth2/web-server diff --git a/crates/kingfisher-rules/data/rules/highnote.yml b/crates/kingfisher-rules/data/rules/highnote.yml index cfdffcb..c8ddf35 100644 --- a/crates/kingfisher-rules/data/rules/highnote.yml +++ b/crates/kingfisher-rules/data/rules/highnote.yml @@ -19,5 +19,35 @@ rules: examples: - 'HIGHNOTE_API_KEY=sk_live_AbCdEfGhIjKlMnOpQrStUvWxYz1234' - 'highnote_key: rk_test_AbCdEfGhIjKlMnOpQrStUvWxYz1234' + validation: + type: Http + content: + request: + method: POST + url: > + {%- if TOKEN contains "_test_" -%} + https://api.us.test.highnote.com/graphql + {%- else -%} + https://api.us.highnote.com/graphql + {%- endif -%} + headers: + Authorization: "Basic {{ TOKEN | b64enc }}" + Content-Type: application/json + Accept: application/json + body: '{"query":"query { ping }"}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"data"' + - '"ping"' + - '"pong"' + # Revocation not added: the public Highnote docs I found describe API key + # usage and rotation guidance, but not an API endpoint to revoke the + # current key directly. references: - https://docs.highnote.com/docs/developers/api/using-the-api diff --git a/crates/kingfisher-rules/data/rules/langfuse.yml b/crates/kingfisher-rules/data/rules/langfuse.yml index 3a010d8..845e6cb 100644 --- a/crates/kingfisher-rules/data/rules/langfuse.yml +++ b/crates/kingfisher-rules/data/rules/langfuse.yml @@ -2,10 +2,10 @@ rules: - name: Langfuse Secret Key id: kingfisher.langfuse.1 pattern: | - (?xi) + (?x) \b ( - sk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} + sk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} ) \b pattern_requirements: @@ -42,10 +42,10 @@ rules: - name: Langfuse Public Key id: kingfisher.langfuse.2 pattern: | - (?xi) + (?x) \b ( - pk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} + pk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} ) \b pattern_requirements: @@ -57,9 +57,6 @@ rules: examples: - pk-lf-a1b2c3d4-e5f6-7890-abcd-ef1234567890 - 'LANGFUSE_PUBLIC_KEY="pk-lf-9f8e7d6c-5b4a-3210-fedc-ba0987654321"' - negative_examples: - - pk-lf-test - - pk-lf- references: - https://langfuse.com/docs/sdk/typescript - https://langfuse.com/docs/get-started diff --git a/crates/kingfisher-rules/data/rules/posthog.yml b/crates/kingfisher-rules/data/rules/posthog.yml index 904c202..6f65b0e 100644 --- a/crates/kingfisher-rules/data/rules/posthog.yml +++ b/crates/kingfisher-rules/data/rules/posthog.yml @@ -57,6 +57,22 @@ rules: examples: - "pha_XgrXUnvwyoPLmjwHES5lc8scZUtheBpa1QV1qmssutB" - "pha_35kHVLA1E068nvrwUTgabkh8xvGGTpSpsVjGcpVNfis" + validation: + type: Http + content: + request: + method: GET + url: https://app.posthog.com/api/users/@me/ + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + # Revocation not added: I did not find a documented token self-revoke + # endpoint for OAuth access tokens in the public PostHog API docs. references: - https://posthog.com/docs/api - https://github.com/PostHog/posthog/blob/e408aac5debe02b39a6a67cfd028f16a2ca7bc90/posthog/models/utils.py#L260-L290 diff --git a/crates/kingfisher-rules/data/rules/proof.yml b/crates/kingfisher-rules/data/rules/proof.yml index 965926e..6fddb22 100644 --- a/crates/kingfisher-rules/data/rules/proof.yml +++ b/crates/kingfisher-rules/data/rules/proof.yml @@ -18,5 +18,32 @@ rules: - 'proof_key: prf_test_AbCdEfGhIjKlMnOpQrStUvWxYz123456' - 'proof_key: prf_cli_AbCdEfGhIjKlMnOpQrStUvWxYz123456' - 'proof_key: prf_cli_test_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + validation: + type: Http + content: + request: + method: POST + url: > + {%- if TOKEN contains "_test_" -%} + https://api.fairfax.proof.com/v1/transactions + {%- else -%} + https://api.proof.com/v1/transactions + {%- endif -%} + headers: + ApiKey: "{{ TOKEN }}" + Content-Type: application/json + Accept: application/json + body: '{}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [422] + - type: WordMatch + words: + - signer + # Revocation not added: the public Proof docs describe dashboard key + # management and secret-scanning guidance, but not a self-revoke API. references: - https://dev.proof.com/docs/api-keys + - https://dev.proof.com/docs/environments + - https://dev.proof.com/reference/createtransaction diff --git a/crates/kingfisher-rules/data/rules/tableau.yml b/crates/kingfisher-rules/data/rules/tableau.yml index 803bbcf..14b0510 100644 --- a/crates/kingfisher-rules/data/rules/tableau.yml +++ b/crates/kingfisher-rules/data/rules/tableau.yml @@ -65,7 +65,11 @@ rules: (?xi) \b ( - https://[a-z0-9.-]{3,200} + https://(?: + (?:[a-z0-9-]+\.)?online\.tableau\.com + | + (?:[a-z0-9-]+\.)*tableau(?:\.[a-z0-9-]+)+ + ) ) (?: /api/\d+\.\d+ @@ -79,7 +83,7 @@ rules: examples: - https://tableau.example.com - https://10ax.online.tableau.com - - server="https://analytics.example.com" + - server="https://analytics.tableau.example.com" references: - https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm @@ -89,12 +93,11 @@ rules: (?xi) \b (?: + tableau[_-]?(?:site|content[_-]?url) + | tableau (?:.|[\n\r]){0,48}? - )? - (?: - site | - content[_-]?url + (?:site|content[_-]?url) ) (?:.|[\n\r]){0,12}? [=:"'\s] @@ -107,6 +110,6 @@ rules: visible: false examples: - tableau_site=companysite - - contentUrl="default" + - tableau_content_url="default" references: - https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm diff --git a/crates/kingfisher-scanner/src/validation/raw.rs b/crates/kingfisher-scanner/src/validation/raw.rs index dc006e6..3436272 100644 --- a/crates/kingfisher-scanner/src/validation/raw.rs +++ b/crates/kingfisher-scanner/src/validation/raw.rs @@ -29,6 +29,8 @@ use tokio::{ use tokio_rustls::TlsConnector; use url::Url; +use crate::validation::http_validation::check_url_resolvable; + pub struct RawValidationOutcome { pub valid: bool, pub status: StatusCode, @@ -104,7 +106,20 @@ pub async fn validate_raw( globals: &Object, client: &Client, use_lax_tls: bool, + allow_internal_ips: bool, ) -> Result { + if let Some(url) = raw_validation_target_url(kind, globals)? { + if let Err(e) = check_url_resolvable(&url, allow_internal_ips).await { + return Ok(RawValidationOutcome { + valid: false, + status: StatusCode::PRECONDITION_REQUIRED, + body: format!( + "Validation skipped - raw validation target blocked or not resolvable: {e}" + ), + }); + } + } + match kind { "azurebatch" => validate_azure_batch(globals, client).await, "ftp" => validate_ftp(globals, use_lax_tls).await, @@ -120,6 +135,18 @@ pub async fn validate_raw( } } +fn raw_validation_target_url(kind: &str, globals: &Object) -> Result> { + match kind { + "azurebatch" => string_var(globals, "BATCH_URL") + .map(|s| Url::parse(&s).context("invalid BATCH_URL")) + .transpose(), + "ftp" | "ldap" | "rabbitmq" | "redis" => string_var(globals, "TOKEN") + .map(|s| Url::parse(&s).context("invalid raw validation URI")) + .transpose(), + _ => Ok(None), + } +} + fn string_var(globals: &Object, name: &str) -> Option { globals.get(name).map(|v| v.to_kstr().to_string()).filter(|s| !s.is_empty()) } diff --git a/data/default/rule_cleanup/count_rules.py b/data/default/rule_cleanup/count_rules.py index 7a54ffe..f9363ac 100644 --- a/data/default/rule_cleanup/count_rules.py +++ b/data/default/rule_cleanup/count_rules.py @@ -27,8 +27,8 @@ DEFAULT_RULES_DIR = ( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description=( - "Count total rules and detector rules. " - "Detector rules are rules that do not " + "Count total rules and standalone detector rules. " + "Standalone detector rules are rules that do not " "declare depends_on_rule." ) ) @@ -41,7 +41,10 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--list-validators", action="store_true", - help="Print the names of detectors with and without a validator", + help=( + "Print the IDs of standalone detectors with and " + "without a validator" + ), ) return parser.parse_args() @@ -64,6 +67,14 @@ def iter_rule_entries(path: Path) -> list[dict]: return entries +def rule_identifier(rule: dict, path: Path, index: int) -> str: + if isinstance(rule.get("id"), str) and rule["id"].strip(): + return rule["id"] + if isinstance(rule.get("name"), str) and rule["name"].strip(): + return rule["name"] + return f"{path.stem}#{index}" + + def main() -> int: args = parse_args() rules_dir = args.rules_dir.resolve() @@ -79,8 +90,8 @@ def main() -> int: total_rules = 0 dependent_rules = 0 - with_validator: list[str] = [] - without_validator: list[str] = [] + standalone_with_validator: list[str] = [] + standalone_without_validator: list[str] = [] for path in rule_files: try: @@ -93,27 +104,43 @@ def main() -> int: dependent_rules += sum( 1 for rule in rules if rule.get("depends_on_rule") ) - if any(rule.get("validation") for rule in rules): - with_validator.append(path.stem) - else: - without_validator.append(path.stem) + for index, rule in enumerate(rules, start=1): + if rule.get("depends_on_rule"): + continue - detector_rules = total_rules - dependent_rules + identifier = rule_identifier(rule, path, index) + if rule.get("validation"): + standalone_with_validator.append(identifier) + else: + standalone_without_validator.append(identifier) + + standalone_detector_rules = total_rules - dependent_rules print(f"Rules directory: {rules_dir}") - print(f"Detectors: {len(rule_files)}") - print(f"Detectors with validator: {len(with_validator)}") - print(f"Detectors without validator: {len(without_validator)}") print(f"Total rules: {total_rules}") print(f"Dependent rules: {dependent_rules}") - print(f"Non-dependent rules: {detector_rules}") + print(f"Standalone detectors: {standalone_detector_rules}") + print( + "Standalone detectors with validator: " + f"{len(standalone_with_validator)}" + ) + print( + "Standalone detectors without validator: " + f"{len(standalone_without_validator)}" + ) if args.list_validators: - print(f"\nWith validator ({len(with_validator)}):") - for name in with_validator: + print( + "\nStandalone detectors with validator " + f"({len(standalone_with_validator)}):" + ) + for name in standalone_with_validator: print(f" {name}") - print(f"\nWithout validator ({len(without_validator)}):") - for name in without_validator: + print( + "\nStandalone detectors without validator " + f"({len(standalone_without_validator)}):" + ) + for name in standalone_without_validator: print(f" {name}") return 0 diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 6439801..323e91b 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,6 +7,11 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. +## [v1.95.0] +- Added 80+ built-in rules, bringing the bundled ruleset to 820 total. New coverage includes Amazon OAuth, Asaas, multiple Azure credential families, Bitrise, Canva, CockroachDB, eBay, Elastic, hCaptcha, Highnote, Lichess, MailerSend, Onfido, Paddle, Pangea, Persona, Pinterest, Proof, Rootly, Runpod, Telnyx, Thunderstore, Valtown, Volcengine, and more. +- Added a `validation: type: Raw` exception path for provider-specific checks, with new raw validators for Azure Batch, FTP, Kraken, LDAP, RabbitMQ, and Redis. Also added stable request-scoped template values plus new Liquid filters for HMAC-SHA384 hex output and timestamp generation. +- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex. Also tightened newly added helper regex to avoid high-match scan regressions, and made preflight-blocked raw validations report as skipped/not attempted instead of failed. + ## [v1.94.0] - Updated vendored `vectorscan-rs` from v0.0.5 (Vectorscan 5.4.11) to v0.0.6 (Vectorscan 5.4.12). The upstream crate now ships pre-extracted sources instead of a tarball+patch, and fixes the `cpu_native` feature flag. Local Windows and musl build patches have been re-applied. - Added more built-in rules diff --git a/src/direct_validate.rs b/src/direct_validate.rs index 1cea920..9a03bbe 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -857,6 +857,7 @@ pub async fn run_direct_validation( &globals, &client, use_lax_tls, + global_args.allow_internal_ips, ) .await { diff --git a/src/reporter.rs b/src/reporter.rs index d1a5a24..9233cc7 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -942,7 +942,11 @@ impl DetailsReporter { let validation_status = if rm.validation_success { "Active Credential".to_string() - } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { + } else if matches!( + rm.validation_response_status, + status if status == StatusCode::CONTINUE.as_u16() + || status == StatusCode::PRECONDITION_REQUIRED.as_u16() + ) { "Not Attempted".to_string() } else { "Inactive Credential".to_string() @@ -1975,7 +1979,7 @@ mod tests { let (report_match, _) = sample_report_match( "(skip list entry) AWS validation not attempted for account 111122223333.", - StatusCode::CONTINUE.as_u16(), + StatusCode::PRECONDITION_REQUIRED.as_u16(), false, ); let scan_args = sample_scan_args(); diff --git a/src/validation.rs b/src/validation.rs index 3e90b07..5cdf823 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1311,7 +1311,7 @@ async fn timed_validate_single_match<'a>( "(skip list entry) AWS validation not attempted for account {}.", account_id )); - m.validation_response_status = StatusCode::CONTINUE; + m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; cache.insert( cache_key, CachedResponse { @@ -1488,6 +1488,7 @@ async fn timed_validate_single_match<'a>( &globals, client, clients.should_use_lax(rule_syntax.tls_mode), + clients.allow_internal_ips, ) .await {