updated rules

This commit is contained in:
Mick Grove 2026-04-07 10:42:44 -07:00
commit afee0b7181
24 changed files with 513 additions and 41 deletions

View file

@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
## [v1.95.0]
- Added 80+ built-in rules, bringing the bundled ruleset to 820 total. New coverage includes Amazon OAuth, Asaas, multiple Azure credential families, Bitrise, Canva, CockroachDB, eBay, Elastic, hCaptcha, Highnote, Lichess, MailerSend, Onfido, Paddle, Pangea, Persona, Pinterest, Proof, Rootly, Runpod, Telnyx, Thunderstore, Valtown, Volcengine, and more.
- Added a `validation: type: Raw` exception path for provider-specific checks, with new raw validators for Azure Batch, FTP, Kraken, LDAP, RabbitMQ, and Redis. Also added stable request-scoped template values plus new Liquid filters for HMAC-SHA384 hex output and timestamp generation.
- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex, and fixed newly added rule patterns/examples so `kingfisher rules check` passes cleanly.
- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex. Also tightened newly added helper regex to avoid high-match scan regressions, and made preflight-blocked raw validations report as skipped/not attempted instead of failed.
## [v1.94.0]
- Updated vendored `vectorscan-rs` from v0.0.5 (Vectorscan 5.4.11) to v0.0.6 (Vectorscan 5.4.12). The upstream crate now ships pre-extracted sources instead of a tarball+patch, and fixes the `cpu_native` feature flag. Local Windows and musl build patches have been re-applied.

View file

@ -30,6 +30,7 @@ Strongly recommended fields:
## Pattern Quality Rules
- Prefer specific anchors/prefixes and provider context over broad generic regex.
- Keep helper/context regex narrow. Avoid patterns that match generic URLs, hostnames, query params, or assignments without strong provider-specific constraints; broad helpers can create huge match counts and cause major memory/time regressions on large repos and git history.
- When the token format is generic or common-looking (for example bare 32-hex keys), prefer contextual patterns of the form: provider keyword -> short flexible gap -> key/secret label -> short flexible gap -> token. A good default is:
- `\b`
- provider identifier (for example `amplitude`, `azure`, `speech`, `translator`)
@ -83,6 +84,9 @@ Strongly recommended fields:
- `cargo test -p kingfisher-rules`
- Broader regression check:
- `cargo test --workspace --all-targets`
- Match-volume check on a realistic large target:
- `kingfisher scan <large-repo-or-test-corpus> --rule-stats`
- Review unexpected high-match helper/generic rules before submitting.
- **Warning-free build**: `cargo check` (or `make darwin` / `make linux`) must produce zero warnings. Address all `dead_code`, `unused_*`, and other warnings before submitting. Use `#[allow(dead_code)]` on individual struct fields kept for deserialization completeness, and remove truly unused code.
- Behavioral check against sample content:
- `kingfisher scan ./testdata --rule <rule-family-or-id> --rule-stats`

View file

@ -73,4 +73,53 @@ rules:
"client_credentials": {
"client_id": "a65b0146769d433a835f36660881db50",
"client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5"
},
},
depends_on_rule:
- rule_id: "kingfisher.adobe.4"
variable: ADOBE_CLIENT_ID
validation:
type: Http
content:
request:
method: POST
url: https://ims-na1.adobelogin.com/ims/token/v3
headers:
Authorization: 'Basic {{ ADOBE_CLIENT_ID | append: ":" | append: TOKEN | b64enc }}'
Content-Type: application/x-www-form-urlencoded
Accept: application/json
body: 'code=invalid_code&grant_type=authorization_code'
response_matcher:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
words:
- invalid_client
negative: true
# Revocation not added: Adobe documents revocation for access and refresh
# tokens, not for the OAuth client secret itself.
references:
- https://developer.adobe.com/developer-console/docs/guides/authentication/UserAuthentication/ims
- name: Adobe OAuth Client ID
id: kingfisher.adobe.4
pattern: |
(?xi)
\b
adobe
(?:.|[\n\r]){0,64}?
client_id
(?:.|[\n\r]){0,16}?
(
[a-f0-9]{32}
)
\b
min_entropy: 3.0
visible: false
examples:
- |
{
"client_credentials": {
"client_id": "a65b0146769d433a835f36660881db50",
"client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5"
},

View file

@ -14,5 +14,34 @@ rules:
examples:
- 'ASAAS_API_KEY=$aact_prod_abcdefghijklmnop1234567890ABCDEF'
- 'api_token: $aact_hmlg_abcdefghijklmnop1234567890ABCDEF'
validation:
type: Http
content:
request:
method: GET
url: >
{%- if TOKEN contains "$aact_hmlg_" -%}
https://api-sandbox.asaas.com/v3/myAccount/commercialInfo/
{%- else -%}
https://api.asaas.com/v3/myAccount/commercialInfo/
{%- endif -%}
headers:
Accept: application/json
User-Agent: kingfisher
access_token: "{{ TOKEN }}"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"object"'
- '"commercialInfo"'
# Revocation not added: Asaas documents key deletion in the dashboard and
# parent-driven sub-account key management, but not a self-revoke endpoint
# for the current access_token alone.
references:
- https://docs.asaas.com/docs/authentication-2
- https://docs.asaas.com/docs/change-the-name-of-a-business-subaccount-via-api

View file

@ -41,6 +41,32 @@ rules:
examples:
- "asana :'20c2F0d03201af478ca1aBE9515A1A4FEfb'"
- ASANA_PAT = 1234567890abcdef1234567890abcdef12
depends_on_rule:
- rule_id: kingfisher.asana.1
variable: ASANA_CLIENT_ID
validation:
type: Http
content:
request:
method: POST
url: https://app.asana.com/-/oauth_token
headers:
Content-Type: application/x-www-form-urlencoded
Accept: application/json
body: >
grant_type=authorization_code&client_id={{ ASANA_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&code=invalid_code
response_matcher:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
words:
- invalid_client
negative: true
# Revocation not added: Asana's revoke endpoint deauthorizes refresh tokens,
# not OAuth client secrets.
references:
- https://developers.asana.com/docs/oauth
- name: Asana OAuth / Personal Access Token (Legacy)
id: kingfisher.asana.3

View file

@ -70,6 +70,30 @@ rules:
- |
if __name__ == "__main__":
ado_pat = "iyfmob6xjrfmit67anxbot64umfx2clwx7dz5ynxi4q2z3uqegvq"
validation:
type: Http
content:
request:
method: GET
url: https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version=7.1
headers:
Authorization: 'Basic {{ ":" | append: TOKEN | b64enc }}'
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"id"'
- '"displayName"'
# Revocation not added: Azure DevOps PAT lifecycle management is documented
# separately and is not a self-revoke flow driven solely by the PAT itself.
references:
- https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops
- https://learn.microsoft.com/en-us/rest/api/azure/devops/profile/profiles/get?view=azure-devops-rest-7.1
- name: Azure Container Registry URL
id: kingfisher.azure.4
pattern: |

View file

@ -17,5 +17,26 @@ rules:
categories: [api, key]
examples:
- AZURE_MAPS_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz123456
validation:
type: Http
content:
request:
method: GET
url: https://atlas.microsoft.com/geocode?api-version=2025-01-01&addressLine=15127%20NE%2024th%20Street%20Redmond%20WA&countryRegion=US&subscription-key={{ TOKEN }}
headers:
Accept: application/geo+json, application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"FeatureCollection"'
- '"features"'
# Revocation not added: Azure Maps shared-key docs cover rotation and
# authentication, but I did not find a token self-revoke API.
references:
- https://learn.microsoft.com/en-us/azure/azure-maps/how-to-manage-authentication
- https://learn.microsoft.com/en-us/rest/api/maps/search/get-geocoding

View file

@ -45,6 +45,20 @@ rules:
- 'branch.init("key_test_plqYW3Aq9Xija1cobGMieipndBzO5y7J");'
references:
- https://help.branch.io/developers-hub/docs/deep-linking-api
- https://help.branch.io/apidocs/app-api
depends_on_rule:
- rule_id: kingfisher.branchio.3
variable: BRANCH_SECRET
validation:
type: Http
content:
request:
method: GET
url: "https://api2.branch.io/v1/app/{{ TOKEN }}?branch_secret={{ BRANCH_SECRET }}"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- name: Branch.io Secret
id: kingfisher.branchio.3

View file

@ -24,5 +24,28 @@ rules:
categories: [api, key]
examples:
- 'COCKROACHDB_API_KEY=B81649_8F7D11A_92BCE13_56782D_C53'
validation:
type: Http
content:
request:
method: GET
url: https://cockroachlabs.cloud/api/v1/clusters?show_inactive=true
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
Cc-Version: "2024-09-16"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"clusters"'
- '"pagination"'
# Revocation not added: the public Cloud API docs describe bearer-token
# authentication for service-account secret keys, but not a documented
# self-revocation endpoint for the current secret key value.
references:
- https://www.cockroachlabs.com/docs/cockroachcloud/cloud-api

View file

@ -22,6 +22,26 @@ rules:
- secret
references:
- https://docs.databricks.com/dev-tools/api/latest/authentication.html
- https://docs.databricks.com/en/dev-tools/auth/pat.html
validation:
type: Http
content:
request:
headers:
Authorization: Bearer {{ TOKEN }}
method: GET
response_matcher:
- report_response: true
- status:
- 200
type: StatusMatch
url: https://{{ DOMAIN }}/api/2.0/clusters/list
depends_on_rule:
- rule_id: "kingfisher.databricks.3"
variable: DOMAIN
# Revocation not added: Databricks PAT docs describe token creation and
# use, but I did not find a PAT-only self-revoke endpoint suitable for YAML
# revocation here.
- name: Databricks API Token
id: kingfisher.databricks.2
@ -51,7 +71,7 @@ rules:
type: StatusMatch
url: https://{{ DOMAIN }}/api/2.0/clusters/list
depends_on_rule:
- rule_id: "kingfisher.databricks.2"
- rule_id: "kingfisher.databricks.3"
variable: DOMAIN
- name: Databricks Domain
@ -83,4 +103,4 @@ rules:
references:
- https://docs.databricks.com/workspace/workspace-details.html
- https://docs.gcp.databricks.com/workspace/workspace-details.html
- https://docs.microsoft.com/en-us/azure/databricks/scenarios/what-is-azure-databricks
- https://docs.microsoft.com/en-us/azure/databricks/scenarios/what-is-azure-databricks

View file

@ -213,6 +213,27 @@ rules:
- 'CI_JOB_TOKEN=glcbt-a1b2c_3dEfGhIjKlMnOpQrStUv'
references:
- https://docs.gitlab.com/ci/jobs/ci_job_token/
- https://docs.gitlab.com/api/jobs/
validation:
type: Http
content:
request:
method: GET
url: https://gitlab.com/api/v4/job
headers:
JOB-TOKEN: '{{ TOKEN }}'
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"id"'
- '"status"'
# Revocation not added: CI/CD job tokens are short-lived and automatically
# invalidated when the job finishes.
- name: GitLab Deploy Token
id: kingfisher.gitlab.6

View file

@ -23,6 +23,32 @@ rules:
confidence: medium
examples:
- 'const CLIENTSECRET = "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"'
depends_on_rule:
- rule_id: "kingfisher.google.1"
variable: GOOGLE_CLIENT_ID
validation:
type: Http
content:
request:
method: POST
url: https://oauth2.googleapis.com/token
headers:
Content-Type: application/x-www-form-urlencoded
Accept: application/json
body: >
code=invalid_code&client_id={{ GOOGLE_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&grant_type=authorization_code
response_matcher:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
words:
- invalid_client
negative: true
# Revocation not added: Google's OAuth revocation endpoint revokes tokens,
# not client secrets.
references:
- https://developers.google.com/identity/protocols/oauth2/web-server
- name: Google OAuth Client Secret
id: kingfisher.google.3
@ -36,6 +62,32 @@ rules:
examples:
- " //$google_client_secret = 'fnhqAakzWrX-mtFQ4PRdMoy0';"
- " 'clientSecret' : 'Ufvuj-d6alhwGKvvLh_8Nq0K'"
depends_on_rule:
- rule_id: "kingfisher.google.1"
variable: GOOGLE_CLIENT_ID
validation:
type: Http
content:
request:
method: POST
url: https://oauth2.googleapis.com/token
headers:
Content-Type: application/x-www-form-urlencoded
Accept: application/json
body: >
code=invalid_code&client_id={{ GOOGLE_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&grant_type=authorization_code
response_matcher:
- report_response: true
- type: StatusMatch
status: [400]
- type: WordMatch
words:
- invalid_client
negative: true
# Revocation not added: Google's OAuth revocation endpoint revokes tokens,
# not client secrets.
references:
- https://developers.google.com/identity/protocols/oauth2/web-server
- name: Google OAuth Access Token
id: kingfisher.google.4
@ -61,6 +113,42 @@ rules:
- |
-- Clear login if it's a new connection.
--propertyTable.access_token = 'ya29.Ci_UA7aEsvT6-oVI8f96kvB6i8oO13WgdZUviLaCVtpEPYZqhQcQycR-u2X9xtmYGA'
validation:
type: Http
content:
request:
method: GET
url: https://www.googleapis.com/oauth2/v3/tokeninfo?access_token={{ TOKEN | url_encode }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"aud"'
- '"expires_in"'
revocation:
type: Http
content:
request:
method: POST
url: https://oauth2.googleapis.com/revoke
headers:
Content-Type: application/x-www-form-urlencoded
Accept: application/json
body: token={{ TOKEN | url_encode }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
references:
- https://developers.google.com/identity/openid-connect/openid-connect
- https://developers.google.com/data-portability/user-guide/quickstart
- https://developers.google.com/identity/protocols/oauth2/web-server
- name: Google OAuth Credentials
id: kingfisher.google.6
@ -118,4 +206,4 @@ rules:
match_all_words: true
words:
- '"models"'
- '"name"'
- '"name"'

View file

@ -30,5 +30,20 @@ rules:
- type: WordMatch
words:
- '"email":'
revocation:
type: Http
content:
request:
method: POST
url: https://oauth2.googleapis.com/revoke
headers:
Content-Type: application/x-www-form-urlencoded
Accept: application/json
body: token={{ TOKEN | url_encode }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
references:
- https://developers.google.com/identity/protocols/oauth2
- https://developers.google.com/identity/protocols/oauth2
- https://developers.google.com/identity/protocols/oauth2/web-server

View file

@ -19,5 +19,35 @@ rules:
examples:
- 'HIGHNOTE_API_KEY=sk_live_AbCdEfGhIjKlMnOpQrStUvWxYz1234'
- 'highnote_key: rk_test_AbCdEfGhIjKlMnOpQrStUvWxYz1234'
validation:
type: Http
content:
request:
method: POST
url: >
{%- if TOKEN contains "_test_" -%}
https://api.us.test.highnote.com/graphql
{%- else -%}
https://api.us.highnote.com/graphql
{%- endif -%}
headers:
Authorization: "Basic {{ TOKEN | b64enc }}"
Content-Type: application/json
Accept: application/json
body: '{"query":"query { ping }"}'
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
- type: WordMatch
match_all_words: true
words:
- '"data"'
- '"ping"'
- '"pong"'
# Revocation not added: the public Highnote docs I found describe API key
# usage and rotation guidance, but not an API endpoint to revoke the
# current key directly.
references:
- https://docs.highnote.com/docs/developers/api/using-the-api

View file

@ -2,10 +2,10 @@ rules:
- name: Langfuse Secret Key
id: kingfisher.langfuse.1
pattern: |
(?xi)
(?x)
\b
(
sk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}
sk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}
)
\b
pattern_requirements:
@ -42,10 +42,10 @@ rules:
- name: Langfuse Public Key
id: kingfisher.langfuse.2
pattern: |
(?xi)
(?x)
\b
(
pk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}
pk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}
)
\b
pattern_requirements:
@ -57,9 +57,6 @@ rules:
examples:
- pk-lf-a1b2c3d4-e5f6-7890-abcd-ef1234567890
- 'LANGFUSE_PUBLIC_KEY="pk-lf-9f8e7d6c-5b4a-3210-fedc-ba0987654321"'
negative_examples:
- pk-lf-test
- pk-lf-
references:
- https://langfuse.com/docs/sdk/typescript
- https://langfuse.com/docs/get-started

View file

@ -57,6 +57,22 @@ rules:
examples:
- "pha_XgrXUnvwyoPLmjwHES5lc8scZUtheBpa1QV1qmssutB"
- "pha_35kHVLA1E068nvrwUTgabkh8xvGGTpSpsVjGcpVNfis"
validation:
type: Http
content:
request:
method: GET
url: https://app.posthog.com/api/users/@me/
headers:
Authorization: "Bearer {{ TOKEN }}"
Content-Type: "application/json"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
# Revocation not added: I did not find a documented token self-revoke
# endpoint for OAuth access tokens in the public PostHog API docs.
references:
- https://posthog.com/docs/api
- https://github.com/PostHog/posthog/blob/e408aac5debe02b39a6a67cfd028f16a2ca7bc90/posthog/models/utils.py#L260-L290

View file

@ -18,5 +18,32 @@ rules:
- 'proof_key: prf_test_AbCdEfGhIjKlMnOpQrStUvWxYz123456'
- 'proof_key: prf_cli_AbCdEfGhIjKlMnOpQrStUvWxYz123456'
- 'proof_key: prf_cli_test_AbCdEfGhIjKlMnOpQrStUvWxYz123456'
validation:
type: Http
content:
request:
method: POST
url: >
{%- if TOKEN contains "_test_" -%}
https://api.fairfax.proof.com/v1/transactions
{%- else -%}
https://api.proof.com/v1/transactions
{%- endif -%}
headers:
ApiKey: "{{ TOKEN }}"
Content-Type: application/json
Accept: application/json
body: '{}'
response_matcher:
- report_response: true
- type: StatusMatch
status: [422]
- type: WordMatch
words:
- signer
# Revocation not added: the public Proof docs describe dashboard key
# management and secret-scanning guidance, but not a self-revoke API.
references:
- https://dev.proof.com/docs/api-keys
- https://dev.proof.com/docs/environments
- https://dev.proof.com/reference/createtransaction

View file

@ -65,7 +65,11 @@ rules:
(?xi)
\b
(
https://[a-z0-9.-]{3,200}
https://(?:
(?:[a-z0-9-]+\.)?online\.tableau\.com
|
(?:[a-z0-9-]+\.)*tableau(?:\.[a-z0-9-]+)+
)
)
(?:
/api/\d+\.\d+
@ -79,7 +83,7 @@ rules:
examples:
- https://tableau.example.com
- https://10ax.online.tableau.com
- server="https://analytics.example.com"
- server="https://analytics.tableau.example.com"
references:
- https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm
@ -89,12 +93,11 @@ rules:
(?xi)
\b
(?:
tableau[_-]?(?:site|content[_-]?url)
|
tableau
(?:.|[\n\r]){0,48}?
)?
(?:
site |
content[_-]?url
(?:site|content[_-]?url)
)
(?:.|[\n\r]){0,12}?
[=:"'\s]
@ -107,6 +110,6 @@ rules:
visible: false
examples:
- tableau_site=companysite
- contentUrl="default"
- tableau_content_url="default"
references:
- https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm

View file

@ -29,6 +29,8 @@ use tokio::{
use tokio_rustls::TlsConnector;
use url::Url;
use crate::validation::http_validation::check_url_resolvable;
pub struct RawValidationOutcome {
pub valid: bool,
pub status: StatusCode,
@ -104,7 +106,20 @@ pub async fn validate_raw(
globals: &Object,
client: &Client,
use_lax_tls: bool,
allow_internal_ips: bool,
) -> Result<RawValidationOutcome> {
if let Some(url) = raw_validation_target_url(kind, globals)? {
if let Err(e) = check_url_resolvable(&url, allow_internal_ips).await {
return Ok(RawValidationOutcome {
valid: false,
status: StatusCode::PRECONDITION_REQUIRED,
body: format!(
"Validation skipped - raw validation target blocked or not resolvable: {e}"
),
});
}
}
match kind {
"azurebatch" => validate_azure_batch(globals, client).await,
"ftp" => validate_ftp(globals, use_lax_tls).await,
@ -120,6 +135,18 @@ pub async fn validate_raw(
}
}
fn raw_validation_target_url(kind: &str, globals: &Object) -> Result<Option<Url>> {
match kind {
"azurebatch" => string_var(globals, "BATCH_URL")
.map(|s| Url::parse(&s).context("invalid BATCH_URL"))
.transpose(),
"ftp" | "ldap" | "rabbitmq" | "redis" => string_var(globals, "TOKEN")
.map(|s| Url::parse(&s).context("invalid raw validation URI"))
.transpose(),
_ => Ok(None),
}
}
fn string_var(globals: &Object, name: &str) -> Option<String> {
globals.get(name).map(|v| v.to_kstr().to_string()).filter(|s| !s.is_empty())
}

View file

@ -27,8 +27,8 @@ DEFAULT_RULES_DIR = (
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Count total rules and detector rules. "
"Detector rules are rules that do not "
"Count total rules and standalone detector rules. "
"Standalone detector rules are rules that do not "
"declare depends_on_rule."
)
)
@ -41,7 +41,10 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--list-validators",
action="store_true",
help="Print the names of detectors with and without a validator",
help=(
"Print the IDs of standalone detectors with and "
"without a validator"
),
)
return parser.parse_args()
@ -64,6 +67,14 @@ def iter_rule_entries(path: Path) -> list[dict]:
return entries
def rule_identifier(rule: dict, path: Path, index: int) -> str:
if isinstance(rule.get("id"), str) and rule["id"].strip():
return rule["id"]
if isinstance(rule.get("name"), str) and rule["name"].strip():
return rule["name"]
return f"{path.stem}#{index}"
def main() -> int:
args = parse_args()
rules_dir = args.rules_dir.resolve()
@ -79,8 +90,8 @@ def main() -> int:
total_rules = 0
dependent_rules = 0
with_validator: list[str] = []
without_validator: list[str] = []
standalone_with_validator: list[str] = []
standalone_without_validator: list[str] = []
for path in rule_files:
try:
@ -93,27 +104,43 @@ def main() -> int:
dependent_rules += sum(
1 for rule in rules if rule.get("depends_on_rule")
)
if any(rule.get("validation") for rule in rules):
with_validator.append(path.stem)
else:
without_validator.append(path.stem)
for index, rule in enumerate(rules, start=1):
if rule.get("depends_on_rule"):
continue
detector_rules = total_rules - dependent_rules
identifier = rule_identifier(rule, path, index)
if rule.get("validation"):
standalone_with_validator.append(identifier)
else:
standalone_without_validator.append(identifier)
standalone_detector_rules = total_rules - dependent_rules
print(f"Rules directory: {rules_dir}")
print(f"Detectors: {len(rule_files)}")
print(f"Detectors with validator: {len(with_validator)}")
print(f"Detectors without validator: {len(without_validator)}")
print(f"Total rules: {total_rules}")
print(f"Dependent rules: {dependent_rules}")
print(f"Non-dependent rules: {detector_rules}")
print(f"Standalone detectors: {standalone_detector_rules}")
print(
"Standalone detectors with validator: "
f"{len(standalone_with_validator)}"
)
print(
"Standalone detectors without validator: "
f"{len(standalone_without_validator)}"
)
if args.list_validators:
print(f"\nWith validator ({len(with_validator)}):")
for name in with_validator:
print(
"\nStandalone detectors with validator "
f"({len(standalone_with_validator)}):"
)
for name in standalone_with_validator:
print(f" {name}")
print(f"\nWithout validator ({len(without_validator)}):")
for name in without_validator:
print(
"\nStandalone detectors without validator "
f"({len(standalone_without_validator)}):"
)
for name in standalone_without_validator:
print(f" {name}")
return 0

View file

@ -7,6 +7,11 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im
All notable changes to this project will be documented in this file.
## [v1.95.0]
- Added 80+ built-in rules, bringing the bundled ruleset to 820 total. New coverage includes Amazon OAuth, Asaas, multiple Azure credential families, Bitrise, Canva, CockroachDB, eBay, Elastic, hCaptcha, Highnote, Lichess, MailerSend, Onfido, Paddle, Pangea, Persona, Pinterest, Proof, Rootly, Runpod, Telnyx, Thunderstore, Valtown, Volcengine, and more.
- Added a `validation: type: Raw` exception path for provider-specific checks, with new raw validators for Azure Batch, FTP, Kraken, LDAP, RabbitMQ, and Redis. Also added stable request-scoped template values plus new Liquid filters for HMAC-SHA384 hex output and timestamp generation.
- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex. Also tightened newly added helper regex to avoid high-match scan regressions, and made preflight-blocked raw validations report as skipped/not attempted instead of failed.
## [v1.94.0]
- Updated vendored `vectorscan-rs` from v0.0.5 (Vectorscan 5.4.11) to v0.0.6 (Vectorscan 5.4.12). The upstream crate now ships pre-extracted sources instead of a tarball+patch, and fixes the `cpu_native` feature flag. Local Windows and musl build patches have been re-applied.
- Added more built-in rules

View file

@ -857,6 +857,7 @@ pub async fn run_direct_validation(
&globals,
&client,
use_lax_tls,
global_args.allow_internal_ips,
)
.await
{

View file

@ -942,7 +942,11 @@ impl DetailsReporter {
let validation_status = if rm.validation_success {
"Active Credential".to_string()
} else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
} else if matches!(
rm.validation_response_status,
status if status == StatusCode::CONTINUE.as_u16()
|| status == StatusCode::PRECONDITION_REQUIRED.as_u16()
) {
"Not Attempted".to_string()
} else {
"Inactive Credential".to_string()
@ -1975,7 +1979,7 @@ mod tests {
let (report_match, _) = sample_report_match(
"(skip list entry) AWS validation not attempted for account 111122223333.",
StatusCode::CONTINUE.as_u16(),
StatusCode::PRECONDITION_REQUIRED.as_u16(),
false,
);
let scan_args = sample_scan_args();

View file

@ -1311,7 +1311,7 @@ async fn timed_validate_single_match<'a>(
"(skip list entry) AWS validation not attempted for account {}.",
account_id
));
m.validation_response_status = StatusCode::CONTINUE;
m.validation_response_status = StatusCode::PRECONDITION_REQUIRED;
cache.insert(
cache_key,
CachedResponse {
@ -1488,6 +1488,7 @@ async fn timed_validate_single_match<'a>(
&globals,
client,
clients.should_use_lax(rule_syntax.tls_mode),
clients.allow_internal_ips,
)
.await
{