From 283e9d29b31e9d06e859b6b2ccd3fc847d6438be Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Fri, 6 Mar 2026 12:06:12 -0800 Subject: [PATCH 01/12] fixed CI to not upload bare checksum, because they are included in zip file --- .github/workflows/release.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8dcd1ee..1a7aaf4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -342,14 +342,11 @@ jobs: run: | mkdir -p dist cp target/release/kingfisher-windows-${{ matrix.arch }}.zip dist/ - cp target/release/CHECKSUM-windows-${{ matrix.arch }}.txt dist/ - uses: actions/upload-artifact@v4 with: name: kingfisher-windows-${{ matrix.arch }} - path: | - dist/kingfisher-windows-${{ matrix.arch }}.zip - dist/CHECKSUM-windows-${{ matrix.arch }}.txt + path: dist/kingfisher-windows-${{ matrix.arch }}.zip release: name: Public GitHub Release From 0983581b7616b78cb13df7fc5e74df111dc19dd6 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 7 Mar 2026 07:40:26 -0800 Subject: [PATCH 02/12] improved yelp and perplexity rules --- CHANGELOG.md | 4 ++ Cargo.toml | 2 +- .../data/rules/perplexity.yml | 17 +++++++- crates/kingfisher-rules/data/rules/yelp.yml | 19 +++++++-- tests/int_rules_no_validated_findings.rs | 40 +++++++++++++++---- 5 files changed, 69 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c94995..62a7490 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [v1.87.0] +- Hardened Perplexity API key validation to reject auth failures (`401`/`403`) and avoid false "Active Credential" results from error payloads. +- Fixed Yelp API key validation false positives by switching to an auth-enforcing endpoint (`/v3/businesses/search`) and adding explicit auth error guards. + ## [v1.86.0] - GitLab scanning: honor OS-trusted internal CAs without requiring `SSL_CERT_FILE`, and preserve custom GitLab API ports in repository enumeration and artifact fetching. - Added detection/validation rules for App Center, Branch.io, BrowserStack, Calendly, Cypress, Delighted, DeviantArt, Instagram, Iterable, Keen.io, Lokalise, Pendo, Razorpay, Spotify, WakaTime, WPEngine. diff --git a/Cargo.toml b/Cargo.toml index 3fe3bf2..0abc7fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ http = "1.4" [package] name = "kingfisher" -version = "1.86.0" +version = "1.87.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/crates/kingfisher-rules/data/rules/perplexity.yml b/crates/kingfisher-rules/data/rules/perplexity.yml index 7942b3e..b502f0e 100644 --- a/crates/kingfisher-rules/data/rules/perplexity.yml +++ b/crates/kingfisher-rules/data/rules/perplexity.yml @@ -23,6 +23,8 @@ rules: - https://www.perplexity.ai/hub/blog/introducing-pplx-api - https://docs.litellm.ai/docs/providers/perplexity - https://developers.cloudflare.com/ai-gateway/providers/perplexity/ + - https://docs.perplexity.ai/api-reference/chat-completions-post + - https://docs.perplexity.ai/docs/sdk/error-handling validation: type: Http content: @@ -32,8 +34,19 @@ rules: headers: Authorization: "Bearer {{ TOKEN }}" Content-Type: application/json - body: '{"model": "kingfisher", "messages": [{ "role": "user", "content": "." }]}' + body: '{"model": "sonar", "messages": [{ "role": "user", "content": "." }]}' response_matcher: - report_response: false + # Perplexity docs identify 401/403 as auth/permission failures. + - type: StatusMatch + status: [401, 403] + negative: true - type: WordMatch - match_all_words: false \ No newline at end of file + words: + - '"invalid_api_key"' + - '"type":"invalid_api_key"' + - '"Invalid API key provided"' + - '"authentication"' + - '"AuthenticationError"' + negative: true + match_all_words: false diff --git a/crates/kingfisher-rules/data/rules/yelp.yml b/crates/kingfisher-rules/data/rules/yelp.yml index 6e4e9d3..355f487 100644 --- a/crates/kingfisher-rules/data/rules/yelp.yml +++ b/crates/kingfisher-rules/data/rules/yelp.yml @@ -21,13 +21,16 @@ rules: - yelp_token = wiuck20l8j-oWwCd9r53FqpN6ELB7K03zGw-ccUQR7uLHc9NaWubovOMdGdyFqIGGM4aVK6nxQ1DreDZn_qBYU4jky_5kQRVkiIDPSheCPggY3WzyRzi27kxoOpoYAYx references: - https://docs.developer.yelp.com/docs/places-authentication - - https://docs.developer.yelp.com/reference/v3_all_categories + - https://docs.developer.yelp.com/reference/v3_business_search + - https://www.yelp.com/developers validation: type: Http content: request: method: GET - url: "https://api.yelp.com/v3/categories?locale=en_US" + # /v3/categories can return data with non-functional bearer strings. + # /v3/businesses/search reliably enforces API key auth. + url: "https://api.yelp.com/v3/businesses/search?location=Seattle&limit=1" headers: Authorization: "Bearer {{ TOKEN }}" Accept: application/json @@ -36,4 +39,14 @@ rules: - type: StatusMatch status: [200] - type: WordMatch - words: ['"categories"'] + words: ['"businesses"'] + # Yelp docs list these auth/authorization failure codes for this endpoint. + - type: WordMatch + words: + - '"error"' + - '"UNAUTHORIZED_API_KEY"' + - '"UNAUTHORIZED_ACCESS_TOKEN"' + - '"TOKEN_INVALID"' + - '"AUTHORIZATION_ERROR"' + negative: true + match_all_words: false diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index a8531c2..b58d96a 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -5,7 +5,14 @@ use serde_json::Value; #[test] fn scan_rules_has_no_validated_findings() -> Result<()> { let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) - .args(["scan", "data/rules", "--format", "json", "--no-update-check", "--only-valid"]) + .args([ + "scan", + "crates/kingfisher-rules/data/rules", + "--format", + "json", + "--no-update-check", + "--only-valid", + ]) .output()?; let stdout = String::from_utf8_lossy(&output.stdout); @@ -42,14 +49,33 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { } let findings: Vec = serde_json::from_str(json_array_str)?; - for finding in findings { - let rule_id = finding["rule"]["id"].as_str().unwrap_or("unknown"); + let validated_rule_ids: Vec = findings + .iter() + .filter_map(|finding| { + let status = finding["finding"]["validation"]["status"] + .as_str() + .unwrap_or("") + .to_ascii_lowercase(); + if status == "active credential" { + Some(finding["rule"]["id"].as_str().unwrap_or("unknown").to_string()) + } else { + None + } + }) + .collect(); - let status = - finding["finding"]["validation"]["status"].as_str().unwrap_or("").to_ascii_lowercase(); + assert!( + validated_rule_ids.is_empty(), + "Validated findings detected in rules: {}", + validated_rule_ids.join(", ") + ); - // Fail only on genuinely validated secrets - assert_ne!(&status, "active credential", "Validated finding detected in rule {rule_id}"); + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + panic!( + "kingfisher scan exited non-zero without validated findings in output.\nstdout:\n{}\nstderr:\n{}", + stdout, stderr + ); } Ok(()) From 6d44e2c1b62e4ac1a360495ea1ade109f7c4160b Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 7 Mar 2026 21:28:37 -0800 Subject: [PATCH 03/12] added new rules --- CHANGELOG.md | 2 + crates/kingfisher-rules/data/rules/AGENTS.md | 78 ++++++++++++++++++ crates/kingfisher-rules/data/rules/ably.yml | 39 +++++++++ .../data/rules/abstractapi.yml | 41 ++++++++++ .../kingfisher-rules/data/rules/abuseipdb.yml | 39 +++++++++ .../data/rules/aviationstack.yml | 40 +++++++++ .../data/rules/betterstack.yml | 42 ++++++++++ crates/kingfisher-rules/data/rules/brevo.yml | 33 ++++++++ .../kingfisher-rules/data/rules/clearout.yml | 42 ++++++++++ crates/kingfisher-rules/data/rules/clerk.yml | 39 +++++++++ .../data/rules/cloudinary.yml | 82 +++++++++++++++++++ .../kingfisher-rules/data/rules/coinlayer.yml | 41 ++++++++++ .../data/rules/contentstack.yml | 62 ++++++++++++++ .../data/rules/currencylayer.yml | 41 ++++++++++ crates/kingfisher-rules/data/rules/daily.yml | 43 ++++++++++ crates/kingfisher-rules/data/rules/fixer.yml | 41 ++++++++++ .../kingfisher-rules/data/rules/geoapify.yml | 38 +++++++++ .../kingfisher-rules/data/rules/hunterio.yml | 40 +++++++++ crates/kingfisher-rules/data/rules/mux.yml | 59 +++++++++++++ .../kingfisher-rules/data/rules/newsapi.yml | 40 +++++++++ .../kingfisher-rules/data/rules/numverify.yml | 41 ++++++++++ .../kingfisher-rules/data/rules/onesignal.yml | 42 ++++++++++ .../kingfisher-rules/data/rules/pinecone.yml | 39 +++++++++ .../kingfisher-rules/data/rules/pingdom.yml | 42 ++++++++++ .../data/rules/positionstack.yml | 40 +++++++++ .../kingfisher-rules/data/rules/railway.yml | 44 ++++++++++ crates/kingfisher-rules/data/rules/render.yml | 39 +++++++++ .../kingfisher-rules/data/rules/rollbar.yml | 42 ++++++++++ .../kingfisher-rules/data/rules/salesloft.yml | 39 +++++++++ crates/kingfisher-rules/data/rules/sanity.yml | 39 +++++++++ .../data/rules/statuscake.yml | 41 ++++++++++ .../kingfisher-rules/data/rules/storyblok.yml | 38 +++++++++ .../data/rules/uptimerobot.yml | 38 +++++++++ .../kingfisher-rules/data/rules/urlscan.yml | 39 +++++++++ .../data/rules/virustotal.yml | 42 ++++++++++ .../data/rules/weatherapi.yml | 40 +++++++++ .../kingfisher-rules/data/rules/webflow.yml | 39 +++++++++ .../data/rules/zerobounce.yml | 38 +++++++++ 38 files changed, 1604 insertions(+) create mode 100644 crates/kingfisher-rules/data/rules/AGENTS.md create mode 100644 crates/kingfisher-rules/data/rules/ably.yml create mode 100644 crates/kingfisher-rules/data/rules/abstractapi.yml create mode 100644 crates/kingfisher-rules/data/rules/abuseipdb.yml create mode 100644 crates/kingfisher-rules/data/rules/aviationstack.yml create mode 100644 crates/kingfisher-rules/data/rules/betterstack.yml create mode 100644 crates/kingfisher-rules/data/rules/brevo.yml create mode 100644 crates/kingfisher-rules/data/rules/clearout.yml create mode 100644 crates/kingfisher-rules/data/rules/clerk.yml create mode 100644 crates/kingfisher-rules/data/rules/cloudinary.yml create mode 100644 crates/kingfisher-rules/data/rules/coinlayer.yml create mode 100644 crates/kingfisher-rules/data/rules/contentstack.yml create mode 100644 crates/kingfisher-rules/data/rules/currencylayer.yml create mode 100644 crates/kingfisher-rules/data/rules/daily.yml create mode 100644 crates/kingfisher-rules/data/rules/fixer.yml create mode 100644 crates/kingfisher-rules/data/rules/geoapify.yml create mode 100644 crates/kingfisher-rules/data/rules/hunterio.yml create mode 100644 crates/kingfisher-rules/data/rules/mux.yml create mode 100644 crates/kingfisher-rules/data/rules/newsapi.yml create mode 100644 crates/kingfisher-rules/data/rules/numverify.yml create mode 100644 crates/kingfisher-rules/data/rules/onesignal.yml create mode 100644 crates/kingfisher-rules/data/rules/pinecone.yml create mode 100644 crates/kingfisher-rules/data/rules/pingdom.yml create mode 100644 crates/kingfisher-rules/data/rules/positionstack.yml create mode 100644 crates/kingfisher-rules/data/rules/railway.yml create mode 100644 crates/kingfisher-rules/data/rules/render.yml create mode 100644 crates/kingfisher-rules/data/rules/rollbar.yml create mode 100644 crates/kingfisher-rules/data/rules/salesloft.yml create mode 100644 crates/kingfisher-rules/data/rules/sanity.yml create mode 100644 crates/kingfisher-rules/data/rules/statuscake.yml create mode 100644 crates/kingfisher-rules/data/rules/storyblok.yml create mode 100644 crates/kingfisher-rules/data/rules/uptimerobot.yml create mode 100644 crates/kingfisher-rules/data/rules/urlscan.yml create mode 100644 crates/kingfisher-rules/data/rules/virustotal.yml create mode 100644 crates/kingfisher-rules/data/rules/weatherapi.yml create mode 100644 crates/kingfisher-rules/data/rules/webflow.yml create mode 100644 crates/kingfisher-rules/data/rules/zerobounce.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 62a7490..fa20dd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file. ## [v1.87.0] - Hardened Perplexity API key validation to reject auth failures (`401`/`403`) and avoid false "Active Credential" results from error payloads. - Fixed Yelp API key validation false positives by switching to an auth-enforcing endpoint (`/v3/businesses/search`) and adding explicit auth error guards. +- Added 37 new provider detection + HTTP validation rules: Ably, AbstractAPI, AbuseIPDB, AviationStack, Better Stack, Brevo, Clearout, Clerk, Cloudinary, Coinlayer, Contentstack, Currencylayer, Daily, Fixer, Geoapify, Hunter.io, Mux, NewsAPI, Numverify, OneSignal, Pinecone, Pingdom, Positionstack, Railway, Render, Rollbar, Salesloft, Sanity, StatusCake, Storyblok, UptimeRobot, urlscan.io, VirusTotal, WeatherAPI, Webflow, and ZeroBounce. +- Tightened regex specificity for newly added rules by replacing broad variable-length token captures with explicit fixed formats/lengths and aligned examples to pass `rules check`. ## [v1.86.0] - GitLab scanning: honor OS-trusted internal CAs without requiring `SSL_CERT_FILE`, and preserve custom GitLab API ports in repository enumeration and artifact fetching. diff --git a/crates/kingfisher-rules/data/rules/AGENTS.md b/crates/kingfisher-rules/data/rules/AGENTS.md new file mode 100644 index 0000000..80a9522 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/AGENTS.md @@ -0,0 +1,78 @@ +# AGENTS.md + +Rule-authoring instructions for this directory. + +## Scope +- Applies to `crates/kingfisher-rules/data/rules/` and all files under it. +- This file overrides broader AGENTS guidance for rule-writing tasks in this subtree. + +## Goal +- Add or update YAML detection rules with high precision, low false positives, and safe remediation support. + +## Rule File Conventions +- Keep provider rules in provider-named files (for example `github.yml`, `openai.yml`). +- Prefer lowercase filenames with `.yml`. +- Keep rule IDs stable and unique. Prefer `kingfisher..` unless a descriptive suffix is already established for that provider. +- Reuse nearby provider patterns/styles instead of inventing new structure. + +## Required Rule Shape +Each rule entry should define: +- `name` +- `id` +- `pattern` +- `min_entropy` (default to 3.0) +- `confidence` (default to medium) +- `examples` (at least one realistic positive example) + +Strongly recommended fields: +- `pattern_requirements` (for extra filtering) +- `references` + +## Pattern Quality Rules +- Prefer specific anchors/prefixes and provider context over broad generic regex. +- Use `pattern_requirements` to enforce quality constraints (`min_digits`, `min_uppercase`, `min_lowercase`, `min_special_chars`, `ignore_if_contains`, `checksum`). +- Use checksum validation in `pattern_requirements.checksum` when token formats support it. +- Use `visible: false` for helper/non-secret captures used only by dependent rules. +- Use `depends_on_rule` for multi-part credential validation (for example ID + secret). + +## Validation Policy (Important) +- Default: define validation logic in YAML under `validation:`. +- Do not move validation logic into Rust unless YAML cannot reliably express it. +- Code-backed validation types (for example AWS, GCP, Coinbase, MongoDB) are notable exceptions and should remain rare. +- For new rules, first attempt `Http`/`Grpc` YAML validation before considering exception paths. + +## Revocation Policy +- If a rule has validation and the provider API safely supports revocation, add `revocation:` in the same YAML rule. +- Prefer explicit success criteria in `response_matcher`. +- Use `HttpMultiStep` revocation when API workflows require pre-fetch/extraction steps. +- If revocation is intentionally not supported, document why with an inline YAML comment. + +## Authoring Workflow +1. Choose the target provider file (or add a new provider file if no suitable file exists). +2. Copy a structurally similar rule from this directory. +3. Implement/adjust `pattern`, `examples`, and filtering (`pattern_requirements`, `min_entropy`). +4. Add YAML `validation` (default path). +5. Add YAML `revocation` when supported. +6. Add `references` for token format/API behavior. +7. Verify locally (below). + +## Local Verification Checklist +- Syntax/load checks: + - `cargo test -p kingfisher-rules` +- Broader regression check: + - `cargo test --workspace --all-targets` +- Behavioral check against sample content: + - `kingfisher scan ./testdata --rule --rule-stats` +- Validation check (when validation is present): + - `kingfisher validate --rule ` + +## Documentation +Read these before complex edits: +- `docs/RULES.md` (schema, pattern requirements, checksum, Liquid, validation/revocation) +- `docs/MULTI_STEP_REVOCATION.md` +- `docs/TOKEN_REVOCATION_SUPPORT.md` + +## Change Discipline +- Keep changes scoped to the specific provider/rule request. +- Do not refactor unrelated rules in the same PR unless explicitly asked. +- Preserve existing YAML style and indentation conventions in this directory. diff --git a/crates/kingfisher-rules/data/rules/ably.yml b/crates/kingfisher-rules/data/rules/ably.yml new file mode 100644 index 0000000..44f1615 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/ably.yml @@ -0,0 +1,39 @@ +rules: + - name: Ably API Key + id: kingfisher.ably.1 + pattern: | + (?xi) + \b + ably + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Za-z0-9_-]{8}\.[A-Za-z0-9_-]{8}:[A-Za-z0-9_-]{24} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - ABLY_API_KEY=abc12345.xyz78901:secretkeyvalue1234567890 + - ably_key = "appid123.keyid987:AbCdEfGhIjKlMnOpQrStUvWx" + references: + - https://ably.com/docs/auth/basic + - https://ably.com/docs/api/rest-api + validation: + type: Http + content: + request: + method: GET + url: https://rest.ably.io/channels?limit=1 + headers: + Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/abstractapi.yml b/crates/kingfisher-rules/data/rules/abstractapi.yml new file mode 100644 index 0000000..0619b94 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/abstractapi.yml @@ -0,0 +1,41 @@ +rules: + - name: AbstractAPI API Key + id: kingfisher.abstractapi.1 + pattern: | + (?xi) + \b + abstractapi + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - abstractapi_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + - ABSTRACTAPI_KEY=abcdef1234567890abcdef1234567890 + references: + - https://docs.abstractapi.com/api/ip-geolocation + - https://abstractapi.com/docs + validation: + type: Http + content: + request: + method: GET + url: https://ipgeolocation.abstractapi.com/v1/?api_key={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/abuseipdb.yml b/crates/kingfisher-rules/data/rules/abuseipdb.yml new file mode 100644 index 0000000..8853324 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/abuseipdb.yml @@ -0,0 +1,39 @@ +rules: + - name: AbuseIPDB API Key + id: kingfisher.abuseipdb.1 + pattern: | + (?xi) + \b + abuseipdb + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{80} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - abuseipdb_api_key = "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + - ABUSEIPDB_KEY=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef + references: + - https://docs.abuseipdb.com/#introduction + - https://www.abuseipdb.com/api + validation: + type: Http + content: + request: + method: GET + url: https://api.abuseipdb.com/api/v2/check?ipAddress=127.0.0.1 + headers: + Key: "{{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/aviationstack.yml b/crates/kingfisher-rules/data/rules/aviationstack.yml new file mode 100644 index 0000000..eaf380e --- /dev/null +++ b/crates/kingfisher-rules/data/rules/aviationstack.yml @@ -0,0 +1,40 @@ +rules: + - name: AviationStack API Key + id: kingfisher.aviationstack.1 + pattern: | + (?xi) + \b + aviationstack + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - AVIATIONSTACK_ACCESS_KEY=abc123def456ghi789jkl012mno345pq + - aviationstack_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://aviationstack.com/documentation + validation: + type: Http + content: + request: + method: GET + url: https://api.aviationstack.com/v1/flights?access_key={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/betterstack.yml b/crates/kingfisher-rules/data/rules/betterstack.yml new file mode 100644 index 0000000..72e49ec --- /dev/null +++ b/crates/kingfisher-rules/data/rules/betterstack.yml @@ -0,0 +1,42 @@ +rules: + - name: Better Stack API Token + id: kingfisher.betterstack.1 + pattern: | + (?xi) + \b + betterstack + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9_-]{24} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - betterstack_api_token = "a1b2c3d4e5f6g7h8i9j0k1l2" + - BETTERSTACK_TOKEN=abcdef1234567890abcdef12 + references: + - https://betterstack.com/docs/uptime/api/getting-started-with-uptime-api/ + - https://betterstack.com/docs/uptime/api/list-all-existing-monitors + validation: + type: Http + content: + request: + method: GET + url: https://uptime.betterstack.com/api/v2/monitors + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/brevo.yml b/crates/kingfisher-rules/data/rules/brevo.yml new file mode 100644 index 0000000..471f5f6 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/brevo.yml @@ -0,0 +1,33 @@ +rules: + - name: Brevo API Token + id: kingfisher.brevo.1 + pattern: | + (?x) + \b + ( + xkeysib-[a-fA-F0-9]{64}-[a-zA-Z0-9]{16} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.2 + confidence: medium + examples: + - BREVO_API_KEY=xkeysib-abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd-1234567890abcd12 + - '"brevo": "xkeysib-1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef-ab12cd34ef56gh78"' + references: + - https://developers.brevo.com/docs/api-key-authentication + - https://developers.brevo.com/docs/how-it-works + validation: + type: Http + content: + request: + method: GET + url: https://api.brevo.com/v3/account + headers: + api-key: "{{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] diff --git a/crates/kingfisher-rules/data/rules/clearout.yml b/crates/kingfisher-rules/data/rules/clearout.yml new file mode 100644 index 0000000..e3290f3 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/clearout.yml @@ -0,0 +1,42 @@ +rules: + - name: Clearout API Token + id: kingfisher.clearout.1 + pattern: | + (?xi) + \b + clearout + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{28} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - clearout_api_token = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4" + - CLEAROUT_TOKEN=abcdef1234567890abcdef123456 + references: + - https://docs.clearout.io/api-overview.html + - https://docs.clearout.io/email-verifier-api.html + validation: + type: Http + content: + request: + method: GET + url: https://api.clearout.io/v2/email_verify/getcredits + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/clerk.yml b/crates/kingfisher-rules/data/rules/clerk.yml new file mode 100644 index 0000000..1960f54 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/clerk.yml @@ -0,0 +1,39 @@ +rules: + - name: Clerk Secret Key + id: kingfisher.clerk.1 + pattern: | + (?xi) + \b + clerk + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|API|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + sk_(?:test|live)_[A-Za-z0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.3 + confidence: medium + examples: + - CLERK_SECRET_KEY=sk_live_abcdefghijklmnopqrstuvwxyz123456 + - clerk_secret = sk_test_4pX9kL2mN8qR5sT7vY1zA3bC6dE0fG2h + references: + - https://clerk.com/docs/deployments/clerk-environment-variables + - https://clerk.com/docs/guides/development/machine-auth/api-keys + validation: + type: Http + content: + request: + method: GET + url: https://api.clerk.com/v1/users?limit=1 + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/cloudinary.yml b/crates/kingfisher-rules/data/rules/cloudinary.yml new file mode 100644 index 0000000..65488ca --- /dev/null +++ b/crates/kingfisher-rules/data/rules/cloudinary.yml @@ -0,0 +1,82 @@ +rules: + - name: Cloudinary Cloud Name + id: kingfisher.cloudinary.3 + pattern: | + (?xi) + \b + cloudinary + (?:.|[\n\r]){0,32}? + (?:CLOUD[_\s]?NAME|CLOUD) + (?:.|[\n\r]){0,16}? + \b + ( + [a-z0-9_-]{3,32} + ) + \b + min_entropy: 2.5 + confidence: medium + visible: false + examples: + - CLOUDINARY_CLOUD_NAME=demo + - name: Cloudinary API Key + id: kingfisher.cloudinary.2 + pattern: | + (?xi) + \b + cloudinary + (?:.|[\n\r]){0,32}? + (?:API[_\s]?KEY|KEY) + (?:.|[\n\r]){0,16}? + \b + ( + [0-9]{15} + ) + \b + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - CLOUDINARY_API_KEY=123456789012345 + - name: Cloudinary API Secret + id: kingfisher.cloudinary.1 + pattern: | + (?xi) + \b + cloudinary + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|API[_\s]?SECRET) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - CLOUDINARY_API_SECRET=abcdefghijklmnopqrstuvwxyz123456 + - cloudinary_secret = "aB3dE5fG7hI9jK1lM3nO5pQ7rS9tU1vW" + references: + - https://cloudinary.com/documentation/developer_onboarding_faq_find_credentials + - https://cloudinary.com/documentation/image_upload_api_reference + validation: + type: Http + content: + request: + method: GET + url: "https://api.cloudinary.com/v1_1/{{ CLOUDNAME }}/usage" + headers: + Authorization: "Basic {{ APIKEY | append: ':' | append: TOKEN | b64enc }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + depends_on_rule: + - rule_id: kingfisher.cloudinary.2 + variable: APIKEY + - rule_id: kingfisher.cloudinary.3 + variable: CLOUDNAME diff --git a/crates/kingfisher-rules/data/rules/coinlayer.yml b/crates/kingfisher-rules/data/rules/coinlayer.yml new file mode 100644 index 0000000..405f57f --- /dev/null +++ b/crates/kingfisher-rules/data/rules/coinlayer.yml @@ -0,0 +1,41 @@ +rules: + - name: Coinlayer API Key + id: kingfisher.coinlayer.1 + pattern: | + (?xi) + \b + coinlayer + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - COINLAYER_ACCESS_KEY=abc123def456ghi789jkl012mno345pq + - coinlayer_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://coinlayer.com/documentation + - https://coinlayer.com/signup/free + validation: + type: Http + content: + request: + method: GET + url: https://api.coinlayer.com/live?access_key={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/contentstack.yml b/crates/kingfisher-rules/data/rules/contentstack.yml new file mode 100644 index 0000000..90774fb --- /dev/null +++ b/crates/kingfisher-rules/data/rules/contentstack.yml @@ -0,0 +1,62 @@ +rules: + - name: Contentstack API Key + id: kingfisher.contentstack.2 + pattern: | + (?xi) + \b + contentstack + (?:.|[\n\r]){0,32}? + (?:API[_\s]?KEY|STACK[_\s]?API[_\s]?KEY) + (?:.|[\n\r]){0,16}? + \b + ( + blt[a-f0-9]{10} + ) + \b + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - CONTENTSTACK_API_KEY=blt1234567890 + - name: Contentstack Management Token + id: kingfisher.contentstack.1 + pattern: | + (?xi) + \b + contentstack + (?:.|[\n\r]){0,32}? + (?:MANAGEMENT[_\s]?TOKEN|AUTH[_\s]?TOKEN|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + cs[a-f0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - CONTENTSTACK_MANAGEMENT_TOKEN=cs1234567890abcdef1234567890abcdef + - contentstack_token = "csa1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4" + references: + - https://www.contentstack.com/docs/developers/create-tokens/overview-of-tokens + - https://www.contentstack.com/docs/developers/apis/content-management-api + validation: + type: Http + content: + request: + method: GET + url: "https://api.contentstack.io/v3/stacks" + headers: + api_key: "{{ APIKEY }}" + authorization: "Bearer {{ TOKEN }}" + Content-Type: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + depends_on_rule: + - rule_id: kingfisher.contentstack.2 + variable: APIKEY diff --git a/crates/kingfisher-rules/data/rules/currencylayer.yml b/crates/kingfisher-rules/data/rules/currencylayer.yml new file mode 100644 index 0000000..0c631c7 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/currencylayer.yml @@ -0,0 +1,41 @@ +rules: + - name: Currencylayer API Key + id: kingfisher.currencylayer.1 + pattern: | + (?xi) + \b + currencylayer + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - CURRENCYLAYER_ACCESS_KEY=abc123def456ghi789jkl012mno345pq + - currencylayer_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://currencylayer.com/documentation + - https://currencylayer.com/api-access + validation: + type: Http + content: + request: + method: GET + url: https://api.currencylayer.com/live?access_key={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/daily.yml b/crates/kingfisher-rules/data/rules/daily.yml new file mode 100644 index 0000000..c4a2b88 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/daily.yml @@ -0,0 +1,43 @@ +rules: + - name: Daily API Key + id: kingfisher.daily.1 + pattern: | + (?xi) + \b + daily + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Za-z0-9]{64} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - DAILY_API_KEY=abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890 + - daily_api_key = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6a7b8c9d0e1f2a3b4c5d6a7b8c9d0e1f2" + references: + - https://docs.daily.co/reference/rest-api + - https://docs.daily.co/reference/rest-api/rooms/list-rooms + validation: + type: Http + content: + request: + method: GET + url: https://api.daily.co/v1/rooms?limit=1 + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"data"' + - '"total_count"' + match_all_words: true diff --git a/crates/kingfisher-rules/data/rules/fixer.yml b/crates/kingfisher-rules/data/rules/fixer.yml new file mode 100644 index 0000000..3c309b1 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/fixer.yml @@ -0,0 +1,41 @@ +rules: + - name: Fixer.io API Key + id: kingfisher.fixer.1 + pattern: | + (?xi) + \b + fixer + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - FIXER_ACCESS_KEY=abc123def456ghi789jkl012mno345pq + - fixer_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://fixer.io/documentation + - https://fixer.io/api-key + validation: + type: Http + content: + request: + method: GET + url: https://data.fixer.io/api/latest?access_key={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/geoapify.yml b/crates/kingfisher-rules/data/rules/geoapify.yml new file mode 100644 index 0000000..7d3dfd0 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/geoapify.yml @@ -0,0 +1,38 @@ +rules: + - name: Geoapify API Key + id: kingfisher.geoapify.1 + pattern: | + (?xi) + \b + geoapify + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - geoapify_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + - GEOAPIFY_KEY=abcdef1234567890abcdef1234567890 + references: + - https://apidocs.geoapify.com/docs/geocoding/api/api + - https://apidocs.geoapify.com/docs/ip-geolocation + validation: + type: Http + content: + request: + method: GET + url: https://api.geoapify.com/v1/ipinfo?apiKey={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/hunterio.yml b/crates/kingfisher-rules/data/rules/hunterio.yml new file mode 100644 index 0000000..3f68ba7 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/hunterio.yml @@ -0,0 +1,40 @@ +rules: + - name: Hunter.io API Key + id: kingfisher.hunterio.1 + pattern: | + (?xi) + \b + hunter + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{40} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + ignore_if_contains: + - test-api-key + examples: + - hunter_api_key = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6a7b8c9d0" + - HUNTER_KEY=abcdef1234567890abcdef1234567890abcdef12 + references: + - https://hunter.io/api/docs + - https://hunter.io/api-keys + validation: + type: Http + content: + request: + method: GET + url: https://api.hunter.io/v2/account?api_key={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/mux.yml b/crates/kingfisher-rules/data/rules/mux.yml new file mode 100644 index 0000000..9148726 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/mux.yml @@ -0,0 +1,59 @@ +rules: + - name: Mux Access Token ID + id: kingfisher.mux.2 + pattern: | + (?xi) + \b + mux + (?:.|[\n\r]){0,32}? + (?:TOKEN[_\s]?ID|ACCESS[_\s]?TOKEN[_\s]?ID) + (?:.|[\n\r]){0,16}? + \b + ( + [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} + ) + \b + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - MUX_TOKEN_ID=44c819de-4add-4c9f-b2e9-384a0a71bede + - name: Mux Access Token Secret + id: kingfisher.mux.1 + pattern: | + (?xi) + \b + mux + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Za-z0-9+/=]{75} + ) + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - MUX_TOKEN_SECRET=INKxCoZ+cX6l1yrR6vqzYHVaeFEcqvZShznWM1U/No8KsV7h6Jxu1XXuTUQ91sdiGONK3H7NE7H + references: + - https://docs.mux.com/core/make-api-requests + - https://docs.mux.com/api-reference/video/video/v1/assets + validation: + type: Http + content: + request: + method: GET + url: https://api.mux.com/video/v1/assets?limit=1 + headers: + Authorization: "Basic {{ MUXID | append: ':' | append: TOKEN | b64enc }}" + Content-Type: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + depends_on_rule: + - rule_id: kingfisher.mux.2 + variable: MUXID diff --git a/crates/kingfisher-rules/data/rules/newsapi.yml b/crates/kingfisher-rules/data/rules/newsapi.yml new file mode 100644 index 0000000..fae9010 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/newsapi.yml @@ -0,0 +1,40 @@ +rules: + - name: NewsAPI API Key + id: kingfisher.newsapi.1 + pattern: | + (?xi) + \b + newsapi + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - NEWSAPI_API_KEY=abc123def456ghi789jkl012mno345pq + - newsapi_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://newsapi.org/docs/authentication + validation: + type: Http + content: + request: + method: GET + url: https://newsapi.org/v2/everything?q=test&apiKey={{ TOKEN }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/numverify.yml b/crates/kingfisher-rules/data/rules/numverify.yml new file mode 100644 index 0000000..3460aa5 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/numverify.yml @@ -0,0 +1,41 @@ +rules: + - name: Numverify API Key + id: kingfisher.numverify.1 + pattern: | + (?xi) + \b + numverify + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - NUMVERIFY_ACCESS_KEY=abc123def456ghi789jkl012mno345pq + - numverify_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://numverify.com/documentation + - https://numverify.com/faq + validation: + type: Http + content: + request: + method: GET + url: https://apilayer.net/api/validate?access_key={{ TOKEN }}&number=14155551234 + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/onesignal.yml b/crates/kingfisher-rules/data/rules/onesignal.yml new file mode 100644 index 0000000..7595cb3 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/onesignal.yml @@ -0,0 +1,42 @@ +rules: + - name: OneSignal REST API Key + id: kingfisher.onesignal.1 + pattern: | + (?xi) + \b + onesignal + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - onesignal_rest_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + - ONESIGNAL_REST_API_KEY=abcdef1234567890abcdef1234567890 + references: + - https://documentation.onesignal.com/reference/rest-api-overview + - https://documentation.onesignal.com/docs/keys-and-ids + validation: + type: Http + content: + request: + method: GET + url: https://api.onesignal.com/apps + headers: + Authorization: "Key {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/pinecone.yml b/crates/kingfisher-rules/data/rules/pinecone.yml new file mode 100644 index 0000000..0c0c995 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/pinecone.yml @@ -0,0 +1,39 @@ +rules: + - name: Pinecone API Key + id: kingfisher.pinecone.1 + pattern: | + (?xi) + \b + pinecone + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|API|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - PINECONE_API_KEY=62b0dbfe-3489-4b79-b850-34d911527c88 + - pinecone_key = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + references: + - https://docs.pinecone.io/reference/api/authentication + - https://docs.pinecone.io/reference/api/2025-10/control-plane/list_indexes + validation: + type: Http + content: + request: + method: GET + url: https://api.pinecone.io/indexes + headers: + Api-Key: "{{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/pingdom.yml b/crates/kingfisher-rules/data/rules/pingdom.yml new file mode 100644 index 0000000..ba75aa4 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/pingdom.yml @@ -0,0 +1,42 @@ +rules: + - name: Pingdom API Token + id: kingfisher.pingdom.1 + pattern: | + (?xi) + \b + pingdom + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9_-]{64} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - pingdom_api_token = "ofOhK18Ca6w4S_XmInGv0QPkqly-rbRBBoHsp_2FEH5QnIbH0VZhRPO3tlvrjMIK" + - PINGDOM_TOKEN=abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890 + references: + - https://docs.pingdom.com/api/ + - https://pingdom.com/resources/pingdom-api + validation: + type: Http + content: + request: + method: GET + url: https://api.pingdom.com/api/3.1/checks + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/positionstack.yml b/crates/kingfisher-rules/data/rules/positionstack.yml new file mode 100644 index 0000000..0bb58a2 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/positionstack.yml @@ -0,0 +1,40 @@ +rules: + - name: Positionstack API Key + id: kingfisher.positionstack.1 + pattern: | + (?xi) + \b + positionstack + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - positionstack_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + - POSITIONSTACK_KEY=1234567890abcdef1234567890abcdef + references: + - https://positionstack.com/documentation + validation: + type: Http + content: + request: + method: GET + url: https://api.positionstack.com/v1/forward?access_key={{ TOKEN }}&query=London + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/railway.yml b/crates/kingfisher-rules/data/rules/railway.yml new file mode 100644 index 0000000..2392d69 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/railway.yml @@ -0,0 +1,44 @@ +rules: + - name: Railway API Token + id: kingfisher.railway.1 + pattern: | + (?xi) + \b + railway + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + (?:[A-Za-z0-9]{32}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}) + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - RAILWAY_API_TOKEN=abcdef1234567890abcdef1234567890 + - railway_token = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + references: + - https://docs.railway.com/guides/public-api + - https://docs.railway.com/reference/oauth/login-and-tokens + validation: + type: Http + content: + request: + method: POST + url: https://backboard.railway.com/graphql/v2 + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: application/json + body: '{"query":"query { me { name email } }"}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"data"' + - '"me"' + match_all_words: true diff --git a/crates/kingfisher-rules/data/rules/render.yml b/crates/kingfisher-rules/data/rules/render.yml new file mode 100644 index 0000000..4848411 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/render.yml @@ -0,0 +1,39 @@ +rules: + - name: Render API Key + id: kingfisher.render.1 + pattern: | + (?xi) + \b + render + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + (?:[A-Za-z0-9]{32}|rnd_[A-Za-z0-9]{33}) + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - RENDER_API_KEY=abcdef1234567890abcdef1234567890 + - render_api_key = "rnd_abc123def456ghi789jkl012mno345pqr" + references: + - https://docs.render.com/api + - https://api-docs.render.com/docs + validation: + type: Http + content: + request: + method: GET + url: https://api.render.com/v1/services?limit=1 + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/rollbar.yml b/crates/kingfisher-rules/data/rules/rollbar.yml new file mode 100644 index 0000000..436fa9f --- /dev/null +++ b/crates/kingfisher-rules/data/rules/rollbar.yml @@ -0,0 +1,42 @@ +rules: + - name: Rollbar Access Token + id: kingfisher.rollbar.1 + pattern: | + (?xi) + \b + rollbar + (?:.|[\n\r]){0,32}? + (?:access[_-]?token|token|key) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - rollbar_access_token = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + - ROLLBAR_ACCESS_TOKEN=abcdef1234567890abcdef1234567890 + references: + - https://docs.rollbar.com/docs/access-tokens + - https://docs.rollbar.com/reference/getting-started-1 + validation: + type: Http + content: + request: + method: GET + url: https://api.rollbar.com/api/1/projects + headers: + X-Rollbar-Access-Token: "{{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/salesloft.yml b/crates/kingfisher-rules/data/rules/salesloft.yml new file mode 100644 index 0000000..f64d75b --- /dev/null +++ b/crates/kingfisher-rules/data/rules/salesloft.yml @@ -0,0 +1,39 @@ +rules: + - name: Salesloft API Key + id: kingfisher.salesloft.1 + pattern: | + (?xi) + \b + salesloft + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|API|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + ak_[a-fA-F0-9]{64} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.2 + confidence: medium + examples: + - SALESLOFT_API_KEY=ak_de656ec86bcab24878c24ff4d86758f8963d8ea6bcd4e90f8fae846ba8f9ac62 + - salesloft_key = "ak_1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + references: + - https://developer.salesloft.com/docs/platform/api-basics/api-key-authentication + - https://developers.salesloft.com/docs/api/me + validation: + type: Http + content: + request: + method: GET + url: https://api.salesloft.com/v2/me + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/sanity.yml b/crates/kingfisher-rules/data/rules/sanity.yml new file mode 100644 index 0000000..0fd13c2 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/sanity.yml @@ -0,0 +1,39 @@ +rules: + - name: Sanity API Token + id: kingfisher.sanity.1 + pattern: | + (?xi) + \b + sanity + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + (?:sk[A-Za-z0-9]{52}|sk[A-Za-z0-9]{78}) + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - SANITY_API_TOKEN=skE5UXUmBEy7U50jcG4In4v4xoHZTlduDxQYet8Y84tsTqAZxp2reIPJsA1JzqXJno2qcpauGwPfjHpU + - sanity_token = "sk1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdef" + references: + - https://www.sanity.io/docs/content-lake/http-auth + - https://www.sanity.io/docs/projects-api + validation: + type: Http + content: + request: + method: GET + url: https://api.sanity.io/v2021-06-07/projects + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/statuscake.yml b/crates/kingfisher-rules/data/rules/statuscake.yml new file mode 100644 index 0000000..da30768 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/statuscake.yml @@ -0,0 +1,41 @@ +rules: + - name: StatusCake API Token + id: kingfisher.statuscake.1 + pattern: | + (?xi) + \b + statuscake + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9_-]{20} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - statuscake_api_token = "a1b2c3d4e5f6g7h8i9j0" + - STATUSCAKE_TOKEN=abcdef1234567890abcd + references: + - https://developers.statuscake.com/guides/api/authentication/ + - https://developers.statuscake.com/api + validation: + type: Http + content: + request: + method: GET + url: https://api.statuscake.com/v1/uptime + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true diff --git a/crates/kingfisher-rules/data/rules/storyblok.yml b/crates/kingfisher-rules/data/rules/storyblok.yml new file mode 100644 index 0000000..833fe81 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/storyblok.yml @@ -0,0 +1,38 @@ +rules: + - name: Storyblok API Token + id: kingfisher.storyblok.1 + pattern: | + (?xi) + \b + storyblok + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Za-z0-9]{22}tt + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - STORYBLOK_ACCESS_TOKEN=wANpEQEsMYGOwLxwXQ76Ggtt + - storyblok_token = "13Kft3335iwbBOI333wawOtt" + references: + - https://www.storyblok.com/docs/api/content-delivery/v2/getting-started/authentication + - https://www.storyblok.com/docs/concepts/access-tokens + validation: + type: Http + content: + request: + method: GET + url: "https://api.storyblok.com/v2/cdn/stories?token={{ TOKEN }}&version=published&per_page=1" + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/uptimerobot.yml b/crates/kingfisher-rules/data/rules/uptimerobot.yml new file mode 100644 index 0000000..2e7a517 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/uptimerobot.yml @@ -0,0 +1,38 @@ +rules: + - name: UptimeRobot API Key + id: kingfisher.uptimerobot.1 + pattern: | + (?xi) + \b + uptimerobot + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + (?:ur[A-Za-z0-9-]{29}|[A-Za-z0-9]{28}) + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - uptimerobot_api_key = "ur123456-7890abcdef1234567890ab" + - UPTIMEROBOT_KEY=abcdef1234567890abcdef123456 + references: + - https://uptimerobot.com/api/v3 + - https://help.uptimerobot.com/en/articles/11620152-how-to-use-uptimerobot-s-api + validation: + type: Http + content: + request: + method: GET + url: https://api.uptimerobot.com/v2/getMonitors?api_key={{ TOKEN }}&format=json + headers: + Content-Type: application/x-www-form-urlencoded + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/urlscan.yml b/crates/kingfisher-rules/data/rules/urlscan.yml new file mode 100644 index 0000000..0c66e77 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/urlscan.yml @@ -0,0 +1,39 @@ +rules: + - name: urlscan.io API Key + id: kingfisher.urlscan.1 + pattern: | + (?xi) + \b + urlscan + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + (?:[A-Za-z0-9]{32}|[A-Za-z0-9]{36}) + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - urlscan_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8" + - URLSCAN_KEY=abcdef1234567890abcdef1234567890 + references: + - https://urlscan.io/docs/api/ + - https://docs.urlscan.io/ + validation: + type: Http + content: + request: + method: GET + url: https://urlscan.io/user/quotas/ + headers: + API-Key: "{{ TOKEN }}" + Content-Type: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/virustotal.yml b/crates/kingfisher-rules/data/rules/virustotal.yml new file mode 100644 index 0000000..4884482 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/virustotal.yml @@ -0,0 +1,42 @@ +rules: + - name: VirusTotal API Key + id: kingfisher.virustotal.1 + pattern: | + (?xi) + \b + virustotal + (?:.|[\n\r]){0,32}? + (?:api[_-]?key|x[_-]?apikey|key) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{64} + ) + \b + min_entropy: 3.6 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - VIRUSTOTAL_API_KEY=abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890 + - virustotal_x_apikey = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6a7b8c9d0e1f2" + references: + - https://docs.virustotal.com/reference/authentication + - https://docs.virustotal.com/docs/please-give-me-an-api-key + validation: + type: Http + content: + request: + method: GET + url: https://www.virustotal.com/api/v3/domains/google.com + headers: + x-apikey: "{{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/weatherapi.yml b/crates/kingfisher-rules/data/rules/weatherapi.yml new file mode 100644 index 0000000..cd5271b --- /dev/null +++ b/crates/kingfisher-rules/data/rules/weatherapi.yml @@ -0,0 +1,40 @@ +rules: + - name: WeatherAPI.com API Key + id: kingfisher.weatherapi.1 + pattern: | + (?xi) + \b + weatherapi + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - WEATHERAPI_KEY=abc123def456ghi789jkl012mno345pq + - weatherapi_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + references: + - https://www.weatherapi.com/docs/ + validation: + type: Http + content: + request: + method: GET + url: https://api.weatherapi.com/v1/current.json?key={{ TOKEN }}&q=London + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401] + negative: true + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/webflow.yml b/crates/kingfisher-rules/data/rules/webflow.yml new file mode 100644 index 0000000..62bf0ac --- /dev/null +++ b/crates/kingfisher-rules/data/rules/webflow.yml @@ -0,0 +1,39 @@ +rules: + - name: Webflow API Token + id: kingfisher.webflow.1 + pattern: | + (?xi) + \b + webflow + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API) + (?:.|[\n\r]){0,32}? + \b + ( + (?:[A-Za-z0-9]{32}|[A-Za-z0-9]{36}) + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - WEBFLOW_API_TOKEN=abcdef1234567890abcdef1234567890 + - webflow_token = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8" + references: + - https://developers.webflow.com/data/reference/authentication + - https://developers.webflow.com/data/reference/rest-introduction/quick-start + validation: + type: Http + content: + request: + method: GET + url: https://api.webflow.com/v2/sites + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid diff --git a/crates/kingfisher-rules/data/rules/zerobounce.yml b/crates/kingfisher-rules/data/rules/zerobounce.yml new file mode 100644 index 0000000..1c0ecdf --- /dev/null +++ b/crates/kingfisher-rules/data/rules/zerobounce.yml @@ -0,0 +1,38 @@ +rules: + - name: ZeroBounce API Key + id: kingfisher.zerobounce.1 + pattern: | + (?xi) + \b + zerobounce + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + [A-Za-z0-9]{32} + ) + \b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_digits: 2 + examples: + - zerobounce_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" + - ZEROBOUNCE_KEY=abcdef1234567890abcdef1234567890 + references: + - https://www.zerobounce.net/docs/email-validation-api-quickstart/ + - https://api.zerobounce.net/v2/validate + validation: + type: Http + content: + request: + method: GET + url: https://api.zerobounce.net/v2/validate?api_key={{ TOKEN }}&email=test@example.com&ip_address= + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid From 5e1d732e80f2f4e748fe8bd2d2370b1f14400181 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 7 Mar 2026 21:30:31 -0800 Subject: [PATCH 04/12] added new rules --- crates/kingfisher-rules/data/rules/clerk.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/kingfisher-rules/data/rules/clerk.yml b/crates/kingfisher-rules/data/rules/clerk.yml index 1960f54..cc5aaf3 100644 --- a/crates/kingfisher-rules/data/rules/clerk.yml +++ b/crates/kingfisher-rules/data/rules/clerk.yml @@ -19,7 +19,7 @@ rules: confidence: medium examples: - CLERK_SECRET_KEY=sk_live_abcdefghijklmnopqrstuvwxyz123456 - - clerk_secret = sk_test_4pX9kL2mN8qR5sT7vY1zA3bC6dE0fG2h + - clerk_secret = sk_test_4pX9kL2mN8qR3sT7vY1zA3bC6dE0fG2h references: - https://clerk.com/docs/deployments/clerk-environment-variables - https://clerk.com/docs/guides/development/machine-auth/api-keys From d66d8f0226704df95b5707b718d3797a7fb674ff Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 7 Mar 2026 21:31:02 -0800 Subject: [PATCH 05/12] added new rules --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6b8329d..996e0f9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Kingfisher Logo [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) -[![Detection Rules](https://img.shields.io/badge/Detection%20Rules-500-2ea043.svg)](https://github.com/mongodb/kingfisher)
+[![Detection Rules](https://img.shields.io/badge/Detection%20Rules-540-2ea043.svg)](https://github.com/mongodb/kingfisher)
[![ghcr downloads](https://ghcr-badge.elias.eu.org/shield/mongodb/kingfisher/kingfisher)](https://github.com/mongodb/kingfisher/pkgs/container/kingfisher)
From c212768e8fb480bd910286a5154027fe6ece08f4 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sun, 8 Mar 2026 13:44:19 -0700 Subject: [PATCH 06/12] rule improvements to reduce FP's --- .../kingfisher-rules/data/rules/atlassian.yml | 3 +- .../data/rules/azurestorage.yml | 29 ++++++++++--------- crates/kingfisher-rules/data/rules/google.yml | 14 +++++++-- crates/kingfisher-rules/data/rules/redis.yml | 2 +- .../data/rules/salesforce.yml | 14 +++++++++ crates/kingfisher-rules/data/rules/square.yml | 1 + 6 files changed, 46 insertions(+), 17 deletions(-) diff --git a/crates/kingfisher-rules/data/rules/atlassian.yml b/crates/kingfisher-rules/data/rules/atlassian.yml index 91c03ef..8393588 100644 --- a/crates/kingfisher-rules/data/rules/atlassian.yml +++ b/crates/kingfisher-rules/data/rules/atlassian.yml @@ -13,10 +13,11 @@ rules: \b pattern_requirements: min_lowercase: 1 + min_digits: 2 min_entropy: 3.5 confidence: medium examples: - - Atlassian_key = "DjayBenyJrtpvydFCzAphcqc" + - Atlassian_key = "DjayBeny2rtpvydF4zAphcqc" - "ATLASSIAN_API_TOKEN:'abcdef1234567890abcdef12'" references: - https://developer.atlassian.com/cloud/admin/organization/rest/api-group-orgs/#api-v1-orgs-get diff --git a/crates/kingfisher-rules/data/rules/azurestorage.yml b/crates/kingfisher-rules/data/rules/azurestorage.yml index bd3925d..1bf3cb5 100644 --- a/crates/kingfisher-rules/data/rules/azurestorage.yml +++ b/crates/kingfisher-rules/data/rules/azurestorage.yml @@ -32,20 +32,23 @@ rules: id: kingfisher.azurestorage.2 pattern: | (?xi) - azure - (?:.|[\n\r]){0,128}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,128}? - ["':\s=}\]\)] - ( - (?: - [A-Z0-9+\\/-]{86,88}={1,2} - ) - | - (?: - [A-Z0-9+\\/-]{86,88}\b - ) + \b + (?: + azure(?:[_\s-]*storage)? + (?:[_\s-]*(?:account[_\s-]*key|storage[_\s-]*key|shared[_\s-]*key|access[_\s-]*key|accountkey)) + | + account[_\s-]*key + | + storage[_\s-]*key ) + \b + (?:.|[\n\r]){0,24}? + [ \t]*[=:][ \t]* + ['"]? + ( + [A-Z0-9+\\/-]{86,88}(?:={1,2})? + ) + ['"]? pattern_requirements: min_digits: 2 min_uppercase: 2 diff --git a/crates/kingfisher-rules/data/rules/google.yml b/crates/kingfisher-rules/data/rules/google.yml index 70b2df0..8e7acdc 100644 --- a/crates/kingfisher-rules/data/rules/google.yml +++ b/crates/kingfisher-rules/data/rules/google.yml @@ -28,10 +28,14 @@ rules: id: kingfisher.google.3 pattern: | (?xi) - client.?secret .{0,10} + client.?secret + (?:.|[\n\r]){0,10}? + [ \t]*[=:][ \t]* + ['"]? ( [a-z0-9_-]{24} ) + ['"]? (?: [^a-z0-9_-] |$) pattern_requirements: min_digits: 1 @@ -80,7 +84,13 @@ rules: (GOCSPX-[A-Z0-9_-]{28}) | (?: - (?i) client.?secret .{0,10} \b ([A-Z0-9_-]{24}) + (?i) + client.?secret + (?:.|[\n\r]){0,10}? + [ \t]*[=:][ \t]* + ['"]? + \b ([A-Z0-9_-]{24}) + ['"]? ) ) (?:[^A-Z0-9_-] | $) diff --git a/crates/kingfisher-rules/data/rules/redis.yml b/crates/kingfisher-rules/data/rules/redis.yml index 27da1b8..0330a1b 100644 --- a/crates/kingfisher-rules/data/rules/redis.yml +++ b/crates/kingfisher-rules/data/rules/redis.yml @@ -85,7 +85,7 @@ rules: (?:PASSWORD|PASS|PASSWD|AUTH|SECRET|TOKEN) \b (?:.|[\n\r]){0,24}? - [=:\s]+ + [ \t]*[=:][ \t]* ['"]? (?P[a-zA-Z0-9%;._~!$&'()*+,;=/*+-]{8,64}) ['"]? diff --git a/crates/kingfisher-rules/data/rules/salesforce.yml b/crates/kingfisher-rules/data/rules/salesforce.yml index f384273..0acfb88 100644 --- a/crates/kingfisher-rules/data/rules/salesforce.yml +++ b/crates/kingfisher-rules/data/rules/salesforce.yml @@ -209,6 +209,10 @@ rules: min_entropy: 3.5 pattern_requirements: min_digits: 3 + ignore_if_contains: + - "www.w3.org" + - "/2001/" + - "/XMLSchema" confidence: medium examples: - | @@ -309,6 +313,16 @@ rules: min_digits: 4 min_entropy: 3.6 confidence: medium + validation: + type: Http + content: + request: + method: GET + url: "https://login.salesforce.com/services/oauth2/authorize?response_type=code&client_id={{ TOKEN }}&redirect_uri=https%3A%2F%2Fexample.com%2Fcb" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] examples: - 3MVG9P8aWj9n4kT2xQ5mV7rY1bC3dF6gH8jK0mN2pR4tU6wX8zA1cE3gH5kM7qS9uV2xY4bD6fJ8nP1rT3vW5yZ7 references: diff --git a/crates/kingfisher-rules/data/rules/square.yml b/crates/kingfisher-rules/data/rules/square.yml index a694632..c463b68 100644 --- a/crates/kingfisher-rules/data/rules/square.yml +++ b/crates/kingfisher-rules/data/rules/square.yml @@ -7,6 +7,7 @@ rules: ( EAAA[a-zA-Z0-9\-\+=]{60} ) + \b pattern_requirements: min_digits: 4 min_entropy: 3.3 From e46fa7b86c73f6502a268904ee08d49fcc445ec2 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 9 Mar 2026 09:37:49 -0700 Subject: [PATCH 07/12] rule improvements to reduce FP's --- .../kingfisher-rules/data/rules/alibaba.yml | 21 ++++++++++---- .../data/rules/azurestorage.yml | 28 +++++++++++++------ crates/kingfisher-rules/data/rules/jwt.yml | 4 +-- .../kingfisher-rules/data/rules/mongodb.yml | 24 ++++++++++++++++ 4 files changed, 61 insertions(+), 16 deletions(-) diff --git a/crates/kingfisher-rules/data/rules/alibaba.yml b/crates/kingfisher-rules/data/rules/alibaba.yml index 82fdf9f..70e1658 100644 --- a/crates/kingfisher-rules/data/rules/alibaba.yml +++ b/crates/kingfisher-rules/data/rules/alibaba.yml @@ -23,13 +23,24 @@ rules: - name: Alibaba Access Key Secret id: kingfisher.alibabacloud.2 pattern: | - (?xi) + (?x) \b - alibaba - (?:.|[\n\r]){0,32}? - ( - [a-z0-9]{30} + (?i:alibaba|alibaba[\s_-]*cloud|aliyun) + (?:.|[\n\r]){0,40}? + (?i:access[\s_-]*key[\s_-]*secret|access[\s_-]*secret|secret|token|key) + (?:.|[\n\r]){0,16}? + (?: + [=:] + | + ["']\s*:\s*["'] ) + \s* + ["']? + ( + [A-Za-z0-9]{30} + ) + \b + ["']? min_entropy: 4.2 confidence: medium examples: diff --git a/crates/kingfisher-rules/data/rules/azurestorage.yml b/crates/kingfisher-rules/data/rules/azurestorage.yml index 1bf3cb5..8684755 100644 --- a/crates/kingfisher-rules/data/rules/azurestorage.yml +++ b/crates/kingfisher-rules/data/rules/azurestorage.yml @@ -31,33 +31,43 @@ rules: - name: Azure Storage Account Key id: kingfisher.azurestorage.2 pattern: | - (?xi) + (?x) \b (?: - azure(?:[_\s-]*storage)? + (?i:azure)(?:[_\s-]*(?i:storage))? (?:[_\s-]*(?:account[_\s-]*key|storage[_\s-]*key|shared[_\s-]*key|access[_\s-]*key|accountkey)) | - account[_\s-]*key + (?i:account)[_.\s-]*(?i:key) | - storage[_\s-]*key + (?i:storage)[_.\s-]*(?i:key) ) \b (?:.|[\n\r]){0,24}? - [ \t]*[=:][ \t]* - ['"]? + (?: + [=:] + | + ["']\s*:\s*["'] + ) + \s* + ["']? ( - [A-Z0-9+\\/-]{86,88}(?:={1,2})? + [A-Za-z0-9+/]{86}== ) ['"]? pattern_requirements: min_digits: 2 min_uppercase: 2 min_lowercase: 2 + min_special_chars: 1 min_entropy: 4.0 confidence: medium examples: - - Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1q - - Azure AccountKey=Ky7aC1cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1g==\ + - Azure AccountKey=oqb4TdY9T0hphvktd5fJnMiHuQqzVy1jd5sSuOpAbGkaoqTlrHl0BOJN2okcasinVLOJzfDbZo1L+ASt68RAhA== + - Azure AccountKey=B/1EVX2Ui47X09tqU3GI/j+Nko9r5COPm0Hea9tfzitF9MQX9lZZiNO3tYQckWnt+rtlGIWS+sCx+AStkq8ZLg== + - Azure AccountKey=u45diQdTiXeuSKl5r4EjgbPP72EYpuTNEzfMTi0mk+d2sTisA4gWzt4H1Ag3kqFaCykWZv2S6KQo+AStHF56RQ== + - Azure AccountKey=b8a/Z4wFAbhOPQTMa4PUTKr2XQhwoyWtP/3PnEto3mK86CFQnVYyTV/HSrij88h5jVYyzwUk0oTw+AStIKN/4w== + - Azure AccountKey=JJD1GDiHCmtTpCOKpBYkXgZKrZvi7P4mRDe3jNVGc/JL/bp51uWcWL0rkOByk5VsX2MM62A/ABkE+AStU9qMkA== + - Azure AccountKey=u45diQdTiXeuSKl5r4EjgbPP72EYpuTNEzfMTi0mk+d2sTisA4gWzt4H1Ag3kqFaCykWZv2S6KQo+AStHF56RQ== validation: type: AzureStorage references: diff --git a/crates/kingfisher-rules/data/rules/jwt.yml b/crates/kingfisher-rules/data/rules/jwt.yml index a5a2503..95ec817 100644 --- a/crates/kingfisher-rules/data/rules/jwt.yml +++ b/crates/kingfisher-rules/data/rules/jwt.yml @@ -8,9 +8,9 @@ rules: \. ey[A-Za-z0-9_-]{12,} (?# payload ) \. - [A-Za-z0-9_-]{12,} (?# signature ) + [A-Za-z0-9_-]{24,} (?# signature ) ) - (?:[^A-Z0-9_-]) + (?:[^A-Za-z0-9_-]|$) pattern_requirements: min_digits: 4 min_entropy: 3.3 diff --git a/crates/kingfisher-rules/data/rules/mongodb.yml b/crates/kingfisher-rules/data/rules/mongodb.yml index 624cfe0..a149d36 100644 --- a/crates/kingfisher-rules/data/rules/mongodb.yml +++ b/crates/kingfisher-rules/data/rules/mongodb.yml @@ -125,6 +125,30 @@ rules: - "****" - "xxxx" - "example" + - "" + - "" + - "" + - ":

" + - "" + - "" + - "{username}:{password}" + - "{password}" + - "${" + - "$(" + - "{{" + - "<" + - ":pass@" + - ":password@" + - ":secret@" + - ":passw0rd@" + - ":secretPassw0rd@" + - ":wrong_password@" + - ":fakepassword@" + - "@host:" + - "@host/" + - "@host1:" + - "@some.address" + - "@connectionString.com" min_entropy: 3 examples: - client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority"); From ab811c8bcfc7232f9db198fe7ef942f7c78b9381 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 9 Mar 2026 20:11:58 -0700 Subject: [PATCH 08/12] v1.87.0 --- CHANGELOG.md | 2 + README.md | 1 + crates/kingfisher-core/src/content_type.rs | 10 +- .../data/rules/adafruitio.yml | 2 +- crates/kingfisher-rules/data/rules/age.yml | 8 +- .../kingfisher-rules/data/rules/anthropic.yml | 2 +- crates/kingfisher-rules/data/rules/apify.yml | 2 +- .../data/rules/artifactory.yml | 2 +- crates/kingfisher-rules/data/rules/brevo.yml | 2 +- .../kingfisher-rules/data/rules/cerebras.yml | 6 +- .../kingfisher-rules/data/rules/circleci.yml | 2 +- .../data/rules/clickhouse.yml | 6 +- .../data/rules/coderabbit.yml | 4 +- .../kingfisher-rules/data/rules/crates.io.yml | 4 +- crates/kingfisher-rules/data/rules/cursor.yml | 4 +- .../data/rules/databricks.yml | 4 +- .../data/rules/dependency_track.yml | 4 +- .../kingfisher-rules/data/rules/doppler.yml | 4 +- .../kingfisher-rules/data/rules/dropbox.yml | 4 +- crates/kingfisher-rules/data/rules/duffel.yml | 6 +- .../kingfisher-rules/data/rules/dynatrace.yml | 8 +- .../data/rules/elevenlabs.yml | 4 +- crates/kingfisher-rules/data/rules/figma.yml | 4 +- .../kingfisher-rules/data/rules/firecrawl.yml | 4 +- .../data/rules/fireworksai.yml | 4 +- .../kingfisher-rules/data/rules/fleetbase.yml | 2 +- .../data/rules/foursquare.yml | 4 +- .../kingfisher-rules/data/rules/freshdesk.yml | 2 +- .../kingfisher-rules/data/rules/friendli.yml | 4 +- crates/kingfisher-rules/data/rules/gitlab.yml | 16 +- .../data/rules/googleoauth2.yml | 4 +- crates/kingfisher-rules/data/rules/groq.yml | 6 +- .../kingfisher-rules/data/rules/harness.yml | 8 +- .../kingfisher-rules/data/rules/hubspot.yml | 2 +- .../kingfisher-rules/data/rules/infracost.yml | 6 +- .../kingfisher-rules/data/rules/instagram.yml | 10 +- .../kingfisher-rules/data/rules/intra42.yml | 4 +- crates/kingfisher-rules/data/rules/ionic.yml | 6 +- crates/kingfisher-rules/data/rules/jina.yml | 2 +- crates/kingfisher-rules/data/rules/looker.yml | 7 +- crates/kingfisher-rules/src/lib.rs | 5 +- crates/kingfisher-rules/src/rules_database.rs | 232 +++++++++- docs/PARSING.md | 24 +- docs/TREE_SITTER.md | 105 +++++ src/matcher/mod.rs | 404 +++++++++++++----- src/reporter.rs | 2 +- src/rules_database.rs | 5 +- src/scanner/processing.rs | 4 +- 48 files changed, 763 insertions(+), 204 deletions(-) create mode 100644 docs/TREE_SITTER.md diff --git a/CHANGELOG.md b/CHANGELOG.md index fa20dd0..b98d185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ All notable changes to this project will be documented in this file. ## [v1.87.0] +- Tree-sitter verification now runs for blobs from `0` bytes up to `128 KiB` (previously `1 KiB` to `64 KiB`), while remaining a post-regex verification step applied only to context-dependent candidate matches from Hyperscan/Vectorscan. +- False-positive reduction: Hyperscan/Vectorscan still scans everything first, then tree-sitter performs a second-pass verification only on auto-classified context-dependent findings; self-identifying/token-explicit findings stay regex-first. - Hardened Perplexity API key validation to reject auth failures (`401`/`403`) and avoid false "Active Credential" results from error payloads. - Fixed Yelp API key validation false positives by switching to an auth-enforcing endpoint (`/v3/businesses/search`) and adding explicit auth error guards. - Added 37 new provider detection + HTTP validation rules: Ably, AbstractAPI, AbuseIPDB, AviationStack, Better Stack, Brevo, Clearout, Clerk, Cloudinary, Coinlayer, Contentstack, Currencylayer, Daily, Fixer, Geoapify, Hunter.io, Mux, NewsAPI, Numverify, OneSignal, Pinecone, Pingdom, Positionstack, Railway, Render, Rollbar, Salesloft, Sanity, StatusCake, Storyblok, UptimeRobot, urlscan.io, VirusTotal, WeatherAPI, Webflow, and ZeroBounce. diff --git a/README.md b/README.md index 996e0f9..4bfe0fc 100644 --- a/README.md +++ b/README.md @@ -645,6 +645,7 @@ kingfisher scan /tmp/repo --branch feature-1 \ | [FINGERPRINT.md](docs/FINGERPRINT.md) | Understanding finding fingerprints and deduplication | | [COMPARISON.md](docs/COMPARISON.md) | Benchmark results and performance comparisons | | [PARSING.md](docs/PARSING.md) | Language-aware parsing details | +| [TREE_SITTER.md](docs/TREE_SITTER.md) | Tree-sitter scanning flow, verification gates, and fallback behavior | # Library Usage diff --git a/crates/kingfisher-core/src/content_type.rs b/crates/kingfisher-core/src/content_type.rs index 2100f0c..7fe3b63 100644 --- a/crates/kingfisher-core/src/content_type.rs +++ b/crates/kingfisher-core/src/content_type.rs @@ -38,6 +38,8 @@ impl Default for ContentInspector { } impl ContentInspector { + /// Maximum bytes inspected for content-based language hints. + const LANGUAGE_SAMPLE_BYTES: usize = 4096; /// Create a new inspector with default thresholds. #[inline] pub fn new() -> Self { @@ -140,8 +142,12 @@ impl ContentInspector { } } + // Limit expensive content probing to a small prefix. This keeps language + // detection cheap for large files while preserving extension-based accuracy. + let sample = &content[..content.len().min(Self::LANGUAGE_SAMPLE_BYTES)]; + // 3) Shebang detection (in-memory): match by longest prefix, byte-wise (no UTF-8 needed). - if let Some(first_line) = content.split(|&b| b == b'\n').next() { + if let Some(first_line) = sample.split(|&b| b == b'\n').next() { if first_line.starts_with(b"#!") { for (prefix, lang) in SHEBANG_PREFIXES.iter() { if first_line.starts_with(prefix) { @@ -152,7 +158,7 @@ impl ContentInspector { } // 4) Lightweight content markers to catch a few ubiquitous cases without I/O. - let s = String::from_utf8_lossy(content); + let s = String::from_utf8_lossy(sample); if s.contains("01) \. - (?[0-9a-z]{2}) + (?[0-9a-zA-Z]{2}) (?[0-9a-z]{7}) ) \b diff --git a/crates/kingfisher-rules/data/rules/googleoauth2.yml b/crates/kingfisher-rules/data/rules/googleoauth2.yml index 9625ffe..ddbd412 100644 --- a/crates/kingfisher-rules/data/rules/googleoauth2.yml +++ b/crates/kingfisher-rules/data/rules/googleoauth2.yml @@ -2,10 +2,10 @@ rules: - name: Google OAuth2 Access Token id: kingfisher.google.oauth2.1 pattern: | - (?xi) + (?x) \b ( - ya29\.(?i:[a-z0-9_-]{30,}) + ya29\.(?i:[a-zA-Z0-9_-]{30,}) ) \b min_entropy: 3.5 diff --git a/crates/kingfisher-rules/data/rules/groq.yml b/crates/kingfisher-rules/data/rules/groq.yml index 28154cf..11725e3 100644 --- a/crates/kingfisher-rules/data/rules/groq.yml +++ b/crates/kingfisher-rules/data/rules/groq.yml @@ -2,15 +2,15 @@ rules: - name: Groq API Key id: kingfisher.groq.1 pattern: | - (?xi) + (?x) \b ( - gsk_[A-Z0-9]{52} + gsk_[A-Za-z0-9]{52} ) \b pattern_requirements: min_digits: 4 - confidence: medium + confidence: high min_entropy: 3.5 validation: type: Http diff --git a/crates/kingfisher-rules/data/rules/harness.yml b/crates/kingfisher-rules/data/rules/harness.yml index b12ab7c..6f5d273 100644 --- a/crates/kingfisher-rules/data/rules/harness.yml +++ b/crates/kingfisher-rules/data/rules/harness.yml @@ -2,15 +2,15 @@ rules: - name: Harness Personal Access Token (PAT) id: kingfisher.harness.pat.1 pattern: | - (?xi) + (?x) \b ( pat\. - [A-Z0-9_-]{22} + [A-Za-z0-9_-]{22} \. - [0-9a-f]{24} + [0-9a-fA-F]{24} \. - [A-Z0-9]{20} + [A-Za-z0-9]{20} ) \b pattern_requirements: diff --git a/crates/kingfisher-rules/data/rules/hubspot.yml b/crates/kingfisher-rules/data/rules/hubspot.yml index 5cb5372..e334ecb 100644 --- a/crates/kingfisher-rules/data/rules/hubspot.yml +++ b/crates/kingfisher-rules/data/rules/hubspot.yml @@ -2,7 +2,7 @@ rules: - name: HubSpot Private App Token id: kingfisher.hubspot.1 pattern: | - (?xi) + (?x) \b ( pat-[a-z0-9]{2,3}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} diff --git a/crates/kingfisher-rules/data/rules/infracost.yml b/crates/kingfisher-rules/data/rules/infracost.yml index 5f046d3..6534802 100644 --- a/crates/kingfisher-rules/data/rules/infracost.yml +++ b/crates/kingfisher-rules/data/rules/infracost.yml @@ -2,16 +2,16 @@ rules: - name: Infracost API Token id: kingfisher.infracost.1 pattern: | - (?xi) + (?x) \b ( - ico-[a-z0-9]{32} + ico-[a-zA-Z0-9]{32} ) \b pattern_requirements: min_digits: 2 min_entropy: 3.3 - confidence: medium + confidence: high examples: - export INFRACOST_API_KEY=ico-abcd12abcdefabcdefabcdefabcdefab - '"infracost": "ico-1234567890abcdef1234567890abcdef"' diff --git a/crates/kingfisher-rules/data/rules/instagram.yml b/crates/kingfisher-rules/data/rules/instagram.yml index 8486bce..3425730 100644 --- a/crates/kingfisher-rules/data/rules/instagram.yml +++ b/crates/kingfisher-rules/data/rules/instagram.yml @@ -1,11 +1,17 @@ rules: - name: Instagram Graph API Access Token id: kingfisher.instagram.1 - pattern: '\b(IGQVJ[A-Za-z0-9_-]{50,256})\b' + pattern: | + (?x) + \b + ( + IGQVJ[A-Za-z0-9_-]{50,256} + ) + \b pattern_requirements: min_digits: 3 min_entropy: 3.5 - confidence: medium + confidence: high examples: - "INSTAGRAM_ACCESS_TOKEN=IGQVJWZAkFNT1liY2J3b3FmUHJVQzNfSUtlX3RhVm5FNGppZAjhCeUpEWnJIUWxCNkVrQjhGSU9rOVBuVnBaQUtKYjF6MjFGZA3VhZAXRxYkJGS01wS1BNLVlsaGtJYUV3" - 'instagram_token: "IGQVJXZAGtIR0hKVERjNXB5bnM0Y1BmUmFKa0RFZAnBXdVFKVUNNUjZAtVW5lZAXRhNGdLZA2V3OVlGS3dFZATVyYnlIVXlwcHdvdkxiS01wS1BNLVlsaGtJYUV3dg"' diff --git a/crates/kingfisher-rules/data/rules/intra42.yml b/crates/kingfisher-rules/data/rules/intra42.yml index 7e0a1fd..9611d4b 100644 --- a/crates/kingfisher-rules/data/rules/intra42.yml +++ b/crates/kingfisher-rules/data/rules/intra42.yml @@ -3,10 +3,10 @@ rules: id: kingfisher.intra42.1 visible: false pattern: | - (?xi) + (?x) \b ( - u-s4t2(?:ud|af)-[a-f0-9]{64} + u-s4t2(?:ud|af)-[a-fA-F0-9]{64} ) \b min_entropy: 3.0 diff --git a/crates/kingfisher-rules/data/rules/ionic.yml b/crates/kingfisher-rules/data/rules/ionic.yml index 40a57a7..ed3dfb4 100644 --- a/crates/kingfisher-rules/data/rules/ionic.yml +++ b/crates/kingfisher-rules/data/rules/ionic.yml @@ -2,17 +2,17 @@ rules: - name: Ionic API token id: kingfisher.ionic.1 pattern: | - (?xi) + (?x) \b ( ion_ - [a-z0-9]{42} + [a-zA-Z0-9]{42} ) \b pattern_requirements: min_digits: 2 min_entropy: 3.5 - confidence: medium + confidence: high examples: - ion_VNR17uGgdxr9P2aOrCulvSLTFDqijIV2ImQsOUhDEI validation: diff --git a/crates/kingfisher-rules/data/rules/jina.yml b/crates/kingfisher-rules/data/rules/jina.yml index 401cc08..775490b 100644 --- a/crates/kingfisher-rules/data/rules/jina.yml +++ b/crates/kingfisher-rules/data/rules/jina.yml @@ -10,7 +10,7 @@ rules: \b pattern_requirements: min_digits: 2 - confidence: medium + confidence: high min_entropy: 3.0 examples: - "JINA_KEY = os.getenv('JINA_KEY','jina_c1758c6f49e14ced990ac7776800dc45ShJNTXBCizzwjE6IMFYJ7LD959cG')" diff --git a/crates/kingfisher-rules/data/rules/looker.yml b/crates/kingfisher-rules/data/rules/looker.yml index 70621e0..8de65d2 100644 --- a/crates/kingfisher-rules/data/rules/looker.yml +++ b/crates/kingfisher-rules/data/rules/looker.yml @@ -5,10 +5,13 @@ rules: confidence: low min_entropy: 2.0 pattern: | - (?xi) + (?x) \b ( - https?://[a-z0-9.-]+(?::\d{2,5})? + https?:// + (?:[a-z0-9-]+\.)+ + looker\.com + (?::\d{2,5})? ) (?:/api/(?:4\.0|3\.1))? \b diff --git a/crates/kingfisher-rules/src/lib.rs b/crates/kingfisher-rules/src/lib.rs index b464638..2c8164a 100644 --- a/crates/kingfisher-rules/src/lib.rs +++ b/crates/kingfisher-rules/src/lib.rs @@ -27,7 +27,10 @@ pub use rule::{ pub use rules::{Rules, RulesError}; // Re-export RulesDatabase -pub use rules_database::{format_regex_pattern, RulesDatabase}; +pub use rules_database::{ + format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase, + TreeSitterFallbackPolicy, +}; // Re-export defaults pub use defaults::get_builtin_rules; diff --git a/crates/kingfisher-rules/src/rules_database.rs b/crates/kingfisher-rules/src/rules_database.rs index 6716e92..3531273 100644 --- a/crates/kingfisher-rules/src/rules_database.rs +++ b/crates/kingfisher-rules/src/rules_database.rs @@ -7,10 +7,30 @@ use vectorscan_rs::{BlockDatabase, Flag, Pattern}; use crate::rule::{Rule, RULE_COMMENTS_PATTERN}; +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum TreeSitterFallbackPolicy { + KeepRawWhenUnavailable, + SuppressWhenUnavailable, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RuleDetectionProfileKind { + SelfIdentifying, + ContextDependent, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuleMatchProfile { + pub kind: RuleDetectionProfileKind, + pub fallback_policy: TreeSitterFallbackPolicy, + pub reason_codes: Vec<&'static str>, +} + pub struct RulesDatabase { // pub(crate) rules: Vec, pub(crate) rules: Vec>, pub(crate) anchored_regexes: Vec, + pub(crate) rule_match_profiles: Vec, pub(crate) vsdb: BlockDatabase, } @@ -27,12 +47,111 @@ pub fn format_regex_pattern(pattern: &str) -> String { } impl RulesDatabase { + fn classify_rule_profile(rule: &Rule) -> RuleMatchProfile { + Self::classify_rule_match_profile(rule) + } + + fn build_rule_profiles(rules: &[Arc]) -> Vec { + rules.iter().map(|r| Self::classify_rule_profile(r.as_ref())).collect() + } + pub fn get_regex_by_rule_id(&self, rule_id: &str) -> Option<&Regex> { self.rules .iter() .position(|r| r.syntax().id == rule_id) .and_then(|index| self.anchored_regexes.get(index)) } + pub fn classify_rule_match_profile(rule: &Rule) -> RuleMatchProfile { + let flattened = format_regex_pattern(&rule.syntax().pattern); + let normalized = flattened.to_lowercase(); + + let mut reason_codes: Vec<&'static str> = Vec::new(); + + let has_self_identifying_prefix = [ + "ccipat_", + "xoxb-", + "xoxa-", + "xoxp-", + "xapp-", + "ghp_", + "github_pat_", + "sk_live_", + "sk_test_", + "ltai", + "akia", + ] + .iter() + .any(|m| normalized.contains(m)); + if has_self_identifying_prefix { + reason_codes.push("self_identifying_prefix"); + return RuleMatchProfile { + kind: RuleDetectionProfileKind::SelfIdentifying, + fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable, + reason_codes, + }; + } + + let has_context_keywords = + ["secret", "token", "key", "password", "private", "access", "client", "auth"] + .iter() + .any(|m| normalized.contains(m)); + if has_context_keywords { + reason_codes.push("context_keywords"); + } + + let has_distance_operator = normalized.contains("(?:.|[\\n\\r]){0,"); + if has_distance_operator { + reason_codes.push("distance_operator"); + } + + let has_depends_on = !rule.syntax().depends_on_rule.is_empty(); + if has_depends_on { + reason_codes.push("depends_on_rule"); + } + + let max_quantifier = max_quantifier_min_value(&normalized); + let looks_generic_token = has_generic_token_class(&normalized) && max_quantifier >= 24; + if looks_generic_token { + reason_codes.push("generic_token_shape"); + } + + let mut score = 0usize; + if has_context_keywords { + score += 1; + } + if has_distance_operator { + score += 1; + } + if looks_generic_token { + score += 2; + } + if has_depends_on { + score += 1; + } + + let is_context_dependent = score >= 3; + if !is_context_dependent { + return RuleMatchProfile { + kind: RuleDetectionProfileKind::SelfIdentifying, + fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable, + reason_codes, + }; + } + + let fallback_policy = if looks_generic_token && has_distance_operator { + reason_codes.push("strict_fallback_suppress_when_unavailable"); + TreeSitterFallbackPolicy::SuppressWhenUnavailable + } else { + reason_codes.push("fallback_keep_when_unavailable"); + TreeSitterFallbackPolicy::KeepRawWhenUnavailable + }; + + RuleMatchProfile { + kind: RuleDetectionProfileKind::ContextDependent, + fallback_policy, + reason_codes, + } + } pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option> { self.rules.iter().find(|r| r.finding_sha1_fingerprint() == finding_fingerprint).cloned() @@ -68,8 +187,9 @@ impl RulesDatabase { Ok(vsdb) => { let d1 = t1.elapsed().as_secs_f64(); let (anchored_regexes, d2) = Self::compile_regexes(&rules)?; + let rule_match_profiles = Self::build_rule_profiles(&rules); debug!("Compiled {} rules: vectorscan {}s; regex {}s", rules.len(), d1, d2); - Ok(RulesDatabase { rules, vsdb, anchored_regexes }) + Ok(RulesDatabase { rules, vsdb, anchored_regexes, rule_match_profiles }) } Err(e) => { error!( @@ -137,7 +257,13 @@ impl RulesDatabase { bail!("Failed to compile the following rules:\n{}", error_messages.join("\n")); } let vsdb = BlockDatabase::new(compiled_patterns)?; - Ok(RulesDatabase { rules: compiled_rules, vsdb, anchored_regexes: compiled_regexes }) + let rule_match_profiles = Self::build_rule_profiles(&compiled_rules); + Ok(RulesDatabase { + rules: compiled_rules, + vsdb, + anchored_regexes: compiled_regexes, + rule_match_profiles, + }) } fn compile_regexes(rules: &[Arc]) -> Result<(Vec, f64)> { @@ -192,7 +318,52 @@ impl RulesDatabase { pub fn anchored_regexes(&self) -> &[Regex] { &self.anchored_regexes } + + #[inline] + pub fn rule_match_profiles(&self) -> &[RuleMatchProfile] { + &self.rule_match_profiles + } } + +fn has_generic_token_class(normalized_pattern: &str) -> bool { + [ + "[a-za-z0-9]{", + "[a-z0-9]{", + "[a-f0-9]{", + "[a-z0-9_-]{", + "[a-za-z0-9_-]{", + "[a-za-z0-9+/]{", + "[a-za-z0-9+/=]{", + ] + .iter() + .any(|needle| normalized_pattern.contains(needle)) +} + +fn max_quantifier_min_value(normalized_pattern: &str) -> usize { + let mut max_seen = 0usize; + let bytes = normalized_pattern.as_bytes(); + let mut i = 0usize; + while i < bytes.len() { + if bytes[i] != b'{' { + i += 1; + continue; + } + let mut j = i + 1; + let mut val = 0usize; + let mut saw_digit = false; + while j < bytes.len() && bytes[j].is_ascii_digit() { + saw_digit = true; + val = val.saturating_mul(10).saturating_add((bytes[j] - b'0') as usize); + j += 1; + } + if saw_digit && val > max_seen { + max_seen = val; + } + i = j.saturating_add(1); + } + max_seen +} + #[cfg(test)] mod test_vectorscan { use pretty_assertions::assert_eq; @@ -249,3 +420,60 @@ mod test_regex_cleaning { println!("{}", data); } } + +#[cfg(test)] +mod test_rule_match_profiles { + use super::*; + use crate::rule::{Confidence, RuleSyntax, Validation}; + + fn mk_rule(id: &str, pattern: &str) -> Rule { + Rule::new(RuleSyntax { + id: id.to_string(), + name: id.to_string(), + pattern: pattern.to_string(), + confidence: Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }) + } + + #[test] + fn classifies_self_identifying_prefix_rule() { + let rule = + mk_rule("kingfisher.circleci.1", r"(?x)\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\b"); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); + assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable); + assert!(profile.reason_codes.contains(&"self_identifying_prefix")); + } + + #[test] + fn classifies_context_dependent_generic_rule() { + let rule = mk_rule( + "kingfisher.auth0.2", + r"(?xi)\bauth0(?:.|[\n\r]){0,16}?(?:secret|token)(?:.|[\n\r]){0,64}?\b([a-z0-9_-]{64,})\b", + ); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent); + assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::SuppressWhenUnavailable); + assert!(profile.reason_codes.contains(&"generic_token_shape")); + } + + #[test] + fn context_like_rule_is_parser_gated() { + let rule = mk_rule( + "kingfisher.example.1", + r"(?xi)\bexample(?:.|[\n\r]){0,16}?(?:secret|token)(?:.|[\n\r]){0,64}?\b([a-z0-9_-]{64,})\b", + ); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent); + } +} diff --git a/docs/PARSING.md b/docs/PARSING.md index 0697bf9..a6f2a43 100644 --- a/docs/PARSING.md +++ b/docs/PARSING.md @@ -1,21 +1,26 @@ # Kingfisher Source Code Parsing [← Back to README](../README.md) -Kingfisher leverages tree‐sitter as an extra layer of analysis when scanning source files written in supported programming languages. In practice, after its initial regex‐based scan (powered by Vectorscan), Kingfisher checks if the file’s language is known. +Kingfisher leverages tree-sitter as an extra layer of analysis when scanning source files written in supported programming languages. In practice, after its initial regex-based scan (powered by Vectorscan/Hyperscan), Kingfisher can run a targeted verification pass for context-dependent rules. If so, it creates a Checker (see below) that uses tree‐sitter to parse the file and run language‐specific queries. This additional pass refines the detection by capturing more structured patterns—such as secret-like tokens—that might be obscured or spread over code constructs. -### How It’s Called +## How It’s Called -In the scanning phase (in the Matcher’s implementation), Kingfisher does the following: -- **Language Detection:** When processing a blob, if a language string is provided (e.g. inferred from file metadata or extension), the code calls a helper (via a function like `get_language_and_queries`) to retrieve the corresponding tree‐sitter language and a set of queries. -- **Checker Creation:** With these values, a `Checker` struct is instantiated. This struct holds both the target language (as defined in its `Language` enum) and a map of tree‐sitter queries to run. -- **Parsing and Querying:** The Checker’s key method (e.g. `check` or indirectly via `modify_regex`) retrieves a thread‐local tree‐sitter parser (to avoid recreating the parser on every call), sets the appropriate language, and parses the source code into a syntax tree. It then executes the queries over that tree, extracting ranges and texts of interest that might represent secrets. +In the scanning phase (in the Matcher's implementation), Kingfisher does the following: + +- **Primary Regex Pass:** Kingfisher always scans the full blob with Vectorscan/Hyperscan first. +- **Candidate Selection:** Findings from rules classified as context-dependent become tree-sitter verification candidates. +- **Language Detection:** If a language string is provided (for example from metadata or extension), the code calls a helper (such as `get_language_and_queries`) to retrieve the corresponding tree-sitter language and queries. +- **Checker Creation:** With those values, a `Checker` is instantiated with the target language and query map. +- **Parsing and Querying:** The Checker retrieves a thread-local parser (to avoid recreating it on every call), sets language, parses source, and runs queries to extract structured snippets (for example `key = value` pairs). +- **Verification Decision:** Candidate findings are kept only if parser-extracted context verifies the matched secret. If tree-sitter is unavailable, fallback behavior is profile-driven (for strict generic keyword+token rules, findings are suppressed). *(See the implementation details in the parser module – for example, the `modify_regex` function in the Checker, and the conditional tree‐sitter call in Matcher::scan_blob)* -### Supported Languages +## Supported Languages The design supports many common source code languages. The Language enum (defined in the parser module) includes variants for: + - **Scripting:** Bash, Python, Ruby, PHP - **Compiled languages:** C, C++, C#, Rust, Java - **Web-related languages:** CSS, HTML, JavaScript, TypeScript, YAML, Toml @@ -23,14 +28,15 @@ The design supports many common source code languages. The Language enum (define Each variant maps to its corresponding tree‐sitter language through the `get_ts_language()` method. -### When Tree‐sitter Is Not Called +## When Tree‐sitter Is Not Called Tree‐sitter won’t be invoked in certain cases: + - **No Language Identified:** If the file isn’t recognized as belonging to one of the supported languages or no language hint is provided, the Checker isn’t even constructed. - **Non-source Files:** Binary files or files that aren’t expected to contain code (or aren’t extracted from archives) bypass tree‐sitter parsing. - **Fallback on Errors:** If tree‐sitter parsing fails (e.g. due to malformed code or other errors), Kingfisher will fall back on its regex/Vectorscan matches without the additional tree‐sitter insights. -### Summary +## Summary In essence, Kingfisher’s use of tree‐sitter is conditional and complementary. It is called only when the scanned file is a source code file written in a supported language, and its role is to enrich the scanning results by leveraging the syntax tree and language-specific queries. When files are non-source, binary, or if no language is provided, tree‐sitter is not invoked, and Kingfisher relies solely on its regex-based detection. diff --git a/docs/TREE_SITTER.md b/docs/TREE_SITTER.md new file mode 100644 index 0000000..1e11222 --- /dev/null +++ b/docs/TREE_SITTER.md @@ -0,0 +1,105 @@ +# Tree-sitter in Kingfisher Scanning + +[← Back to README](../README.md) + +This document explains how Tree-sitter is used during scanning, and when it is intentionally skipped. + +## What Tree-sitter Is Used For + +Kingfisher always starts with a fast regex pass (Vectorscan/Hyperscan). Tree-sitter is a secondary verification layer used only for context-dependent findings. + +The goal is to confirm that a regex hit appears in a plausible code assignment/config context (for example `api_key = "..."`) before keeping the finding. + +## Where It Runs in the Scan Pipeline + +1. `BlobProcessor::run` decides whether to compute a language hint. + - It skips language hinting in `fast_mode`. + - It also skips when blob size is outside the Tree-sitter window. +2. `Matcher::scan_blob` performs the primary regex scan and other filtering. +3. `maybe_apply_tree_sitter_verification` runs near the end of `scan_blob`. +4. Only candidate matches are checked against Tree-sitter extracted text. +5. Matches that fail verification can be dropped, depending on rule profile and fallback policy. + +## Size and Mode Gates + +Tree-sitter is attempted only when all of these are true: + +- Blob length is between `1 KiB` and `64 KiB` (`should_attempt_tree_sitter`). +- `fast_mode` is disabled. +- A language hint is available. +- The language maps to a supported Tree-sitter grammar + query set. + +If any of these conditions fails, Tree-sitter verification is considered unavailable for that blob. + +## Candidate Selection (Not Every Match) + +Tree-sitter verification is only applied to matches that are: + +- Classified as `ContextDependent` by rule profiling. +- Not base64-derived findings (`is_base64 == false`). + +Classification and fallback policy come from rule profiles in `kingfisher-rules`: + +- `SelfIdentifying`: usually keep raw regex result. +- `ContextDependent`: may require Tree-sitter confirmation. + +## How Verification Works + +When Tree-sitter is available: + +1. `load_tree_sitter_results` builds a `Checker` with: + - `Language` enum value + - language-specific queries from `src/parser/queries.rs` +2. `Checker::check`: + - Reuses a thread-local parser cache (`PARSER_CACHE`) + - Parses source into a syntax tree + - Runs language query patterns capturing `@key` and `@val` + - Produces normalized strings like `key = value` + - Attempts base64 decode of value and keeps decoded ASCII form when valid +3. For each candidate finding, Kingfisher re-runs that rule's anchored regex on each extracted Tree-sitter text fragment. +4. Verification succeeds only when the rule's secret capture equals the original matched secret bytes. + +If no extracted fragment verifies the secret, that candidate finding is removed. + +## Fallback Behavior When Tree-sitter Is Unavailable + +If Tree-sitter cannot run (size/mode/language/parse errors), behavior is rule-driven: + +- `KeepRawWhenUnavailable`: keep the regex finding. +- `SuppressWhenUnavailable`: drop the finding. + +`SuppressWhenUnavailable` is used for stricter generic-context patterns where false positives are likely without syntax-aware confirmation. + +## Supported Languages in This Path + +Language mapping for verification currently includes: + +- `bash`/`shell` +- `c` +- `c#`/`csharp` +- `c++`/`cpp` +- `css` +- `go` +- `html` +- `java` +- `javascript`/`js` +- `php` +- `python`/`py`/`starlark` +- `ruby` +- `rust` +- `toml` +- `typescript`/`ts` +- `yaml` + +The Tree-sitter query definitions for these languages live in `src/parser/queries.rs`. + +## Operational Summary + +Tree-sitter in Kingfisher is a conditional verifier, not the primary detector: + +- Regex finds candidates quickly. +- Rule profiling decides which candidates need context verification. +- Tree-sitter confirms contextual plausibility from parsed syntax. +- Fallback policy determines what to do when verification cannot run. + +This keeps scanning fast while reducing noisy matches for context-dependent secret patterns. diff --git a/src/matcher/mod.rs b/src/matcher/mod.rs index 6324ba7..6735e52 100644 --- a/src/matcher/mod.rs +++ b/src/matcher/mod.rs @@ -16,6 +16,7 @@ use std::sync::{Arc, Mutex}; use anyhow::Result; use http::StatusCode; use rustc_hash::{FxHashMap, FxHashSet}; +use tracing::debug; use crate::{ blob::{Blob, BlobId, BlobIdMap}, @@ -26,10 +27,11 @@ use crate::{ parser::{Checker, Language}, rule_profiling::{ConcurrentRuleProfiler, RuleStats}, rules::rule::Rule, - rules_database::RulesDatabase, + rules_database::{RuleDetectionProfileKind, RulesDatabase, TreeSitterFallbackPolicy}, scanner_pool::ScannerPool, validation_body::ValidationResponseBody, }; +use kingfisher_scanner::primitives::find_secret_capture; use self::{ base64_decode::get_base64_strings as get_b64_strings, dedup::record_match, filter::filter_match, @@ -38,8 +40,13 @@ use self::{ const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment const CHUNK_OVERLAP: usize = 64 * 1024; // 64 KiB overlap to catch boundary matches const BASE64_SCAN_LIMIT: usize = 64 * 1024 * 1024; // skip expensive Base64 pass on huge blobs -const TREE_SITTER_MAX_LIMIT: usize = 64 * 1024; // only run tree-sitter on blobs <= 64 KiB -const TREE_SITTER_MIN_LIMIT: usize = 1 * 1024; // only run tree-sitter on blobs >= 1 KiB +const TREE_SITTER_MAX_LIMIT: usize = 128 * 1024; // only run tree-sitter on blobs <= 128 KiB +const TREE_SITTER_MIN_LIMIT: usize = 0; // allow tree-sitter starting at 0 bytes + +#[inline] +pub(crate) fn should_attempt_tree_sitter(blob_len: usize) -> bool { + blob_len <= TREE_SITTER_MAX_LIMIT && blob_len >= TREE_SITTER_MIN_LIMIT +} // ------------------------------------------------------------------------------------------------- // RawMatch @@ -281,48 +288,10 @@ impl<'a> Matcher<'a> { let rules_db = self.rules_db; let mut seen_matches = FxHashSet::default(); let mut previous_matches: FxHashMap> = FxHashMap::default(); + let mut match_rule_indices: Vec = Vec::new(); let blob_len = blob.len(); - - let should_run_tree_sitter = blob_len > 0 - && blob_len <= TREE_SITTER_MAX_LIMIT - && blob_len >= TREE_SITTER_MIN_LIMIT - && has_raw_matches - && lang_hint.is_some(); - - let tree_sitter_result = if should_run_tree_sitter { - lang_hint.and_then(|lang_str| { - get_language_and_queries(lang_str).and_then(|(language, queries)| { - let checker = Checker { language, rules: queries }; - match checker.check(&blob.bytes()) { - Ok(results) => Some(results), - Err(e) => { - println!("Error in checker.check: {}", e); - None - } - } - }) - }) - } else { - None - }; - // - // Process matches - // let mut matches = Vec::new(); - let owned_ts_results = tree_sitter_result.map(|ts_results| { - ts_results - .into_iter() - .map(|match_result| { - ( - match_result.range, - match_result.text, - match_result.is_base64_decoded, - match_result.original_base64, - ) - }) - .collect::>() - }); let mut previous_raw_matches: FxHashMap> = FxHashMap::default(); for &RawMatch { rule_id, start_idx, end_idx } in self.user_data.raw_matches_scratch.iter().rev() @@ -336,6 +305,7 @@ impl<'a> Matcher<'a> { if !record_match(&mut previous_raw_matches, rule_id_usize, current_span) { continue; } + let before_len = matches.len(); filter_match( blob, rule, @@ -355,68 +325,8 @@ impl<'a> Matcher<'a> { self.respect_ignore_if_contains, &self.inline_ignore_config, ); - } - // Pre-filter tree-sitter extracted key-value pairs through Vectorscan, - // then only run the anchored regex for rules that Vectorscan flags as candidates. - if let Some(ref ts_results) = owned_ts_results { - if !ts_results.is_empty() { - // Build a combined buffer of all tree-sitter texts separated by newlines - // so we can run a single Vectorscan pass instead of one per result. - let mut combined_buf = Vec::new(); - let mut segment_ends: Vec = Vec::with_capacity(ts_results.len()); - for (_ts_range, ts_match, _is_base64_decoded, _original_base64) in ts_results.iter() - { - combined_buf.extend_from_slice(ts_match.as_bytes()); - segment_ends.push(combined_buf.len()); - combined_buf.push(b'\n'); - } - - // Single Vectorscan pass over the combined buffer - let mut ts_raw_matches: Vec<(u32, u64)> = Vec::new(); - self.scanner_pool.with(|scanner| { - scanner.scan(&combined_buf, |rule_id, _from, to, _flags| { - ts_raw_matches.push((rule_id, to)); - vectorscan_rs::Scan::Continue - }) - })?; - - // Map each Vectorscan hit back to its tree-sitter result and dedup - let mut rule_ts_pairs: FxHashSet<(usize, usize)> = FxHashSet::default(); - for &(rule_id, to) in &ts_raw_matches { - let to = to as usize; - let seg_idx = segment_ends.partition_point(|&end| end < to); - if seg_idx < ts_results.len() { - rule_ts_pairs.insert((rule_id as usize, seg_idx)); - } - } - - // Only run the anchored regex for (rule, ts_result) pairs Vectorscan flagged - for (rule_id_usize, ts_idx) in rule_ts_pairs { - let (ts_range, ts_match, is_base64_decoded, _original_base64) = - &ts_results[ts_idx]; - let rule = Arc::clone(&rules_db.rules()[rule_id_usize]); - let re = &rules_db.anchored_regexes()[rule_id_usize]; - filter_match( - blob, - rule, - re, - ts_range.start, - ts_range.end, - &mut matches, - &mut previous_matches, - rule_id_usize, - &mut seen_matches, - origin, - Some(ts_match.as_bytes()), - *is_base64_decoded, - redact, - &filename, - self.profiler.as_ref(), - self.respect_ignore_if_contains, - &self.inline_ignore_config, - ); - } - } + match_rule_indices + .extend(std::iter::repeat_n(rule_id_usize, matches.len() - before_len)); } if !no_base64 { @@ -427,6 +337,7 @@ impl<'a> Matcher<'a> { while let Some((item, depth)) = b64_stack.pop() { for (rule_id_usize, rule) in rules_db.rules().iter().enumerate() { let re = &rules_db.anchored_regexes()[rule_id_usize]; + let before_len = matches.len(); filter_match( blob, rule.clone(), @@ -446,6 +357,8 @@ impl<'a> Matcher<'a> { self.respect_ignore_if_contains, &self.inline_ignore_config, ); + match_rule_indices + .extend(std::iter::repeat_n(rule_id_usize, matches.len() - before_len)); } if depth + 1 < MAX_B64_DEPTH { for nested in get_b64_strings(item.decoded.as_slice()) { @@ -461,6 +374,14 @@ impl<'a> Matcher<'a> { } } } + maybe_apply_tree_sitter_verification( + rules_db, + blob, + lang_hint, + blob_len, + &mut matches, + &match_rule_indices, + ); // Finalize if !no_dedup && !matches.is_empty() { let blob_id = blob.id(); @@ -486,6 +407,108 @@ impl<'a> Matcher<'a> { } } +fn maybe_apply_tree_sitter_verification<'a>( + rules_db: &RulesDatabase, + blob: &'a Blob, + lang_hint: Option<&str>, + blob_len: usize, + matches: &mut Vec>, + match_rule_indices: &[usize], +) { + if matches.is_empty() { + return; + } + + let profiles = rules_db.rule_match_profiles(); + let candidate_indices: Vec = matches + .iter() + .enumerate() + .filter(|(idx, m)| { + if m.is_base64 { + return false; + } + let Some(rule_idx) = match_rule_indices.get(*idx) else { + return false; + }; + profiles[*rule_idx].kind == RuleDetectionProfileKind::ContextDependent + }) + .map(|(idx, _)| idx) + .collect(); + + if candidate_indices.is_empty() { + return; + } + + let ts_results = load_tree_sitter_results(blob, lang_hint, blob_len); + let mut keep = vec![true; matches.len()]; + + for idx in candidate_indices { + let Some(rule_idx) = match_rule_indices.get(idx).copied() else { + continue; + }; + let profile = &profiles[rule_idx]; + let match_secret = matches[idx].matching_input; + let re = &rules_db.anchored_regexes()[rule_idx]; + + match ts_results.as_ref() { + Some(results) => { + let verified = results.iter().any(|text| { + verify_match_in_tree_sitter_text(re, match_secret, text.as_bytes()) + }); + if !verified { + keep[idx] = false; + } + } + None => { + if profile.fallback_policy == TreeSitterFallbackPolicy::SuppressWhenUnavailable { + keep[idx] = false; + } + } + } + } + + if keep.iter().all(|k| *k) { + return; + } + + let mut filtered = Vec::with_capacity(matches.len()); + for (idx, item) in std::mem::take(matches).into_iter().enumerate() { + if keep[idx] { + filtered.push(item); + } + } + *matches = filtered; +} + +fn load_tree_sitter_results( + blob: &Blob, + lang_hint: Option<&str>, + blob_len: usize, +) -> Option> { + if !should_attempt_tree_sitter(blob_len) { + return None; + } + let lang = lang_hint?; + let (language, queries) = get_language_and_queries(lang)?; + let checker = Checker { language, rules: queries }; + match checker.check(&blob.bytes()) { + Ok(results) => Some(results.into_iter().map(|m| m.text).collect()), + Err(e) => { + debug!("tree-sitter verification unavailable: {e}"); + None + } + } +} + +fn verify_match_in_tree_sitter_text( + re: ®ex::bytes::Regex, + expected_secret: &[u8], + text: &[u8], +) -> bool { + re.captures_iter(text) + .any(|captures| find_secret_capture(re, &captures).as_bytes() == expected_secret) +} + fn get_language_and_queries(lang: &str) -> Option<(Language, FxHashMap)> { match lang.to_lowercase().as_str() { "bash" | "shell" => Some((Language::Bash, parser::queries::bash::get_bash_queries())), @@ -1049,4 +1072,177 @@ line2 assert_eq!(entries[1], (Some("body"), 2, "ABC")); assert_eq!(entries[2], (Some("checksum"), 3, "12")); } + + #[test] + fn parser_second_pass_keeps_verified_contextual_match() -> Result<()> { + let token = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234"; + let rule = Rule::new(RuleSyntax { + id: "kingfisher.auth0.2".into(), + name: "auth0 secret".into(), + pattern: "(?xi)\\bauth0(?:.|[\\n\\r]){0,16}?(?:secret|token)(?:.|[\\n\\r]){0,64}?\\b([a-z0-9_-]{64,})\\b".into(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }); + + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; + + let mut content = "x".repeat(1200); + content.push_str(&format!("\nauth0_client_secret = \"{token}\"\n")); + let blob = Blob::from_bytes(content.into_bytes()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("verified.py"))); + + let found = match matcher.scan_blob( + &blob, + &origin, + Some("python".to_string()), + false, + false, + false, + )? { + ScanResult::New(matches) => matches, + _ => panic!("unexpected scan result"), + }; + assert_eq!(found.len(), 1); + Ok(()) + } + + #[test] + fn parser_second_pass_suppresses_unverified_contextual_match() -> Result<()> { + let token = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234"; + let rule = Rule::new(RuleSyntax { + id: "kingfisher.auth0.2".into(), + name: "auth0 secret".into(), + pattern: "(?xi)\\bauth0(?:.|[\\n\\r]){0,16}?(?:secret|token)(?:.|[\\n\\r]){0,64}?\\b([a-z0-9_-]{64,})\\b".into(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }); + + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; + + let mut content = "x".repeat(1200); + content.push_str(&format!("\n# auth0 secret {token}\n")); + let blob = Blob::from_bytes(content.into_bytes()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("comment.py"))); + + let found = match matcher.scan_blob( + &blob, + &origin, + Some("python".to_string()), + false, + false, + false, + )? { + ScanResult::New(matches) => matches, + _ => panic!("unexpected scan result"), + }; + assert!( + found.is_empty(), + "comment-only contextual hits should be suppressed when tree-sitter cannot verify assignment context" + ); + Ok(()) + } + + #[test] + fn strict_context_rule_suppresses_when_tree_sitter_unavailable() -> Result<()> { + let token = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234"; + let rule = Rule::new(RuleSyntax { + id: "kingfisher.auth0.2".into(), + name: "auth0 secret".into(), + pattern: "(?xi)\\bauth0(?:.|[\\n\\r]){0,16}?(?:secret|token)(?:.|[\\n\\r]){0,64}?\\b([a-z0-9_-]{64,})\\b".into(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }); + + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; + + let content = format!("auth0 token {token}"); + let blob = Blob::from_bytes(content.into_bytes()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("small.txt"))); + + let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches, + _ => panic!("unexpected scan result"), + }; + assert!( + found.is_empty(), + "strict contextual rules should suppress when tree-sitter is unavailable for verification" + ); + Ok(()) + } + + #[test] + fn self_identifying_rule_remains_hyperscan_only() -> Result<()> { + let token = "CCIPAT_FERZRjTN451xnDCy1y9gWn_79fb6ca4d0e5f833612eee17de397a9dca0a9e9f"; + let rule = Rule::new(RuleSyntax { + id: "kingfisher.circleci.1".into(), + name: "circleci pat".into(), + pattern: "(?x)\\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\\b".into(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }); + + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; + + let blob = Blob::from_bytes(format!("token={token}").into_bytes()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("circleci.txt"))); + + let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches, + _ => panic!("unexpected scan result"), + }; + assert_eq!(found.len(), 1, "self-identifying tokens should remain raw-pass findings"); + Ok(()) + } } diff --git a/src/reporter.rs b/src/reporter.rs index f8c8464..4374c4f 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1056,7 +1056,7 @@ impl DetailsReporter { finding: FindingRecordData { snippet, fingerprint: rm.m.finding_fingerprint.to_string(), - confidence: rm.m.rule.confidence().to_string(), + confidence: rm.match_confidence.to_string(), entropy: format!("{:.2}", rm.m.calculated_entropy), validation: ValidationInfo { status: validation_status, response: response_body }, language: rm diff --git a/src/rules_database.rs b/src/rules_database.rs index e2f7122..396fdf9 100644 --- a/src/rules_database.rs +++ b/src/rules_database.rs @@ -2,4 +2,7 @@ //! //! This module re-exports types from [`kingfisher_rules::rules_database`]. -pub use kingfisher_rules::rules_database::{format_regex_pattern, RulesDatabase}; +pub use kingfisher_rules::rules_database::{ + format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase, + TreeSitterFallbackPolicy, +}; diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs index fc441b0..42e2327 100644 --- a/src/scanner/processing.rs +++ b/src/scanner/processing.rs @@ -6,7 +6,7 @@ use crate::{ blob::{Blob, BlobMetadata}, content_type::ContentInspector, location::LocationMapping, - matcher::{Match, Matcher, OwnedBlobMatch, ScanResult}, + matcher::{should_attempt_tree_sitter, Match, Matcher, OwnedBlobMatch, ScanResult}, origin::{Origin, OriginSet}, scanner::repos::DatastoreMessage, Path, @@ -32,7 +32,7 @@ impl<'a> BlobProcessor<'a> { ) -> Result> { let _span = debug_span!("matcher", temp_id = blob.temp_id()).entered(); let t1 = Instant::now(); - let language_hint = if fast_mode { + let language_hint = if fast_mode || !should_attempt_tree_sitter(blob.len()) { None } else { origin From b518e349dff0d7b793b4112ec00f2ca94de931fa Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 9 Mar 2026 20:46:08 -0700 Subject: [PATCH 09/12] v1.87.0 --- crates/kingfisher-rules/data/rules/google.yml | 33 ++----------------- crates/kingfisher-rules/data/rules/redis.yml | 13 +------- tests/int_uri_parsing.rs | 5 +-- tests/int_vulnerable_files.rs | 2 +- 4 files changed, 7 insertions(+), 46 deletions(-) diff --git a/crates/kingfisher-rules/data/rules/google.yml b/crates/kingfisher-rules/data/rules/google.yml index 8e7acdc..c6c9e66 100644 --- a/crates/kingfisher-rules/data/rules/google.yml +++ b/crates/kingfisher-rules/data/rules/google.yml @@ -26,17 +26,7 @@ rules: - name: Google OAuth Client Secret id: kingfisher.google.3 - pattern: | - (?xi) - client.?secret - (?:.|[\n\r]){0,10}? - [ \t]*[=:][ \t]* - ['"]? - ( - [a-z0-9_-]{24} - ) - ['"]? - (?: [^a-z0-9_-] |$) + pattern: (?i)(?:client[_\-. ]?secret)(?:.|[\n\r]){0,20}?[=:][ \t]*['"]?([A-Z0-9_-]{24})['"]?(?:[^A-Z0-9_-]|$) pattern_requirements: min_digits: 1 min_uppercase: 3 @@ -74,26 +64,7 @@ rules: - name: Google OAuth Credentials id: kingfisher.google.6 - pattern: | - (?xi) - ([0-9]+-[a-z0-9_]{32}\.apps\.googleusercontent\.com) - (?: - (?s).{0,40} - ) - (?: - (GOCSPX-[A-Z0-9_-]{28}) - | - (?: - (?i) - client.?secret - (?:.|[\n\r]){0,10}? - [ \t]*[=:][ \t]* - ['"]? - \b ([A-Z0-9_-]{24}) - ['"]? - ) - ) - (?:[^A-Z0-9_-] | $) + pattern: (?is)([0-9]+-[a-z0-9_]{32}\.apps\.googleusercontent\.com)(?:.|[\n\r]){0,120}?(?:GOCSPX-[A-Z0-9_-]{28}|(?:client[_\-. ]?secret)(?:.|[\n\r]){0,20}?[=:][ \t]*['"]?([A-Z0-9_-]{24})['"]?)(?:[^A-Z0-9_-]|$) pattern_requirements: min_digits: 2 min_entropy: 3.3 diff --git a/crates/kingfisher-rules/data/rules/redis.yml b/crates/kingfisher-rules/data/rules/redis.yml index 0330a1b..9c92f9b 100644 --- a/crates/kingfisher-rules/data/rules/redis.yml +++ b/crates/kingfisher-rules/data/rules/redis.yml @@ -77,18 +77,7 @@ rules: - id: kingfisher.redis.3 name: Redis Password (Standalone Config) # Detects REDIS_PASSWORD, redis_password, redis.password etc. in env vars and config files - pattern: | - (?xi) - \b - (?:REDIS|redis) - [-_.]? - (?:PASSWORD|PASS|PASSWD|AUTH|SECRET|TOKEN) - \b - (?:.|[\n\r]){0,24}? - [ \t]*[=:][ \t]* - ['"]? - (?P[a-zA-Z0-9%;._~!$&'()*+,;=/*+-]{8,64}) - ['"]? + pattern: (?i)\b(?:redis[-_.]?(?:password|pass|passwd|auth|secret|token)|config\.redis\.auth)\b(?:.|[\n\r]){0,24}?[=:][ \t]*['"]?([a-zA-Z0-9%;._~!$&'()*+,;=/*+-]{8,64})['"]? pattern_requirements: ignore_if_contains: diff --git a/tests/int_uri_parsing.rs b/tests/int_uri_parsing.rs index 927640e..d85aa7b 100644 --- a/tests/int_uri_parsing.rs +++ b/tests/int_uri_parsing.rs @@ -7,8 +7,9 @@ use tempfile::tempdir; fn filters_invalid_mongodb_uri_even_without_validation() -> anyhow::Result<()> { let dir = tempdir()?; let file_path = dir.path().join("mongo.txt"); - let valid = "mongodb://usr:pass@exmple.com:27017/db"; - let invalid = "mongodb://usr:pass@exmple.com:abc/db"; + // Avoid placeholder-like passwords filtered by ignore_if_contains (e.g. :pass@). + let valid = "mongodb://usr:p4ssw0rd123@exmple.com:27017/db"; + let invalid = "mongodb://usr:p4ssw0rd123@exmple.com:abc/db"; fs::write(&file_path, format!("{valid}\n{invalid}\n"))?; Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 389f28e..5dea201 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -359,7 +359,7 @@ async fn test_scan_vulnerable_files() -> Result<()> { let test_cases = vec![ TestCase { file_name: "testdata/c_vulnerable.c", min_expected_findings: 3 }, - TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 3 }, + TestCase { file_name: "testdata/cpp_vulnerable.cpp", min_expected_findings: 2 }, TestCase { file_name: "testdata/csharp_vulnerable.cs", min_expected_findings: 4 }, TestCase { file_name: "testdata/elixir_vulnerable.exs", min_expected_findings: 1 }, TestCase { file_name: "testdata/generic_secrets.py", min_expected_findings: 9 }, From f5fe3eb2b9d7003dba5101ff1c0baa3d3386b20c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 9 Mar 2026 20:51:32 -0700 Subject: [PATCH 10/12] v1.87.0 --- crates/kingfisher-rules/data/rules/ably.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/kingfisher-rules/data/rules/ably.yml b/crates/kingfisher-rules/data/rules/ably.yml index 44f1615..d5dc260 100644 --- a/crates/kingfisher-rules/data/rules/ably.yml +++ b/crates/kingfisher-rules/data/rules/ably.yml @@ -30,7 +30,7 @@ rules: method: GET url: https://rest.ably.io/channels?limit=1 headers: - Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}" + Authorization: "Basic {{ TOKEN | b64enc }}" Accept: application/json response_matcher: - report_response: true From 02a314529e5bf70f05840e5fbc6cfe9bb3311199 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 9 Mar 2026 21:50:06 -0700 Subject: [PATCH 11/12] v1.87.0 --- .github/workflows/ci.yml | 21 ------------------- .../kingfisher-rules/data/rules/eraserio.yml | 5 ++++- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca9d663..bc1b017 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,8 +53,6 @@ jobs: run: make tests env: CARGO_BUILD_JOBS: 1 - - name: Build (Makefile linux-x64) - run: make ubuntu-x64 linux-arm64: name: Linux arm64 @@ -93,8 +91,6 @@ jobs: run: make tests env: CARGO_BUILD_JOBS: 1 - - name: Build (Makefile linux-arm64) - run: make ubuntu-arm64 macos-arm64: name: macOS arm64 @@ -120,8 +116,6 @@ jobs: - name: Run tests run: make tests - - name: Build (Makefile darwin-arm64) - run: make darwin-arm64 windows: name: Windows ${{ matrix.arch }} @@ -204,18 +198,3 @@ jobs: echo "▶ cargo test --release --workspace --all-targets --target $target_triple" cargo test --release --workspace --all-targets --target "$target_triple" - - - name: Build - shell: msys2 {0} - run: make windows-${{ matrix.arch }} - - - name: Move artifact to dist - shell: bash - run: | - mkdir -p dist - cp target/release/kingfisher-windows-${{ matrix.arch }}.zip dist/ - - - uses: actions/upload-artifact@v4 - with: - name: kingfisher-windows-${{ matrix.arch }} - path: dist/kingfisher-*windows-${{ matrix.arch }}*.* diff --git a/crates/kingfisher-rules/data/rules/eraserio.yml b/crates/kingfisher-rules/data/rules/eraserio.yml index 54dcca2..91d75a1 100644 --- a/crates/kingfisher-rules/data/rules/eraserio.yml +++ b/crates/kingfisher-rules/data/rules/eraserio.yml @@ -31,4 +31,7 @@ rules: response_matcher: - report_response: true - type: StatusMatch - status: [200, 403] \ No newline at end of file + status: [200, 403] + - type: WordMatch + words: ['API token is disabled'] + negative: true \ No newline at end of file From d5539c4e9d9e203b598da1aaea22cb120c7e5ad7 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 9 Mar 2026 21:50:58 -0700 Subject: [PATCH 12/12] v1.87.0 --- docs/TREE_SITTER.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/TREE_SITTER.md b/docs/TREE_SITTER.md index 1e11222..50999ea 100644 --- a/docs/TREE_SITTER.md +++ b/docs/TREE_SITTER.md @@ -24,7 +24,7 @@ The goal is to confirm that a regex hit appears in a plausible code assignment/c Tree-sitter is attempted only when all of these are true: -- Blob length is between `1 KiB` and `64 KiB` (`should_attempt_tree_sitter`). +- Blob length is between `0 KiB` and `128 KiB` (`should_attempt_tree_sitter`). - `fast_mode` is disabled. - A language hint is available. - The language maps to a supported Tree-sitter grammar + query set.