From efa47ba140414eded96d9cab1d3b936dfa3aaaa8 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Wed, 15 Apr 2026 14:37:26 -0700 Subject: [PATCH] updates to new rules --- CHANGELOG.md | 4 +- README.md | 10 +- THREAT_MODEL_PROMPT.md | 45 -------- .../data/rules/cloudflare.yml | 5 +- .../kingfisher-rules/data/rules/composio.yml | 23 ++-- crates/kingfisher-rules/data/rules/hexpm.yml | 68 ++++------- .../kingfisher-rules/data/rules/infisical.yml | 2 +- .../kingfisher-rules/data/rules/inngest.yml | 59 ++++++++-- .../kingfisher-rules/data/rules/livekit.yml | 90 ++++++++++++--- .../kingfisher-rules/data/rules/tinybird.yml | 47 ++++++-- crates/kingfisher-rules/data/rules/tolgee.yml | 6 +- .../data/rules/triggerdev.yml | 49 +++++--- crates/kingfisher-rules/data/rules/unkey.yml | 4 +- .../kingfisher-rules/data/rules/voiceflow.yml | 44 +++++-- crates/kingfisher-rules/data/rules/workos.yml | 4 +- crates/kingfisher-rules/src/liquid_filters.rs | 40 +++++++ docs-site/docs/changelog.md | 4 +- docs-site/docs/features/parsing.md | 6 +- docs-site/docs/reference/library.md | 2 +- docs-site/docs/rules/builtin-rules.md | 48 +++++--- docs-site/docs/rules/overview.md | 5 + docs/CONTEXT_VERIFICATION.md | 9 +- docs/LIBRARY.md | 2 +- docs/PARSING.md | 6 +- docs/RULES.md | 5 + src/matcher/mod.rs | 109 +++++++++++++++++- 26 files changed, 486 insertions(+), 210 deletions(-) delete mode 100644 THREAT_MODEL_PROMPT.md diff --git a/CHANGELOG.md b/CHANGELOG.md index b5f84c6..3f42fbd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ All notable changes to this project will be documented in this file. ## [v1.96.0] - Added live HTTP validation for 18 rules across 15 providers: Val Town, Polar, hCaptcha, Thunderstore, Elastic Cloud (2 rules), LlamaCloud, Gemfury (2 rules), Vonage, ThingsBoard, Zapier, Facebook Access Token, GitLab Session Cookie, PostHog Feature Flags, Unkey API Key, and Hop.io (2 rules). - Added revocation support for 7 rules across 6 providers: Discord webhooks (single-step DELETE), DigitalOcean PATs (self-revoke via OAuth), and multi-step HttpMultiStep revocation for LaunchDarkly, Resend, Linode, and Netlify (2 rules). Built-in revocation coverage is now 34 provider families with 53 revocation-enabled rules. -- Expanded Alibaba Cloud coverage with STS temporary credential detection for STS access key IDs, STS security tokens, and STS access key secrets. Built-in rule coverage is now 921 rules total. +- Expanded Alibaba Cloud coverage with STS temporary credential detection for STS access key IDs, STS security tokens, and STS access key secrets. Built-in rule coverage is now 923 rules total. +- Fixed parser-based context gating so assignment-style contextual secrets still scan in raw text when parser verification is unavailable, instead of being dropped. +- Corrected several newly added SaaS rules and validators, including LiveKit (with dependent API secret validation), Tinybird, Inngest, Tolgee, Unkey, Composio, Hex.pm, Trigger.dev, Voiceflow, WorkOS, and Infisical. - Added 61 new detection rules across 46 providers: Axiom (API token + PAT), Trigger.dev (secret key + PAT), Dub.co, Svix webhook signing secret, Liveblocks, Inngest (signing key + event key), Seam, Courier, Cal.com, Arcjet, WarpStream, Mem0, Mintlify, Pirsch, Tinybird, Tolgee (project key + PAT), Ory (API key + session + OAuth2 tokens), Xendit, Xata, Crossmint (server + client keys), DeepL (Free + Pro), Flagsmith, E2B, Infisical, WooCommerce (consumer key + secret), Nightfall AI, Ramp (client ID + secret), Hex.pm (personal + workspace tokens), Convex deploy key, MiniMax, Mappedin (key + secret), Pollinations (secret + publishable), Fal.ai, Aikido, Hack Club, GuardSquare, Browser Use, Composio, Gamma, Hex.tech, Mastra, redirect.pizza, Upstash, and WorkOS. Also added new prefixed-token rules for Netlify (`nfp_`), Cloudflare (`cfut_`), and Supabase (`sb_publishable_`). Added live HTTP validation for 30 of these rules. - Added 32 new detection rules across 25 providers: Ghost CMS (admin + content keys), UpCloud (`ucat_`), Voiceflow (`VF.DM.`/`VF.WS.`), Robinhood Crypto (`rh-api-`), ClickUp (`pk_`), Unleash (client/admin + personal tokens), ConfigCat (standard + extended SDK keys), SaladCloud (`salad_cloud_`), Tigris (`tid_`/`tsec_`), Portainer (`ptr_`), Permit.io (`permit_key_`), Builder.io (`bpk-`), LiveKit (API key + secret), Close CRM (`api_`), Hetzner Cloud, Censys (API ID + secret), Wistia, PandaDoc, Pinata (key + secret), ZeroTier, Detectify, ChartMogul, Moralis, ButterCMS, and Loops. Includes HTTP validation for 19 of these rules. - Removed 17 direct dependencies from the root crate by dropping unused deps (`p256`, `ed25519-dalek`, `jsonwebtoken`, `gitlab`, `lazy_static`, `base32`, `pem`, `byteorder`, `reqwest-middleware`, `sha1`, `time`, `ring`, `num_cpus`, `strum_macros`), replacing `once_cell` with `std::sync::{LazyLock, OnceLock}`, and using `std::thread::available_parallelism()` in place of `num_cpus`. Salt generation now uses `rand` instead of `ring`, and all `strum_macros::Display` imports are consolidated under `strum::Display`. diff --git a/README.md b/README.md index 2a27a56..d0f2075 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Kingfisher is an open source secret scanner and **live secret validation** tool built in Rust. -It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and **ships with 921 built-in rules** to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. +It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and **ships with 923 built-in rules** to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. Designed for offensive security engineers and blue-team defenders alike, Kingfisher helps you scan repositories, cloud storage, chat, docs, and CI pipelines to find and verify exposed secrets quickly. @@ -49,9 +49,9 @@ Kingfisher is a high-performance, open source secret detection tool for source c -### Performance, Accuracy, and 921 Rules +### Performance, Accuracy, and 923 Rules - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases -- **Extensible rules**: 921 built-in rules plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) +- **Extensible rules**: 923 built-in rules plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Validate & Revoke**: live validation of discovered secrets, plus direct revocation for supported platforms (GitHub, GitLab, Slack, AWS, GCP, and more) ([docs/USAGE.md](/docs/USAGE.md)) - **Revocation support matrix**: current built-in revocation coverage across providers and rule IDs ([docs/REVOCATION_PROVIDERS.md](/docs/REVOCATION_PROVIDERS.md)) - **Blast Radius Mapping**: instantly map leaked keys to their effective cloud identities and exposed resources with `--access-map`. Supports 39 providers (see table below). @@ -345,7 +345,7 @@ gh attestation verify kingfisher-linux-x64.tgz --repo mongodb/kingfisher # Detection Rules -Kingfisher ships with [921 built-in rules](crates/kingfisher-rules/data/rules/) covering cloud keys, AI tokens, CI/CD secrets, database credentials, and SaaS API keys. Below is an overview — see the full list in [crates/kingfisher-rules/data/rules/](crates/kingfisher-rules/data/rules/): +Kingfisher ships with [923 built-in rules](crates/kingfisher-rules/data/rules/) covering cloud keys, AI tokens, CI/CD secrets, database credentials, and SaaS API keys. Below is an overview — see the full list in [crates/kingfisher-rules/data/rules/](crates/kingfisher-rules/data/rules/): | Category | What we catch | |----------|---------------| @@ -362,7 +362,7 @@ Kingfisher ships with [921 built-in rules](crates/kingfisher-rules/data/rules/) ## Write Custom Rules -Kingfisher ships with 921 rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. +Kingfisher ships with 923 rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. However, you may want to add your own custom rules, or modify a detection to better suit your needs / environment. diff --git a/THREAT_MODEL_PROMPT.md b/THREAT_MODEL_PROMPT.md deleted file mode 100644 index 5896a15..0000000 --- a/THREAT_MODEL_PROMPT.md +++ /dev/null @@ -1,45 +0,0 @@ -# Threat Model Prompt (STRIDE) - -Perform a practical STRIDE threat model of this system. - -**Principles:** -- Only realistic, relevant risks. No generic boilerplate. -- Depth over breadth. Be concrete and opinionated. -- Skip STRIDE categories that aren't meaningfully relevant — say so briefly and move on. - -## Step 1: Architecture Summary - -Summarize the system in short bullets: -- Purpose -- Main components -- Data flows -- Trust boundaries -- Sensitive assets - -## Step 2: STRIDE Analysis - -For each relevant threat: -- **STRIDE category** -- **Threat scenario** — specific, not generic -- **Impacted asset / component** -- **Why it matters** — not just "it's bad", but the concrete consequence -- **Severity:** Low / Medium / High / Critical -- **Recommended mitigations** - -Focus especially on: -- Authn / authz failures -- Trust boundary crossings -- Secret handling -- Multi-tenant isolation -- Injection / unsafe input handling -- Data exfiltration paths -- Abuse / privilege escalation -- Logging / detection gaps -- Supply chain and execution risks - -## Step 3: Prioritized Output - -End with: -1. **Top 5 risks to fix first** — ordered by impact, with brief rationale -2. **Assumptions made** — what you assumed about the deployment, environment, or usage -3. **Open questions** — things that, if answered differently, would change the threat model diff --git a/crates/kingfisher-rules/data/rules/cloudflare.yml b/crates/kingfisher-rules/data/rules/cloudflare.yml index 1ef2a1b..5871c06 100644 --- a/crates/kingfisher-rules/data/rules/cloudflare.yml +++ b/crates/kingfisher-rules/data/rules/cloudflare.yml @@ -117,15 +117,16 @@ rules: (?x) \b ( - cfut_[a-zA-Z0-9_-]{36,44} + cfut_[A-Za-z0-9_-]{41,64} ) \b min_entropy: 3.5 confidence: medium examples: - - 'CF_API_TOKEN=cfut_a1B2c3D4e5F6g7H8i9J0k1L2m3N4o5P6q7R8s9' + - 'CF_API_TOKEN=cfut_a1B2c3D4e5F6g7H8i9J0k1L2m3N4o5P6q7R8s9TuV' references: - https://developers.cloudflare.com/fundamentals/api/get-started/token-formats/ + # Cloudflare documents 40 characters plus a checksum, but not the checksum algorithm. validation: type: Http content: diff --git a/crates/kingfisher-rules/data/rules/composio.yml b/crates/kingfisher-rules/data/rules/composio.yml index af67b81..1c1d63b 100644 --- a/crates/kingfisher-rules/data/rules/composio.yml +++ b/crates/kingfisher-rules/data/rules/composio.yml @@ -1,5 +1,5 @@ rules: - - name: Composio API Key + - name: Composio Project API Key id: kingfisher.composio.1 pattern: | (?xi) @@ -8,11 +8,7 @@ rules: (?:.|[\n\r]){0,48}? \b ( - (?: - comp_[A-Za-z0-9_-]{32}(?:[A-Za-z0-9_-]{16}){0,3} - | - sk_(?:live|test)_[A-Za-z0-9_-]{16}(?:[A-Za-z0-9_-]{16}){0,3} - ) + ak_[A-Za-z0-9]{16,48} ) \b pattern_requirements: @@ -21,17 +17,17 @@ rules: min_entropy: 3.5 confidence: medium examples: - - 'COMPOSIO_API_KEY="comp_a7B2c9D4eF6gH8jK1mN3pQ5rS7tV9xY0"' - - 'composio x-api-key: sk_live_Qj7mN4vK8sL2xP6zT9aBcD3eF5gH1jK2' + - 'COMPOSIO_API_KEY="ak_a1b2c3d4e5f6g7h8i9j0"' + - 'composio x-api-key: ak_0f1e2d3c4b5a69788796' references: - - https://docs.composio.dev/reference/api-reference/authentication/getAuthSessionInfo - - https://composio.dev/content/secure-ai-agent-infrastructure-guide + - https://docs.composio.dev/reference/authentication + - https://docs.composio.dev/api-reference/projects/post-org-project-by-project-id-api-keys-create validation: type: Http content: request: method: GET - url: https://backend.composio.dev/api/v3/auth/session/info + url: https://backend.composio.dev/api/v3.1/tools headers: x-api-key: "{{ TOKEN }}" Accept: application/json @@ -40,9 +36,4 @@ rules: - type: StatusMatch status: [200] - type: JsonValid - - type: WordMatch - words: - - '"project"' - - '"api_key"' - match_all_words: true # Current Composio API docs expose session verification, but not a same-key revocation flow. diff --git a/crates/kingfisher-rules/data/rules/hexpm.yml b/crates/kingfisher-rules/data/rules/hexpm.yml index e9c5cca..3e1c56e 100644 --- a/crates/kingfisher-rules/data/rules/hexpm.yml +++ b/crates/kingfisher-rules/data/rules/hexpm.yml @@ -1,58 +1,32 @@ rules: - - name: Hex.pm Personal Token + - name: Hex.pm Organization Repository Key id: kingfisher.hexpm.1 pattern: | - (?x) + (?xi) \b + (?: + mix \s+ hex\.organization \s+ (?:auth|key) + | + HEX_ORGANIZATION_KEY + | + hexpm + | + hex\.pm + ) + (?:.|[\n\r]){0,64}? ( - hxtp_[a-f0-9]{64} + [a-f0-9]{32} ) \b + pattern_requirements: + min_digits: 4 + min_lowercase: 8 min_entropy: 3.5 confidence: medium examples: - - 'HEX_API_KEY=hxtp_a1b2c3d4e5f60708a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f60708a9b0c1d2' + - 'mix hex.organization auth acme --key 126d49fb3014bd26457471ebae97c625' + - 'HEX_ORGANIZATION_KEY=126d49fb3014bd26457471ebae97c625' references: - - https://hex.pm/docs/faq - validation: - type: Http - content: - request: - method: GET - url: https://hex.pm/api/auth - headers: - Authorization: "{{ TOKEN }}" - Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - - name: Hex.pm Workspace Token - id: kingfisher.hexpm.2 - pattern: | - (?x) - \b - ( - hxtw_[a-f0-9]{64} - ) - \b - min_entropy: 3.5 - confidence: medium - examples: - - 'HEX_API_KEY=hxtw_a1b2c3d4e5f60708a9b0c1d2e3f4a5b6c7d8e9f0a1b2c3d4e5f60708a9b0c1d2' - references: - - https://hex.pm/docs/faq - validation: - type: Http - content: - request: - method: GET - url: https://hex.pm/api/auth - headers: - Authorization: "{{ TOKEN }}" - Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] + - https://hex.pm/docs/private + # Hex.pm docs only expose these keys as repository authentication hashes. + # A same-key validation or revocation endpoint is not publicly documented. diff --git a/crates/kingfisher-rules/data/rules/infisical.yml b/crates/kingfisher-rules/data/rules/infisical.yml index 0f7a5f9..1658dc3 100644 --- a/crates/kingfisher-rules/data/rules/infisical.yml +++ b/crates/kingfisher-rules/data/rules/infisical.yml @@ -5,7 +5,7 @@ rules: (?x) \b ( - st\.[a-f0-9]{8,32}\.[a-f0-9]{8,32}\.[a-f0-9]{16,48} + st\.[A-Za-z0-9_-]{3,48}\.[A-Za-z0-9_-]{3,48}\.[a-f0-9]{8,64} ) (?:\b|$) min_entropy: 3.5 diff --git a/crates/kingfisher-rules/data/rules/inngest.yml b/crates/kingfisher-rules/data/rules/inngest.yml index 60a8e5c..23fa12b 100644 --- a/crates/kingfisher-rules/data/rules/inngest.yml +++ b/crates/kingfisher-rules/data/rules/inngest.yml @@ -5,30 +5,75 @@ rules: (?x) \b ( - signkey-[A-Za-z0-9_]+-[a-f0-9]{32,68} + signkey-[a-z0-9_-]+-[a-f0-9]{64} ) \b min_entropy: 3.5 confidence: medium examples: - 'INNGEST_SIGNING_KEY=signkey-prod-b2ed992186a5cb19f6668aade821f502c1d00970dfd0e35128d51bac4649916c' - - 'INNGEST_SIGNING_KEY="signkey-branch-12345678abcdef0123456789abcdef01"' + - 'INNGEST_SIGNING_KEY="signkey-staging-12345678abcdef0123456789abcdef0123456789abcdef0123456789abcdef"' references: - https://www.inngest.com/docs/platform/signing-keys + validation: + type: Http + content: + request: + method: GET + url: https://api.inngest.com/v1/webhooks + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid - name: Inngest Event Key id: kingfisher.inngest.2 pattern: | - (?x) + (?xi) + \b + (?:inngest|INNGEST_EVENT_KEY) + (?:.|[\n\r]){0,48}? + \b + (?:EVENT(?:[\s_-]*KEY)?|KEY) + (?:.|[\n\r]){0,16}? \b ( - evtkey-(?:dev|prod|test|staging|branch)-[a-f0-9]{24,40} + [A-Za-z0-9_-]{86} ) \b + pattern_requirements: + min_digits: 4 + min_uppercase: 4 + min_lowercase: 4 min_entropy: 3.0 confidence: medium examples: - - 'INNGEST_EVENT_KEY=evtkey-dev-a1b2c3d4e5f60708a9b0c1d2' - - 'INNGEST_EVENT_KEY="evtkey-prod-deadbeefcafebabe1234567890abcdef"' + - 'INNGEST_EVENT_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-AbCdEfGhIj' + - 'inngest event key: "ZyXwVuTsRqPoNmLkJiHgFeDcBa9876543210_-ZyXwVuTsRqPoNmLkJiHgFeDcBa9876543210_-ZyXwVuTsRq"' references: - - https://www.inngest.com/docs/events/creating-an-event + - https://www.inngest.com/docs/events/creating-an-event-key + # Validation sends an inert event (no function subscribes to this name). + # The event key is only accepted at the event ingestion endpoint. + validation: + type: Http + content: + request: + method: POST + url: 'https://inn.gs/e/{{ TOKEN }}' + headers: + Content-Type: application/json + body: '{"name": "kingfisher/validate", "data": {}}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true diff --git a/crates/kingfisher-rules/data/rules/livekit.yml b/crates/kingfisher-rules/data/rules/livekit.yml index 9d50693..a5623af 100644 --- a/crates/kingfisher-rules/data/rules/livekit.yml +++ b/crates/kingfisher-rules/data/rules/livekit.yml @@ -3,10 +3,11 @@ rules: id: kingfisher.livekit.1 visible: false pattern: | - (?xi) - \b(?:livekit|LIVEKIT) + (?x) + \b + (?i:livekit) (?:.|[\n\r]){0,32}? - (?:API[_-]?KEY|KEY|ACCESS) + (?i:(?:API[_-]?KEY|KEY|ACCESS)) (?:.|[\n\r]){0,16}? \b ( @@ -14,39 +15,98 @@ rules: ) \b pattern_requirements: - min_digits: 1 min_uppercase: 3 min_entropy: 2.5 confidence: medium examples: - - 'LIVEKIT_API_KEY=APIa3B4c5D6e7F8' - - 'livekit_key: "API2K4m6N8p3R5s"' + - 'LIVEKIT_API_KEY=APIAb1Cd2Ef3Gh4' + - 'livekit_key: "APIQw2Er3Ty4Ui5"' references: - https://docs.livekit.io/home/get-started/authentication/ - name: LiveKit API Secret id: kingfisher.livekit.2 + visible: true pattern: | (?xi) - \b(?:livekit|LIVEKIT) - (?:.|[\n\r]){0,32}? - (?:SECRET|PRIVATE|ACCESS) - (?:.|[\n\r]){0,16}? \b - ( - [A-Za-z0-9]{40,48} + (?: + LIVEKIT_API_SECRET + | + livekit_api_secret + | + livekit[-_]?secret + | + livekitSecret ) + \s*[:=]\s* + ['"]? + ( + [A-Za-z0-9]{43,44} + ) + ['"]? \b pattern_requirements: min_digits: 3 min_uppercase: 3 min_lowercase: 3 - min_entropy: 4.0 + min_entropy: 3.0 confidence: medium examples: - - 'LIVEKIT_API_SECRET=aBcDeFgHiJkLmNoPqRsTuVwXyZ01234567890abcde' + - 'LIVEKIT_API_SECRET=AbCdEfGhIjKlMnOpQrStUvWxYz0123456789AaBbCcDd' references: - https://docs.livekit.io/home/get-started/authentication/ + - https://docs.livekit.io/reference/other/roomservice-api.md + - https://docs.livekit.io/frontends/reference/tokens-grants.md depends_on_rule: - rule_id: kingfisher.livekit.1 - variable: LIVEKIT_API_KEY + variable: API_KEY + - rule_id: kingfisher.livekit.3 + variable: LIVEKIT_URL + validation: + type: Http + content: + request: + method: POST + url: > + {%- assign base_url = LIVEKIT_URL | replace: "wss://", "https://" | replace: "ws://", "http://" -%} + {{ base_url }}/twirp/livekit.RoomService/ListRooms + headers: + Content-Type: application/json + Accept: application/json + Authorization: '{%- assign header = "HS256" | jwt_header -%}{%- assign now = "" | unix_timestamp -%}{%- assign exp = now | plus: 300 -%}{%- assign nbf = now | minus: 5 -%}{%- assign payload_json = ''{"iss":"'' | append: API_KEY | append: ''","sub":"kingfisher-validation","exp":'' | append: exp | append: '',"nbf":'' | append: nbf | append: '',"video":{"roomList":true}}'' -%}{%- assign payload = payload_json | b64url_enc -%}{%- assign signing_input = header | append: "." | append: payload -%}{%- assign sig_b64 = signing_input | hmac_sha256: TOKEN -%}{%- assign sig = sig_b64 | replace: "+", "-" | replace: "/", "_" | replace: "=", "" -%}Bearer {{ header }}.{{ payload }}.{{ sig }}' + body: | + {} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: StatusMatch + status: [401, 403] + negative: true + - type: JsonValid + - type: WordMatch + words: ['"rooms"'] + # LiveKit validation needs the URL and API key as well, so standalone API secrets must remain + # detectable even when contextual verification is unavailable. + + - name: LiveKit URL + id: kingfisher.livekit.3 + pattern: | + (?xi) + \b(?:livekit|LIVEKIT) + (?:.|[\n\r]){0,32}? + (?:URL|HOST|WS[_-]?URL|HTTP[_-]?URL) + (?:.|[\n\r]){0,16}? + ( + (?:wss?|https?)://[A-Za-z0-9.-]+(?:livekit\.(?:cloud|io)|[A-Za-z0-9.-]+) + ) + \b + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - 'LIVEKIT_URL=wss://acme-demo.livekit.cloud' + - 'livekit_host=https://acme-voice.livekit.cloud' + references: + - https://docs.livekit.io/home/get-started/authentication/ diff --git a/crates/kingfisher-rules/data/rules/tinybird.yml b/crates/kingfisher-rules/data/rules/tinybird.yml index 6351e87..a71b606 100644 --- a/crates/kingfisher-rules/data/rules/tinybird.yml +++ b/crates/kingfisher-rules/data/rules/tinybird.yml @@ -1,34 +1,61 @@ rules: - - name: Tinybird Admin Token + - name: Tinybird Static Token id: kingfisher.tinybird.1 pattern: | (?x) \b - (?i:tinybird) - (?:.|[\n\r]){0,32}? - (?:ADMIN|SECRET|TOKEN|KEY) - (?:.|[\n\r]){0,16}? - \b ( - p\.[A-Za-z0-9]{40,100} + p\.eyJ1I[A-Za-z0-9_-]{80,260}\.[A-Za-z0-9_-]{43} ) - (?:\b|$) + (?:[^A-Za-z0-9_-]|$) pattern_requirements: min_digits: 1 min_uppercase: 1 min_lowercase: 1 + min_special_chars: 2 min_entropy: 3.5 confidence: medium examples: - - 'TINYBIRD_TOKEN=p.eyJ1IjogIjczMjA3MzYwLWVmMDAtNGIxOS1hZjQ3LWRlMmU3OTI2ZTdmOCJ9' + - 'workspace admin token: p.eyJ1IjogIjdmMmM5YTQxLWI2ZDgtNGUwMy05MWFhLTU4YzI0YjdkMGU5NiIsICJpZCI6ICI1YTBiOGM3MS1kM2U0LTRmMjktOGI2NS0wYzlkMmU3YTRmMTMiLCAiaG9zdCI6ICJnY3AtZXVyb3BlLXdlc3QyIn0.QgGQO4xKgNTfZ6GHe8_sBeTk4b2axbZLEdvzCQYvHnw' # nosemgrep: generic-jwt-token + - 'admin token: "p.eyJ1IjogImM5NGE3ZDUyLThmMDMtNDdiZS05YTE2LTJlODBkNWI3YzM0MSIsICJpZCI6ICIwZDdiMmY0OC02YTkxLTRjNWUtOGYzMC1iMmE2ZDU5ZTAxN2MiLCAiaG9zdCI6ICJnY3AtdXMtZWFzdDEifQ.rziyKeAZJYhenUJChcWxV1K8n_3ejHiS4XlzXK49jFI"' # nosemgrep: generic-jwt-token + - 'user token: p.eyJ1IjogIjNiOGU3MmQwLWE5MTQtNGY2Yy04NmQxLTA1YzdlMjlhNGI2OCIsICJpZCI6ICJlMTdjNmYzMC00YjhhLTQxZDktOTI3ZS02YTA1YjM4YzJmNzQiLCAiaG9zdCI6ICJhd3MtdXMtZWFzdC0xIn0.IIU-mUxglkE7kOSc8tweaZIcwACiBNCqoGAnoeluISA' # nosemgrep: generic-jwt-token references: - https://www.tinybird.co/docs/api-reference/token-api + - https://tinybird.co/docs/forward/administration/tokens/static-tokens + # Validation decodes the JWT payload to extract the "host" field, maps it to + # the correct regional API base URL, then calls GET /v0/pipes/ which requires + # only read scope and returns 200 for any valid token with pipe-read access. + # Write-only tokens (DATASOURCES:APPEND) may return 403 even if valid. validation: type: Http content: request: method: GET - url: https://api.tinybird.co/v0/datasources + url: > + {%- assign parts = TOKEN | split: "." -%} + {%- assign payload_enc = parts[1] -%} + {%- assign payload = payload_enc | b64url_dec -%} + {%- assign after_host_key = payload | split: '"host"' -%} + {%- assign after_host_val = after_host_key[1] -%} + {%- assign host_parts = after_host_val | split: '"' -%} + {%- assign host = host_parts[1] -%} + {%- if host == "gcp-europe-west3" or host == "eu_shared" -%} + {%- assign api_base = "api.tinybird.co" -%} + {%- elsif host == "gcp-europe-west2" -%} + {%- assign api_base = "api.europe-west2.gcp.tinybird.co" -%} + {%- elsif host == "gcp-us-east4" or host == "us_east" or host == "gcp-us-east1" -%} + {%- assign api_base = "api.us-east.tinybird.co" -%} + {%- elsif host == "gcp-northamerica-northeast2" or host == "northamerica-northeast2-gcp" -%} + {%- assign api_base = "api.northamerica-northeast2.gcp.tinybird.co" -%} + {%- elsif host == "us-east-aws" -%} + {%- assign api_base = "api.us-east.aws.tinybird.co" -%} + {%- elsif host contains "aws-" -%} + {%- assign region = host | remove: "aws-" -%} + {%- assign api_base = "api." | append: region | append: ".aws.tinybird.co" -%} + {%- else -%} + {%- assign api_base = "api.tinybird.co" -%} + {%- endif -%} + https://{{ api_base }}/v0/pipes/ headers: Authorization: "Bearer {{ TOKEN }}" Accept: application/json diff --git a/crates/kingfisher-rules/data/rules/tolgee.yml b/crates/kingfisher-rules/data/rules/tolgee.yml index c8d1da3..d01c77d 100644 --- a/crates/kingfisher-rules/data/rules/tolgee.yml +++ b/crates/kingfisher-rules/data/rules/tolgee.yml @@ -5,7 +5,7 @@ rules: (?x) \b ( - tgpak_[a-z2-7]{40,64} + tgpak_[a-z0-9]{40,64} ) \b min_entropy: 3.0 @@ -36,13 +36,13 @@ rules: (?x) \b ( - tgpat_[a-z2-7]{40,64} + tgpat_[a-z0-9]{40,64} ) \b min_entropy: 3.0 confidence: medium examples: - - 'TOLGEE_PAT=tgpat_gm2tgnrqgi3tsnrvheytomrvme4dgmrtgntdgntfmu3to' + - 'TOLGEE_PAT=tgpat_c813fppevpm6smlv8gld2fqq7knqqfht5ri1vgo57gj6ivkbhp5' references: - https://tolgee.io/platform/account_settings/api_keys_and_pat_tokens validation: diff --git a/crates/kingfisher-rules/data/rules/triggerdev.yml b/crates/kingfisher-rules/data/rules/triggerdev.yml index 14f2618..0a918f7 100644 --- a/crates/kingfisher-rules/data/rules/triggerdev.yml +++ b/crates/kingfisher-rules/data/rules/triggerdev.yml @@ -5,7 +5,7 @@ rules: (?x) \b ( - tr_(?:dev|prod|stg)_[A-Za-z0-9]{20,40} + tr_(?Pdev|prod|stg)_[A-Za-z0-9]{20,40} ) \b pattern_requirements: @@ -17,23 +17,24 @@ rules: - 'TRIGGER_SECRET_KEY=tr_prod_xK8m2LpQr5nW0vYz3cJ7' references: - https://trigger.dev/docs/management/authentication + - https://trigger.dev/docs/management/envvars/list + depends_on_rule: + - rule_id: kingfisher.triggerdev.3 + variable: TRIGGER_PROJECT_REF validation: type: Http content: request: method: GET - url: https://api.trigger.dev/api/v1/tasks + url: 'https://api.trigger.dev/api/v1/projects/{{ TRIGGER_PROJECT_REF }}/envvars/{{ env | replace: "stg", "staging" }}' headers: Authorization: "Bearer {{ TOKEN }}" Accept: application/json response_matcher: - report_response: true - type: StatusMatch - status: [200, 401] - - type: WordMatch - words: - - '"Unauthorized"' - negative: true + status: [200] + - type: JsonValid - name: Trigger.dev Personal Access Token id: kingfisher.triggerdev.2 @@ -52,20 +53,42 @@ rules: - 'TRIGGER_ACCESS_TOKEN=tr_pat_xK8m2LpQr5nW0vYz3cJ7aB4d' references: - https://trigger.dev/docs/management/authentication + - https://trigger.dev/docs/management/envvars/list + depends_on_rule: + - rule_id: kingfisher.triggerdev.3 + variable: TRIGGER_PROJECT_REF validation: type: Http content: request: method: GET - url: https://api.trigger.dev/api/v1/tasks + url: 'https://api.trigger.dev/api/v1/projects/{{ TRIGGER_PROJECT_REF }}/envvars/dev' headers: Authorization: "Bearer {{ TOKEN }}" Accept: application/json response_matcher: - report_response: true - type: StatusMatch - status: [200, 401] - - type: WordMatch - words: - - '"Unauthorized"' - negative: true + status: [200] + - type: JsonValid + # Trigger.dev documents PAT creation in the dashboard, but no same-key revocation endpoint is publicly documented. + + - name: Trigger.dev Project Reference + id: kingfisher.triggerdev.3 + pattern: | + (?xi) + \b + (?:TRIGGER_PROJECT_REF|projectRef|trigger(?:\.config)?|trigger\.dev) + (?:.|[\n\r]){0,48}? + ( + proj_[a-z0-9]{12,32} + ) + \b + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - 'TRIGGER_PROJECT_REF=proj_yubjwjsfkxnylobaqvqz' + - 'projectRef: "proj_yubjwjsfkxnylobaqvqz"' + references: + - https://trigger.dev/docs/management/envvars/list diff --git a/crates/kingfisher-rules/data/rules/unkey.yml b/crates/kingfisher-rules/data/rules/unkey.yml index 858a192..da7a3e1 100644 --- a/crates/kingfisher-rules/data/rules/unkey.yml +++ b/crates/kingfisher-rules/data/rules/unkey.yml @@ -91,9 +91,11 @@ rules: - report_response: true - type: StatusMatch status: [200] + - type: JsonValid - type: WordMatch words: - - '"valid"' + - '"valid":true' + - '"valid": true' references: - https://www.unkey.com/docs/api-reference/v2/keys/create-api-key - https://www.unkey.com/docs/api-reference/v2/keys/verify-api-key diff --git a/crates/kingfisher-rules/data/rules/voiceflow.yml b/crates/kingfisher-rules/data/rules/voiceflow.yml index dbf372a..a4b7dd4 100644 --- a/crates/kingfisher-rules/data/rules/voiceflow.yml +++ b/crates/kingfisher-rules/data/rules/voiceflow.yml @@ -5,7 +5,7 @@ rules: (?xi) \b ( - VF\.(?:(?:DM|WS)\.) + VF\.DM\. [0-9a-f]{24} \. [0-9a-zA-Z]{16} @@ -18,26 +18,50 @@ rules: confidence: medium examples: - 'VOICEFLOW_API_KEY=VF.DM.6421e3d5b1e4a9001d2b7c8f.a1B2c3D4e5F6g7H8' - - 'voiceflow_key: "VF.WS.53a1b2c3d4e5f6001a2b3c4d.xY9zW8vU7tS6rQ5p"' + - 'voiceflow_key: "VF.DM.53a1b2c3d4e5f6001a2b3c4d.xY9zW8vU7tS6rQ5p"' references: - https://developer.voiceflow.com/reference/overview + - https://developer.voiceflow.com/api-reference/authentication + depends_on_rule: + - rule_id: kingfisher.voiceflow.2 + variable: VOICEFLOW_PROJECT_ID validation: type: Http content: request: - method: POST - url: https://general-runtime.voiceflow.com/knowledge-base/query + method: GET + url: https://api.voiceflow.com/v2/versions/development/export?prototype=true headers: Accept: application/json Authorization: "{{ TOKEN }}" - Content-Type: application/json - body: | - {"question": "test"} + projectID: "{{ VOICEFLOW_PROJECT_ID }}" response_matcher: - report_response: true - type: StatusMatch - status: [200, 400, 404] + status: [200] + - type: JsonValid - type: WordMatch - negative: true words: - - '"Unauthorized"' + - '"programs"' + - '"_id"' + # Voiceflow documents key rotation in the dashboard, but not a same-key revocation endpoint. + + - name: Voiceflow Project ID + id: kingfisher.voiceflow.2 + pattern: | + (?xi) + \b + (?:VOICEFLOW_PROJECT_ID|projectID|project_id|voiceflow \s+ project \s+ id) + (?:.|[\n\r]){0,48}? + ( + [0-9a-f]{24} + ) + \b + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - 'VOICEFLOW_PROJECT_ID=63204daf2c4a550006873771' + - 'projectID: "63204daf2c4a550006873771"' + references: + - https://docs.voiceflow.com/reference/project-ids-and-versions diff --git a/crates/kingfisher-rules/data/rules/workos.yml b/crates/kingfisher-rules/data/rules/workos.yml index faa05b7..296a332 100644 --- a/crates/kingfisher-rules/data/rules/workos.yml +++ b/crates/kingfisher-rules/data/rules/workos.yml @@ -10,12 +10,14 @@ rules: (?:.|[\n\r]){0,16}? \b ( - sk_(?:live|test|staging|example)_[A-Za-z0-9_-]{16}(?:[A-Za-z0-9_-]{16}){0,3} + sk_[A-Za-z0-9_-]{16,64} ) \b pattern_requirements: min_digits: 4 min_lowercase: 4 + ignore_if_contains: + - example min_entropy: 3.5 confidence: medium examples: diff --git a/crates/kingfisher-rules/src/liquid_filters.rs b/crates/kingfisher-rules/src/liquid_filters.rs index cf6b73e..16219da 100644 --- a/crates/kingfisher-rules/src/liquid_filters.rs +++ b/crates/kingfisher-rules/src/liquid_filters.rs @@ -900,6 +900,36 @@ static_filter!( } ); +// {{ value | b64url_dec }} – URL-safe base64 decode (with or without padding) +#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)] +#[filter( + name = "b64url_dec", + description = "Decodes a URL-safe Base64 string (with or without padding)", + parsed(B64UrlDecFilter) +)] +pub struct B64UrlDecFilter; + +impl std::fmt::Display for B64UrlDecFilter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "b64url_dec") + } +} + +impl Filter for B64UrlDecFilter { + fn evaluate( + &self, + input: &dyn ValueView, + _runtime: &dyn Runtime, + ) -> Result { + let input_str = input.to_kstr(); + general_purpose::URL_SAFE_NO_PAD + .decode(input_str.as_bytes()) + .or_else(|_| general_purpose::URL_SAFE.decode(input_str.as_bytes())) + .map(|bytes| Value::scalar(String::from_utf8_lossy(&bytes).to_string())) + .map_err(|e| LiquidError::with_msg(format!("b64url_dec: {e}"))) + } +} + // {{ algo | jwt_header }} – e.g. “HS256” -- Base64URL-encoded header static_filter!( /// Generate a minimal JWT header for the given alg. @@ -1012,6 +1042,7 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { // zero-arg helpers .filter(Replace::default()) .filter(B64UrlEncFilter::default()) + .filter(B64UrlDecFilter::default()) .filter(Sha256Filter::default()) .filter(UrlEncodeFilter::default()) .filter(JsonEscapeFilter::default()) @@ -1148,6 +1179,15 @@ mod tests { ); } + #[test] + fn b64url_dec_filter() { + let encoded = general_purpose::URL_SAFE_NO_PAD.encode("++??"); + assert_eq!(render(&format!("{{{{ \"{encoded}\" | b64url_dec }}}}")), "++??"); + // Also works with padding + let padded = general_purpose::URL_SAFE.encode("hello"); + assert_eq!(render(&format!("{{{{ \"{padded}\" | b64url_dec }}}}")), "hello"); + } + #[test] fn url_encode_filter() { assert_eq!( diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index b9084ad..5482d22 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -10,7 +10,9 @@ All notable changes to this project will be documented in this file. ## [v1.96.0] - Added live HTTP validation for 18 rules across 15 providers: Val Town, Polar, hCaptcha, Thunderstore, Elastic Cloud (2 rules), LlamaCloud, Gemfury (2 rules), Vonage, ThingsBoard, Zapier, Facebook Access Token, GitLab Session Cookie, PostHog Feature Flags, Unkey API Key, and Hop.io (2 rules). - Added revocation support for 7 rules across 6 providers: Discord webhooks (single-step DELETE), DigitalOcean PATs (self-revoke via OAuth), and multi-step HttpMultiStep revocation for LaunchDarkly, Resend, Linode, and Netlify (2 rules). Built-in revocation coverage is now 34 provider families with 53 revocation-enabled rules. -- Expanded Alibaba Cloud coverage with STS temporary credential detection for STS access key IDs, STS security tokens, and STS access key secrets. Built-in rule coverage is now 921 rules total. +- Expanded Alibaba Cloud coverage with STS temporary credential detection for STS access key IDs, STS security tokens, and STS access key secrets. Built-in rule coverage is now 923 rules total. +- Fixed parser-based context gating so assignment-style contextual secrets still scan in raw text when parser verification is unavailable, instead of being dropped. +- Corrected several newly added SaaS rules and validators, including LiveKit (with dependent API secret validation), Tinybird, Inngest, Tolgee, Unkey, Composio, Hex.pm, Trigger.dev, Voiceflow, WorkOS, and Infisical. - Added 61 new detection rules across 46 providers: Axiom (API token + PAT), Trigger.dev (secret key + PAT), Dub.co, Svix webhook signing secret, Liveblocks, Inngest (signing key + event key), Seam, Courier, Cal.com, Arcjet, WarpStream, Mem0, Mintlify, Pirsch, Tinybird, Tolgee (project key + PAT), Ory (API key + session + OAuth2 tokens), Xendit, Xata, Crossmint (server + client keys), DeepL (Free + Pro), Flagsmith, E2B, Infisical, WooCommerce (consumer key + secret), Nightfall AI, Ramp (client ID + secret), Hex.pm (personal + workspace tokens), Convex deploy key, MiniMax, Mappedin (key + secret), Pollinations (secret + publishable), Fal.ai, Aikido, Hack Club, GuardSquare, Browser Use, Composio, Gamma, Hex.tech, Mastra, redirect.pizza, Upstash, and WorkOS. Also added new prefixed-token rules for Netlify (`nfp_`), Cloudflare (`cfut_`), and Supabase (`sb_publishable_`). Added live HTTP validation for 30 of these rules. - Added 32 new detection rules across 25 providers: Ghost CMS (admin + content keys), UpCloud (`ucat_`), Voiceflow (`VF.DM.`/`VF.WS.`), Robinhood Crypto (`rh-api-`), ClickUp (`pk_`), Unleash (client/admin + personal tokens), ConfigCat (standard + extended SDK keys), SaladCloud (`salad_cloud_`), Tigris (`tid_`/`tsec_`), Portainer (`ptr_`), Permit.io (`permit_key_`), Builder.io (`bpk-`), LiveKit (API key + secret), Close CRM (`api_`), Hetzner Cloud, Censys (API ID + secret), Wistia, PandaDoc, Pinata (key + secret), ZeroTier, Detectify, ChartMogul, Moralis, ButterCMS, and Loops. Includes HTTP validation for 19 of these rules. - Removed 17 direct dependencies from the root crate by dropping unused deps (`p256`, `ed25519-dalek`, `jsonwebtoken`, `gitlab`, `lazy_static`, `base32`, `pem`, `byteorder`, `reqwest-middleware`, `sha1`, `time`, `ring`, `num_cpus`, `strum_macros`), replacing `once_cell` with `std::sync::{LazyLock, OnceLock}`, and using `std::thread::available_parallelism()` in place of `num_cpus`. Salt generation now uses `rand` instead of `ring`, and all `strum_macros::Display` imports are consolidated under `strum::Display`. diff --git a/docs-site/docs/features/parsing.md b/docs-site/docs/features/parsing.md index 0d53695..ac94fff 100644 --- a/docs-site/docs/features/parsing.md +++ b/docs-site/docs/features/parsing.md @@ -27,7 +27,7 @@ In the scanning phase (in the Matcher’s implementation), Kingfisher does the f - **Candidate Selection:** Findings from rules classified as context-dependent become parser-verification candidates. - **Language Detection:** If a language string is provided (for example from metadata or extension), the code maps it to a supported parser backend. - **Parsing and Querying:** The parser streams normalized snippets such as `key = value` without materializing a full syntax tree. -- **Verification Decision:** Candidate findings are kept only if parser-extracted context verifies the matched secret. +- **Verification Decision:** Strict contextual candidates are kept only if parser-extracted context verifies the matched secret. More explicit assignment-style rules can still survive on raw regex evidence when parser verification is unavailable. ## Supported Languages @@ -45,10 +45,10 @@ Context verification is skipped in certain cases: - **No Language Identified:** If the file isn’t recognized as belonging to one of the supported languages or no language hint is provided, the context verifier isn’t even constructed. - **Non-source Files:** Binary files or files that aren’t expected to contain code (or aren’t extracted from archives) bypass parser-based context verification. - **Large Blobs:** Files larger than 2 MiB skip context verification to avoid spending time on generated or minified content. -- **Verification Errors:** If extraction fails, context-dependent matches are suppressed instead of falling back to raw regex hits. +- **Verification Errors:** If extraction fails, rules whose match profile strictly requires parser confirmation are suppressed. Assignment-style contextual rules can still fall back to their raw regex hit. ## Summary -Parser-based context verification is conditional and complementary. It is called only when the scanned file is a supported source or config file, and its role is to reduce noisy context-dependent findings by checking them against extracted code/config structure. +Parser-based context verification is conditional and complementary. It is called only when the scanned file is a supported source or config file, and its role is to reduce noisy strict-context findings by checking them against extracted code/config structure without unnecessarily dropping clear assignment-style secrets from raw text inputs. This layered approach helps improve the accuracy of secret detection while maintaining high performance. diff --git a/docs-site/docs/reference/library.md b/docs-site/docs/reference/library.md index 98e7ee1..5eef2f3 100644 --- a/docs-site/docs/reference/library.md +++ b/docs-site/docs/reference/library.md @@ -389,7 +389,7 @@ let template = parser.parse("{{ secret | sha256 }}")?; Available filters: -- **Encoding**: `b64enc`, `b64dec`, `b64url_enc`, `url_encode`, `json_escape` +- **Encoding**: `b64enc`, `b64dec`, `b64url_enc`, `b64url_dec`, `url_encode`, `json_escape` - **Hashing**: `sha256`, `crc32`, `crc32_dec`, `crc32_hex`, `crc32_le_b64` - **HMAC**: `hmac_sha256`, `hmac_sha384`, `hmac_sha1`, `hmac_sha256_b64key` - **Encoding**: `base62`, `base36` diff --git a/docs-site/docs/rules/builtin-rules.md b/docs-site/docs/rules/builtin-rules.md index 075bd72..ec4497c 100644 --- a/docs-site/docs/rules/builtin-rules.md +++ b/docs-site/docs/rules/builtin-rules.md @@ -1,13 +1,11 @@ --- title: "Built-in Rules List" -description: "Complete list of all 921 built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support." +description: "Complete list of all 923 built-in secret detection rules in Kingfisher. Searchable and filterable by provider, confidence level, and validation support." --- # Built-in Rules -Kingfisher ships with **921 detection rules** across **579 providers** -(808 detectors + 113 dependent rules). -Of these, **596** include live validation and **53** support direct revocation. +Kingfisher ships with **923 detection rules** across **579 providers** !!! tip "Search" Use the search box below to filter rules by provider name, rule ID, or confidence level. @@ -1517,7 +1515,7 @@ Of these, **596** include live validation and **53** support direct revocation. Composio -Composio API Key +Composio Project API Key kingfisher.composio.1 Medium Yes @@ -3349,18 +3347,10 @@ Of these, **596** include live validation and **53** support direct revocation. Hexpm -Hex.pm Personal Token +Hex.pm Organization Repository Key kingfisher.hexpm.1 Medium -Yes - - -Hexpm -Hex.pm Workspace Token -kingfisher.hexpm.2 -Medium -Yes @@ -3504,7 +3494,7 @@ Of these, **596** include live validation and **53** support direct revocation. Inngest Signing Key kingfisher.inngest.1 Medium - +Yes @@ -3512,7 +3502,7 @@ Of these, **596** include live validation and **53** support direct revocation. Inngest Event Key kingfisher.inngest.2 Medium - +Yes @@ -4000,6 +3990,14 @@ Of these, **596** include live validation and **53** support direct revocation. LiveKit API Secret kingfisher.livekit.2 Medium +Yes + + + +Livekit +LiveKit URL +kingfisher.livekit.3 +Medium @@ -6597,7 +6595,7 @@ Of these, **596** include live validation and **53** support direct revocation. Tinybird -Tinybird Admin Token +Tinybird Static Token kingfisher.tinybird.1 Medium Yes @@ -6700,6 +6698,14 @@ Of these, **596** include live validation and **53** support direct revocation. +Triggerdev +Trigger.dev Project Reference +kingfisher.triggerdev.3 +Medium + + + + Truenas TrueNAS API Key (WebSocket) kingfisher.truenas.1 @@ -7004,6 +7010,14 @@ Of these, **596** include live validation and **53** support direct revocation. +Voiceflow +Voiceflow Project ID +kingfisher.voiceflow.2 +Medium + + + + Volcengine VolcEngine Access Key ID kingfisher.volcengine.1 diff --git a/docs-site/docs/rules/overview.md b/docs-site/docs/rules/overview.md index 9de8718..d8d7676 100644 --- a/docs-site/docs/rules/overview.md +++ b/docs-site/docs/rules/overview.md @@ -485,6 +485,7 @@ Below is the complete list of Liquid filters available in Kingfisher, along with | `b64enc` | – | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` | | `b64url_enc` | – | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` | | `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | +| `b64url_dec` | – | Decodes a URL-safe Base64 string (with or without padding). | `{{ "Kys_Pw" \| b64url_dec }}` | | `sha256` | – | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` | | `crc32` | – | Computes the CRC32 checksum of the input and returns a decimal value. | `{{ TOKEN \| crc32 }}` | | `crc32_dec` | `digits` (integer, optional) | Computes the CRC32 checksum and returns the last `digits` decimal characters (zero-padded). Defaults to the full value when omitted. | `{{ TOKEN \| crc32_dec: 6 }}` | @@ -538,6 +539,9 @@ Authorization: Basic {{ "api:" | append: TOKEN | b64enc }} - **Using the Captured Value:** This captured value can then be used during the validation phase. For instance, if you have a rule for an Algolia Admin API Key that depends on an Algolia Application ID (captured as `APPID`), the validation logic can incorporate the `APPID` value to confirm that the secret matches the expected pattern or format for that specific account. +- **Detection vs validation:** + `depends_on_rule` is for capture chaining and validation context. It does not automatically hide the main secret finding, and it does not by itself mean the rule must be parser-verified before it can be reported from raw text. + ### Use depends_on_rule to require one rule before another runs: ```yaml @@ -548,6 +552,7 @@ depends_on_rule: - **Capture flow**: First rule captures `APPID` → second rule injects `{{ APPID }}` into validation HTTP request or pattern - **Visible control:** set `visible: false` on the supporting rule so it doesn’t clutter your report for non-secret matches +- **Primary secret rule:** leave the secret rule visible unless it is also only a helper; helper rules should usually be the ones marked `visible: false` ## Algolia Example Consider this example rule for an Algolia Application ID and Admin Key combination. To validate that this is an active credential, both must be matched: diff --git a/docs/CONTEXT_VERIFICATION.md b/docs/CONTEXT_VERIFICATION.md index 1978027..1b1f37d 100644 --- a/docs/CONTEXT_VERIFICATION.md +++ b/docs/CONTEXT_VERIFICATION.md @@ -2,7 +2,7 @@ [← Back to README](../README.md) -Kingfisher starts with a fast regex pass powered by Vectorscan/Hyperscan. For rules classified as `ContextDependent`, it then runs a lightweight parser-based verification pass that extracts likely assignment-style snippets such as `api_key = secret`. +Kingfisher starts with a fast regex pass powered by Vectorscan/Hyperscan. For rules classified as `ContextDependent`, it can then run a lightweight parser-based verification pass that extracts likely assignment-style snippets such as `api_key = secret`. > **Why not a full AST parser?** Earlier implementations used statically linked > grammar crates for this step. That added roughly 20 MB to the binary and @@ -17,7 +17,7 @@ Kingfisher starts with a fast regex pass powered by Vectorscan/Hyperscan. For ru 2. `Matcher::scan_blob` performs the primary regex scan and other filtering. 3. `maybe_apply_context_verification` streams parser candidates near the end of `scan_blob`. 4. Only context-dependent, non-Base64 matches are checked. -5. Candidates that cannot be verified are removed. +5. Candidates whose match profile strictly requires parser confirmation are removed if they cannot be verified. ## Gates @@ -27,7 +27,7 @@ Context verification runs only when all of these are true: - Turbo mode is disabled. - A supported language hint is available. -If any gate fails, context-dependent matches are suppressed rather than falling back to raw regex hits. +If any gate fails, only strict contextual matches are suppressed. Assignment-style contextual rules may still fall back to their raw regex hit when the parser cannot run. ## Backends @@ -42,8 +42,9 @@ Each lexer runs a comment-aware stripping pass (tracking string boundaries to av ## Verification Model - Rule profiling decides which matches are `ContextDependent`. +- A narrower subset of those profiles are treated as parser-mandatory (`strict_contextual_shape`). - The parser streams candidate text snippets like `secret_key = abcd1234`. - Kingfisher re-runs the rule's anchored regex against each candidate snippet. - Verification succeeds only when the regex secret capture exactly matches the original hit. -This keeps the fast regex engine on the hot path while still filtering noisy generic keyword+token matches with language-aware context. +This keeps the fast regex engine on the hot path while still filtering noisy generic keyword+token matches with language-aware context, without dropping clear assignment-style secrets from raw text files just because no parser backend is available. diff --git a/docs/LIBRARY.md b/docs/LIBRARY.md index f3ec288..2de14e2 100644 --- a/docs/LIBRARY.md +++ b/docs/LIBRARY.md @@ -386,7 +386,7 @@ let template = parser.parse("{{ secret | sha256 }}")?; Available filters: -- **Encoding**: `b64enc`, `b64dec`, `b64url_enc`, `url_encode`, `json_escape` +- **Encoding**: `b64enc`, `b64dec`, `b64url_enc`, `b64url_dec`, `url_encode`, `json_escape` - **Hashing**: `sha256`, `crc32`, `crc32_dec`, `crc32_hex`, `crc32_le_b64` - **HMAC**: `hmac_sha256`, `hmac_sha384`, `hmac_sha1`, `hmac_sha256_b64key` - **Encoding**: `base62`, `base36` diff --git a/docs/PARSING.md b/docs/PARSING.md index 740b0ea..7c35cae 100644 --- a/docs/PARSING.md +++ b/docs/PARSING.md @@ -24,7 +24,7 @@ In the scanning phase (in the Matcher’s implementation), Kingfisher does the f - **Candidate Selection:** Findings from rules classified as context-dependent become parser-verification candidates. - **Language Detection:** If a language string is provided (for example from metadata or extension), the code maps it to a supported parser backend. - **Parsing and Querying:** The parser streams normalized snippets such as `key = value` without materializing a full syntax tree. -- **Verification Decision:** Candidate findings are kept only if parser-extracted context verifies the matched secret. +- **Verification Decision:** Strict contextual candidates are kept only if parser-extracted context verifies the matched secret. More explicit assignment-style rules can still survive on raw regex evidence when parser verification is unavailable. ## Supported Languages @@ -42,10 +42,10 @@ Context verification is skipped in certain cases: - **No Language Identified:** If the file isn’t recognized as belonging to one of the supported languages or no language hint is provided, the context verifier isn’t even constructed. - **Non-source Files:** Binary files or files that aren’t expected to contain code (or aren’t extracted from archives) bypass parser-based context verification. - **Large Blobs:** Files larger than 2 MiB skip context verification to avoid spending time on generated or minified content. -- **Verification Errors:** If extraction fails, context-dependent matches are suppressed instead of falling back to raw regex hits. +- **Verification Errors:** If extraction fails, rules whose match profile strictly requires parser confirmation are suppressed. Assignment-style contextual rules can still fall back to their raw regex hit. ## Summary -Parser-based context verification is conditional and complementary. It is called only when the scanned file is a supported source or config file, and its role is to reduce noisy context-dependent findings by checking them against extracted code/config structure. +Parser-based context verification is conditional and complementary. It is called only when the scanned file is a supported source or config file, and its role is to reduce noisy strict-context findings by checking them against extracted code/config structure without unnecessarily dropping clear assignment-style secrets from raw text inputs. This layered approach helps improve the accuracy of secret detection while maintaining high performance. diff --git a/docs/RULES.md b/docs/RULES.md index 39982d5..957e435 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -480,6 +480,7 @@ Below is the complete list of Liquid filters available in Kingfisher, along with | `b64enc` | – | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` | | `b64url_enc` | – | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` | | `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | +| `b64url_dec` | – | Decodes a URL-safe Base64 string (with or without padding). | `{{ "Kys_Pw" \| b64url_dec }}` | | `sha256` | – | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` | | `crc32` | – | Computes the CRC32 checksum of the input and returns a decimal value. | `{{ TOKEN \| crc32 }}` | | `crc32_dec` | `digits` (integer, optional) | Computes the CRC32 checksum and returns the last `digits` decimal characters (zero-padded). Defaults to the full value when omitted. | `{{ TOKEN \| crc32_dec: 6 }}` | @@ -533,6 +534,9 @@ Authorization: Basic {{ "api:" | append: TOKEN | b64enc }} - **Using the Captured Value:** This captured value can then be used during the validation phase. For instance, if you have a rule for an Algolia Admin API Key that depends on an Algolia Application ID (captured as `APPID`), the validation logic can incorporate the `APPID` value to confirm that the secret matches the expected pattern or format for that specific account. +- **Detection vs validation:** + `depends_on_rule` is for capture chaining and validation context. It does not automatically hide the main secret finding, and it does not by itself mean the rule must be parser-verified before it can be reported from raw text. + ### Use depends_on_rule to require one rule before another runs: ```yaml @@ -543,6 +547,7 @@ depends_on_rule: - **Capture flow**: First rule captures `APPID` → second rule injects `{{ APPID }}` into validation HTTP request or pattern - **Visible control:** set `visible: false` on the supporting rule so it doesn’t clutter your report for non-secret matches +- **Primary secret rule:** leave the secret rule visible unless it is also only a helper; helper rules should usually be the ones marked `visible: false` ## Algolia Example Consider this example rule for an Algolia Application ID and Admin Key combination. To validate that this is an active credential, both must be matched: diff --git a/src/matcher/mod.rs b/src/matcher/mod.rs index 1938b30..c23a5dd 100644 --- a/src/matcher/mod.rs +++ b/src/matcher/mod.rs @@ -27,7 +27,7 @@ use crate::{ parser::Language, rule_profiling::{ConcurrentRuleProfiler, RuleStats}, rules::rule::Rule, - rules_database::{RuleDetectionProfileKind, RulesDatabase}, + rules_database::{RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase}, scanner_pool::ScannerPool, validation_body::ValidationResponseBody, }; @@ -473,8 +473,18 @@ fn maybe_apply_context_verification<'a>( } let mut keep = vec![true; matches.len()]; + let mandatory_candidate_indices: Vec = candidate_indices + .iter() + .copied() + .filter(|idx| { + let Some(rule_idx) = match_rule_indices.get(*idx) else { + return false; + }; + should_require_context_verification(&profiles[*rule_idx]) + }) + .collect(); let Some(language) = load_context_verifier_language(lang_hint, blob_len) else { - for idx in candidate_indices { + for idx in mandatory_candidate_indices { keep[idx] = false; } filter_kept_matches(matches, &keep); @@ -496,7 +506,7 @@ fn maybe_apply_context_verification<'a>( if let Err(e) = verification { debug!("context verification unavailable: {e}"); - remaining = candidate_indices; + remaining = mandatory_candidate_indices; } for idx in remaining { @@ -506,6 +516,10 @@ fn maybe_apply_context_verification<'a>( filter_kept_matches(matches, &keep); } +fn should_require_context_verification(profile: &RuleMatchProfile) -> bool { + profile.reason_codes.contains(&"strict_contextual_shape") +} + fn filter_kept_matches<'a>(matches: &mut Vec>, keep: &[bool]) { if keep.iter().all(|k| *k) { return; @@ -1208,6 +1222,95 @@ line2 Ok(()) } + #[test] + fn assignment_style_context_rule_survives_when_context_verification_is_unavailable( + ) -> Result<()> { + let token = "xcexacEQFtULkSTDCXejdWy5ew8NyU9QJoip5a97TE7A"; + let rule = Rule::new(RuleSyntax { + id: "kingfisher.livekit.2".into(), + name: "livekit api secret".into(), + pattern: "(?xi)\\b(?:LIVEKIT_API_SECRET|livekit_api_secret|livekit[-_]?secret|livekitSecret)\\s*[:=]\\s*['\"]?([A-Za-z0-9]{43,44})['\"]?\\b".into(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }); + + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; + + let blob = Blob::from_bytes(format!("LIVEKIT_API_SECRET={token}").into_bytes()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("secrets.log"))); + + let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches, + _ => panic!("unexpected scan result"), + }; + assert_eq!( + found.len(), + 1, + "assignment-style contextual rules should still scan raw text when parser context is unavailable" + ); + Ok(()) + } + + #[test] + fn depends_on_assignment_style_rule_survives_when_context_verification_is_unavailable( + ) -> Result<()> { + use crate::rules::rule::DependsOnRule; + + let token = "xcexacEQFtULkSTDCXejdWy5ew8NyU9QJoip5a97TE7A"; + let rule = Rule::new(RuleSyntax { + id: "kingfisher.livekit.2".into(), + name: "livekit api secret".into(), + pattern: "(?xi)\\b(?:LIVEKIT_API_SECRET|livekit_api_secret|livekit[-_]?secret|livekitSecret)\\s*[:=]\\s*['\"]?([A-Za-z0-9]{43,44})['\"]?\\b".into(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None::, + revocation: None, + depends_on_rule: vec![Some(DependsOnRule { + rule_id: "kingfisher.livekit.1".into(), + variable: "API_KEY".into(), + })], + pattern_requirements: None, + tls_mode: None, + }); + + let rules_db = RulesDatabase::from_rules(vec![rule])?; + let seen = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone()))); + let mut matcher = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; + + let blob = Blob::from_bytes(format!("LIVEKIT_API_SECRET={token}").into_bytes()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("secrets.log"))); + + let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches, + _ => panic!("unexpected scan result"), + }; + assert_eq!( + found.len(), + 1, + "depends_on assignment-style rules should still scan raw text when parser context is unavailable" + ); + Ok(()) + } + #[test] fn self_identifying_rule_remains_hyperscan_only() -> Result<()> { let token = "CCIPAT_FERZRjTN451xnDCy1y9gWn_79fb6ca4d0e5f833612eee17de397a9dca0a9e9f";