diff --git a/CHANGELOG.md b/CHANGELOG.md index db2e20b..41373bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.46.0] +- Improved rules: AWS, pem +- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu +- Added `self-update` command to update the binary independently. Now supports updating over homebrew managed binary + ## [1.45.0] - Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url` - Added rules for sendbird, mattermost, langchain, notion diff --git a/Cargo.toml b/Cargo.toml index 9ebd262..b906c05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.45.0" +version = "1.46.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/README.md b/README.md index 56e5f18..10fdbf7 100644 --- a/README.md +++ b/README.md @@ -8,21 +8,12 @@ Kingfisher is a blazingly fast secret‑scanning and live validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production
-Kingfisher originated as a fork of Praetorian's Nosey Parker, and is built atop their incredible work and the work contributed by the Nosey Parker community. - -## What Kingfisher Adds -- **Live validation** via cloud-provider APIs -- **Extra targets**: GitLab repos, S3 buckets, Docker images, Jira issues, Confluence pages, and Slack messages -- **Compressed Files**: Supports extracting and scanning compressed files for secrets -- **Baseline mode**: ignore known secrets, flag only new ones -- **Allowlist support**: suppress false positives with custom regexes or words -- **Language-aware detection** (source-code parsing) for ~20 languages -- **Native Windows** binary - +Originally forked from Praetorian’s Nosey Parker, Kingfisher adds live cloud-API validation; many more targets (GitLab, S3, Docker, Jira, Confluence, Slack); compressed-file extraction and scanning; baseline and allowlist controls; language-aware detection (~20 languages); and a native Windows binary. See [Origins and Divergence](#origins-and-divergence) for details. ## Key Features - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases - **Extensible rules**: hundreds of built-in detectors plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) + - **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more - **Multiple targets**: - **Git history**: local repos or GitHub/GitLab orgs/users - **Repository artifacts**: with `--repo-artifacts`, scan GitHub/GitLab repository artifacts such as issues, pull/merge requests, wikis, snippets, and owner gists in addition to code @@ -154,18 +145,18 @@ docker run --rm \ # 🔐 Detection Rules at a Glance -Kingfisher ships with hundreds of rules that cover everything from classic cloud keys to the latest LLM-API secrets. Below is an overview: +Kingfisher ships with [hundreds of rules](/data/rules/) that cover everything from classic cloud keys to the latest AI SaaS tokens. Below is an overview: | Category | What we catch | |----------|---------------| -| **AI / LLM APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), and more -| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more -| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm, PyPI, and more -| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun, SendGrid, Mailchimp, and more -| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more -| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more -| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more -| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and more +| **AI SaaS APIs** | OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, Zhipu, and more | +| **Cloud Providers** | AWS, Azure, GCP, Alibaba Cloud, DigitalOcean, IBM Cloud, Cloudflare, and more | +| **Dev & CI/CD** | GitHub/GitLab tokens, CircleCI, TravisCI, TeamCity, Docker Hub, npm, PyPI, and more | +| **Messaging & Comms** | Slack, Discord, Microsoft Teams, Twilio, Mailgun, SendGrid, Mailchimp, and more | +| **Databases & Data Ops** | MongoDB Atlas, PlanetScale, Postgres DSNs, Grafana Cloud, Datadog, Dynatrace, and more | +| **Payments & Billing** | Stripe, PayPal, Square, GoCardless, and more | +| **Security & DevSecOps** | Snyk, Dependency-Track, CodeClimate, Codacy, OpsGenie, PagerDuty, and more | +| **Misc. SaaS & Tools** | 1Password, Adobe, Atlassian/Jira, Asana, Netlify, Baremetrics, and more | ## Write Custom Rules! @@ -543,9 +534,11 @@ Kingfisher automatically queries GitHub for a newer release when it starts and t - **Hands-free updates** – Add `--self-update` to any Kingfisher command - * If a newer version exists, Kingfisher will download it, replace the running binary, and re-launch itself with the **exact same arguments**. + * If a newer version exists, Kingfisher will download it, replace the running binary, and re-launch itself with the **exact same arguments**. * If the update fails or no newer release is found, the current run proceeds as normal +- **Manual update** – Run `kingfisher self-update` to update the binary without scanning + - **Disable version checks** – Pass `--no-update-check` to skip both the startup and shutdown checks entirely # Advanced Options @@ -661,6 +654,20 @@ Use `--rule-stats` to collect timing information for every rule. After scanning, kingfisher scan --help ``` + +## Origins and Divergence + +Kingfisher began as a fork of Praetorian’s Nosey Parker, as our experiment with adding live validation support and embedding that validation directly inside each rule. + +Since that initial fork, it has diverged heavily from Nosey Parker: +- Replaced the SQLite datastore with an in-memory store + Bloom filter +- Collapsed the workflow into a single scan-and-report phase with direct JSON/BSON/SARIF outputs +- Added Tree-Sitter parsing on top of Hyperscan for deeper language-aware detection +- Removed datastore-driven reporting/annotations in favor of live validation, baselines, allowlists, and compressed-file extraction +- Expanded support for new targets (GitLab, Jira, Confluence, Slack, S3, Docker, etc.) +- Delivered cross-platform builds, including native Windows + + # Roadmap - More rules diff --git a/data/rules/aws.yml b/data/rules/aws.yml index 49fcbe3..62041da 100644 --- a/data/rules/aws.yml +++ b/data/rules/aws.yml @@ -5,7 +5,7 @@ rules: (?xi) \b ( - (?:AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) [2-7A-Z]{16} ) \b @@ -21,7 +21,7 @@ rules: (?xi) (?: \b - (?:AWS|AMAZON|AMZN|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) (?:.|[\n\r]){0,32}? \b ( @@ -29,7 +29,7 @@ rules: ) \b | - \b(?:AWS|AMAZON|AMZN|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + \b(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) (?:.|[\n\r]){0,96}? (?:SECRET|PRIVATE|ACCESS) (?:.|[\n\r]){0,16}? diff --git a/data/rules/cerebras.yml b/data/rules/cerebras.yml new file mode 100644 index 0000000..73760a6 --- /dev/null +++ b/data/rules/cerebras.yml @@ -0,0 +1,36 @@ +rules: + - name: Cerebras AI API Key + id: kingfisher.cerebras.1 + pattern: | + (?xi) + \b + ( + csk-[a-z0-9]{48} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.cerebras.ai/v1/models" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"object"' + - '"data"' + match_all_words: true + references: + - https://docs.cerebras.net/ + examples: + - "csk-6nptf4w5cx36fw58t3hkx48jvm52wm693pex5tjm29kn55yt" + - "csk-e2knhj8h3h4erp6crfx6rh52tvecj4xnwmtjf3mtrvtt54et" + - "csk-rhw8npjrp6kpv9phm55n5nv5rkkm4492jepx3yh65dc9cwe9" + - "csk-w6p3nxk3dc5249mrpmv642fffert28rwdkepffrpn8rtfr9h" diff --git a/data/rules/fireworksai.yml b/data/rules/fireworksai.yml new file mode 100644 index 0000000..0933441 --- /dev/null +++ b/data/rules/fireworksai.yml @@ -0,0 +1,35 @@ +rules: + - name: Fireworks.ai API Key + id: kingfisher.fireworks.1 + pattern: | + (?xi) + \b + ( + fw_[A-Z0-9]{24} + ) + \b + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: GET + url: "https://api.fireworks.ai/inference/v1/models" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"owned_by"' + - '"data"' + match_all_words: true + references: + - https://readme.fireworks.ai/reference/getting-started-with-the-api + examples: + - "fw_3ZL5ji26Tp7baYrW5S2pA5xi" + - "fw_3ZaW5fSpx5GTnHpRGb8CPu2V" + - "fw_3ZSU8ymvmZ38YPv8uwbZHAyW" diff --git a/data/rules/friendli.yml b/data/rules/friendli.yml new file mode 100644 index 0000000..ec5f3ec --- /dev/null +++ b/data/rules/friendli.yml @@ -0,0 +1,35 @@ +rules: + - name: Friendli.ai API Key + id: kingfisher.friendli.1 + pattern: | + (?xi) + \b + ( + flp_[A-Z0-9]{46} + ) + \b + confidence: medium + min_entropy: 3.0 + validation: + type: Http + content: + request: + method: GET + url: "https://api.friendli.ai/dedicated/beta/endpoint" + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"data"' + - '"status"' + references: + - https://docs.friendli.ai/reference/authentication + examples: + - "flp_eb8CAc1OHdVISFraFZXFYQeH1CYtqM2VdYFvV1duniWw32" + - "flp_fYvncz2Ahh4YEfSKbNoT09DWlwPq5I7svZG2l1bdbpOg1c" + - "flp_kGcjWhZQ4zYQnY7b3O6nukAhflKZJeS7pNDhs79IRrfodc" diff --git a/data/rules/mailgun.yml b/data/rules/mailgun.yml index c17c40b..06a02c2 100644 --- a/data/rules/mailgun.yml +++ b/data/rules/mailgun.yml @@ -2,8 +2,8 @@ rules: - name: MailGun Token id: kingfisher.mailgun.1 pattern: | - (?xi) - \b + (?xi) + \b mailgun (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) diff --git a/data/rules/nvidia.yml b/data/rules/nvidia.yml new file mode 100644 index 0000000..1dc7b31 --- /dev/null +++ b/data/rules/nvidia.yml @@ -0,0 +1,30 @@ +rules: + - name: NVIDIA NIM API Key + id: kingfisher.nvidia.nim.1 + pattern: | + (?xi) + \b + ( + nvapi-[A-Z0-9_-]{60,70} + ) + \b + confidence: medium + min_entropy: 3.5 + examples: + - "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" + - "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" + - "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" + validation: + type: Http + content: + request: + method: GET + url: "https://api.nvcf.nvidia.com/v2/nvcf/functions" + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ["id", "versionId"] diff --git a/data/rules/ollama.yml b/data/rules/ollama.yml new file mode 100644 index 0000000..a2df4ab --- /dev/null +++ b/data/rules/ollama.yml @@ -0,0 +1,47 @@ +rules: + - name: Ollama API Key + id: kingfisher.ollama.1 + pattern: | + (?xi) + \b + ollama + (?:.|[\n\r]){0,32}? + \b + ( + [a-f0-9]{32}\.[a-zA-Z0-9_-]{24} + ) + confidence: medium + min_entropy: 3.5 + validation: + type: Http + content: + request: + method: POST + url: https://ollama.com/api/generate + headers: + Content-Type: application/json + # Turbo keys are sent as the raw value in Authorization (no "Bearer " prefix) + # per working client behavior. + Authorization: "{{ TOKEN }}" + body: | + { + "model": "gpt-oss:20b", + "prompt": "ping", + "stream": false + } + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"response":' + - '"done":true' + references: + - https://ollama.com/blog/turbo + examples: + - "ollama key = 8bcdd9b4e28e4e1b8bf14a2eb8701220.QH5p5TU2BDwzHu5_RCtvJXsj" + - "ollama key = e56714bd7c1146e4b4801244bc2bc67a.3GAswjZGZ5YY6Qdgt0xg56vM" + - "ollama key = 872658d00c284033a707abf1725d4b6c.-4JpTp0dQHmf0nb89xI-wgP-" + - "ollama key = 0c4e6bf1222c4ffc87025a7a9ffd5cac.z-fgt1JO9-LadzA2cL23qLH3" + - "ollama key = dae874a007d442cdb807910c4c57c6f5.B_aHUSdeAe42UR-X41StUFJq" \ No newline at end of file diff --git a/data/rules/pem.yml b/data/rules/pem.yml index 390171d..00d93c6 100644 --- a/data/rules/pem.yml +++ b/data/rules/pem.yml @@ -55,6 +55,7 @@ rules: (?: LS0tLS1CRUdJTiBSU0EgUFJJVkFURSBLRVktLS0t (?# prefix of base64 encoding of `-----BEGIN RSA PRIVATE KEY-----` ) | LS0tLS1CRUdJTiBEU0EgUFJJVkFURSBLRVktLS0t (?# prefix of base64 encoding of `-----BEGIN DSA PRIVATE KEY-----` ) | LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0t (?# prefix of base64 encoding of `-----BEGIN EC PRIVATE KEY-----` ) + | LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0t (?# prefix of base64 encoding of `-----BEGIN PRIVATE KEY-----` ) ) [a-zA-Z0-9+/=]{50,} ) diff --git a/data/rules/togetherai.yml b/data/rules/togetherai.yml new file mode 100644 index 0000000..ee43097 --- /dev/null +++ b/data/rules/togetherai.yml @@ -0,0 +1,36 @@ +rules: + - name: Together.ai API Key + id: kingfisher.together.1 + pattern: | + (?xi) + \b + ( + tgp_v1_[A-Z0-9_-]{43} + ) + confidence: medium + min_entropy: 3.0 + examples: + - tgp_v1_Tctm6OfOeNkwLIKkyxJxUHIqNKx2AvFr65tQRIOMgzY + - tgp_v1_HgWU7iym2128y2Pdj-7-9kX4W_MSCcIT5EhuY_SmNqc + - tgp_v1_xeybrcbPy2c10JR9eAlkOq1qvPaBXT3ZbXp8yKq1VME + - tgp_v1_yanBH3171P6HAZ01LbzSDlnOiXM3lo_89kG2Gg5yzko + validation: + type: Http + content: + request: + method: GET + url: "https://api.together.xyz/v1/models" + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"id":' + - '"object":' + references: + - https://docs.together.ai/reference/authentication + - https://docs.together.ai/reference/models-list diff --git a/data/rules/weightsandbiases.yml b/data/rules/weightsandbiases.yml new file mode 100644 index 0000000..bed9ca5 --- /dev/null +++ b/data/rules/weightsandbiases.yml @@ -0,0 +1,35 @@ +rules: + - name: Weights and Biases API Key + id: kingfisher.wandb.1 + pattern: | + (?xi) + \b + (?:wandb|weightsandbiases) + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{40} + ) + \b + confidence: medium + min_entropy: 3.5 + examples: + - "export WANDB_API_KEY=872ab943740b34157041da2529fb160d89632710" + - "wandb_api_key: 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b" + - "WeightsandBiases token => 7f9d2e34c1a0b5d6e7f81234abcd5678ef901234" + validation: + type: Http + content: + request: + method: POST + url: "https://api.wandb.ai/graphql" + headers: + Authorization: "Basic {{ 'api:' | append: TOKEN | b64enc }}" + Content-Type: "application/json" + body: | + {"query":"query { viewer { email username } }"} + response_matcher: + - report_response: true + - type: JsonValid + - type: WordMatch + words: + - '"username"' diff --git a/data/rules/zhipu.yml b/data/rules/zhipu.yml new file mode 100644 index 0000000..bc1bffd --- /dev/null +++ b/data/rules/zhipu.yml @@ -0,0 +1,34 @@ +rules: + - name: Zhipu (BigModel) API Key + id: kingfisher.zhipu.1 + pattern: | + (?xi) + \b + ( + [A-F0-9]{32} + \. + [A-Z0-9]{16} + ) + \b + confidence: medium + min_entropy: 4.0 + examples: + - "3494c505cf244a3fb17417d6894d404c.LLSZ2InjarUXEhNr" + - "a64cb6a9b4e840919351d041dbe65654.eh1YZt0SAhSTOsNR" + - "4d140d7d21c4477ab20d5090e530496c.A5pEbmgcid2deKNA" + validation: + type: Http + content: + request: + method: GET + url: "https://open.bigmodel.cn/api/paas/v4/files" + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: "application/json" + timeout_seconds: 12 + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ["object", "data"] diff --git a/src/cli/global.rs b/src/cli/global.rs index 93599b7..8f761de 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -62,6 +62,10 @@ pub enum Command { /// Manage rules #[command(alias = "rule")] Rules(RulesArgs), + + /// Update the Kingfisher binary + #[command(name = "self-update")] + SelfUpdate, } pub static RAM_GB: Lazy