Merge pull request #253 from mongodb/development

v1.87.0
This commit is contained in:
Mick Grove 2026-03-09 22:51:28 -07:00 committed by GitHub
commit e9362d16ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
102 changed files with 2534 additions and 301 deletions

View file

@ -53,8 +53,6 @@ jobs:
run: make tests
env:
CARGO_BUILD_JOBS: 1
- name: Build (Makefile linux-x64)
run: make ubuntu-x64
linux-arm64:
name: Linux arm64
@ -93,8 +91,6 @@ jobs:
run: make tests
env:
CARGO_BUILD_JOBS: 1
- name: Build (Makefile linux-arm64)
run: make ubuntu-arm64
macos-arm64:
name: macOS arm64
@ -120,8 +116,6 @@ jobs:
- name: Run tests
run: make tests
- name: Build (Makefile darwin-arm64)
run: make darwin-arm64
windows:
name: Windows ${{ matrix.arch }}
@ -204,18 +198,3 @@ jobs:
echo "▶ cargo test --release --workspace --all-targets --target $target_triple"
cargo test --release --workspace --all-targets --target "$target_triple"
- name: Build
shell: msys2 {0}
run: make windows-${{ matrix.arch }}
- name: Move artifact to dist
shell: bash
run: |
mkdir -p dist
cp target/release/kingfisher-windows-${{ matrix.arch }}.zip dist/
- uses: actions/upload-artifact@v4
with:
name: kingfisher-windows-${{ matrix.arch }}
path: dist/kingfisher-*windows-${{ matrix.arch }}*.*

View file

@ -342,14 +342,11 @@ jobs:
run: |
mkdir -p dist
cp target/release/kingfisher-windows-${{ matrix.arch }}.zip dist/
cp target/release/CHECKSUM-windows-${{ matrix.arch }}.txt dist/
- uses: actions/upload-artifact@v4
with:
name: kingfisher-windows-${{ matrix.arch }}
path: |
dist/kingfisher-windows-${{ matrix.arch }}.zip
dist/CHECKSUM-windows-${{ matrix.arch }}.txt
path: dist/kingfisher-windows-${{ matrix.arch }}.zip
release:
name: Public GitHub Release

View file

@ -2,6 +2,14 @@
All notable changes to this project will be documented in this file.
## [v1.87.0]
- Tree-sitter verification now runs for blobs from `0` bytes up to `128 KiB` (previously `1 KiB` to `64 KiB`), while remaining a post-regex verification step applied only to context-dependent candidate matches from Hyperscan/Vectorscan.
- False-positive reduction: Hyperscan/Vectorscan still scans everything first, then tree-sitter performs a second-pass verification only on auto-classified context-dependent findings; self-identifying/token-explicit findings stay regex-first.
- Hardened Perplexity API key validation to reject auth failures (`401`/`403`) and avoid false "Active Credential" results from error payloads.
- Fixed Yelp API key validation false positives by switching to an auth-enforcing endpoint (`/v3/businesses/search`) and adding explicit auth error guards.
- Added 37 new provider detection + HTTP validation rules: Ably, AbstractAPI, AbuseIPDB, AviationStack, Better Stack, Brevo, Clearout, Clerk, Cloudinary, Coinlayer, Contentstack, Currencylayer, Daily, Fixer, Geoapify, Hunter.io, Mux, NewsAPI, Numverify, OneSignal, Pinecone, Pingdom, Positionstack, Railway, Render, Rollbar, Salesloft, Sanity, StatusCake, Storyblok, UptimeRobot, urlscan.io, VirusTotal, WeatherAPI, Webflow, and ZeroBounce.
- Tightened regex specificity for newly added rules by replacing broad variable-length token captures with explicit fixed formats/lengths and aligned examples to pass `rules check`.
## [v1.86.0]
- GitLab scanning: honor OS-trusted internal CAs without requiring `SSL_CERT_FILE`, and preserve custom GitLab API ports in repository enumeration and artifact fetching.
- Added detection/validation rules for App Center, Branch.io, BrowserStack, Calendly, Cypress, Delighted, DeviantArt, Instagram, Iterable, Keen.io, Lokalise, Pendo, Razorpay, Spotify, WakaTime, WPEngine.

View file

@ -48,7 +48,7 @@ http = "1.4"
[package]
name = "kingfisher"
version = "1.86.0"
version = "1.87.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -4,7 +4,7 @@
<img src="docs/kingfisher_logo.png" alt="Kingfisher Logo" width="126" height="173" style="vertical-align: right;" />
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Detection Rules](https://img.shields.io/badge/Detection%20Rules-500-2ea043.svg)](https://github.com/mongodb/kingfisher)<br>
[![Detection Rules](https://img.shields.io/badge/Detection%20Rules-540-2ea043.svg)](https://github.com/mongodb/kingfisher)<br>
[![ghcr downloads](https://ghcr-badge.elias.eu.org/shield/mongodb/kingfisher/kingfisher)](https://github.com/mongodb/kingfisher/pkgs/container/kingfisher)<br>
@ -645,6 +645,7 @@ kingfisher scan /tmp/repo --branch feature-1 \
| [FINGERPRINT.md](docs/FINGERPRINT.md) | Understanding finding fingerprints and deduplication |
| [COMPARISON.md](docs/COMPARISON.md) | Benchmark results and performance comparisons |
| [PARSING.md](docs/PARSING.md) | Language-aware parsing details |
| [TREE_SITTER.md](docs/TREE_SITTER.md) | Tree-sitter scanning flow, verification gates, and fallback behavior |
# Library Usage

View file

@ -38,6 +38,8 @@ impl Default for ContentInspector {
}
impl ContentInspector {
/// Maximum bytes inspected for content-based language hints.
const LANGUAGE_SAMPLE_BYTES: usize = 4096;
/// Create a new inspector with default thresholds.
#[inline]
pub fn new() -> Self {
@ -140,8 +142,12 @@ impl ContentInspector {
}
}
// Limit expensive content probing to a small prefix. This keeps language
// detection cheap for large files while preserving extension-based accuracy.
let sample = &content[..content.len().min(Self::LANGUAGE_SAMPLE_BYTES)];
// 3) Shebang detection (in-memory): match by longest prefix, byte-wise (no UTF-8 needed).
if let Some(first_line) = content.split(|&b| b == b'\n').next() {
if let Some(first_line) = sample.split(|&b| b == b'\n').next() {
if first_line.starts_with(b"#!") {
for (prefix, lang) in SHEBANG_PREFIXES.iter() {
if first_line.starts_with(prefix) {
@ -152,7 +158,7 @@ impl ContentInspector {
}
// 4) Lightweight content markers to catch a few ubiquitous cases without I/O.
let s = String::from_utf8_lossy(content);
let s = String::from_utf8_lossy(sample);
if s.contains("<?php") {
return Some(LanguageType::Php.name().to_string());
}

View file

@ -0,0 +1,78 @@
# AGENTS.md
Rule-authoring instructions for this directory.
## Scope
- Applies to `crates/kingfisher-rules/data/rules/` and all files under it.
- This file overrides broader AGENTS guidance for rule-writing tasks in this subtree.
## Goal
- Add or update YAML detection rules with high precision, low false positives, and safe remediation support.
## Rule File Conventions
- Keep provider rules in provider-named files (for example `github.yml`, `openai.yml`).
- Prefer lowercase filenames with `.yml`.
- Keep rule IDs stable and unique. Prefer `kingfisher.<provider>.<number>` unless a descriptive suffix is already established for that provider.
- Reuse nearby provider patterns/styles instead of inventing new structure.
## Required Rule Shape
Each rule entry should define:
- `name`
- `id`
- `pattern`
- `min_entropy` (default to 3.0)
- `confidence` (default to medium)
- `examples` (at least one realistic positive example)
Strongly recommended fields:
- `pattern_requirements` (for extra filtering)
- `references`
## Pattern Quality Rules
- Prefer specific anchors/prefixes and provider context over broad generic regex.
- Use `pattern_requirements` to enforce quality constraints (`min_digits`, `min_uppercase`, `min_lowercase`, `min_special_chars`, `ignore_if_contains`, `checksum`).
- Use checksum validation in `pattern_requirements.checksum` when token formats support it.
- Use `visible: false` for helper/non-secret captures used only by dependent rules.
- Use `depends_on_rule` for multi-part credential validation (for example ID + secret).
## Validation Policy (Important)
- Default: define validation logic in YAML under `validation:`.
- Do not move validation logic into Rust unless YAML cannot reliably express it.
- Code-backed validation types (for example AWS, GCP, Coinbase, MongoDB) are notable exceptions and should remain rare.
- For new rules, first attempt `Http`/`Grpc` YAML validation before considering exception paths.
## Revocation Policy
- If a rule has validation and the provider API safely supports revocation, add `revocation:` in the same YAML rule.
- Prefer explicit success criteria in `response_matcher`.
- Use `HttpMultiStep` revocation when API workflows require pre-fetch/extraction steps.
- If revocation is intentionally not supported, document why with an inline YAML comment.
## Authoring Workflow
1. Choose the target provider file (or add a new provider file if no suitable file exists).
2. Copy a structurally similar rule from this directory.
3. Implement/adjust `pattern`, `examples`, and filtering (`pattern_requirements`, `min_entropy`).
4. Add YAML `validation` (default path).
5. Add YAML `revocation` when supported.
6. Add `references` for token format/API behavior.
7. Verify locally (below).
## Local Verification Checklist
- Syntax/load checks:
- `cargo test -p kingfisher-rules`
- Broader regression check:
- `cargo test --workspace --all-targets`
- Behavioral check against sample content:
- `kingfisher scan ./testdata --rule <rule-family-or-id> --rule-stats`
- Validation check (when validation is present):
- `kingfisher validate --rule <rule-id> <token-or-secret>`
## Documentation
Read these before complex edits:
- `docs/RULES.md` (schema, pattern requirements, checksum, Liquid, validation/revocation)
- `docs/MULTI_STEP_REVOCATION.md`
- `docs/TOKEN_REVOCATION_SUPPORT.md`
## Change Discipline
- Keep changes scoped to the specific provider/rule request.
- Do not refactor unrelated rules in the same PR unless explicitly asked.
- Preserve existing YAML style and indentation conventions in this directory.

View file

@ -0,0 +1,39 @@
rules:
- name: Ably API Key
id: kingfisher.ably.1
pattern: |
(?xi)
\b
ably
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
[A-Za-z0-9_-]{8}\.[A-Za-z0-9_-]{8}:[A-Za-z0-9_-]{24}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- ABLY_API_KEY=abc12345.xyz78901:secretkeyvalue1234567890
- ably_key = "appid123.keyid987:AbCdEfGhIjKlMnOpQrStUvWx"
references:
- https://ably.com/docs/auth/basic
- https://ably.com/docs/api/rest-api
validation:
type: Http
content:
request:
method: GET
url: https://rest.ably.io/channels?limit=1
headers:
Authorization: "Basic {{ TOKEN | b64enc }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,41 @@
rules:
- name: AbstractAPI API Key
id: kingfisher.abstractapi.1
pattern: |
(?xi)
\b
abstractapi
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- abstractapi_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
- ABSTRACTAPI_KEY=abcdef1234567890abcdef1234567890
references:
- https://docs.abstractapi.com/api/ip-geolocation
- https://abstractapi.com/docs
validation:
type: Http
content:
request:
method: GET
url: https://ipgeolocation.abstractapi.com/v1/?api_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid

View file

@ -0,0 +1,39 @@
rules:
- name: AbuseIPDB API Key
id: kingfisher.abuseipdb.1
pattern: |
(?xi)
\b
abuseipdb
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{80}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- abuseipdb_api_key = "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
- ABUSEIPDB_KEY=1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
references:
- https://docs.abuseipdb.com/#introduction
- https://www.abuseipdb.com/api
validation:
type: Http
content:
request:
method: GET
url: https://api.abuseipdb.com/api/v2/check?ipAddress=127.0.0.1
headers:
Key: "{{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -12,7 +12,7 @@ rules:
)
\b
min_entropy: 3.5
confidence: medium
confidence: high
examples:
- ADAFRUIT_AIO_KEY = "aio_giXk31KzM05IVxHRwJwtpNGClUE5"
validation:

View file

@ -12,7 +12,7 @@ rules:
min_lowercase: 1
min_entropy: 3.3
visible: false
confidence: medium
confidence: high
examples:
- 'age1zvkyg2lqzraa2lnjvqej32nkuu0ues2s82hzrye869xeexvn73equnujwj'
references:
@ -28,7 +28,7 @@ rules:
AGE-SECRET-KEY-1[QPZRY9X8GF2TVDW0S3JN54KHCE6MUA7L]{58}
)
min_entropy: 3.3
confidence: medium
confidence: high
examples:
- |
# created: 2022-09-26T21:55:47-05:00
@ -53,7 +53,7 @@ rules:
\b
min_entropy: 3.3
visible: false
confidence: medium
confidence: high
examples:
- 'age1pq1x34nzsvr0rxjsgdn8zgyhfe8j7ceq5r9rdelkjuh3y235jzxshfg87pzf5zrqtzdxz95paef6caq5aapdmwjjqpjfdyxnzr2zampc3uxy0dg4z2n2gm9su72p0pc3u0jvev55l694v78snxg3yzvcl7yda0eyytqj6a0ec477lnhcy5hzpz4zq3pxanve4cn62gqj3pjy5lqj9c6kyj4v2z8alktn8zh99970x79gjkv7522hv9kfz35zsnxhsx8wwtmu9cy3ftzjgwcp4sshn3llnylnpdsyz5jm72vefv4x5vfwytrefxg4wq3mv42wcrvkj742479zrxzpvp2p3e9fed9f0739vcu80r7ma28qfhnvlv4gfzel9q654dj3zmuvvz893azhxdvs9fxd0r7jzchzcfcs5mkyyjxhw0n2z6dvp9yn9qfdp29h0azxqyjw6v7fhyuzj7zel0uq6j9rd7wgrpz7mf5dnj43jwsgvrc8qcnhy7tu6dkdujuxzkp9xj43xe8h92ktre2a3u3s8mm5mrp9nr9pwkgtz4mdlq9hgn4fps4k57ff6wddn2fy23t47sm20r8km8sd2pcyyafnet8f0dajsrlyjeah4n3mssr6aseevuuskdvq5lzguyvpgwpta742c6698vgutzqgny8usfg0w2he7kq5vyxjd0f9hqg8xk26y9e4th0gezq92q4cpp5p2y9hf5f2cje5l0c3sa3a2qxmm38pxxvhxh99yzmfz0zk7r2s64nnwjhkfgfr3gf8xnmppcgmaykvh5sh6g7vk9790rf8ws0axmr2t7z8aae5fq2029uvcn2ghgt4fu4wgwdc0k0cz52qkvwmuzj8p8k5jgf3xzk5zmrkavjekjrpeq408xz3zxazwkc6tyfmhayrkfpjhwtz5mp8j8guqe43k2q6m2kte03vrw27y3wmqyu5etmt9dnkwcnnpmu9gz9dekfhdevf42ucshphnrk38ra6hx8w5f8q5ru0xdhrjxmwqf6cused7zc5xvq43r0zscjglpwlptpwydhqw64xz7ptjdyeyzpq2zkxtmzg29gzjpvzva4d3l0cenn9xs297wf4y4ukwrunf57xj6pm7nvrkwvtrt8hwcmgv8x7ajw7258ugf9wvkmk4052ekg87tw5vnx8nq2swyzv77v8yqlwsenvamr0zssknwts8rrhfuwj7ykysnq9jxy0uv3kuyt22djszjdtvpz6d0s0kwh8ryynddzud92emeyvvyqktd0jtj7rvvg5gch25v8smlvny3kvn5gagyz475ze2y6q466xqmz2n3hs77lddeqyta2nch5k2u5yacuk9ywnwfdzvyejnucz724hj77hrrmakm7pr3kxsrxq22ejexlud9fy2kdqmkg5yncz7jm5wv2qjk5w5kvcpqsry2yqffh2la52dxfjkjq5rzhjzeyn6dupn0qwtyv7s4lwg3xdarsdlwe2y3tujy480y7z39q259fzx6jhd2j0f5hagqpcpees7hzc2yrk5cy788uk3s7qvp5cpepx24gvws3m2g433exgwppnkjscec8qu4y9z9r7vccexjcjaen42245lmgmxmuavg9alej92322gvvyy2t6267v09ch64y0m53jff0vjj96s0ypk60hr3jw4myd6m5hpn3xjstx7tl2szhpr5qe8jj08ydjc4wy2rch2fhuy3pdfjax5awe9j99ly5hkntzz9fe5zatgjvzdd0kgtxs25njnajyf6ssekp7gelxquusn4pt25czh3scj68kq79wdn5tgm6yvm9nzavrg043x3msnygf8dweknw5jmqd0uvny6ttsn09508k0c55zfnegrm9efhxpfqdkmhh6gjtqmwze9pyyzk3tlhl53k2ykx3qheyty7saeq0d3fzv49zc0k'
references:
@ -71,7 +71,7 @@ rules:
)
\b
min_entropy: 3.3
confidence: medium
confidence: high
examples:
- |
# created: 2025-11-17T12:15:17+01:00

View file

@ -23,13 +23,24 @@ rules:
- name: Alibaba Access Key Secret
id: kingfisher.alibabacloud.2
pattern: |
(?xi)
(?x)
\b
alibaba
(?:.|[\n\r]){0,32}?
(
[a-z0-9]{30}
(?i:alibaba|alibaba[\s_-]*cloud|aliyun)
(?:.|[\n\r]){0,40}?
(?i:access[\s_-]*key[\s_-]*secret|access[\s_-]*secret|secret|token|key)
(?:.|[\n\r]){0,16}?
(?:
[=:]
|
["']\s*:\s*["']
)
\s*
["']?
(
[A-Za-z0-9]{30}
)
\b
["']?
min_entropy: 4.2
confidence: medium
examples:

View file

@ -15,7 +15,7 @@ rules:
min_uppercase: 1
min_lowercase: 1
min_entropy: 3.3
confidence: medium
confidence: high
examples:
- sk-ant-api668-Clm512odot9WDD7itfUU9R880nefA1EtYZDbpE-C9b0XQEWpqFKf9DQUo03vOfXl16oSmyar1CLF1SzV3YzpZJ6bahcpLAA
references:

View file

@ -11,7 +11,7 @@ rules:
min_digits: 2
min_uppercase: 1
min_lowercase: 1
confidence: medium
confidence: high
min_entropy: 3.5
validation:
type: Http

View file

@ -13,7 +13,7 @@ rules:
min_uppercase: 1
min_lowercase: 1
min_entropy: 3.5
confidence: medium
confidence: high
examples:
- |
export HOMEBREW_ARTIFACTORY_API_TOKEN=AKCp8igrDNFerC357m4422e4tmu7xB983QLPxJhKFcSMfoux2RFvp8rc4jC8t9ncdmYCMFD8W

View file

@ -13,10 +13,11 @@ rules:
\b
pattern_requirements:
min_lowercase: 1
min_digits: 2
min_entropy: 3.5
confidence: medium
examples:
- Atlassian_key = "DjayBenyJrtpvydFCzAphcqc"
- Atlassian_key = "DjayBeny2rtpvydF4zAphcqc"
- "ATLASSIAN_API_TOKEN:'abcdef1234567890abcdef12'"
references:
- https://developer.atlassian.com/cloud/admin/organization/rest/api-group-orgs/#api-v1-orgs-get

View file

@ -0,0 +1,40 @@
rules:
- name: AviationStack API Key
id: kingfisher.aviationstack.1
pattern: |
(?xi)
\b
aviationstack
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- AVIATIONSTACK_ACCESS_KEY=abc123def456ghi789jkl012mno345pq
- aviationstack_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://aviationstack.com/documentation
validation:
type: Http
content:
request:
method: GET
url: https://api.aviationstack.com/v1/flights?access_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -31,30 +31,43 @@ rules:
- name: Azure Storage Account Key
id: kingfisher.azurestorage.2
pattern: |
(?xi)
azure
(?:.|[\n\r]){0,128}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,128}?
["':\s=}\]\)]
(
(?:
[A-Z0-9+\\/-]{86,88}={1,2}
)
|
(?:
[A-Z0-9+\\/-]{86,88}\b
)
(?x)
\b
(?:
(?i:azure)(?:[_\s-]*(?i:storage))?
(?:[_\s-]*(?:account[_\s-]*key|storage[_\s-]*key|shared[_\s-]*key|access[_\s-]*key|accountkey))
|
(?i:account)[_.\s-]*(?i:key)
|
(?i:storage)[_.\s-]*(?i:key)
)
\b
(?:.|[\n\r]){0,24}?
(?:
[=:]
|
["']\s*:\s*["']
)
\s*
["']?
(
[A-Za-z0-9+/]{86}==
)
['"]?
pattern_requirements:
min_digits: 2
min_uppercase: 2
min_lowercase: 2
min_special_chars: 1
min_entropy: 4.0
confidence: medium
examples:
- Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1q
- Azure AccountKey=Ky7aC1cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1g==\
- Azure AccountKey=oqb4TdY9T0hphvktd5fJnMiHuQqzVy1jd5sSuOpAbGkaoqTlrHl0BOJN2okcasinVLOJzfDbZo1L+ASt68RAhA==
- Azure AccountKey=B/1EVX2Ui47X09tqU3GI/j+Nko9r5COPm0Hea9tfzitF9MQX9lZZiNO3tYQckWnt+rtlGIWS+sCx+AStkq8ZLg==
- Azure AccountKey=u45diQdTiXeuSKl5r4EjgbPP72EYpuTNEzfMTi0mk+d2sTisA4gWzt4H1Ag3kqFaCykWZv2S6KQo+AStHF56RQ==
- Azure AccountKey=b8a/Z4wFAbhOPQTMa4PUTKr2XQhwoyWtP/3PnEto3mK86CFQnVYyTV/HSrij88h5jVYyzwUk0oTw+AStIKN/4w==
- Azure AccountKey=JJD1GDiHCmtTpCOKpBYkXgZKrZvi7P4mRDe3jNVGc/JL/bp51uWcWL0rkOByk5VsX2MM62A/ABkE+AStU9qMkA==
- Azure AccountKey=u45diQdTiXeuSKl5r4EjgbPP72EYpuTNEzfMTi0mk+d2sTisA4gWzt4H1Ag3kqFaCykWZv2S6KQo+AStHF56RQ==
validation:
type: AzureStorage
references:

View file

@ -0,0 +1,42 @@
rules:
- name: Better Stack API Token
id: kingfisher.betterstack.1
pattern: |
(?xi)
\b
betterstack
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9_-]{24}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- betterstack_api_token = "a1b2c3d4e5f6g7h8i9j0k1l2"
- BETTERSTACK_TOKEN=abcdef1234567890abcdef12
references:
- https://betterstack.com/docs/uptime/api/getting-started-with-uptime-api/
- https://betterstack.com/docs/uptime/api/list-all-existing-monitors
validation:
type: Http
content:
request:
method: GET
url: https://uptime.betterstack.com/api/v2/monitors
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid

View file

@ -0,0 +1,33 @@
rules:
- name: Brevo API Token
id: kingfisher.brevo.1
pattern: |
(?x)
\b
(
xkeysib-[a-fA-F0-9]{64}-[a-zA-Z0-9]{16}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.2
confidence: high
examples:
- BREVO_API_KEY=xkeysib-abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd-1234567890abcd12
- '"brevo": "xkeysib-1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef-ab12cd34ef56gh78"'
references:
- https://developers.brevo.com/docs/api-key-authentication
- https://developers.brevo.com/docs/how-it-works
validation:
type: Http
content:
request:
method: GET
url: https://api.brevo.com/v3/account
headers:
api-key: "{{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]

View file

@ -2,15 +2,15 @@ rules:
- name: Cerebras AI API Key
id: kingfisher.cerebras.1
pattern: |
(?xi)
(?x)
(
csk-[a-z0-9]{48}
csk-[a-zA-Z0-9]{48}
)
\b
pattern_requirements:
min_digits: 2
min_lowercase: 2
confidence: medium
confidence: high
min_entropy: 3.0
validation:
type: Http

View file

@ -14,7 +14,7 @@ rules:
pattern_requirements:
min_digits: 2
min_entropy: 3.5
confidence: medium
confidence: high
examples:
- |
export CIRCLECI_TOKEN=CCIPAT_FERZRjTN451xnDCy1y9gWn_79fb6ca4d0e5f833612eee17de397a9dca0a9e9f

View file

@ -0,0 +1,42 @@
rules:
- name: Clearout API Token
id: kingfisher.clearout.1
pattern: |
(?xi)
\b
clearout
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{28}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- clearout_api_token = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4"
- CLEAROUT_TOKEN=abcdef1234567890abcdef123456
references:
- https://docs.clearout.io/api-overview.html
- https://docs.clearout.io/email-verifier-api.html
validation:
type: Http
content:
request:
method: GET
url: https://api.clearout.io/v2/email_verify/getcredits
headers:
Authorization: "Bearer {{ TOKEN }}"
Content-Type: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -0,0 +1,39 @@
rules:
- name: Clerk Secret Key
id: kingfisher.clerk.1
pattern: |
(?xi)
\b
clerk
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|API|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
sk_(?:test|live)_[A-Za-z0-9]{32}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.3
confidence: medium
examples:
- CLERK_SECRET_KEY=sk_live_abcdefghijklmnopqrstuvwxyz123456
- clerk_secret = sk_test_4pX9kL2mN8qR3sT7vY1zA3bC6dE0fG2h
references:
- https://clerk.com/docs/deployments/clerk-environment-variables
- https://clerk.com/docs/guides/development/machine-auth/api-keys
validation:
type: Http
content:
request:
method: GET
url: https://api.clerk.com/v1/users?limit=1
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -2,15 +2,15 @@ rules:
- name: ClickHouse Cloud Secret Key
id: kingfisher.clickhouse.1
pattern: |
(?xi)
(?x)
\b
(
4b1d[a-z0-9]{38}
4b1d[a-zA-Z0-9]{38}
)
\b
pattern_requirements:
min_digits: 2
confidence: medium
confidence: high
min_entropy: 3.5
examples:
- "4b1dwEZ8aNo1U9ODBqffSci1INBrltLHM2d1bHF4dq"

View file

@ -0,0 +1,82 @@
rules:
- name: Cloudinary Cloud Name
id: kingfisher.cloudinary.3
pattern: |
(?xi)
\b
cloudinary
(?:.|[\n\r]){0,32}?
(?:CLOUD[_\s]?NAME|CLOUD)
(?:.|[\n\r]){0,16}?
\b
(
[a-z0-9_-]{3,32}
)
\b
min_entropy: 2.5
confidence: medium
visible: false
examples:
- CLOUDINARY_CLOUD_NAME=demo
- name: Cloudinary API Key
id: kingfisher.cloudinary.2
pattern: |
(?xi)
\b
cloudinary
(?:.|[\n\r]){0,32}?
(?:API[_\s]?KEY|KEY)
(?:.|[\n\r]){0,16}?
\b
(
[0-9]{15}
)
\b
min_entropy: 3.0
confidence: medium
visible: false
examples:
- CLOUDINARY_API_KEY=123456789012345
- name: Cloudinary API Secret
id: kingfisher.cloudinary.1
pattern: |
(?xi)
\b
cloudinary
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|API[_\s]?SECRET)
(?:.|[\n\r]){0,32}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- CLOUDINARY_API_SECRET=abcdefghijklmnopqrstuvwxyz123456
- cloudinary_secret = "aB3dE5fG7hI9jK1lM3nO5pQ7rS9tU1vW"
references:
- https://cloudinary.com/documentation/developer_onboarding_faq_find_credentials
- https://cloudinary.com/documentation/image_upload_api_reference
validation:
type: Http
content:
request:
method: GET
url: "https://api.cloudinary.com/v1_1/{{ CLOUDNAME }}/usage"
headers:
Authorization: "Basic {{ APIKEY | append: ':' | append: TOKEN | b64enc }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
depends_on_rule:
- rule_id: kingfisher.cloudinary.2
variable: APIKEY
- rule_id: kingfisher.cloudinary.3
variable: CLOUDNAME

View file

@ -2,10 +2,10 @@ rules:
- name: CodeRabbit API Key
id: kingfisher.coderabbit.1
pattern: |
(?xi)
(?x)
\b
(
cr-[a-f0-9]{58}
cr-[a-fA-F0-9]{58}
)
\b
pattern_requirements:

View file

@ -0,0 +1,41 @@
rules:
- name: Coinlayer API Key
id: kingfisher.coinlayer.1
pattern: |
(?xi)
\b
coinlayer
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- COINLAYER_ACCESS_KEY=abc123def456ghi789jkl012mno345pq
- coinlayer_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://coinlayer.com/documentation
- https://coinlayer.com/signup/free
validation:
type: Http
content:
request:
method: GET
url: https://api.coinlayer.com/live?access_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -0,0 +1,62 @@
rules:
- name: Contentstack API Key
id: kingfisher.contentstack.2
pattern: |
(?xi)
\b
contentstack
(?:.|[\n\r]){0,32}?
(?:API[_\s]?KEY|STACK[_\s]?API[_\s]?KEY)
(?:.|[\n\r]){0,16}?
\b
(
blt[a-f0-9]{10}
)
\b
min_entropy: 3.0
confidence: medium
visible: false
examples:
- CONTENTSTACK_API_KEY=blt1234567890
- name: Contentstack Management Token
id: kingfisher.contentstack.1
pattern: |
(?xi)
\b
contentstack
(?:.|[\n\r]){0,32}?
(?:MANAGEMENT[_\s]?TOKEN|AUTH[_\s]?TOKEN|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
cs[a-f0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- CONTENTSTACK_MANAGEMENT_TOKEN=cs1234567890abcdef1234567890abcdef
- contentstack_token = "csa1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4"
references:
- https://www.contentstack.com/docs/developers/create-tokens/overview-of-tokens
- https://www.contentstack.com/docs/developers/apis/content-management-api
validation:
type: Http
content:
request:
method: GET
url: "https://api.contentstack.io/v3/stacks"
headers:
api_key: "{{ APIKEY }}"
authorization: "Bearer {{ TOKEN }}"
Content-Type: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
depends_on_rule:
- rule_id: kingfisher.contentstack.2
variable: APIKEY

View file

@ -2,10 +2,10 @@ rules:
- name: crates.io API Key
id: kingfisher.cratesio.1
pattern: |
(?xi)
(?x)
\b
(
cio[A-Z0-9]{32}
cio[A-Za-z0-9]{32}
)
\b
pattern_requirements:

View file

@ -0,0 +1,41 @@
rules:
- name: Currencylayer API Key
id: kingfisher.currencylayer.1
pattern: |
(?xi)
\b
currencylayer
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- CURRENCYLAYER_ACCESS_KEY=abc123def456ghi789jkl012mno345pq
- currencylayer_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://currencylayer.com/documentation
- https://currencylayer.com/api-access
validation:
type: Http
content:
request:
method: GET
url: https://api.currencylayer.com/live?access_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -2,11 +2,11 @@ rules:
- name: Cursor Integrations (User) API Key
id: kingfisher.cursor.1
pattern: |
(?xi)
(?x)
\b
(
key_
[0-9a-f]{64}
[0-9a-fA-F]{64}
)
\b
min_entropy: 3.8

View file

@ -0,0 +1,43 @@
rules:
- name: Daily API Key
id: kingfisher.daily.1
pattern: |
(?xi)
\b
daily
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
[A-Za-z0-9]{64}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- DAILY_API_KEY=abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890
- daily_api_key = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6a7b8c9d0e1f2a3b4c5d6a7b8c9d0e1f2"
references:
- https://docs.daily.co/reference/rest-api
- https://docs.daily.co/reference/rest-api/rooms/list-rooms
validation:
type: Http
content:
request:
method: GET
url: https://api.daily.co/v1/rooms?limit=1
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"data"'
- '"total_count"'
match_all_words: true

View file

@ -2,11 +2,11 @@ rules:
- name: Databricks API token
id: kingfisher.databricks.1
pattern: |
(?xi)
(?x)
\b
(
dapi
[a-f0-9]{32}
[a-fA-F0-9]{32}
)
\b
pattern_requirements:

View file

@ -2,10 +2,10 @@ rules:
- name: Dependency-Track API Key
id: kingfisher.dtrack.1
pattern: |
(?xi)
(?x)
\b
(
odt_[A-Z0-9]{32,255}
odt_[A-Za-z0-9]{32,255}
)
\b
pattern_requirements:

View file

@ -2,10 +2,10 @@ rules:
- name: Doppler CLI Token
id: kingfisher.doppler.1
pattern: |
(?xi)
(?x)
\b
(
dp\.ct\.[A-Z0-9]{40,44}
dp\.ct\.[A-Za-z0-9]{40,44}
)
\b
pattern_requirements:

View file

@ -2,10 +2,10 @@ rules:
- name: Dropbox API secret/key
id: kingfisher.dropbox.1
pattern: |
(?xi)
(?x)
\b
(
sl\.[A-Z0-9\-\_]{130,152}
sl\.[A-Za-z0-9\-\_]{130,152}
)
\b
pattern_requirements:

View file

@ -2,16 +2,16 @@ rules:
- name: Duffel API Token
id: kingfisher.duffel.1
pattern: |
(?xi)
(?x)
\b
(
duffel_(?:test|live)_[a-z0-9_\-=]{43}
duffel_(?:test|live)_[a-zA-Z0-9_\-=]{43}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.2
confidence: medium
confidence: high
examples:
- DUFFEL_TOKEN=duffel_test_qwertyuiopasdfghjklzxcvbnm123456789abcdefgh
- 'Authorization: "Bearer duffel_live_abcd1234efgh5678ijkl9012mnop3456qrstuvwxyza"'

View file

@ -2,14 +2,14 @@ rules:
- name: Dynatrace Token
id: kingfisher.dynatrace.1
pattern: |
(?xi)
(?x)
\b
(
dt0[a-z][0-9]{2}
dt0[a-zA-Z][0-9]{2}
\.
[A-Z0-9]{24}
[A-Za-z0-9]{24}
\.
[A-Z0-9]{64}
[A-Za-z0-9]{64}
)
\b
pattern_requirements:

View file

@ -2,11 +2,11 @@ rules:
- name: ElevenLabs API Key
id: kingfisher.elevenlabs.1
pattern: |
(?xi)
(?x)
\b
(
sk_
[0-9a-f]{48}
[0-9a-fA-F]{48}
)
\b
pattern_requirements:

View file

@ -31,4 +31,7 @@ rules:
response_matcher:
- report_response: true
- type: StatusMatch
status: [200, 403]
status: [200, 403]
- type: WordMatch
words: ['API token is disabled']
negative: true

View file

@ -2,10 +2,10 @@ rules:
- name: Figma Personal Access Token
id: kingfisher.figma.1
pattern: |
(?xi)
(?x)
\b
(
figd_[A-Z0-9_-]{38,42}
figd_[A-Za-z0-9_-]{38,42}
)
\b
pattern_requirements:

View file

@ -2,10 +2,10 @@ rules:
- name: Firecrawl API Key
id: kingfisher.firecrawl.1
pattern: |
(?xi)
(?x)
\b
(
fc-[a-f0-9]{32}
fc-[a-fA-F0-9]{32}
)
\b
pattern_requirements:

View file

@ -2,10 +2,10 @@ rules:
- name: Fireworks.ai API Key
id: kingfisher.fireworks.1
pattern: |
(?xi)
(?x)
\b
(
fw_[A-Z0-9]{24}
fw_[A-Za-z0-9]{24}
)
\b
pattern_requirements:

View file

@ -0,0 +1,41 @@
rules:
- name: Fixer.io API Key
id: kingfisher.fixer.1
pattern: |
(?xi)
\b
fixer
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- FIXER_ACCESS_KEY=abc123def456ghi789jkl012mno345pq
- fixer_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://fixer.io/documentation
- https://fixer.io/api-key
validation:
type: Http
content:
request:
method: GET
url: https://data.fixer.io/api/latest?access_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -2,7 +2,7 @@ rules:
- name: Fleetbase API Key
id: kingfisher.fleetbase.1
pattern: |
(?xi)
(?x)
\b
(
flb_(?:live|test)_[0-9a-zA-Z]{20,64}

View file

@ -2,8 +2,8 @@ rules:
- name: Foursquare Client ID
id: kingfisher.foursquare.1
visible: false
confidence: low
min_entropy: 0.0
confidence: medium
min_entropy: 1.0
pattern: |
(?xi)
(?:

View file

@ -2,7 +2,7 @@ rules:
- name: Freshdesk Domain
id: kingfisher.freshdesk.1
visible: false
confidence: low
confidence: medium
min_entropy: 0.0
pattern: |
(?xi)

View file

@ -2,10 +2,10 @@ rules:
- name: Friendli.ai API Key
id: kingfisher.friendli.1
pattern: |
(?xi)
(?x)
\b
(
flp_[A-Z0-9]{46}
flp_[A-Za-z0-9]{46}
)
\b
pattern_requirements:

View file

@ -0,0 +1,38 @@
rules:
- name: Geoapify API Key
id: kingfisher.geoapify.1
pattern: |
(?xi)
\b
geoapify
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- geoapify_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
- GEOAPIFY_KEY=abcdef1234567890abcdef1234567890
references:
- https://apidocs.geoapify.com/docs/geocoding/api/api
- https://apidocs.geoapify.com/docs/ip-geolocation
validation:
type: Http
content:
request:
method: GET
url: https://api.geoapify.com/v1/ipinfo?apiKey={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -2,11 +2,11 @@ rules:
- name: GitLab Private Token
id: kingfisher.gitlab.1
pattern: |
(?xi)
(?x)
\b
(
glpat-
[0-9A-Z_-]{20}
[0-9A-Za-z_-]{20}
)
\b
pattern_requirements:
@ -51,10 +51,10 @@ rules:
- name: GitLab Runner Registration Token
id: kingfisher.gitlab.2
pattern: |
(?xi)
(?x)
\b
(
GR1348941[0-9A-Z_-]{20}
GR1348941[0-9A-Za-z_-]{20}
)
\b
pattern_requirements:
@ -97,10 +97,10 @@ rules:
- name: GitLab Pipeline Trigger Token
id: kingfisher.gitlab.3
pattern: |
(?xi)
(?x)
\b
(
glptt-[0-9a-f]{40}
glptt-[0-9a-fA-F]{40}
)
pattern_requirements:
min_digits: 2
@ -135,7 +135,7 @@ rules:
- name: GitLab Private Token - Routable Format
id: kingfisher.gitlab.4
pattern: |
(?xi)
(?x)
\b
(
glpat-
@ -143,7 +143,7 @@ rules:
\.
(?<version>01)
\.
(?<base36_payload_length>[0-9a-z]{2})
(?<base36_payload_length>[0-9a-zA-Z]{2})
(?<crc32>[0-9a-z]{7})
)
\b

View file

@ -26,13 +26,7 @@ rules:
- name: Google OAuth Client Secret
id: kingfisher.google.3
pattern: |
(?xi)
client.?secret .{0,10}
(
[a-z0-9_-]{24}
)
(?: [^a-z0-9_-] |$)
pattern: (?i)(?:client[_\-. ]?secret)(?:.|[\n\r]){0,20}?[=:][ \t]*['"]?([A-Z0-9_-]{24})['"]?(?:[^A-Z0-9_-]|$)
pattern_requirements:
min_digits: 1
min_uppercase: 3
@ -70,20 +64,7 @@ rules:
- name: Google OAuth Credentials
id: kingfisher.google.6
pattern: |
(?xi)
([0-9]+-[a-z0-9_]{32}\.apps\.googleusercontent\.com)
(?:
(?s).{0,40}
)
(?:
(GOCSPX-[A-Z0-9_-]{28})
|
(?:
(?i) client.?secret .{0,10} \b ([A-Z0-9_-]{24})
)
)
(?:[^A-Z0-9_-] | $)
pattern: (?is)([0-9]+-[a-z0-9_]{32}\.apps\.googleusercontent\.com)(?:.|[\n\r]){0,120}?(?:GOCSPX-[A-Z0-9_-]{28}|(?:client[_\-. ]?secret)(?:.|[\n\r]){0,20}?[=:][ \t]*['"]?([A-Z0-9_-]{24})['"]?)(?:[^A-Z0-9_-]|$)
pattern_requirements:
min_digits: 2
min_entropy: 3.3

View file

@ -2,10 +2,10 @@ rules:
- name: Google OAuth2 Access Token
id: kingfisher.google.oauth2.1
pattern: |
(?xi)
(?x)
\b
(
ya29\.(?i:[a-z0-9_-]{30,})
ya29\.(?i:[a-zA-Z0-9_-]{30,})
)
\b
min_entropy: 3.5

View file

@ -2,15 +2,15 @@ rules:
- name: Groq API Key
id: kingfisher.groq.1
pattern: |
(?xi)
(?x)
\b
(
gsk_[A-Z0-9]{52}
gsk_[A-Za-z0-9]{52}
)
\b
pattern_requirements:
min_digits: 4
confidence: medium
confidence: high
min_entropy: 3.5
validation:
type: Http

View file

@ -2,15 +2,15 @@ rules:
- name: Harness Personal Access Token (PAT)
id: kingfisher.harness.pat.1
pattern: |
(?xi)
(?x)
\b
(
pat\.
[A-Z0-9_-]{22}
[A-Za-z0-9_-]{22}
\.
[0-9a-f]{24}
[0-9a-fA-F]{24}
\.
[A-Z0-9]{20}
[A-Za-z0-9]{20}
)
\b
pattern_requirements:

View file

@ -2,7 +2,7 @@ rules:
- name: HubSpot Private App Token
id: kingfisher.hubspot.1
pattern: |
(?xi)
(?x)
\b
(
pat-[a-z0-9]{2,3}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}

View file

@ -0,0 +1,40 @@
rules:
- name: Hunter.io API Key
id: kingfisher.hunterio.1
pattern: |
(?xi)
\b
hunter
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{40}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
ignore_if_contains:
- test-api-key
examples:
- hunter_api_key = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6a7b8c9d0"
- HUNTER_KEY=abcdef1234567890abcdef1234567890abcdef12
references:
- https://hunter.io/api/docs
- https://hunter.io/api-keys
validation:
type: Http
content:
request:
method: GET
url: https://api.hunter.io/v2/account?api_key={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -2,16 +2,16 @@ rules:
- name: Infracost API Token
id: kingfisher.infracost.1
pattern: |
(?xi)
(?x)
\b
(
ico-[a-z0-9]{32}
ico-[a-zA-Z0-9]{32}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.3
confidence: medium
confidence: high
examples:
- export INFRACOST_API_KEY=ico-abcd12abcdefabcdefabcdefabcdefab
- '"infracost": "ico-1234567890abcdef1234567890abcdef"'

View file

@ -1,11 +1,17 @@
rules:
- name: Instagram Graph API Access Token
id: kingfisher.instagram.1
pattern: '\b(IGQVJ[A-Za-z0-9_-]{50,256})\b'
pattern: |
(?x)
\b
(
IGQVJ[A-Za-z0-9_-]{50,256}
)
\b
pattern_requirements:
min_digits: 3
min_entropy: 3.5
confidence: medium
confidence: high
examples:
- "INSTAGRAM_ACCESS_TOKEN=IGQVJWZAkFNT1liY2J3b3FmUHJVQzNfSUtlX3RhVm5FNGppZAjhCeUpEWnJIUWxCNkVrQjhGSU9rOVBuVnBaQUtKYjF6MjFGZA3VhZAXRxYkJGS01wS1BNLVlsaGtJYUV3"
- 'instagram_token: "IGQVJXZAGtIR0hKVERjNXB5bnM0Y1BmUmFKa0RFZAnBXdVFKVUNNUjZAtVW5lZAXRhNGdLZA2V3OVlGS3dFZATVyYnlIVXlwcHdvdkxiS01wS1BNLVlsaGtJYUV3dg"'

View file

@ -3,10 +3,10 @@ rules:
id: kingfisher.intra42.1
visible: false
pattern: |
(?xi)
(?x)
\b
(
u-s4t2(?:ud|af)-[a-f0-9]{64}
u-s4t2(?:ud|af)-[a-fA-F0-9]{64}
)
\b
min_entropy: 3.0

View file

@ -2,17 +2,17 @@ rules:
- name: Ionic API token
id: kingfisher.ionic.1
pattern: |
(?xi)
(?x)
\b
(
ion_
[a-z0-9]{42}
[a-zA-Z0-9]{42}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.5
confidence: medium
confidence: high
examples:
- ion_VNR17uGgdxr9P2aOrCulvSLTFDqijIV2ImQsOUhDEI
validation:

View file

@ -10,7 +10,7 @@ rules:
\b
pattern_requirements:
min_digits: 2
confidence: medium
confidence: high
min_entropy: 3.0
examples:
- "JINA_KEY = os.getenv('JINA_KEY','jina_c1758c6f49e14ced990ac7776800dc45ShJNTXBCizzwjE6IMFYJ7LD959cG')"

View file

@ -8,9 +8,9 @@ rules:
\.
ey[A-Za-z0-9_-]{12,} (?# payload )
\.
[A-Za-z0-9_-]{12,} (?# signature )
[A-Za-z0-9_-]{24,} (?# signature )
)
(?:[^A-Z0-9_-])
(?:[^A-Za-z0-9_-]|$)
pattern_requirements:
min_digits: 4
min_entropy: 3.3

View file

@ -5,10 +5,13 @@ rules:
confidence: low
min_entropy: 2.0
pattern: |
(?xi)
(?x)
\b
(
https?://[a-z0-9.-]+(?::\d{2,5})?
https?://
(?:[a-z0-9-]+\.)+
looker\.com
(?::\d{2,5})?
)
(?:/api/(?:4\.0|3\.1))?
\b

View file

@ -125,6 +125,30 @@ rules:
- "****"
- "xxxx"
- "example"
- "<password>"
- "<db_password>"
- "<username>"
- "<u>:<p>"
- "<redacted>"
- "<DO_NOT_UPDATE_AUTH>"
- "{username}:{password}"
- "{password}"
- "${"
- "$("
- "{{"
- "&lt;"
- ":pass@"
- ":password@"
- ":secret@"
- ":passw0rd@"
- ":secretPassw0rd@"
- ":wrong_password@"
- ":fakepassword@"
- "@host:"
- "@host/"
- "@host1:"
- "@some.address"
- "@connectionString.com"
min_entropy: 3
examples:
- client = mongoc_client_new ("mongodb+srv://someuser:hunter2@my-atlas-rd941.mongodb.net/test?retryWrites=true&w=majority");

View file

@ -0,0 +1,59 @@
rules:
- name: Mux Access Token ID
id: kingfisher.mux.2
pattern: |
(?xi)
\b
mux
(?:.|[\n\r]){0,32}?
(?:TOKEN[_\s]?ID|ACCESS[_\s]?TOKEN[_\s]?ID)
(?:.|[\n\r]){0,16}?
\b
(
[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}
)
\b
min_entropy: 3.0
confidence: medium
visible: false
examples:
- MUX_TOKEN_ID=44c819de-4add-4c9f-b2e9-384a0a71bede
- name: Mux Access Token Secret
id: kingfisher.mux.1
pattern: |
(?xi)
\b
mux
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
[A-Za-z0-9+/=]{75}
)
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- MUX_TOKEN_SECRET=INKxCoZ+cX6l1yrR6vqzYHVaeFEcqvZShznWM1U/No8KsV7h6Jxu1XXuTUQ91sdiGONK3H7NE7H
references:
- https://docs.mux.com/core/make-api-requests
- https://docs.mux.com/api-reference/video/video/v1/assets
validation:
type: Http
content:
request:
method: GET
url: https://api.mux.com/video/v1/assets?limit=1
headers:
Authorization: "Basic {{ MUXID | append: ':' | append: TOKEN | b64enc }}"
Content-Type: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid
depends_on_rule:
- rule_id: kingfisher.mux.2
variable: MUXID

View file

@ -0,0 +1,40 @@
rules:
- name: NewsAPI API Key
id: kingfisher.newsapi.1
pattern: |
(?xi)
\b
newsapi
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- NEWSAPI_API_KEY=abc123def456ghi789jkl012mno345pq
- newsapi_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://newsapi.org/docs/authentication
validation:
type: Http
content:
request:
method: GET
url: https://newsapi.org/v2/everything?q=test&apiKey={{ TOKEN }}
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -0,0 +1,41 @@
rules:
- name: Numverify API Key
id: kingfisher.numverify.1
pattern: |
(?xi)
\b
numverify
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- NUMVERIFY_ACCESS_KEY=abc123def456ghi789jkl012mno345pq
- numverify_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://numverify.com/documentation
- https://numverify.com/faq
validation:
type: Http
content:
request:
method: GET
url: https://apilayer.net/api/validate?access_key={{ TOKEN }}&number=14155551234
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -0,0 +1,42 @@
rules:
- name: OneSignal REST API Key
id: kingfisher.onesignal.1
pattern: |
(?xi)
\b
onesignal
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- onesignal_rest_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
- ONESIGNAL_REST_API_KEY=abcdef1234567890abcdef1234567890
references:
- https://documentation.onesignal.com/reference/rest-api-overview
- https://documentation.onesignal.com/docs/keys-and-ids
validation:
type: Http
content:
request:
method: GET
url: https://api.onesignal.com/apps
headers:
Authorization: "Key {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid

View file

@ -23,6 +23,8 @@ rules:
- https://www.perplexity.ai/hub/blog/introducing-pplx-api
- https://docs.litellm.ai/docs/providers/perplexity
- https://developers.cloudflare.com/ai-gateway/providers/perplexity/
- https://docs.perplexity.ai/api-reference/chat-completions-post
- https://docs.perplexity.ai/docs/sdk/error-handling
validation:
type: Http
content:
@ -32,8 +34,19 @@ rules:
headers:
Authorization: "Bearer {{ TOKEN }}"
Content-Type: application/json
body: '{"model": "kingfisher", "messages": [{ "role": "user", "content": "." }]}'
body: '{"model": "sonar", "messages": [{ "role": "user", "content": "." }]}'
response_matcher:
- report_response: false
# Perplexity docs identify 401/403 as auth/permission failures.
- type: StatusMatch
status: [401, 403]
negative: true
- type: WordMatch
match_all_words: false
words:
- '"invalid_api_key"'
- '"type":"invalid_api_key"'
- '"Invalid API key provided"'
- '"authentication"'
- '"AuthenticationError"'
negative: true
match_all_words: false

View file

@ -0,0 +1,39 @@
rules:
- name: Pinecone API Key
id: kingfisher.pinecone.1
pattern: |
(?xi)
\b
pinecone
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|API|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.0
confidence: medium
examples:
- PINECONE_API_KEY=62b0dbfe-3489-4b79-b850-34d911527c88
- pinecone_key = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
references:
- https://docs.pinecone.io/reference/api/authentication
- https://docs.pinecone.io/reference/api/2025-10/control-plane/list_indexes
validation:
type: Http
content:
request:
method: GET
url: https://api.pinecone.io/indexes
headers:
Api-Key: "{{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,42 @@
rules:
- name: Pingdom API Token
id: kingfisher.pingdom.1
pattern: |
(?xi)
\b
pingdom
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9_-]{64}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- pingdom_api_token = "ofOhK18Ca6w4S_XmInGv0QPkqly-rbRBBoHsp_2FEH5QnIbH0VZhRPO3tlvrjMIK"
- PINGDOM_TOKEN=abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890
references:
- https://docs.pingdom.com/api/
- https://pingdom.com/resources/pingdom-api
validation:
type: Http
content:
request:
method: GET
url: https://api.pingdom.com/api/3.1/checks
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid

View file

@ -0,0 +1,40 @@
rules:
- name: Positionstack API Key
id: kingfisher.positionstack.1
pattern: |
(?xi)
\b
positionstack
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- positionstack_access_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
- POSITIONSTACK_KEY=1234567890abcdef1234567890abcdef
references:
- https://positionstack.com/documentation
validation:
type: Http
content:
request:
method: GET
url: https://api.positionstack.com/v1/forward?access_key={{ TOKEN }}&query=London
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid

View file

@ -0,0 +1,44 @@
rules:
- name: Railway API Token
id: kingfisher.railway.1
pattern: |
(?xi)
\b
railway
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
(?:[A-Za-z0-9]{32}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- RAILWAY_API_TOKEN=abcdef1234567890abcdef1234567890
- railway_token = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
references:
- https://docs.railway.com/guides/public-api
- https://docs.railway.com/reference/oauth/login-and-tokens
validation:
type: Http
content:
request:
method: POST
url: https://backboard.railway.com/graphql/v2
headers:
Authorization: "Bearer {{ TOKEN }}"
Content-Type: application/json
body: '{"query":"query { me { name email } }"}'
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words:
- '"data"'
- '"me"'
match_all_words: true

View file

@ -77,18 +77,7 @@ rules:
- id: kingfisher.redis.3
name: Redis Password (Standalone Config)
# Detects REDIS_PASSWORD, redis_password, redis.password etc. in env vars and config files
pattern: |
(?xi)
\b
(?:REDIS|redis)
[-_.]?
(?:PASSWORD|PASS|PASSWD|AUTH|SECRET|TOKEN)
\b
(?:.|[\n\r]){0,24}?
[=:\s]+
['"]?
(?P<password>[a-zA-Z0-9%;._~!$&'()*+,;=/*+-]{8,64})
['"]?
pattern: (?i)\b(?:redis[-_.]?(?:password|pass|passwd|auth|secret|token)|config\.redis\.auth)\b(?:.|[\n\r]){0,24}?[=:][ \t]*['"]?([a-zA-Z0-9%;._~!$&'()*+,;=/*+-]{8,64})['"]?
pattern_requirements:
ignore_if_contains:

View file

@ -0,0 +1,39 @@
rules:
- name: Render API Key
id: kingfisher.render.1
pattern: |
(?xi)
\b
render
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
(?:[A-Za-z0-9]{32}|rnd_[A-Za-z0-9]{33})
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- RENDER_API_KEY=abcdef1234567890abcdef1234567890
- render_api_key = "rnd_abc123def456ghi789jkl012mno345pqr"
references:
- https://docs.render.com/api
- https://api-docs.render.com/docs
validation:
type: Http
content:
request:
method: GET
url: https://api.render.com/v1/services?limit=1
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,42 @@
rules:
- name: Rollbar Access Token
id: kingfisher.rollbar.1
pattern: |
(?xi)
\b
rollbar
(?:.|[\n\r]){0,32}?
(?:access[_-]?token|token|key)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- rollbar_access_token = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
- ROLLBAR_ACCESS_TOKEN=abcdef1234567890abcdef1234567890
references:
- https://docs.rollbar.com/docs/access-tokens
- https://docs.rollbar.com/reference/getting-started-1
validation:
type: Http
content:
request:
method: GET
url: https://api.rollbar.com/api/1/projects
headers:
X-Rollbar-Access-Token: "{{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [403]
negative: true
- type: JsonValid

View file

@ -209,6 +209,10 @@ rules:
min_entropy: 3.5
pattern_requirements:
min_digits: 3
ignore_if_contains:
- "www.w3.org"
- "/2001/"
- "/XMLSchema"
confidence: medium
examples:
- |
@ -309,6 +313,16 @@ rules:
min_digits: 4
min_entropy: 3.6
confidence: medium
validation:
type: Http
content:
request:
method: GET
url: "https://login.salesforce.com/services/oauth2/authorize?response_type=code&client_id={{ TOKEN }}&redirect_uri=https%3A%2F%2Fexample.com%2Fcb"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
examples:
- 3MVG9P8aWj9n4kT2xQ5mV7rY1bC3dF6gH8jK0mN2pR4tU6wX8zA1cE3gH5kM7qS9uV2xY4bD6fJ8nP1rT3vW5yZ7
references:

View file

@ -0,0 +1,39 @@
rules:
- name: Salesloft API Key
id: kingfisher.salesloft.1
pattern: |
(?xi)
\b
salesloft
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|API|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
ak_[a-fA-F0-9]{64}
)
\b
pattern_requirements:
min_digits: 2
min_entropy: 3.2
confidence: medium
examples:
- SALESLOFT_API_KEY=ak_de656ec86bcab24878c24ff4d86758f8963d8ea6bcd4e90f8fae846ba8f9ac62
- salesloft_key = "ak_1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"
references:
- https://developer.salesloft.com/docs/platform/api-basics/api-key-authentication
- https://developers.salesloft.com/docs/api/me
validation:
type: Http
content:
request:
method: GET
url: https://api.salesloft.com/v2/me
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,39 @@
rules:
- name: Sanity API Token
id: kingfisher.sanity.1
pattern: |
(?xi)
\b
sanity
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
(?:sk[A-Za-z0-9]{52}|sk[A-Za-z0-9]{78})
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- SANITY_API_TOKEN=skE5UXUmBEy7U50jcG4In4v4xoHZTlduDxQYet8Y84tsTqAZxp2reIPJsA1JzqXJno2qcpauGwPfjHpU
- sanity_token = "sk1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdef"
references:
- https://www.sanity.io/docs/content-lake/http-auth
- https://www.sanity.io/docs/projects-api
validation:
type: Http
content:
request:
method: GET
url: https://api.sanity.io/v2021-06-07/projects
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -7,6 +7,7 @@ rules:
(
EAAA[a-zA-Z0-9\-\+=]{60}
)
\b
pattern_requirements:
min_digits: 4
min_entropy: 3.3

View file

@ -0,0 +1,41 @@
rules:
- name: StatusCake API Token
id: kingfisher.statuscake.1
pattern: |
(?xi)
\b
statuscake
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9_-]{20}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- statuscake_api_token = "a1b2c3d4e5f6g7h8i9j0"
- STATUSCAKE_TOKEN=abcdef1234567890abcd
references:
- https://developers.statuscake.com/guides/api/authentication/
- https://developers.statuscake.com/api
validation:
type: Http
content:
request:
method: GET
url: https://api.statuscake.com/v1/uptime
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true

View file

@ -0,0 +1,38 @@
rules:
- name: Storyblok API Token
id: kingfisher.storyblok.1
pattern: |
(?xi)
\b
storyblok
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
[A-Za-z0-9]{22}tt
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- STORYBLOK_ACCESS_TOKEN=wANpEQEsMYGOwLxwXQ76Ggtt
- storyblok_token = "13Kft3335iwbBOI333wawOtt"
references:
- https://www.storyblok.com/docs/api/content-delivery/v2/getting-started/authentication
- https://www.storyblok.com/docs/concepts/access-tokens
validation:
type: Http
content:
request:
method: GET
url: "https://api.storyblok.com/v2/cdn/stories?token={{ TOKEN }}&version=published&per_page=1"
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,38 @@
rules:
- name: UptimeRobot API Key
id: kingfisher.uptimerobot.1
pattern: |
(?xi)
\b
uptimerobot
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
(?:ur[A-Za-z0-9-]{29}|[A-Za-z0-9]{28})
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- uptimerobot_api_key = "ur123456-7890abcdef1234567890ab"
- UPTIMEROBOT_KEY=abcdef1234567890abcdef123456
references:
- https://uptimerobot.com/api/v3
- https://help.uptimerobot.com/en/articles/11620152-how-to-use-uptimerobot-s-api
validation:
type: Http
content:
request:
method: GET
url: https://api.uptimerobot.com/v2/getMonitors?api_key={{ TOKEN }}&format=json
headers:
Content-Type: application/x-www-form-urlencoded
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,39 @@
rules:
- name: urlscan.io API Key
id: kingfisher.urlscan.1
pattern: |
(?xi)
\b
urlscan
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
(?:[A-Za-z0-9]{32}|[A-Za-z0-9]{36})
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- urlscan_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8"
- URLSCAN_KEY=abcdef1234567890abcdef1234567890
references:
- https://urlscan.io/docs/api/
- https://docs.urlscan.io/
validation:
type: Http
content:
request:
method: GET
url: https://urlscan.io/user/quotas/
headers:
API-Key: "{{ TOKEN }}"
Content-Type: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -0,0 +1,42 @@
rules:
- name: VirusTotal API Key
id: kingfisher.virustotal.1
pattern: |
(?xi)
\b
virustotal
(?:.|[\n\r]){0,32}?
(?:api[_-]?key|x[_-]?apikey|key)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{64}
)
\b
min_entropy: 3.6
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- VIRUSTOTAL_API_KEY=abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890
- virustotal_x_apikey = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6a7b8c9d0e1f2"
references:
- https://docs.virustotal.com/reference/authentication
- https://docs.virustotal.com/docs/please-give-me-an-api-key
validation:
type: Http
content:
request:
method: GET
url: https://www.virustotal.com/api/v3/domains/google.com
headers:
x-apikey: "{{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401, 403]
negative: true
- type: JsonValid

View file

@ -0,0 +1,40 @@
rules:
- name: WeatherAPI.com API Key
id: kingfisher.weatherapi.1
pattern: |
(?xi)
\b
weatherapi
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- WEATHERAPI_KEY=abc123def456ghi789jkl012mno345pq
- weatherapi_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
references:
- https://www.weatherapi.com/docs/
validation:
type: Http
content:
request:
method: GET
url: https://api.weatherapi.com/v1/current.json?key={{ TOKEN }}&q=London
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: StatusMatch
status: [401]
negative: true
- type: JsonValid

View file

@ -0,0 +1,39 @@
rules:
- name: Webflow API Token
id: kingfisher.webflow.1
pattern: |
(?xi)
\b
webflow
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN|API)
(?:.|[\n\r]){0,32}?
\b
(
(?:[A-Za-z0-9]{32}|[A-Za-z0-9]{36})
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- WEBFLOW_API_TOKEN=abcdef1234567890abcdef1234567890
- webflow_token = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8"
references:
- https://developers.webflow.com/data/reference/authentication
- https://developers.webflow.com/data/reference/rest-introduction/quick-start
validation:
type: Http
content:
request:
method: GET
url: https://api.webflow.com/v2/sites
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -21,13 +21,16 @@ rules:
- yelp_token = wiuck20l8j-oWwCd9r53FqpN6ELB7K03zGw-ccUQR7uLHc9NaWubovOMdGdyFqIGGM4aVK6nxQ1DreDZn_qBYU4jky_5kQRVkiIDPSheCPggY3WzyRzi27kxoOpoYAYx
references:
- https://docs.developer.yelp.com/docs/places-authentication
- https://docs.developer.yelp.com/reference/v3_all_categories
- https://docs.developer.yelp.com/reference/v3_business_search
- https://www.yelp.com/developers
validation:
type: Http
content:
request:
method: GET
url: "https://api.yelp.com/v3/categories?locale=en_US"
# /v3/categories can return data with non-functional bearer strings.
# /v3/businesses/search reliably enforces API key auth.
url: "https://api.yelp.com/v3/businesses/search?location=Seattle&limit=1"
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
@ -36,4 +39,14 @@ rules:
- type: StatusMatch
status: [200]
- type: WordMatch
words: ['"categories"']
words: ['"businesses"']
# Yelp docs list these auth/authorization failure codes for this endpoint.
- type: WordMatch
words:
- '"error"'
- '"UNAUTHORIZED_API_KEY"'
- '"UNAUTHORIZED_ACCESS_TOKEN"'
- '"TOKEN_INVALID"'
- '"AUTHORIZATION_ERROR"'
negative: true
match_all_words: false

View file

@ -0,0 +1,38 @@
rules:
- name: ZeroBounce API Key
id: kingfisher.zerobounce.1
pattern: |
(?xi)
\b
zerobounce
(?:.|[\n\r]){0,16}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
[A-Za-z0-9]{32}
)
\b
min_entropy: 3.5
confidence: medium
pattern_requirements:
min_digits: 2
examples:
- zerobounce_api_key = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"
- ZEROBOUNCE_KEY=abcdef1234567890abcdef1234567890
references:
- https://www.zerobounce.net/docs/email-validation-api-quickstart/
- https://api.zerobounce.net/v2/validate
validation:
type: Http
content:
request:
method: GET
url: https://api.zerobounce.net/v2/validate?api_key={{ TOKEN }}&email=test@example.com&ip_address=
headers:
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: JsonValid

View file

@ -27,7 +27,10 @@ pub use rule::{
pub use rules::{Rules, RulesError};
// Re-export RulesDatabase
pub use rules_database::{format_regex_pattern, RulesDatabase};
pub use rules_database::{
format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase,
TreeSitterFallbackPolicy,
};
// Re-export defaults
pub use defaults::get_builtin_rules;

View file

@ -7,10 +7,30 @@ use vectorscan_rs::{BlockDatabase, Flag, Pattern};
use crate::rule::{Rule, RULE_COMMENTS_PATTERN};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TreeSitterFallbackPolicy {
KeepRawWhenUnavailable,
SuppressWhenUnavailable,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RuleDetectionProfileKind {
SelfIdentifying,
ContextDependent,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RuleMatchProfile {
pub kind: RuleDetectionProfileKind,
pub fallback_policy: TreeSitterFallbackPolicy,
pub reason_codes: Vec<&'static str>,
}
pub struct RulesDatabase {
// pub(crate) rules: Vec<Rule,>,
pub(crate) rules: Vec<Arc<Rule>>,
pub(crate) anchored_regexes: Vec<Regex>,
pub(crate) rule_match_profiles: Vec<RuleMatchProfile>,
pub(crate) vsdb: BlockDatabase,
}
@ -27,12 +47,111 @@ pub fn format_regex_pattern(pattern: &str) -> String {
}
impl RulesDatabase {
fn classify_rule_profile(rule: &Rule) -> RuleMatchProfile {
Self::classify_rule_match_profile(rule)
}
fn build_rule_profiles(rules: &[Arc<Rule>]) -> Vec<RuleMatchProfile> {
rules.iter().map(|r| Self::classify_rule_profile(r.as_ref())).collect()
}
pub fn get_regex_by_rule_id(&self, rule_id: &str) -> Option<&Regex> {
self.rules
.iter()
.position(|r| r.syntax().id == rule_id)
.and_then(|index| self.anchored_regexes.get(index))
}
pub fn classify_rule_match_profile(rule: &Rule) -> RuleMatchProfile {
let flattened = format_regex_pattern(&rule.syntax().pattern);
let normalized = flattened.to_lowercase();
let mut reason_codes: Vec<&'static str> = Vec::new();
let has_self_identifying_prefix = [
"ccipat_",
"xoxb-",
"xoxa-",
"xoxp-",
"xapp-",
"ghp_",
"github_pat_",
"sk_live_",
"sk_test_",
"ltai",
"akia",
]
.iter()
.any(|m| normalized.contains(m));
if has_self_identifying_prefix {
reason_codes.push("self_identifying_prefix");
return RuleMatchProfile {
kind: RuleDetectionProfileKind::SelfIdentifying,
fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable,
reason_codes,
};
}
let has_context_keywords =
["secret", "token", "key", "password", "private", "access", "client", "auth"]
.iter()
.any(|m| normalized.contains(m));
if has_context_keywords {
reason_codes.push("context_keywords");
}
let has_distance_operator = normalized.contains("(?:.|[\\n\\r]){0,");
if has_distance_operator {
reason_codes.push("distance_operator");
}
let has_depends_on = !rule.syntax().depends_on_rule.is_empty();
if has_depends_on {
reason_codes.push("depends_on_rule");
}
let max_quantifier = max_quantifier_min_value(&normalized);
let looks_generic_token = has_generic_token_class(&normalized) && max_quantifier >= 24;
if looks_generic_token {
reason_codes.push("generic_token_shape");
}
let mut score = 0usize;
if has_context_keywords {
score += 1;
}
if has_distance_operator {
score += 1;
}
if looks_generic_token {
score += 2;
}
if has_depends_on {
score += 1;
}
let is_context_dependent = score >= 3;
if !is_context_dependent {
return RuleMatchProfile {
kind: RuleDetectionProfileKind::SelfIdentifying,
fallback_policy: TreeSitterFallbackPolicy::KeepRawWhenUnavailable,
reason_codes,
};
}
let fallback_policy = if looks_generic_token && has_distance_operator {
reason_codes.push("strict_fallback_suppress_when_unavailable");
TreeSitterFallbackPolicy::SuppressWhenUnavailable
} else {
reason_codes.push("fallback_keep_when_unavailable");
TreeSitterFallbackPolicy::KeepRawWhenUnavailable
};
RuleMatchProfile {
kind: RuleDetectionProfileKind::ContextDependent,
fallback_policy,
reason_codes,
}
}
pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option<Arc<Rule>> {
self.rules.iter().find(|r| r.finding_sha1_fingerprint() == finding_fingerprint).cloned()
@ -68,8 +187,9 @@ impl RulesDatabase {
Ok(vsdb) => {
let d1 = t1.elapsed().as_secs_f64();
let (anchored_regexes, d2) = Self::compile_regexes(&rules)?;
let rule_match_profiles = Self::build_rule_profiles(&rules);
debug!("Compiled {} rules: vectorscan {}s; regex {}s", rules.len(), d1, d2);
Ok(RulesDatabase { rules, vsdb, anchored_regexes })
Ok(RulesDatabase { rules, vsdb, anchored_regexes, rule_match_profiles })
}
Err(e) => {
error!(
@ -137,7 +257,13 @@ impl RulesDatabase {
bail!("Failed to compile the following rules:\n{}", error_messages.join("\n"));
}
let vsdb = BlockDatabase::new(compiled_patterns)?;
Ok(RulesDatabase { rules: compiled_rules, vsdb, anchored_regexes: compiled_regexes })
let rule_match_profiles = Self::build_rule_profiles(&compiled_rules);
Ok(RulesDatabase {
rules: compiled_rules,
vsdb,
anchored_regexes: compiled_regexes,
rule_match_profiles,
})
}
fn compile_regexes(rules: &[Arc<Rule>]) -> Result<(Vec<Regex>, f64)> {
@ -192,7 +318,52 @@ impl RulesDatabase {
pub fn anchored_regexes(&self) -> &[Regex] {
&self.anchored_regexes
}
#[inline]
pub fn rule_match_profiles(&self) -> &[RuleMatchProfile] {
&self.rule_match_profiles
}
}
fn has_generic_token_class(normalized_pattern: &str) -> bool {
[
"[a-za-z0-9]{",
"[a-z0-9]{",
"[a-f0-9]{",
"[a-z0-9_-]{",
"[a-za-z0-9_-]{",
"[a-za-z0-9+/]{",
"[a-za-z0-9+/=]{",
]
.iter()
.any(|needle| normalized_pattern.contains(needle))
}
fn max_quantifier_min_value(normalized_pattern: &str) -> usize {
let mut max_seen = 0usize;
let bytes = normalized_pattern.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] != b'{' {
i += 1;
continue;
}
let mut j = i + 1;
let mut val = 0usize;
let mut saw_digit = false;
while j < bytes.len() && bytes[j].is_ascii_digit() {
saw_digit = true;
val = val.saturating_mul(10).saturating_add((bytes[j] - b'0') as usize);
j += 1;
}
if saw_digit && val > max_seen {
max_seen = val;
}
i = j.saturating_add(1);
}
max_seen
}
#[cfg(test)]
mod test_vectorscan {
use pretty_assertions::assert_eq;
@ -249,3 +420,60 @@ mod test_regex_cleaning {
println!("{}", data);
}
}
#[cfg(test)]
mod test_rule_match_profiles {
use super::*;
use crate::rule::{Confidence, RuleSyntax, Validation};
fn mk_rule(id: &str, pattern: &str) -> Rule {
Rule::new(RuleSyntax {
id: id.to_string(),
name: id.to_string(),
pattern: pattern.to_string(),
confidence: Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
revocation: None,
depends_on_rule: vec![],
pattern_requirements: None,
tls_mode: None,
})
}
#[test]
fn classifies_self_identifying_prefix_rule() {
let rule =
mk_rule("kingfisher.circleci.1", r"(?x)\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\b");
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying);
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::KeepRawWhenUnavailable);
assert!(profile.reason_codes.contains(&"self_identifying_prefix"));
}
#[test]
fn classifies_context_dependent_generic_rule() {
let rule = mk_rule(
"kingfisher.auth0.2",
r"(?xi)\bauth0(?:.|[\n\r]){0,16}?(?:secret|token)(?:.|[\n\r]){0,64}?\b([a-z0-9_-]{64,})\b",
);
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent);
assert_eq!(profile.fallback_policy, TreeSitterFallbackPolicy::SuppressWhenUnavailable);
assert!(profile.reason_codes.contains(&"generic_token_shape"));
}
#[test]
fn context_like_rule_is_parser_gated() {
let rule = mk_rule(
"kingfisher.example.1",
r"(?xi)\bexample(?:.|[\n\r]){0,16}?(?:secret|token)(?:.|[\n\r]){0,64}?\b([a-z0-9_-]{64,})\b",
);
let profile = RulesDatabase::classify_rule_profile(&rule);
assert_eq!(profile.kind, RuleDetectionProfileKind::ContextDependent);
}
}

View file

@ -1,21 +1,26 @@
# Kingfisher Source Code Parsing
[← Back to README](../README.md)
Kingfisher leverages treesitter as an extra layer of analysis when scanning source files written in supported programming languages. In practice, after its initial regexbased scan (powered by Vectorscan), Kingfisher checks if the files language is known.
Kingfisher leverages tree-sitter as an extra layer of analysis when scanning source files written in supported programming languages. In practice, after its initial regex-based scan (powered by Vectorscan/Hyperscan), Kingfisher can run a targeted verification pass for context-dependent rules.
If so, it creates a Checker (see below) that uses treesitter to parse the file and run languagespecific queries. This additional pass refines the detection by capturing more structured patterns—such as secret-like tokens—that might be obscured or spread over code constructs.
### How Its Called
## How Its Called
In the scanning phase (in the Matchers implementation), Kingfisher does the following:
- **Language Detection:** When processing a blob, if a language string is provided (e.g. inferred from file metadata or extension), the code calls a helper (via a function like `get_language_and_queries`) to retrieve the corresponding treesitter language and a set of queries.
- **Checker Creation:** With these values, a `Checker` struct is instantiated. This struct holds both the target language (as defined in its `Language` enum) and a map of treesitter queries to run.
- **Parsing and Querying:** The Checkers key method (e.g. `check` or indirectly via `modify_regex`) retrieves a threadlocal treesitter parser (to avoid recreating the parser on every call), sets the appropriate language, and parses the source code into a syntax tree. It then executes the queries over that tree, extracting ranges and texts of interest that might represent secrets.
In the scanning phase (in the Matcher's implementation), Kingfisher does the following:
- **Primary Regex Pass:** Kingfisher always scans the full blob with Vectorscan/Hyperscan first.
- **Candidate Selection:** Findings from rules classified as context-dependent become tree-sitter verification candidates.
- **Language Detection:** If a language string is provided (for example from metadata or extension), the code calls a helper (such as `get_language_and_queries`) to retrieve the corresponding tree-sitter language and queries.
- **Checker Creation:** With those values, a `Checker` is instantiated with the target language and query map.
- **Parsing and Querying:** The Checker retrieves a thread-local parser (to avoid recreating it on every call), sets language, parses source, and runs queries to extract structured snippets (for example `key = value` pairs).
- **Verification Decision:** Candidate findings are kept only if parser-extracted context verifies the matched secret. If tree-sitter is unavailable, fallback behavior is profile-driven (for strict generic keyword+token rules, findings are suppressed).
*(See the implementation details in the parser module for example, the `modify_regex` function in the Checker, and the conditional treesitter call in Matcher::scan_blob)*
### Supported Languages
## Supported Languages
The design supports many common source code languages. The Language enum (defined in the parser module) includes variants for:
- **Scripting:** Bash, Python, Ruby, PHP
- **Compiled languages:** C, C++, C#, Rust, Java
- **Web-related languages:** CSS, HTML, JavaScript, TypeScript, YAML, Toml
@ -23,14 +28,15 @@ The design supports many common source code languages. The Language enum (define
Each variant maps to its corresponding treesitter language through the `get_ts_language()` method.
### When Treesitter Is Not Called
## When Treesitter Is Not Called
Treesitter wont be invoked in certain cases:
- **No Language Identified:** If the file isnt recognized as belonging to one of the supported languages or no language hint is provided, the Checker isnt even constructed.
- **Non-source Files:** Binary files or files that arent expected to contain code (or arent extracted from archives) bypass treesitter parsing.
- **Fallback on Errors:** If treesitter parsing fails (e.g. due to malformed code or other errors), Kingfisher will fall back on its regex/Vectorscan matches without the additional treesitter insights.
### Summary
## Summary
In essence, Kingfishers use of treesitter is conditional and complementary. It is called only when the scanned file is a source code file written in a supported language, and its role is to enrich the scanning results by leveraging the syntax tree and language-specific queries. When files are non-source, binary, or if no language is provided, treesitter is not invoked, and Kingfisher relies solely on its regex-based detection.

105
docs/TREE_SITTER.md Normal file
View file

@ -0,0 +1,105 @@
# Tree-sitter in Kingfisher Scanning
[← Back to README](../README.md)
This document explains how Tree-sitter is used during scanning, and when it is intentionally skipped.
## What Tree-sitter Is Used For
Kingfisher always starts with a fast regex pass (Vectorscan/Hyperscan). Tree-sitter is a secondary verification layer used only for context-dependent findings.
The goal is to confirm that a regex hit appears in a plausible code assignment/config context (for example `api_key = "..."`) before keeping the finding.
## Where It Runs in the Scan Pipeline
1. `BlobProcessor::run` decides whether to compute a language hint.
- It skips language hinting in `fast_mode`.
- It also skips when blob size is outside the Tree-sitter window.
2. `Matcher::scan_blob` performs the primary regex scan and other filtering.
3. `maybe_apply_tree_sitter_verification` runs near the end of `scan_blob`.
4. Only candidate matches are checked against Tree-sitter extracted text.
5. Matches that fail verification can be dropped, depending on rule profile and fallback policy.
## Size and Mode Gates
Tree-sitter is attempted only when all of these are true:
- Blob length is between `0 KiB` and `128 KiB` (`should_attempt_tree_sitter`).
- `fast_mode` is disabled.
- A language hint is available.
- The language maps to a supported Tree-sitter grammar + query set.
If any of these conditions fails, Tree-sitter verification is considered unavailable for that blob.
## Candidate Selection (Not Every Match)
Tree-sitter verification is only applied to matches that are:
- Classified as `ContextDependent` by rule profiling.
- Not base64-derived findings (`is_base64 == false`).
Classification and fallback policy come from rule profiles in `kingfisher-rules`:
- `SelfIdentifying`: usually keep raw regex result.
- `ContextDependent`: may require Tree-sitter confirmation.
## How Verification Works
When Tree-sitter is available:
1. `load_tree_sitter_results` builds a `Checker` with:
- `Language` enum value
- language-specific queries from `src/parser/queries.rs`
2. `Checker::check`:
- Reuses a thread-local parser cache (`PARSER_CACHE`)
- Parses source into a syntax tree
- Runs language query patterns capturing `@key` and `@val`
- Produces normalized strings like `key = value`
- Attempts base64 decode of value and keeps decoded ASCII form when valid
3. For each candidate finding, Kingfisher re-runs that rule's anchored regex on each extracted Tree-sitter text fragment.
4. Verification succeeds only when the rule's secret capture equals the original matched secret bytes.
If no extracted fragment verifies the secret, that candidate finding is removed.
## Fallback Behavior When Tree-sitter Is Unavailable
If Tree-sitter cannot run (size/mode/language/parse errors), behavior is rule-driven:
- `KeepRawWhenUnavailable`: keep the regex finding.
- `SuppressWhenUnavailable`: drop the finding.
`SuppressWhenUnavailable` is used for stricter generic-context patterns where false positives are likely without syntax-aware confirmation.
## Supported Languages in This Path
Language mapping for verification currently includes:
- `bash`/`shell`
- `c`
- `c#`/`csharp`
- `c++`/`cpp`
- `css`
- `go`
- `html`
- `java`
- `javascript`/`js`
- `php`
- `python`/`py`/`starlark`
- `ruby`
- `rust`
- `toml`
- `typescript`/`ts`
- `yaml`
The Tree-sitter query definitions for these languages live in `src/parser/queries.rs`.
## Operational Summary
Tree-sitter in Kingfisher is a conditional verifier, not the primary detector:
- Regex finds candidates quickly.
- Rule profiling decides which candidates need context verification.
- Tree-sitter confirms contextual plausibility from parsed syntax.
- Fallback policy determines what to do when verification cannot run.
This keeps scanning fast while reducing noisy matches for context-dependent secret patterns.

View file

@ -16,6 +16,7 @@ use std::sync::{Arc, Mutex};
use anyhow::Result;
use http::StatusCode;
use rustc_hash::{FxHashMap, FxHashSet};
use tracing::debug;
use crate::{
blob::{Blob, BlobId, BlobIdMap},
@ -26,10 +27,11 @@ use crate::{
parser::{Checker, Language},
rule_profiling::{ConcurrentRuleProfiler, RuleStats},
rules::rule::Rule,
rules_database::RulesDatabase,
rules_database::{RuleDetectionProfileKind, RulesDatabase, TreeSitterFallbackPolicy},
scanner_pool::ScannerPool,
validation_body::ValidationResponseBody,
};
use kingfisher_scanner::primitives::find_secret_capture;
use self::{
base64_decode::get_base64_strings as get_b64_strings, dedup::record_match, filter::filter_match,
@ -38,8 +40,13 @@ use self::{
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
const CHUNK_OVERLAP: usize = 64 * 1024; // 64 KiB overlap to catch boundary matches
const BASE64_SCAN_LIMIT: usize = 64 * 1024 * 1024; // skip expensive Base64 pass on huge blobs
const TREE_SITTER_MAX_LIMIT: usize = 64 * 1024; // only run tree-sitter on blobs <= 64 KiB
const TREE_SITTER_MIN_LIMIT: usize = 1 * 1024; // only run tree-sitter on blobs >= 1 KiB
const TREE_SITTER_MAX_LIMIT: usize = 128 * 1024; // only run tree-sitter on blobs <= 128 KiB
const TREE_SITTER_MIN_LIMIT: usize = 0; // allow tree-sitter starting at 0 bytes
#[inline]
pub(crate) fn should_attempt_tree_sitter(blob_len: usize) -> bool {
blob_len <= TREE_SITTER_MAX_LIMIT && blob_len >= TREE_SITTER_MIN_LIMIT
}
// -------------------------------------------------------------------------------------------------
// RawMatch
@ -281,48 +288,10 @@ impl<'a> Matcher<'a> {
let rules_db = self.rules_db;
let mut seen_matches = FxHashSet::default();
let mut previous_matches: FxHashMap<usize, Vec<OffsetSpan>> = FxHashMap::default();
let mut match_rule_indices: Vec<usize> = Vec::new();
let blob_len = blob.len();
let should_run_tree_sitter = blob_len > 0
&& blob_len <= TREE_SITTER_MAX_LIMIT
&& blob_len >= TREE_SITTER_MIN_LIMIT
&& has_raw_matches
&& lang_hint.is_some();
let tree_sitter_result = if should_run_tree_sitter {
lang_hint.and_then(|lang_str| {
get_language_and_queries(lang_str).and_then(|(language, queries)| {
let checker = Checker { language, rules: queries };
match checker.check(&blob.bytes()) {
Ok(results) => Some(results),
Err(e) => {
println!("Error in checker.check: {}", e);
None
}
}
})
})
} else {
None
};
//
// Process matches
//
let mut matches = Vec::new();
let owned_ts_results = tree_sitter_result.map(|ts_results| {
ts_results
.into_iter()
.map(|match_result| {
(
match_result.range,
match_result.text,
match_result.is_base64_decoded,
match_result.original_base64,
)
})
.collect::<Vec<_>>()
});
let mut previous_raw_matches: FxHashMap<usize, Vec<OffsetSpan>> = FxHashMap::default();
for &RawMatch { rule_id, start_idx, end_idx } in
self.user_data.raw_matches_scratch.iter().rev()
@ -336,6 +305,7 @@ impl<'a> Matcher<'a> {
if !record_match(&mut previous_raw_matches, rule_id_usize, current_span) {
continue;
}
let before_len = matches.len();
filter_match(
blob,
rule,
@ -355,68 +325,8 @@ impl<'a> Matcher<'a> {
self.respect_ignore_if_contains,
&self.inline_ignore_config,
);
}
// Pre-filter tree-sitter extracted key-value pairs through Vectorscan,
// then only run the anchored regex for rules that Vectorscan flags as candidates.
if let Some(ref ts_results) = owned_ts_results {
if !ts_results.is_empty() {
// Build a combined buffer of all tree-sitter texts separated by newlines
// so we can run a single Vectorscan pass instead of one per result.
let mut combined_buf = Vec::new();
let mut segment_ends: Vec<usize> = Vec::with_capacity(ts_results.len());
for (_ts_range, ts_match, _is_base64_decoded, _original_base64) in ts_results.iter()
{
combined_buf.extend_from_slice(ts_match.as_bytes());
segment_ends.push(combined_buf.len());
combined_buf.push(b'\n');
}
// Single Vectorscan pass over the combined buffer
let mut ts_raw_matches: Vec<(u32, u64)> = Vec::new();
self.scanner_pool.with(|scanner| {
scanner.scan(&combined_buf, |rule_id, _from, to, _flags| {
ts_raw_matches.push((rule_id, to));
vectorscan_rs::Scan::Continue
})
})?;
// Map each Vectorscan hit back to its tree-sitter result and dedup
let mut rule_ts_pairs: FxHashSet<(usize, usize)> = FxHashSet::default();
for &(rule_id, to) in &ts_raw_matches {
let to = to as usize;
let seg_idx = segment_ends.partition_point(|&end| end < to);
if seg_idx < ts_results.len() {
rule_ts_pairs.insert((rule_id as usize, seg_idx));
}
}
// Only run the anchored regex for (rule, ts_result) pairs Vectorscan flagged
for (rule_id_usize, ts_idx) in rule_ts_pairs {
let (ts_range, ts_match, is_base64_decoded, _original_base64) =
&ts_results[ts_idx];
let rule = Arc::clone(&rules_db.rules()[rule_id_usize]);
let re = &rules_db.anchored_regexes()[rule_id_usize];
filter_match(
blob,
rule,
re,
ts_range.start,
ts_range.end,
&mut matches,
&mut previous_matches,
rule_id_usize,
&mut seen_matches,
origin,
Some(ts_match.as_bytes()),
*is_base64_decoded,
redact,
&filename,
self.profiler.as_ref(),
self.respect_ignore_if_contains,
&self.inline_ignore_config,
);
}
}
match_rule_indices
.extend(std::iter::repeat_n(rule_id_usize, matches.len() - before_len));
}
if !no_base64 {
@ -427,6 +337,7 @@ impl<'a> Matcher<'a> {
while let Some((item, depth)) = b64_stack.pop() {
for (rule_id_usize, rule) in rules_db.rules().iter().enumerate() {
let re = &rules_db.anchored_regexes()[rule_id_usize];
let before_len = matches.len();
filter_match(
blob,
rule.clone(),
@ -446,6 +357,8 @@ impl<'a> Matcher<'a> {
self.respect_ignore_if_contains,
&self.inline_ignore_config,
);
match_rule_indices
.extend(std::iter::repeat_n(rule_id_usize, matches.len() - before_len));
}
if depth + 1 < MAX_B64_DEPTH {
for nested in get_b64_strings(item.decoded.as_slice()) {
@ -461,6 +374,14 @@ impl<'a> Matcher<'a> {
}
}
}
maybe_apply_tree_sitter_verification(
rules_db,
blob,
lang_hint,
blob_len,
&mut matches,
&match_rule_indices,
);
// Finalize
if !no_dedup && !matches.is_empty() {
let blob_id = blob.id();
@ -486,6 +407,108 @@ impl<'a> Matcher<'a> {
}
}
fn maybe_apply_tree_sitter_verification<'a>(
rules_db: &RulesDatabase,
blob: &'a Blob,
lang_hint: Option<&str>,
blob_len: usize,
matches: &mut Vec<BlobMatch<'a>>,
match_rule_indices: &[usize],
) {
if matches.is_empty() {
return;
}
let profiles = rules_db.rule_match_profiles();
let candidate_indices: Vec<usize> = matches
.iter()
.enumerate()
.filter(|(idx, m)| {
if m.is_base64 {
return false;
}
let Some(rule_idx) = match_rule_indices.get(*idx) else {
return false;
};
profiles[*rule_idx].kind == RuleDetectionProfileKind::ContextDependent
})
.map(|(idx, _)| idx)
.collect();
if candidate_indices.is_empty() {
return;
}
let ts_results = load_tree_sitter_results(blob, lang_hint, blob_len);
let mut keep = vec![true; matches.len()];
for idx in candidate_indices {
let Some(rule_idx) = match_rule_indices.get(idx).copied() else {
continue;
};
let profile = &profiles[rule_idx];
let match_secret = matches[idx].matching_input;
let re = &rules_db.anchored_regexes()[rule_idx];
match ts_results.as_ref() {
Some(results) => {
let verified = results.iter().any(|text| {
verify_match_in_tree_sitter_text(re, match_secret, text.as_bytes())
});
if !verified {
keep[idx] = false;
}
}
None => {
if profile.fallback_policy == TreeSitterFallbackPolicy::SuppressWhenUnavailable {
keep[idx] = false;
}
}
}
}
if keep.iter().all(|k| *k) {
return;
}
let mut filtered = Vec::with_capacity(matches.len());
for (idx, item) in std::mem::take(matches).into_iter().enumerate() {
if keep[idx] {
filtered.push(item);
}
}
*matches = filtered;
}
fn load_tree_sitter_results(
blob: &Blob,
lang_hint: Option<&str>,
blob_len: usize,
) -> Option<Vec<String>> {
if !should_attempt_tree_sitter(blob_len) {
return None;
}
let lang = lang_hint?;
let (language, queries) = get_language_and_queries(lang)?;
let checker = Checker { language, rules: queries };
match checker.check(&blob.bytes()) {
Ok(results) => Some(results.into_iter().map(|m| m.text).collect()),
Err(e) => {
debug!("tree-sitter verification unavailable: {e}");
None
}
}
}
fn verify_match_in_tree_sitter_text(
re: &regex::bytes::Regex,
expected_secret: &[u8],
text: &[u8],
) -> bool {
re.captures_iter(text)
.any(|captures| find_secret_capture(re, &captures).as_bytes() == expected_secret)
}
fn get_language_and_queries(lang: &str) -> Option<(Language, FxHashMap<String, String>)> {
match lang.to_lowercase().as_str() {
"bash" | "shell" => Some((Language::Bash, parser::queries::bash::get_bash_queries())),
@ -1049,4 +1072,177 @@ line2
assert_eq!(entries[1], (Some("body"), 2, "ABC"));
assert_eq!(entries[2], (Some("checksum"), 3, "12"));
}
#[test]
fn parser_second_pass_keeps_verified_contextual_match() -> Result<()> {
let token = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234";
let rule = Rule::new(RuleSyntax {
id: "kingfisher.auth0.2".into(),
name: "auth0 secret".into(),
pattern: "(?xi)\\bauth0(?:.|[\\n\\r]){0,16}?(?:secret|token)(?:.|[\\n\\r]){0,64}?\\b([a-z0-9_-]{64,})\\b".into(),
confidence: crate::rules::rule::Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
revocation: None,
depends_on_rule: vec![],
pattern_requirements: None,
tls_mode: None,
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let mut content = "x".repeat(1200);
content.push_str(&format!("\nauth0_client_secret = \"{token}\"\n"));
let blob = Blob::from_bytes(content.into_bytes());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("verified.py")));
let found = match matcher.scan_blob(
&blob,
&origin,
Some("python".to_string()),
false,
false,
false,
)? {
ScanResult::New(matches) => matches,
_ => panic!("unexpected scan result"),
};
assert_eq!(found.len(), 1);
Ok(())
}
#[test]
fn parser_second_pass_suppresses_unverified_contextual_match() -> Result<()> {
let token = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234";
let rule = Rule::new(RuleSyntax {
id: "kingfisher.auth0.2".into(),
name: "auth0 secret".into(),
pattern: "(?xi)\\bauth0(?:.|[\\n\\r]){0,16}?(?:secret|token)(?:.|[\\n\\r]){0,64}?\\b([a-z0-9_-]{64,})\\b".into(),
confidence: crate::rules::rule::Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
revocation: None,
depends_on_rule: vec![],
pattern_requirements: None,
tls_mode: None,
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let mut content = "x".repeat(1200);
content.push_str(&format!("\n# auth0 secret {token}\n"));
let blob = Blob::from_bytes(content.into_bytes());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("comment.py")));
let found = match matcher.scan_blob(
&blob,
&origin,
Some("python".to_string()),
false,
false,
false,
)? {
ScanResult::New(matches) => matches,
_ => panic!("unexpected scan result"),
};
assert!(
found.is_empty(),
"comment-only contextual hits should be suppressed when tree-sitter cannot verify assignment context"
);
Ok(())
}
#[test]
fn strict_context_rule_suppresses_when_tree_sitter_unavailable() -> Result<()> {
let token = "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234";
let rule = Rule::new(RuleSyntax {
id: "kingfisher.auth0.2".into(),
name: "auth0 secret".into(),
pattern: "(?xi)\\bauth0(?:.|[\\n\\r]){0,16}?(?:secret|token)(?:.|[\\n\\r]){0,64}?\\b([a-z0-9_-]{64,})\\b".into(),
confidence: crate::rules::rule::Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
revocation: None,
depends_on_rule: vec![],
pattern_requirements: None,
tls_mode: None,
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let content = format!("auth0 token {token}");
let blob = Blob::from_bytes(content.into_bytes());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("small.txt")));
let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => matches,
_ => panic!("unexpected scan result"),
};
assert!(
found.is_empty(),
"strict contextual rules should suppress when tree-sitter is unavailable for verification"
);
Ok(())
}
#[test]
fn self_identifying_rule_remains_hyperscan_only() -> Result<()> {
let token = "CCIPAT_FERZRjTN451xnDCy1y9gWn_79fb6ca4d0e5f833612eee17de397a9dca0a9e9f";
let rule = Rule::new(RuleSyntax {
id: "kingfisher.circleci.1".into(),
name: "circleci pat".into(),
pattern: "(?x)\\b(CCIPAT_[A-Za-z0-9]{22}_[a-z0-9]{40})\\b".into(),
confidence: crate::rules::rule::Confidence::Medium,
min_entropy: 0.0,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None::<Validation>,
revocation: None,
depends_on_rule: vec![],
pattern_requirements: None,
tls_mode: None,
});
let rules_db = RulesDatabase::from_rules(vec![rule])?;
let seen = BlobIdMap::new();
let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
let mut matcher =
Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
let blob = Blob::from_bytes(format!("token={token}").into_bytes());
let origin = OriginSet::from(Origin::from_file(PathBuf::from("circleci.txt")));
let found = match matcher.scan_blob(&blob, &origin, None, false, false, false)? {
ScanResult::New(matches) => matches,
_ => panic!("unexpected scan result"),
};
assert_eq!(found.len(), 1, "self-identifying tokens should remain raw-pass findings");
Ok(())
}
}

View file

@ -1056,7 +1056,7 @@ impl DetailsReporter {
finding: FindingRecordData {
snippet,
fingerprint: rm.m.finding_fingerprint.to_string(),
confidence: rm.m.rule.confidence().to_string(),
confidence: rm.match_confidence.to_string(),
entropy: format!("{:.2}", rm.m.calculated_entropy),
validation: ValidationInfo { status: validation_status, response: response_body },
language: rm

View file

@ -2,4 +2,7 @@
//!
//! This module re-exports types from [`kingfisher_rules::rules_database`].
pub use kingfisher_rules::rules_database::{format_regex_pattern, RulesDatabase};
pub use kingfisher_rules::rules_database::{
format_regex_pattern, RuleDetectionProfileKind, RuleMatchProfile, RulesDatabase,
TreeSitterFallbackPolicy,
};

View file

@ -6,7 +6,7 @@ use crate::{
blob::{Blob, BlobMetadata},
content_type::ContentInspector,
location::LocationMapping,
matcher::{Match, Matcher, OwnedBlobMatch, ScanResult},
matcher::{should_attempt_tree_sitter, Match, Matcher, OwnedBlobMatch, ScanResult},
origin::{Origin, OriginSet},
scanner::repos::DatastoreMessage,
Path,
@ -32,7 +32,7 @@ impl<'a> BlobProcessor<'a> {
) -> Result<Option<DatastoreMessage>> {
let _span = debug_span!("matcher", temp_id = blob.temp_id()).entered();
let t1 = Instant::now();
let language_hint = if fast_mode {
let language_hint = if fast_mode || !should_attempt_tree_sitter(blob.len()) {
None
} else {
origin

View file

@ -5,7 +5,14 @@ use serde_json::Value;
#[test]
fn scan_rules_has_no_validated_findings() -> Result<()> {
let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args(["scan", "data/rules", "--format", "json", "--no-update-check", "--only-valid"])
.args([
"scan",
"crates/kingfisher-rules/data/rules",
"--format",
"json",
"--no-update-check",
"--only-valid",
])
.output()?;
let stdout = String::from_utf8_lossy(&output.stdout);
@ -42,14 +49,33 @@ fn scan_rules_has_no_validated_findings() -> Result<()> {
}
let findings: Vec<Value> = serde_json::from_str(json_array_str)?;
for finding in findings {
let rule_id = finding["rule"]["id"].as_str().unwrap_or("unknown");
let validated_rule_ids: Vec<String> = findings
.iter()
.filter_map(|finding| {
let status = finding["finding"]["validation"]["status"]
.as_str()
.unwrap_or("")
.to_ascii_lowercase();
if status == "active credential" {
Some(finding["rule"]["id"].as_str().unwrap_or("unknown").to_string())
} else {
None
}
})
.collect();
let status =
finding["finding"]["validation"]["status"].as_str().unwrap_or("").to_ascii_lowercase();
assert!(
validated_rule_ids.is_empty(),
"Validated findings detected in rules: {}",
validated_rule_ids.join(", ")
);
// Fail only on genuinely validated secrets
assert_ne!(&status, "active credential", "Validated finding detected in rule {rule_id}");
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
panic!(
"kingfisher scan exited non-zero without validated findings in output.\nstdout:\n{}\nstderr:\n{}",
stdout, stderr
);
}
Ok(())

Some files were not shown because too many files have changed in this diff Show more